1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[AArch64] Don't expand memcmp in strict align mode.

7aecf232 fixed the bug where we would miscompile, but we still generate
a crazy amount of code. Turn off the expansion until someone implements
an appropriate heuristic.

Differential Revision: https://reviews.llvm.org/D77599
This commit is contained in:
Eli Friedman 2020-04-06 15:17:02 -07:00
parent 029366a349
commit c20bf7d3a7
2 changed files with 11 additions and 12 deletions

View File

@ -629,7 +629,12 @@ int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
AArch64TTIImpl::TTI::MemCmpExpansionOptions
AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
TTI::MemCmpExpansionOptions Options;
Options.AllowOverlappingLoads = !ST->requiresStrictAlign();
if (ST->requiresStrictAlign()) {
// TODO: Add cost modeling for strict align. Misaligned loads expand to
// a bunch of instructions when strict align is enabled.
return Options;
}
Options.AllowOverlappingLoads = true;
Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
Options.NumLoadsPerBlock = Options.MaxNumLoads;
// TODO: Though vector loads usually perform well on AArch64, in some targets

View File

@ -11,12 +11,12 @@ entry:
ret i1 %ret
; CHECK-LABEL: test_b2:
; CHECK-NOT: bl bcmp
; CHECKN-NOT: bl bcmp
; CHECKN: ldr x
; CHECKN-NEXT: ldr x
; CHECKN-NEXT: ldur x
; CHECKN-NEXT: ldur x
; CHECKS-COUNT-30: ldrb w
; CHECKS: bl bcmp
}
define i1 @test_b2_align8(i8* align 8 %s1, i8* align 8 %s2) {
@ -26,19 +26,13 @@ entry:
ret i1 %ret
; CHECK-LABEL: test_b2_align8:
; CHECK-NOT: bl bcmp
; CHECKN-NOT: bl bcmp
; CHECKN: ldr x
; CHECKN-NEXT: ldr x
; CHECKN-NEXT: ldur x
; CHECKN-NEXT: ldur x
; CHECKS: ldr x
; CHECKS-NEXT: ldr x
; CHECKS-NEXT: ldr w
; CHECKS-NEXT: ldr w
; CHECKS-NEXT: ldrh w
; CHECKS-NEXT: ldrh w
; CHECKS-NEXT: ldrb w
; CHECKS-NEXT: ldrb w
; TODO: Four loads should be within the limit, but the heuristic isn't implemented.
; CHECKS: bl bcmp
}
define i1 @test_bs(i8* %s1, i8* %s2) optsize {