mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[AArch64] Don't expand memcmp in strict align mode.
7aecf232 fixed the bug where we would miscompile, but we still generate a crazy amount of code. Turn off the expansion until someone implements an appropriate heuristic. Differential Revision: https://reviews.llvm.org/D77599
This commit is contained in:
parent
029366a349
commit
c20bf7d3a7
@ -629,7 +629,12 @@ int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
||||
AArch64TTIImpl::TTI::MemCmpExpansionOptions
|
||||
AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
|
||||
TTI::MemCmpExpansionOptions Options;
|
||||
Options.AllowOverlappingLoads = !ST->requiresStrictAlign();
|
||||
if (ST->requiresStrictAlign()) {
|
||||
// TODO: Add cost modeling for strict align. Misaligned loads expand to
|
||||
// a bunch of instructions when strict align is enabled.
|
||||
return Options;
|
||||
}
|
||||
Options.AllowOverlappingLoads = true;
|
||||
Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
|
||||
Options.NumLoadsPerBlock = Options.MaxNumLoads;
|
||||
// TODO: Though vector loads usually perform well on AArch64, in some targets
|
||||
|
@ -11,12 +11,12 @@ entry:
|
||||
ret i1 %ret
|
||||
|
||||
; CHECK-LABEL: test_b2:
|
||||
; CHECK-NOT: bl bcmp
|
||||
; CHECKN-NOT: bl bcmp
|
||||
; CHECKN: ldr x
|
||||
; CHECKN-NEXT: ldr x
|
||||
; CHECKN-NEXT: ldur x
|
||||
; CHECKN-NEXT: ldur x
|
||||
; CHECKS-COUNT-30: ldrb w
|
||||
; CHECKS: bl bcmp
|
||||
}
|
||||
|
||||
define i1 @test_b2_align8(i8* align 8 %s1, i8* align 8 %s2) {
|
||||
@ -26,19 +26,13 @@ entry:
|
||||
ret i1 %ret
|
||||
|
||||
; CHECK-LABEL: test_b2_align8:
|
||||
; CHECK-NOT: bl bcmp
|
||||
; CHECKN-NOT: bl bcmp
|
||||
; CHECKN: ldr x
|
||||
; CHECKN-NEXT: ldr x
|
||||
; CHECKN-NEXT: ldur x
|
||||
; CHECKN-NEXT: ldur x
|
||||
; CHECKS: ldr x
|
||||
; CHECKS-NEXT: ldr x
|
||||
; CHECKS-NEXT: ldr w
|
||||
; CHECKS-NEXT: ldr w
|
||||
; CHECKS-NEXT: ldrh w
|
||||
; CHECKS-NEXT: ldrh w
|
||||
; CHECKS-NEXT: ldrb w
|
||||
; CHECKS-NEXT: ldrb w
|
||||
; TODO: Four loads should be within the limit, but the heuristic isn't implemented.
|
||||
; CHECKS: bl bcmp
|
||||
}
|
||||
|
||||
define i1 @test_bs(i8* %s1, i8* %s2) optsize {
|
||||
|
Loading…
x
Reference in New Issue
Block a user