diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 4724d6b8dae..e8ba30c7e92 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -629,7 +629,12 @@ int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, AArch64TTIImpl::TTI::MemCmpExpansionOptions AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { TTI::MemCmpExpansionOptions Options; - Options.AllowOverlappingLoads = !ST->requiresStrictAlign(); + if (ST->requiresStrictAlign()) { + // TODO: Add cost modeling for strict align. Misaligned loads expand to + // a bunch of instructions when strict align is enabled. + return Options; + } + Options.AllowOverlappingLoads = true; Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize); Options.NumLoadsPerBlock = Options.MaxNumLoads; // TODO: Though vector loads usually perform well on AArch64, in some targets diff --git a/test/CodeGen/AArch64/bcmp-inline-small.ll b/test/CodeGen/AArch64/bcmp-inline-small.ll index a7d08565c4c..12eefa0ffe3 100644 --- a/test/CodeGen/AArch64/bcmp-inline-small.ll +++ b/test/CodeGen/AArch64/bcmp-inline-small.ll @@ -11,12 +11,12 @@ entry: ret i1 %ret ; CHECK-LABEL: test_b2: -; CHECK-NOT: bl bcmp +; CHECKN-NOT: bl bcmp ; CHECKN: ldr x ; CHECKN-NEXT: ldr x ; CHECKN-NEXT: ldur x ; CHECKN-NEXT: ldur x -; CHECKS-COUNT-30: ldrb w +; CHECKS: bl bcmp } define i1 @test_b2_align8(i8* align 8 %s1, i8* align 8 %s2) { @@ -26,19 +26,13 @@ entry: ret i1 %ret ; CHECK-LABEL: test_b2_align8: -; CHECK-NOT: bl bcmp +; CHECKN-NOT: bl bcmp ; CHECKN: ldr x ; CHECKN-NEXT: ldr x ; CHECKN-NEXT: ldur x ; CHECKN-NEXT: ldur x -; CHECKS: ldr x -; CHECKS-NEXT: ldr x -; CHECKS-NEXT: ldr w -; CHECKS-NEXT: ldr w -; CHECKS-NEXT: ldrh w -; CHECKS-NEXT: ldrh w -; CHECKS-NEXT: ldrb w -; CHECKS-NEXT: ldrb w +; TODO: Four loads should be within the limit, but the heuristic isn't implemented. +; CHECKS: bl bcmp } define i1 @test_bs(i8* %s1, i8* %s2) optsize {