From fa18c6d0db834712d1ebf944c4d1f02eea8a4611 Mon Sep 17 00:00:00 2001 From: Clement Courbet Date: Thu, 5 Oct 2017 08:03:39 +0000 Subject: [PATCH] Revert "[MergeICmps] Disable mergeicmps if the target does not want to handle memcmp expansion." Breaks clang-stage1-cmake-RA-incremental/llvm/test/Transforms/MergeICmps/X86/tuple-four-int8.ll This reverts commit 3038c459d67f8898ffa295d54a013b280690abfa. llvm-svn: 314972 --- lib/Transforms/Scalar/MergeICmps.cpp | 21 ++--- test/Transforms/MergeICmps/X86/lit.local.cfg | 3 - .../MergeICmps/X86/pair-int32-int32.ll | 91 ------------------- test/Transforms/MergeICmps/X86/volatile.ll | 48 ---------- .../Transforms/MergeICmps/pair-int32-int32.ll | 81 ++++++++--------- .../MergeICmps/{X86 => }/tuple-four-int8.ll | 39 ++------ test/Transforms/MergeICmps/volatile.ll | 30 ++++++ 7 files changed, 83 insertions(+), 230 deletions(-) delete mode 100644 test/Transforms/MergeICmps/X86/lit.local.cfg delete mode 100644 test/Transforms/MergeICmps/X86/pair-int32-int32.ll delete mode 100644 test/Transforms/MergeICmps/X86/volatile.ll rename test/Transforms/MergeICmps/{X86 => }/tuple-four-int8.ll (57%) create mode 100644 test/Transforms/MergeICmps/volatile.ll diff --git a/lib/Transforms/Scalar/MergeICmps.cpp b/lib/Transforms/Scalar/MergeICmps.cpp index c68cc53a518..a56cffcdfa0 100644 --- a/lib/Transforms/Scalar/MergeICmps.cpp +++ b/lib/Transforms/Scalar/MergeICmps.cpp @@ -28,8 +28,6 @@ #include #include "llvm/ADT/APSInt.h" #include "llvm/Analysis/Loads.h" -#include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" @@ -43,6 +41,8 @@ namespace { #define DEBUG_TYPE "mergeicmps" +#define MERGEICMPS_DOT_ON + // A BCE atom. struct BCEAtom { BCEAtom() : GEP(nullptr), LoadI(nullptr), Offset() {} @@ -589,30 +589,22 @@ class MergeICmps : public FunctionPass { bool runOnFunction(Function &F) override { if (skipFunction(F)) return false; const auto &TLI = getAnalysis().getTLI(); - const auto &TTI = getAnalysis().getTTI(F); - auto PA = runImpl(F, &TLI, &TTI); + auto PA = runImpl(F, &TLI); return !PA.areAllPreserved(); } private: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); - AU.addRequired(); } - PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI, - const TargetTransformInfo *TTI); + PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI); }; -PreservedAnalyses MergeICmps::runImpl(Function &F, const TargetLibraryInfo *TLI, - const TargetTransformInfo *TTI) { +PreservedAnalyses MergeICmps::runImpl(Function &F, + const TargetLibraryInfo *TLI) { DEBUG(dbgs() << "MergeICmpsPass: " << F.getName() << "\n"); - // We only try merging comparisons if the target wants to expand memcmp later. - // The rationale is to avoid turning small chains into memcmp calls. - unsigned MaxLoadSize; - if (!TTI->enableMemCmpExpansion(MaxLoadSize)) return PreservedAnalyses::all(); - bool MadeChange = false; for (auto BBIt = ++F.begin(); BBIt != F.end(); ++BBIt) { @@ -631,7 +623,6 @@ char MergeICmps::ID = 0; INITIALIZE_PASS_BEGIN(MergeICmps, "mergeicmps", "Merge contiguous icmps into a memcmp", false, false) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(MergeICmps, "mergeicmps", "Merge contiguous icmps into a memcmp", false, false) diff --git a/test/Transforms/MergeICmps/X86/lit.local.cfg b/test/Transforms/MergeICmps/X86/lit.local.cfg deleted file mode 100644 index e71f3cc4c41..00000000000 --- a/test/Transforms/MergeICmps/X86/lit.local.cfg +++ /dev/null @@ -1,3 +0,0 @@ -if not 'X86' in config.root.targets: - config.unsupported = True - diff --git a/test/Transforms/MergeICmps/X86/pair-int32-int32.ll b/test/Transforms/MergeICmps/X86/pair-int32-int32.ll deleted file mode 100644 index e3c70bcc9bb..00000000000 --- a/test/Transforms/MergeICmps/X86/pair-int32-int32.ll +++ /dev/null @@ -1,91 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86 - -%"struct.std::pair" = type { i32, i32 } - -define zeroext i1 @opeq1( -; X86-LABEL: @opeq1( -; X86-NEXT: entry: -; X86-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0 -; X86-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0 -; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[FIRST_I]] to i8* -; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[FIRST1_I]] to i8* -; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 8) -; X86-NEXT: [[TMP0:%.*]] = icmp eq i32 [[MEMCMP]], 0 -; X86-NEXT: br label [[OPEQ1_EXIT:%.*]] -; X86: opeq1.exit: -; X86-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[ENTRY:%.*]] ] -; X86-NEXT: ret i1 [[TMP1]] -; - %"struct.std::pair"* nocapture readonly dereferenceable(8) %a, - %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 { -entry: - %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0 - %0 = load i32, i32* %first.i, align 4 - %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0 - %1 = load i32, i32* %first1.i, align 4 - %cmp.i = icmp eq i32 %0, %1 - br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit - -land.rhs.i: - %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1 - %2 = load i32, i32* %second.i, align 4 - %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1 - %3 = load i32, i32* %second2.i, align 4 - %cmp3.i = icmp eq i32 %2, %3 - br label %opeq1.exit - -opeq1.exit: - %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ] - ret i1 %4 -; The entry block with zero-offset GEPs is kept, loads are removed. -; The two 4 byte loads and compares are replaced with a single 8-byte memcmp. -; The branch is now a direct branch; the other block has been removed. -; The phi is updated. -} - -; Same as above, but the two blocks are in inverse order. -define zeroext i1 @opeq1_inverse( -; X86-LABEL: @opeq1_inverse( -; X86-NEXT: land.rhs.i: -; X86-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0 -; X86-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0 -; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[SECOND_I]] to i8* -; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[SECOND2_I]] to i8* -; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 8) -; X86-NEXT: [[TMP0:%.*]] = icmp eq i32 [[MEMCMP]], 0 -; X86-NEXT: br label [[OPEQ1_EXIT:%.*]] -; X86: opeq1.exit: -; X86-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[LAND_RHS_I:%.*]] ] -; X86-NEXT: ret i1 [[TMP1]] -; - %"struct.std::pair"* nocapture readonly dereferenceable(8) %a, - %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 { -entry: - %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1 - %0 = load i32, i32* %first.i, align 4 - %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1 - %1 = load i32, i32* %first1.i, align 4 - %cmp.i = icmp eq i32 %0, %1 - br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit - -land.rhs.i: - %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0 - %2 = load i32, i32* %second.i, align 4 - %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0 - %3 = load i32, i32* %second2.i, align 4 - %cmp3.i = icmp eq i32 %2, %3 - br label %opeq1.exit - -opeq1.exit: - %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ] - ret i1 %4 -; The second block with zero-offset GEPs is kept, loads are removed. -; CHECK: land.rhs.i -; The two 4 byte loads and compares are replaced with a single 8-byte memcmp. -; The branch is now a direct branch; the other block has been removed. -; The phi is updated. -} - - - diff --git a/test/Transforms/MergeICmps/X86/volatile.ll b/test/Transforms/MergeICmps/X86/volatile.ll deleted file mode 100644 index 3e9af6c1d8b..00000000000 --- a/test/Transforms/MergeICmps/X86/volatile.ll +++ /dev/null @@ -1,48 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s - -%"struct.std::pair" = type { i32, i32 } - -define zeroext i1 @opeq( -; CHECK-LABEL: @opeq( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4 -; CHECK-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4 -; CHECK-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]] -; CHECK-NEXT: br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]] -; CHECK: land.rhs.i: -; CHECK-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A]], i64 0, i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = load volatile i32, i32* [[SECOND_I]], align 4 -; CHECK-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B]], i64 0, i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[SECOND2_I]], align 4 -; CHECK-NEXT: [[CMP3_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]] -; CHECK-NEXT: br label [[OPEQ1_EXIT]] -; CHECK: opeq1.exit: -; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP3_I]], [[LAND_RHS_I]] ] -; CHECK-NEXT: ret i1 [[TMP4]] -; - %"struct.std::pair"* nocapture readonly dereferenceable(8) %a, - %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 { -entry: - %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0 - %0 = load i32, i32* %first.i, align 4 - %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0 - %1 = load i32, i32* %first1.i, align 4 - %cmp.i = icmp eq i32 %0, %1 - br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit - -land.rhs.i: - %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1 - %2 = load volatile i32, i32* %second.i, align 4 - %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1 - %3 = load i32, i32* %second2.i, align 4 - %cmp3.i = icmp eq i32 %2, %3 - br label %opeq1.exit - -opeq1.exit: - %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ] - ret i1 %4 -} - diff --git a/test/Transforms/MergeICmps/pair-int32-int32.ll b/test/Transforms/MergeICmps/pair-int32-int32.ll index 7544b84d861..351cb2adedf 100644 --- a/test/Transforms/MergeICmps/pair-int32-int32.ll +++ b/test/Transforms/MergeICmps/pair-int32-int32.ll @@ -1,30 +1,10 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mergeicmps -S | FileCheck %s --check-prefix=NOEXPANSION +; RUN: opt -mergeicmps -S -o - %s | FileCheck %s %"struct.std::pair" = type { i32, i32 } define zeroext i1 @opeq1( -; NOEXPANSION-LABEL: @opeq1( -; NOEXPANSION-NEXT: entry: -; NOEXPANSION-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0 -; NOEXPANSION-NEXT: [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4 -; NOEXPANSION-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0 -; NOEXPANSION-NEXT: [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4 -; NOEXPANSION-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]] -; NOEXPANSION-NEXT: br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]] -; NOEXPANSION: land.rhs.i: -; NOEXPANSION-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A]], i64 0, i32 1 -; NOEXPANSION-NEXT: [[TMP2:%.*]] = load i32, i32* [[SECOND_I]], align 4 -; NOEXPANSION-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B]], i64 0, i32 1 -; NOEXPANSION-NEXT: [[TMP3:%.*]] = load i32, i32* [[SECOND2_I]], align 4 -; NOEXPANSION-NEXT: [[CMP3_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]] -; NOEXPANSION-NEXT: br label [[OPEQ1_EXIT]] -; NOEXPANSION: opeq1.exit: -; NOEXPANSION-NEXT: [[TMP4:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP3_I]], [[LAND_RHS_I]] ] -; NOEXPANSION-NEXT: ret i1 [[TMP4]] -; - %"struct.std::pair"* nocapture readonly dereferenceable(8) %a, - %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 { + %"struct.std::pair"* nocapture readonly dereferenceable(8) %a, + %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 { entry: %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0 %0 = load i32, i32* %first.i, align 4 @@ -44,31 +24,28 @@ land.rhs.i: opeq1.exit: %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ] ret i1 %4 +; CHECK-LABEL: @opeq1( +; The entry block with zero-offset GEPs is kept, loads are removed. +; CHECK: entry +; CHECK: getelementptr {{.*}} i32 0 +; CHECK-NOT: load +; CHECK: getelementptr {{.*}} i32 0 +; CHECK-NOT: load +; The two 4 byte loads and compares are replaced with a single 8-byte memcmp. +; CHECK: @memcmp({{.*}}8) +; CHECK: icmp eq {{.*}} 0 +; The branch is now a direct branch; the other block has been removed. +; CHECK: br label %opeq1.exit +; CHECK-NOT: br +; The phi is updated. +; CHECK: phi i1 [ %{{[^,]*}}, %entry ] +; CHECK-NEXT: ret } ; Same as above, but the two blocks are in inverse order. define zeroext i1 @opeq1_inverse( -; NOEXPANSION-LABEL: @opeq1_inverse( -; NOEXPANSION-NEXT: entry: -; NOEXPANSION-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 1 -; NOEXPANSION-NEXT: [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4 -; NOEXPANSION-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 1 -; NOEXPANSION-NEXT: [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4 -; NOEXPANSION-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]] -; NOEXPANSION-NEXT: br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]] -; NOEXPANSION: land.rhs.i: -; NOEXPANSION-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A]], i64 0, i32 0 -; NOEXPANSION-NEXT: [[TMP2:%.*]] = load i32, i32* [[SECOND_I]], align 4 -; NOEXPANSION-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B]], i64 0, i32 0 -; NOEXPANSION-NEXT: [[TMP3:%.*]] = load i32, i32* [[SECOND2_I]], align 4 -; NOEXPANSION-NEXT: [[CMP3_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]] -; NOEXPANSION-NEXT: br label [[OPEQ1_EXIT]] -; NOEXPANSION: opeq1.exit: -; NOEXPANSION-NEXT: [[TMP4:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP3_I]], [[LAND_RHS_I]] ] -; NOEXPANSION-NEXT: ret i1 [[TMP4]] -; - %"struct.std::pair"* nocapture readonly dereferenceable(8) %a, - %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 { + %"struct.std::pair"* nocapture readonly dereferenceable(8) %a, + %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 { entry: %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1 %0 = load i32, i32* %first.i, align 4 @@ -88,6 +65,22 @@ land.rhs.i: opeq1.exit: %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ] ret i1 %4 +; CHECK-LABEL: @opeq1_inverse( +; The second block with zero-offset GEPs is kept, loads are removed. +; CHECK: land.rhs.i +; CHECK: getelementptr {{.*}} i32 0 +; CHECK-NOT: load +; CHECK: getelementptr {{.*}} i32 0 +; CHECK-NOT: load +; The two 4 byte loads and compares are replaced with a single 8-byte memcmp. +; CHECK: @memcmp({{.*}}8) +; CHECK: icmp eq {{.*}} 0 +; The branch is now a direct branch; the other block has been removed. +; CHECK: br label %opeq1.exit +; CHECK-NOT: br +; The phi is updated. +; CHECK: phi i1 [ %{{[^,]*}}, %land.rhs.i ] +; CHECK-NEXT: ret } diff --git a/test/Transforms/MergeICmps/X86/tuple-four-int8.ll b/test/Transforms/MergeICmps/tuple-four-int8.ll similarity index 57% rename from test/Transforms/MergeICmps/X86/tuple-four-int8.ll rename to test/Transforms/MergeICmps/tuple-four-int8.ll index 145a1361108..f5e2ab57e04 100644 --- a/test/Transforms/MergeICmps/X86/tuple-four-int8.ll +++ b/test/Transforms/MergeICmps/tuple-four-int8.ll @@ -1,5 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s +; RUN: opt -mergeicmps -S -o - %s | FileCheck %s ; This is a more involved test: clang generates this weird pattern for ; tuple. Right now we skip the entry block @@ -18,33 +17,8 @@ %"struct.std::_Head_base.6" = type { i8 } define zeroext i1 @opeq( -; CHECK-LABEL: @opeq( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds %"class.std::tuple", %"class.std::tuple"* [[A:%.*]], i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 -; CHECK-NEXT: [[ADD_PTR_I_I_I_I_I:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[ADD_PTR_I_I_I_I_I]], align 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds %"class.std::tuple", %"class.std::tuple"* [[B:%.*]], i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 -; CHECK-NEXT: [[ADD_PTR_I_I_I6_I_I:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 3 -; CHECK-NEXT: [[TMP3:%.*]] = load i8, i8* [[ADD_PTR_I_I_I6_I_I]], align 1 -; CHECK-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP1]], [[TMP3]] -; CHECK-NEXT: br i1 [[CMP_I_I]], label [[LAND_RHS_I_I_I_I:%.*]], label [[OPEQ_EXIT:%.*]] -; CHECK: land.rhs.i.i.i: -; CHECK-NEXT: [[ADD_PTR_I_I_I_I_I_I_I:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i64 1 -; CHECK-NEXT: [[ADD_PTR_I_I_I6_I_I_I_I:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 1 -; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[ADD_PTR_I_I_I6_I_I_I_I]], i8* [[ADD_PTR_I_I_I_I_I_I_I]], i64 2) -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[MEMCMP]], 0 -; CHECK-NEXT: br label [[OPEQ_EXIT]] -; CHECK: land.rhs.i.i.i.i: -; CHECK-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP0]], align 1 -; CHECK-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP2]], align 1 -; CHECK-NEXT: [[CMP_I_I_I_I_I:%.*]] = icmp eq i8 [[TMP5]], [[TMP6]] -; CHECK-NEXT: br i1 [[CMP_I_I_I_I_I]], label [[LAND_RHS_I_I_I:%.*]], label [[OPEQ_EXIT]] -; CHECK: opeq.exit: -; CHECK-NEXT: [[TMP7:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP_I_I_I_I_I]], [[LAND_RHS_I_I_I_I]] ], [ [[TMP4]], [[LAND_RHS_I_I_I]] ] -; CHECK-NEXT: ret i1 [[TMP7]] -; - %"class.std::tuple"* nocapture readonly dereferenceable(4) %a, - %"class.std::tuple"* nocapture readonly dereferenceable(4) %b) local_unnamed_addr #1 { + %"class.std::tuple"* nocapture readonly dereferenceable(4) %a, + %"class.std::tuple"* nocapture readonly dereferenceable(4) %b) local_unnamed_addr #1 { entry: %0 = getelementptr inbounds %"class.std::tuple", %"class.std::tuple"* %a, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 %add.ptr.i.i.i.i.i = getelementptr inbounds i8, i8* %0, i64 3 @@ -80,11 +54,18 @@ land.rhs.i.i.i.i: opeq.exit: %10 = phi i1 [ false, %entry ], [ false, %land.rhs.i.i ], [ false, %land.rhs.i.i.i ], [ %cmp.i.i.i.i.i, %land.rhs.i.i.i.i ] ret i1 %10 +; CHECK-LABEL: @opeq( ; The entry block is kept as is, but the next block is now the merged comparison ; block for bytes [1,2] or the block for the head. +; CHECK: entry +; CHECK: br i1 %cmp.i.i, label %land.rhs.i.i.i{{(.i)?}}, label %opeq.exit ; The two 1 byte loads and compares at offset 1 are replaced with a single ; 2-byte memcmp. +; CHECK: land.rhs.i.i.i +; CHECK: @memcmp({{.*}}2) +; CHECK: icmp eq {{.*}} 0 ; In the end we have three blocks. +; CHECK: phi i1 ; CHECK-SAME %entry ; CHECK-SAME %land.rhs.i.i.i.i ; CHECK-SAME %land.rhs.i.i.i diff --git a/test/Transforms/MergeICmps/volatile.ll b/test/Transforms/MergeICmps/volatile.ll new file mode 100644 index 00000000000..1df22575c2c --- /dev/null +++ b/test/Transforms/MergeICmps/volatile.ll @@ -0,0 +1,30 @@ +; RUN: opt -mergeicmps -S -o - %s | FileCheck %s + +%"struct.std::pair" = type { i32, i32 } + +define zeroext i1 @opeq( + %"struct.std::pair"* nocapture readonly dereferenceable(8) %a, + %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 { +entry: + %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0 + %0 = load i32, i32* %first.i, align 4 + %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0 + %1 = load i32, i32* %first1.i, align 4 + %cmp.i = icmp eq i32 %0, %1 + br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit + +land.rhs.i: + %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1 + %2 = load volatile i32, i32* %second.i, align 4 + %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1 + %3 = load i32, i32* %second2.i, align 4 + %cmp3.i = icmp eq i32 %2, %3 + br label %opeq1.exit + +opeq1.exit: + %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ] + ret i1 %4 +; CHECK-LABEL: @opeq( +; CHECK-NOT: memcmp +} +