From b306bc03ed38c7e13989c5baca37a1adbdca546c Mon Sep 17 00:00:00 2001 From: Balaram Makam Date: Tue, 3 Oct 2017 22:39:24 +0000 Subject: [PATCH] [AArch64] Use LateSimplifyCFG after expanding atomic operations. Summary: After r308422 we defer optimizations that can destroy loop canonical forms to LateSimplifyCFG. Running LateSimplifyCFG after expanding atomic operations can exploit more control-flow opportunities. Reviewers: mcrosier, t.p.northover, efriedma Reviewed By: efriedma Subscribers: aemerson, rengolin, javed.absar, llvm-commits, kristof.beyls Differential Revision: https://reviews.llvm.org/D38262 llvm-svn: 314857 --- lib/Target/AArch64/AArch64TargetMachine.cpp | 2 +- test/CodeGen/AArch64/aarch64-loop-gep-opt.ll | 6 +- test/CodeGen/AArch64/cmpxchg-idioms.ll | 60 ++++++++++++++++++++ 3 files changed, 64 insertions(+), 4 deletions(-) diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index e65b382e85b..1762475ac93 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -365,7 +365,7 @@ void AArch64PassConfig::addIRPasses() { // determine whether it succeeded. We can exploit existing control-flow in // ldrex/strex loops to simplify this, but it needs tidying up. if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) - addPass(createCFGSimplificationPass()); + addPass(createLateCFGSimplificationPass()); // Run LoopDataPrefetch // diff --git a/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll b/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll index 2b4e438a13a..1b2ed4b8952 100644 --- a/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll +++ b/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll @@ -19,9 +19,9 @@ entry: do.body.i: ; CHECK-LABEL: do.body.i: -; CHECK: %uglygep1 = getelementptr i8, i8* %uglygep, i64 %3 -; CHECK-NEXT: %4 = bitcast i8* %uglygep1 to i32* -; CHECK-NOT: %uglygep1 = getelementptr i8, i8* %uglygep, i64 1032 +; CHECK: %uglygep2 = getelementptr i8, i8* %uglygep, i64 %3 +; CHECK-NEXT: %4 = bitcast i8* %uglygep2 to i32* +; CHECK-NOT: %uglygep2 = getelementptr i8, i8* %uglygep, i64 1032 %0 = phi i32 [ 256, %entry ], [ %.be, %do.body.i.backedge ] diff --git a/test/CodeGen/AArch64/cmpxchg-idioms.ll b/test/CodeGen/AArch64/cmpxchg-idioms.ll index 0c008c26979..cae09b28979 100644 --- a/test/CodeGen/AArch64/cmpxchg-idioms.ll +++ b/test/CodeGen/AArch64/cmpxchg-idioms.ll @@ -91,3 +91,63 @@ end: declare void @bar() declare void @baz() + +define i1 @test_conditional2(i32 %a, i32 %b, i32* %c) { +; CHECK-LABEL: test_conditional2: +; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxr [[LOADED:w[0-9]+]], [x19] +; CHECK: cmp [[LOADED]], w21 +; CHECK: b.ne [[FAILED:LBB[0-9]+_[0-9]+]] + +; CHECK: stlxr [[STATUS:w[0-9]+]], w20, [x19] +; CHECK: cbnz [[STATUS]], [[LOOP]] +; CHECK: orr [[STATUS]], wzr, #0x1 +; CHECK: b [[PH:LBB[0-9]+_[0-9]+]] + +; CHECK: [[FAILED]]: +; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}} + +; verify the preheader is simplified by latesimplifycfg. +; CHECK: [[PH]]: +; CHECK: orr w22, wzr, #0x2 +; CHECK-NOT: orr w22, wzr, #0x4 +; CHECK-NOT: cmn w22, #4 +; CHECK: b [[LOOP2:LBB[0-9]+_[0-9]+]] +; CHECK-NOT: b.ne [[LOOP2]] +; CHECK-NOT: b {{LBB[0-9]+_[0-9]+}} +; CHECK: bl _foo +entry: + %pair = cmpxchg i32* %c, i32 %a, i32 %b seq_cst seq_cst + %success = extractvalue { i32, i1 } %pair, 1 + br label %for.cond + +for.cond: ; preds = %if.end, %entry + %i.0 = phi i32 [ 2, %entry ], [ %dec, %if.end ] + %changed.0.off0 = phi i1 [ %success, %entry ], [ %changed.1.off0, %if.end ] + %dec = add nsw i32 %i.0, -1 + %tobool = icmp eq i32 %i.0, 0 + br i1 %tobool, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.cond + %changed.0.off0.lcssa = phi i1 [ %changed.0.off0, %for.cond ] + ret i1 %changed.0.off0.lcssa + +for.body: ; preds = %for.cond + %or = or i32 %a, %b + %idxprom = sext i32 %dec to i64 + %arrayidx = getelementptr inbounds i32, i32* %c, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %cmp = icmp eq i32 %or, %0 + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %for.body + store i32 %or, i32* %arrayidx, align 4 + tail call void @foo() + br label %if.end + +if.end: ; preds = %for.body, %if.then + %changed.1.off0 = phi i1 [ false, %if.then ], [ %changed.0.off0, %for.body ] + br label %for.cond +} + +declare void @foo()