From 6e799c3c583c1d3fcb7dfe54b4e5e6087139ec7c Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 23 Mar 2011 22:52:04 +0000 Subject: [PATCH] Cmp peephole optimization isn't always safe for signed arithmetics. int tries = INT_MAX; while (tries > 0) { tries--; } The check should be: subs r4, #1 cmp r4, #0 bgt LBB0_1 The subs can set the overflow V bit when r4 is INT_MAX+1 (which loop canonicalization apparently does in this case). cmp #0 would have cleared it while not changing the N and Z bits. Since BGT is dependent on the V bit, i.e. (N == V) && !Z, it is not safe to eliminate the cmp #0. rdar://9172742 llvm-svn: 128179 --- lib/Target/ARM/ARMBaseInstrInfo.cpp | 46 ++++++++++++++++++++-- test/CodeGen/ARM/2011-03-23-PeepholeBug.ll | 41 +++++++++++++++++++ test/CodeGen/ARM/long_shift.ll | 6 ++- 3 files changed, 88 insertions(+), 5 deletions(-) create mode 100644 test/CodeGen/ARM/2011-03-23-PeepholeBug.ll diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index a56df304eab..b5f20a44021 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1612,11 +1612,51 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, switch (MI->getOpcode()) { default: break; case ARM::ADDri: - case ARM::ANDri: - case ARM::t2ANDri: case ARM::SUBri: case ARM::t2ADDri: - case ARM::t2SUBri: + case ARM::t2SUBri: { + // Scan forward for the use of CPSR, if it's a conditional code requires + // checking of V bit, then this is not safe to do. If we can't find the + // CPSR use (i.e. used in another block), then it's not safe to perform + // the optimization. + bool isSafe = false; + I = CmpInstr; + E = MI->getParent()->end(); + while (!isSafe && ++I != E) { + const MachineInstr &Instr = *I; + for (unsigned IO = 0, EO = Instr.getNumOperands(); + !isSafe && IO != EO; ++IO) { + const MachineOperand &MO = Instr.getOperand(IO); + if (!MO.isReg() || MO.getReg() != ARM::CPSR) + continue; + if (MO.isDef()) { + isSafe = true; + break; + } + // Condition code is after the operand before CPSR. + ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm(); + switch (CC) { + default: + isSafe = true; + break; + case ARMCC::VS: + case ARMCC::VC: + case ARMCC::GE: + case ARMCC::LT: + case ARMCC::GT: + case ARMCC::LE: + return false; + } + } + } + + if (!isSafe) + return false; + + // fallthrough + } + case ARM::ANDri: + case ARM::t2ANDri: // Toggle the optional operand to CPSR. MI->getOperand(5).setReg(ARM::CPSR); MI->getOperand(5).setIsDef(true); diff --git a/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll b/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll new file mode 100644 index 00000000000..7c9af6f5e59 --- /dev/null +++ b/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll @@ -0,0 +1,41 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 | FileCheck %s + +; subs r4, #1 +; cmp r4, 0 +; bgt +; cmp cannot be optimized away since it will clear the overflow bit. +; gt / ge, lt, le conditions all depend on V bit. +; rdar://9172742 + +define i32 @t() nounwind { +; CHECK: t: +entry: + br label %bb2 + +bb: ; preds = %bb2 + %0 = tail call i32 @rand() nounwind + %1 = icmp eq i32 %0, 50 + br i1 %1, label %bb3, label %bb1 + +bb1: ; preds = %bb + %tmp = tail call i32 @puts() nounwind + %indvar.next = add i32 %indvar, 1 + br label %bb2 + +bb2: ; preds = %bb1, %entry +; CHECK: bb2 +; CHECK: subs [[REG:r[0-9]+]], #1 +; CHECK: cmp [[REG]], #0 +; CHECK: bgt + %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ] + %tries.0 = sub i32 2147483647, %indvar + %tmp1 = icmp sgt i32 %tries.0, 0 + br i1 %tmp1, label %bb, label %bb3 + +bb3: ; preds = %bb2, %bb + ret i32 0 +} + +declare i32 @rand() + +declare i32 @puts() nounwind diff --git a/test/CodeGen/ARM/long_shift.ll b/test/CodeGen/ARM/long_shift.ll index ab14de10ca2..d5aac2e3dda 100644 --- a/test/CodeGen/ARM/long_shift.ll +++ b/test/CodeGen/ARM/long_shift.ll @@ -24,7 +24,8 @@ define i32 @f2(i64 %x, i64 %y) { ; CHECK: f2 ; CHECK: lsr{{.*}}r2 ; CHECK-NEXT: rsb r3, r2, #32 -; CHECK-NEXT: subs r2, r2, #32 +; CHECK-NEXT: sub r2, r2, #32 +; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: orr r0, r0, r1, lsl r3 ; CHECK-NEXT: asrge r0, r1, r2 %a = ashr i64 %x, %y @@ -36,7 +37,8 @@ define i32 @f3(i64 %x, i64 %y) { ; CHECK: f3 ; CHECK: lsr{{.*}}r2 ; CHECK-NEXT: rsb r3, r2, #32 -; CHECK-NEXT: subs r2, r2, #32 +; CHECK-NEXT: sub r2, r2, #32 +; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: orr r0, r0, r1, lsl r3 ; CHECK-NEXT: lsrge r0, r1, r2 %a = lshr i64 %x, %y