Cmp peephole optimization isn't always safe for signed arithmetics.

int tries = INT_MAX; while (tries > 0) { tries--; } The check should be: subs r4, #1 cmp r4, #0 bgt LBB0_1 The subs can set the overflow V bit when r4 is INT_MAX+1 (which loop canonicalization apparently does in this case). cmp #0 would have cleared it while not changing the N and Z bits. Since BGT is dependent on the V bit, i.e. (N == V) && !Z, it is not safe to eliminate the cmp #0. rdar://9172742 llvm-svn: 128179
2024-11-23 19:23:23 +01:00 · 2011-03-23 22:52:04 +00:00 · 2011-03-23 22:52:04 +00:00 · 6e799c3c58
commit 6e799c3c58
parent 76fcfaab12
3 changed files with 88 additions and 5 deletions
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@ -1612,11 +1612,51 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
  switch (MI->getOpcode()) {
  default: break;
  case ARM::ADDri:
-  case ARM::ANDri:
-  case ARM::t2ANDri:
  case ARM::SUBri:
  case ARM::t2ADDri:
-  case ARM::t2SUBri:
+  case ARM::t2SUBri: {
+    // Scan forward for the use of CPSR, if it's a conditional code requires
+    // checking of V bit, then this is not safe to do. If we can't find the
+    // CPSR use (i.e. used in another block), then it's not safe to perform
+    // the optimization.
+    bool isSafe = false;
+    I = CmpInstr;
+    E = MI->getParent()->end();
+    while (!isSafe && ++I != E) {
+      const MachineInstr &Instr = *I;
+      for (unsigned IO = 0, EO = Instr.getNumOperands();
+           !isSafe && IO != EO; ++IO) {
+        const MachineOperand &MO = Instr.getOperand(IO);
+        if (!MO.isReg() || MO.getReg() != ARM::CPSR)
+          continue;
+        if (MO.isDef()) {
+          isSafe = true;
+          break;
+        }
+        // Condition code is after the operand before CPSR.
+        ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm();
+        switch (CC) {
+        default:
+          isSafe = true;
+          break;
+        case ARMCC::VS:
+        case ARMCC::VC:
+        case ARMCC::GE:
+        case ARMCC::LT:
+        case ARMCC::GT:
+        case ARMCC::LE:
+          return false;
+        }
+      }
+    }
+
+    if (!isSafe)
+      return false;
+
+    // fallthrough
+  }
+  case ARM::ANDri:
+  case ARM::t2ANDri:
    // Toggle the optional operand to CPSR.
    MI->getOperand(5).setReg(ARM::CPSR);
    MI->getOperand(5).setIsDef(true);
--- a/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll
+++ b/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll
@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 | FileCheck %s
+
+; subs r4, #1
+; cmp r4, 0
+; bgt
+; cmp cannot be optimized away since it will clear the overflow bit.
+; gt / ge, lt, le conditions all depend on V bit.
+; rdar://9172742
+
+define i32 @t() nounwind {
+; CHECK: t:
+entry:
+  br label %bb2
+
+bb:                                               ; preds = %bb2
+  %0 = tail call i32 @rand() nounwind
+  %1 = icmp eq i32 %0, 50
+  br i1 %1, label %bb3, label %bb1
+
+bb1:                                              ; preds = %bb
+  %tmp = tail call i32 @puts() nounwind
+  %indvar.next = add i32 %indvar, 1
+  br label %bb2
+
+bb2:                                              ; preds = %bb1, %entry
+; CHECK: bb2
+; CHECK: subs [[REG:r[0-9]+]], #1
+; CHECK: cmp [[REG]], #0
+; CHECK: bgt
+  %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
+  %tries.0 = sub i32 2147483647, %indvar
+  %tmp1 = icmp sgt i32 %tries.0, 0
+  br i1 %tmp1, label %bb, label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  ret i32 0
+}
+
+declare i32 @rand()
+
+declare i32 @puts() nounwind
--- a/test/CodeGen/ARM/long_shift.ll
+++ b/test/CodeGen/ARM/long_shift.ll
@ -24,7 +24,8 @@ define i32 @f2(i64 %x, i64 %y) {
 ; CHECK: f2
 ; CHECK:      lsr{{.*}}r2
 ; CHECK-NEXT: rsb     r3, r2, #32
-; CHECK-NEXT: subs    r2, r2, #32
+; CHECK-NEXT: sub     r2, r2, #32
+; CHECK-NEXT: cmp     r2, #0
 ; CHECK-NEXT: orr     r0, r0, r1, lsl r3
 ; CHECK-NEXT: asrge   r0, r1, r2
 	%a = ashr i64 %x, %y
@ -36,7 +37,8 @@ define i32 @f3(i64 %x, i64 %y) {
 ; CHECK: f3
 ; CHECK:      lsr{{.*}}r2
 ; CHECK-NEXT: rsb     r3, r2, #32
-; CHECK-NEXT: subs    r2, r2, #32
+; CHECK-NEXT: sub     r2, r2, #32
+; CHECK-NEXT: cmp     r2, #0
 ; CHECK-NEXT: orr     r0, r0, r1, lsl r3
 ; CHECK-NEXT: lsrge   r0, r1, r2
 	%a = lshr i64 %x, %y