[AArch64] Fix comparison peephole opt with non-0/1 immediate (PR51476)

This is a non-intrusive fix for https://bugs.llvm.org/show_bug.cgi?id=51476 intended for backport to the 13.x release branch. It expands on the current hack by distinguishing between CmpValue of 0, 1 and 2, where 0 and 1 have the obvious meaning and 2 means "anything else". The new optimization from D98564 should only be performed for CmpValue of 0 or 1. For main, I think we should switch the analyzeCompare() and optimizeCompare() APIs to use int64_t instead of int, which is in line with MachineOperand's notion of an immediate, and avoids this problem altogether. Differential Revision: https://reviews.llvm.org/D108076 (cherry picked from commit 81b106584f2baf33e09be2362c35c1bf2f6bfe94)
2024-11-22 02:33:06 +01:00 · 2021-08-14 23:35:27 +02:00 · 2021-08-14 23:35:27 +02:00 · e4e6f3eeff
commit e4e6f3eeff
parent 45d26b8826
3 changed files with 93 additions and 15 deletions
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@ -1120,6 +1120,16 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
  if (!MI.getOperand(1).isReg())
    return false;
  auto NormalizeCmpValue = [](int64_t Value) -> int {
    // Comparison immediates may be 64-bit, but CmpValue is only an int.
    // Normalize to 0/1/2 return value, where 2 indicates any value apart from
    // 0 or 1.
    // TODO: Switch CmpValue to int64_t in the API to avoid this.
    if (Value == 0 || Value == 1)
      return Value;
    return 2;
  };
  switch (MI.getOpcode()) {
  default:
    break;
@ -1155,8 +1165,7 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
    SrcReg = MI.getOperand(1).getReg();
    SrcReg2 = 0;
    CmpMask = ~0;
-    // FIXME: In order to convert CmpValue to 0 or 1
+    CmpValue = NormalizeCmpValue(MI.getOperand(2).getImm());
    CmpValue = MI.getOperand(2).getImm() != 0;
    return true;
  case AArch64::ANDSWri:
  case AArch64::ANDSXri:
@ -1165,14 +1174,9 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
    SrcReg = MI.getOperand(1).getReg();
    SrcReg2 = 0;
    CmpMask = ~0;
-    // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
+    CmpValue = NormalizeCmpValue(AArch64_AM::decodeLogicalImmediate(
    // while the type of CmpValue is int. When converting uint64_t to int,
    // the high 32 bits of uint64_t will be lost.
    // In fact it causes a bug in spec2006-483.xalancbmk
    // CmpValue is only used to compare with zero in OptimizeCompareInstr
    CmpValue = AArch64_AM::decodeLogicalImmediate(
                   MI.getOperand(2).getImm(),
-                   MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
+                   MI.getOpcode() == AArch64::ANDSWri ? 32 : 64));
    return true;
  }
@ -1462,10 +1466,9 @@ bool AArch64InstrInfo::optimizeCompareInstr(
  if (CmpInstr.getOpcode() == AArch64::PTEST_PP)
    return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
-  // Continue only if we have a "ri" where immediate is zero.
+  // Warning: CmpValue == 2 indicates *any* value apart from 0 or 1.
-  // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
+  assert((CmpValue == 0 || CmpValue == 1 || CmpValue == 2) &&
-  // function.
+         "CmpValue must be 0, 1, or 2!");
  assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
  if (SrcReg2 != 0)
    return false;
@ -1473,9 +1476,10 @@ bool AArch64InstrInfo::optimizeCompareInstr(
  if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
    return false;
-  if (!CmpValue && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
+  if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
    return true;
-  return removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
+  return (CmpValue == 0 || CmpValue == 1) &&
         removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
 }
 /// Get opcode of S version of Instr.
--- a/test/CodeGen/AArch64/csinc-cmp-removal.mir
+++ b/test/CodeGen/AArch64/csinc-cmp-removal.mir
@ -307,3 +307,42 @@ body:             |
    RET_ReallyLR
 ...
 ---
 name:            subswr_wrong_cmp_value
 tracksRegLiveness: true
 body:             |
  ; CHECK-LABEL: name: subswr_wrong_cmp_value
  ; CHECK: bb.0:
  ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
  ; CHECK:   liveins: $x1
  ; CHECK:   [[COPY:%[0-9]+]]:gpr64common = COPY $x1
  ; CHECK:   [[DEF:%[0-9]+]]:gpr64 = IMPLICIT_DEF
  ; CHECK:   [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr killed [[DEF]], [[COPY]], implicit-def $nzcv
  ; CHECK:   [[CSINCWr:%[0-9]+]]:gpr32common = CSINCWr $wzr, $wzr, 1, implicit $nzcv
  ; CHECK:   [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri killed [[CSINCWr]], 3, 0, implicit-def $nzcv
  ; CHECK:   Bcc 1, %bb.2, implicit $nzcv
  ; CHECK:   B %bb.1
  ; CHECK: bb.1:
  ; CHECK:   successors: %bb.2(0x80000000)
  ; CHECK:   B %bb.2
  ; CHECK: bb.2:
  ; CHECK:   RET_ReallyLR
  bb.0:
    liveins: $x1
    successors: %bb.1(0x40000000), %bb.2(0x40000000)
    %1:gpr64common = COPY $x1
    %2:gpr64 = IMPLICIT_DEF
    %3:gpr64 = SUBSXrr killed %2:gpr64, %1:gpr64common, implicit-def $nzcv
    %4:gpr32common = CSINCWr $wzr, $wzr, 1, implicit $nzcv
    %5:gpr32 = SUBSWri killed %4:gpr32common, 3, 0, implicit-def $nzcv
    Bcc 1, %bb.2, implicit $nzcv
    B %bb.1
  bb.1:
    successors: %bb.2(0x80000000)
    B %bb.2
  bb.2:
    RET_ReallyLR
 ...
--- a/test/CodeGen/AArch64/pr51476.ll
+++ b/test/CodeGen/AArch64/pr51476.ll
@ -0,0 +1,35 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
 define void @test(i8 %arg) nounwind {
 ; CHECK-LABEL: test:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    and w8, w0, #0xff
 ; CHECK-NEXT:    cmp w8, #1
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    cmp w0, #3
 ; CHECK-NEXT:    strb w0, [sp, #12]
 ; CHECK-NEXT:    b.eq .LBB0_2
 ; CHECK-NEXT:  // %bb.1: // %do_call
 ; CHECK-NEXT:    bl unknown
 ; CHECK-NEXT:  .LBB0_2: // %common.ret
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
  %tmp = alloca i8
  %cmp1 = icmp ne i8 %arg, 1
  %zext = zext i1 %cmp1 to i8
  store i8 %zext, i8* %tmp
  %zext2 = load i8, i8* %tmp
  %cmp2 = icmp eq i8 %zext2, 3
  br i1 %cmp2, label %exit, label %do_call
 do_call:
  call void @unknown(i8 %zext2)
  ret void
 exit:
  ret void
 }
 declare void @unknown(i8)