[X86] Teach the isel optimization for (x << C1) op C2 to (x op (C2>>C1)) << C1 to consider cases where C2>>C1 can fit an unsigned 32-bit immediate

For 64-bit operations we should consider if the immediate can be made to fit in an unsigned 32-bits immedate. For OR/XOR this allows us to load the immediate with MOV32ri instead of movabsq. For AND this allows us to fold the immediate. Differential Revision: https://reviews.llvm.org/D59867 llvm-svn: 357196
2025-02-01 05:01:59 +01:00 · 2019-03-28 18:05:37 +00:00 · 2019-03-28 18:05:37 +00:00 · addf5a7422
commit addf5a7422
parent 7a1f7242ef
2 changed files with 36 additions and 30 deletions
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@ -3501,39 +3501,45 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
      break;

    int64_t Val = Cst->getSExtValue();
-    uint64_t ShlVal = ShlCst->getZExtValue();
+    uint64_t ShAmt = ShlCst->getZExtValue();

    // Make sure that we don't change the operation by removing bits.
    // This only matters for OR and XOR, AND is unaffected.
-    uint64_t RemovedBitsMask = (1ULL << ShlVal) - 1;
+    uint64_t RemovedBitsMask = (1ULL << ShAmt) - 1;
    if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
      break;

-    MVT CstVT = NVT;
-
    // Check the minimum bitwidth for the new constant.
-    // TODO: AND32ri is the same as AND64ri32 with zext imm.
-    // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr
    // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
-    if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal))
-      CstVT = MVT::i8;
-    else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal))
-      CstVT = MVT::i32;
+    auto CanShrinkImmediate = [&](int64_t &ShiftedVal) {
+      ShiftedVal = Val >> ShAmt;
+      if ((!isInt<8>(Val) && isInt<8>(ShiftedVal)) ||
+          (!isInt<32>(Val) && isInt<32>(ShiftedVal)))
+        return true;
+      // For 64-bit we can also try unsigned 32 bit immediates.
+      // AND32ri is the same as AND64ri32 with zext imm.
+      // MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr
+      ShiftedVal = (uint64_t)Val >> ShAmt;
+      if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
+        return true;
+      return false;
+    };

-    // Bail if there is no smaller encoding.
-    if (NVT == CstVT)
-      break;
+    int64_t ShiftedVal;
+    if (CanShrinkImmediate(ShiftedVal)) {
+      SDValue NewCst = CurDAG->getConstant(ShiftedVal, dl, NVT);
+      insertDAGNode(*CurDAG, SDValue(Node, 0), NewCst);
+      SDValue NewBinOp = CurDAG->getNode(Opcode, dl, NVT, N0->getOperand(0),
+                                         NewCst);
+      insertDAGNode(*CurDAG, SDValue(Node, 0), NewBinOp);
+      SDValue NewSHL = CurDAG->getNode(ISD::SHL, dl, NVT, NewBinOp,
+                                       N0->getOperand(1));
+      ReplaceNode(Node, NewSHL.getNode());
+      SelectCode(NewSHL.getNode());
+      return;
+    }

-    SDValue NewCst = CurDAG->getConstant(Val >> ShlVal, dl, NVT);
-    insertDAGNode(*CurDAG, SDValue(Node, 0), NewCst);
-    SDValue NewBinOp = CurDAG->getNode(Opcode, dl, NVT, N0->getOperand(0),
-                                       NewCst);
-    insertDAGNode(*CurDAG, SDValue(Node, 0), NewBinOp);
-    SDValue NewSHL = CurDAG->getNode(ISD::SHL, dl, NVT, NewBinOp,
-                                     N0->getOperand(1));
-    ReplaceNode(Node, NewSHL.getNode());
-    SelectCode(NewSHL.getNode());
-    return;
+    break;
  }
  case X86ISD::SMUL:
    // i16/i32/i64 are handled with isel patterns.
--- a/test/CodeGen/X86/narrow-shl-cst.ll
+++ b/test/CodeGen/X86/narrow-shl-cst.ll
@ -166,9 +166,9 @@ define i64 @test13(i64 %x, i64* %y) nounwind {
 define i64 @test14(i64 %x, i64* %y) nounwind {
 ; CHECK-LABEL: test14:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    shlq $8, %rdi
-; CHECK-NEXT:    movabsq $1095216660480, %rax # imm = 0xFF00000000
-; CHECK-NEXT:    andq %rdi, %rax
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    andl $-16777216, %eax # imm = 0xFF000000
+; CHECK-NEXT:    shlq $8, %rax
 ; CHECK-NEXT:    retq
  %and = shl i64 %x, 8
  %shl = and i64 %and, 1095216660480
@ -178,9 +178,9 @@ define i64 @test14(i64 %x, i64* %y) nounwind {
 define i64 @test15(i64 %x, i64* %y) nounwind {
 ; CHECK-LABEL: test15:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    shlq $8, %rdi
-; CHECK-NEXT:    movabsq $1095216660480, %rax # imm = 0xFF00000000
+; CHECK-NEXT:    movl $4278190080, %eax # imm = 0xFF000000
 ; CHECK-NEXT:    orq %rdi, %rax
+; CHECK-NEXT:    shlq $8, %rax
 ; CHECK-NEXT:    retq
  %or = shl i64 %x, 8
  %shl = or i64 %or, 1095216660480
@ -190,9 +190,9 @@ define i64 @test15(i64 %x, i64* %y) nounwind {
 define i64 @test16(i64 %x, i64* %y) nounwind {
 ; CHECK-LABEL: test16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    shlq $8, %rdi
-; CHECK-NEXT:    movabsq $1095216660480, %rax # imm = 0xFF00000000
+; CHECK-NEXT:    movl $4278190080, %eax # imm = 0xFF000000
 ; CHECK-NEXT:    xorq %rdi, %rax
+; CHECK-NEXT:    shlq $8, %rax
 ; CHECK-NEXT:    retq
  %xor = shl i64 %x, 8
  %shl = xor i64 %xor, 1095216660480