[DAGCombiner] try to convert opposing shifts to casts

This reverses a questionable IR canonicalization when a truncate is free: sra (add (shl X, N1C), AddC), N1C --> sext (add (trunc X to (width - N1C)), AddC') https://rise4fun.com/Alive/slRC More details in PR42644: https://bugs.llvm.org/show_bug.cgi?id=42644 I limited this to pre-legalization for code simplicity because that should be enough to reverse the IR patterns. I don't have any evidence (no regression test diffs) that we need to try this later. Differential Revision: https://reviews.llvm.org/D65607 llvm-svn: 367710
2025-01-31 20:51:52 +01:00 · 2019-08-02 19:33:46 +00:00 · 2019-08-02 19:33:46 +00:00 · 6449e66c97
commit 6449e66c97
parent 3ca8e94f65
3 changed files with 54 additions and 38 deletions
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -7616,6 +7616,32 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
    }
  }

+  // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
+  //   sra (add (shl X, N1C), AddC), N1C -->
+  //   sext (add (trunc X to (width - N1C)), AddC')
+  if (!LegalOperations && N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
+      N0.getOperand(0).getOpcode() == ISD::SHL &&
+      N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
+    if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
+      SDValue Shl = N0.getOperand(0);
+      // Determine what the truncate's type would be and ask the target if that
+      // is a free operation.
+      LLVMContext &Ctx = *DAG.getContext();
+      unsigned ShiftAmt = N1C->getZExtValue();
+      EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
+      if (VT.isVector())
+        TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
+      if (TLI.isTruncateFree(VT, TruncVT)) {
+        SDLoc DL(N);
+        SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
+        SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
+                             trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
+        SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
+        return DAG.getSExtOrTrunc(Add, DL, VT);
+      }
+    }
+  }
+
  // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
  if (N1.getOpcode() == ISD::TRUNCATE &&
      N1.getOperand(0).getOpcode() == ISD::AND) {
--- a/test/CodeGen/AArch64/shift-mod.ll
+++ b/test/CodeGen/AArch64/shift-mod.ll
@ -78,9 +78,8 @@ entry:
 define i64 @ashr_add_shl_i32(i64 %r) {
 ; CHECK-LABEL: ashr_add_shl_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #4294967296
-; CHECK-NEXT:    add x8, x8, x0, lsl #32
-; CHECK-NEXT:    asr x0, x8, #32
+; CHECK-NEXT:    add w8, w0, #1 // =1
+; CHECK-NEXT:    sxtw x0, w8
 ; CHECK-NEXT:    ret
  %conv = shl i64 %r, 32
  %sext = add i64 %conv, 4294967296
@ -91,9 +90,8 @@ define i64 @ashr_add_shl_i32(i64 %r) {
 define i64 @ashr_add_shl_i8(i64 %r) {
 ; CHECK-LABEL: ashr_add_shl_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #72057594037927936
-; CHECK-NEXT:    add x8, x8, x0, lsl #56
-; CHECK-NEXT:    asr x0, x8, #56
+; CHECK-NEXT:    add w8, w0, #1 // =1
+; CHECK-NEXT:    sxtb x0, w8
 ; CHECK-NEXT:    ret
  %conv = shl i64 %r, 56
  %sext = add i64 %conv, 72057594037927936
--- a/test/CodeGen/X86/shift-combine.ll
+++ b/test/CodeGen/X86/shift-combine.ll
@ -168,10 +168,8 @@ define i64 @ashr_add_shl_i32(i64 %r) nounwind {
 ;
 ; X64-LABEL: ashr_add_shl_i32:
 ; X64:       # %bb.0:
-; X64-NEXT:    shlq $32, %rdi
-; X64-NEXT:    movabsq $4294967296, %rax # imm = 0x100000000
-; X64-NEXT:    addq %rdi, %rax
-; X64-NEXT:    sarq $32, %rax
+; X64-NEXT:    incl %edi
+; X64-NEXT:    movslq %edi, %rax
 ; X64-NEXT:    retq
  %conv = shl i64 %r, 32
  %sext = add i64 %conv, 4294967296
@ -182,20 +180,17 @@ define i64 @ashr_add_shl_i32(i64 %r) nounwind {
 define i64 @ashr_add_shl_i8(i64 %r) nounwind {
 ; X32-LABEL: ashr_add_shl_i8:
 ; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT:    shll $24, %edx
-; X32-NEXT:    addl $33554432, %edx # imm = 0x2000000
-; X32-NEXT:    movl %edx, %eax
-; X32-NEXT:    sarl $24, %eax
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X32-NEXT:    addb $2, %al
+; X32-NEXT:    movsbl %al, %eax
+; X32-NEXT:    movl %eax, %edx
 ; X32-NEXT:    sarl $31, %edx
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: ashr_add_shl_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    shlq $56, %rdi
-; X64-NEXT:    movabsq $144115188075855872, %rax # imm = 0x200000000000000
-; X64-NEXT:    addq %rdi, %rax
-; X64-NEXT:    sarq $56, %rax
+; X64-NEXT:    addb $2, %dil
+; X64-NEXT:    movsbq %dil, %rax
 ; X64-NEXT:    retq
  %conv = shl i64 %r, 56
  %sext = add i64 %conv, 144115188075855872
@ -209,34 +204,31 @@ define <4 x i32> @ashr_add_shl_v4i8(<4 x i32> %r) nounwind {
 ; X32-NEXT:    pushl %edi
 ; X32-NEXT:    pushl %esi
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X32-NEXT:    shll $24, %edi
-; X32-NEXT:    shll $24, %esi
-; X32-NEXT:    shll $24, %edx
-; X32-NEXT:    shll $24, %ecx
-; X32-NEXT:    addl $16777216, %ecx # imm = 0x1000000
-; X32-NEXT:    addl $16777216, %edx # imm = 0x1000000
-; X32-NEXT:    addl $16777216, %esi # imm = 0x1000000
-; X32-NEXT:    addl $16777216, %edi # imm = 0x1000000
-; X32-NEXT:    sarl $24, %edi
-; X32-NEXT:    sarl $24, %esi
-; X32-NEXT:    sarl $24, %edx
-; X32-NEXT:    sarl $24, %ecx
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X32-NEXT:    incb %dh
+; X32-NEXT:    movsbl %dh, %esi
+; X32-NEXT:    incb %ch
+; X32-NEXT:    movsbl %ch, %edi
+; X32-NEXT:    incb %dl
+; X32-NEXT:    movsbl %dl, %edx
+; X32-NEXT:    incb %cl
+; X32-NEXT:    movsbl %cl, %ecx
 ; X32-NEXT:    movl %ecx, 12(%eax)
 ; X32-NEXT:    movl %edx, 8(%eax)
-; X32-NEXT:    movl %esi, 4(%eax)
-; X32-NEXT:    movl %edi, (%eax)
+; X32-NEXT:    movl %edi, 4(%eax)
+; X32-NEXT:    movl %esi, (%eax)
 ; X32-NEXT:    popl %esi
 ; X32-NEXT:    popl %edi
 ; X32-NEXT:    retl $4
 ;
 ; X64-LABEL: ashr_add_shl_v4i8:
 ; X64:       # %bb.0:
+; X64-NEXT:    pcmpeqd %xmm1, %xmm1
+; X64-NEXT:    psubd %xmm1, %xmm0
 ; X64-NEXT:    pslld $24, %xmm0
-; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    psrad $24, %xmm0
 ; X64-NEXT:    retq
  %conv = shl <4 x i32> %r, <i32 24, i32 24, i32 24, i32 24>