From 1746c5a1e320f0cc8892ef83ff187e9a3fc777d0 Mon Sep 17 00:00:00 2001 From: Amaury Sechet Date: Sat, 31 Aug 2019 11:40:02 +0000 Subject: [PATCH] [DAGCombiner] Match (add X, X) as (shl X, 1) when detecting rotate. Summary: The combiner transforms (shl X, 1) into (add X, X). Reviewers: craig.topper, efriedma, RKSimon, lebedev.ri Subscribers: llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D66882 llvm-svn: 370578 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 24 +++++++++++++++++++---- test/CodeGen/X86/rotate-extract-vector.ll | 14 +++++++------ test/CodeGen/X86/rotate-extract.ll | 22 +++++++-------------- 3 files changed, 35 insertions(+), 25 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 7c73212c294..b37985ed6c6 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6023,6 +6023,9 @@ static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift, /// Otherwise, returns an expansion of \p ExtractFrom based on the following /// patterns: /// +/// (or (add v v) (shrl v bitwidth-1)): +/// expands (add v v) -> (shl v 1) +/// /// (or (mul v c0) (shrl (mul v c1) c2)): /// expands (mul v c0) -> (shl (mul v c1) c3) /// @@ -6045,6 +6048,23 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, "Existing shift must be valid as a rotate half"); ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask); + + // Value and Type of the shift. + SDValue OppShiftLHS = OppShift.getOperand(0); + EVT ShiftedVT = OppShiftLHS.getValueType(); + + // Amount of the existing shift. + ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1)); + + // (add v v) -> (shl v 1) + if (OppShift.getOpcode() == ISD::SRL && OppShiftCst && + ExtractFrom.getOpcode() == ISD::ADD && + ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) && + ExtractFrom.getOperand(0) == OppShiftLHS && + OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1) + return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS, + DAG.getShiftAmountConstant(1, ShiftedVT, DL)); + // Preconditions: // (or (op0 v c0) (shiftl/r (op0 v c1) c2)) // @@ -6068,15 +6088,11 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, // op0 must be the same opcode on both sides, have the same LHS argument, // and produce the same value type. - SDValue OppShiftLHS = OppShift.getOperand(0); - EVT ShiftedVT = OppShiftLHS.getValueType(); if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() || OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) || ShiftedVT != ExtractFrom.getValueType()) return SDValue(); - // Amount of the existing shift. - ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1)); // Constant mul/udiv/shift amount from the RHS of the shift's LHS op. ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1)); // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op. diff --git a/test/CodeGen/X86/rotate-extract-vector.ll b/test/CodeGen/X86/rotate-extract-vector.ll index 5f6a9ff0ffa..7b7feb3372f 100644 --- a/test/CodeGen/X86/rotate-extract-vector.ll +++ b/test/CodeGen/X86/rotate-extract-vector.ll @@ -285,9 +285,10 @@ define <2 x i64> @no_extract_udiv(<2 x i64> %i) nounwind { define <4 x i32> @extract_add_1(<4 x i32> %i) nounwind { ; CHECK-LABEL: extract_add_1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm1 -; CHECK-NEXT: vpsrld $31, %xmm0, %xmm0 -; CHECK-NEXT: vpor %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; CHECK-NEXT: vprold $1, %zmm0, %zmm0 +; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; CHECK-NEXT: vzeroupper ; CHECK-NEXT: ret{{[l|q]}} %ii = add <4 x i32> %i, %i %rhs = lshr <4 x i32> %i, @@ -298,9 +299,10 @@ define <4 x i32> @extract_add_1(<4 x i32> %i) nounwind { define <4 x i32> @extract_add_1_comut(<4 x i32> %i) nounwind { ; CHECK-LABEL: extract_add_1_comut: ; CHECK: # %bb.0: -; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm1 -; CHECK-NEXT: vpsrld $31, %xmm0, %xmm0 -; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; CHECK-NEXT: vprold $1, %zmm0, %zmm0 +; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; CHECK-NEXT: vzeroupper ; CHECK-NEXT: ret{{[l|q]}} %ii = add <4 x i32> %i, %i %lhs = lshr <4 x i32> %i, diff --git a/test/CodeGen/X86/rotate-extract.ll b/test/CodeGen/X86/rotate-extract.ll index 3963fae3679..a705773598b 100644 --- a/test/CodeGen/X86/rotate-extract.ll +++ b/test/CodeGen/X86/rotate-extract.ll @@ -270,18 +270,14 @@ define i8 @no_extract_udiv(i8 %i) nounwind { define i32 @extract_add_1(i32 %i) nounwind { ; X86-LABEL: extract_add_1: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: leal (%ecx,%ecx), %eax -; X86-NEXT: shrl $31, %ecx -; X86-NEXT: orl %ecx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: roll %eax ; X86-NEXT: retl ; ; X64-LABEL: extract_add_1: ; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: leal (%rdi,%rdi), %eax -; X64-NEXT: shrl $31, %edi -; X64-NEXT: orl %edi, %eax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: roll %eax ; X64-NEXT: retq %ii = add i32 %i, %i %rhs = lshr i32 %i, 31 @@ -293,17 +289,13 @@ define i32 @extract_add_1_comut(i32 %i) nounwind { ; X86-LABEL: extract_add_1_comut: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: leal (%eax,%eax), %ecx -; X86-NEXT: shrl $31, %eax -; X86-NEXT: orl %ecx, %eax +; X86-NEXT: roll %eax ; X86-NEXT: retl ; ; X64-LABEL: extract_add_1_comut: ; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: leal (%rdi,%rdi), %eax -; X64-NEXT: shrl $31, %edi -; X64-NEXT: orl %edi, %eax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: roll %eax ; X64-NEXT: retq %ii = add i32 %i, %i %lhs = lshr i32 %i, 31