mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
Fix pr11266.
On x86: (shl V, 1) -> add V,V Hardware support for vector-shift is sparse and in many cases we scalarize the result. Additionally, on sandybridge padd is faster than shl. llvm-svn: 143311
This commit is contained in:
parent
eb62811647
commit
8282fc9e3b
@ -13042,7 +13042,8 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
|
||||
// fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2))
|
||||
// since the result of setcc_c is all zero's or all ones.
|
||||
if (N1C && N0.getOpcode() == ISD::AND &&
|
||||
if (VT.isInteger() && !VT.isVector() &&
|
||||
N1C && N0.getOpcode() == ISD::AND &&
|
||||
N0.getOperand(1).getOpcode() == ISD::Constant) {
|
||||
SDValue N00 = N0.getOperand(0);
|
||||
if (N00.getOpcode() == X86ISD::SETCC_CARRY ||
|
||||
@ -13058,6 +13059,22 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Hardware support for vector shifts is sparse which makes us scalarize the
|
||||
// vector operations in many cases. Also, on sandybridge ADD is faster than
|
||||
// shl.
|
||||
// (shl V, 1) -> add V,V
|
||||
if (isSplatVector(N1.getNode())) {
|
||||
assert(N0.getValueType().isVector() && "Invalid vector shift type");
|
||||
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(0));
|
||||
// We shift all of the values by one. In many cases we do not have
|
||||
// hardware support for this operation. This is better expressed as an ADD
|
||||
// of two values.
|
||||
if (N1C && (1 == N1C->getZExtValue())) {
|
||||
return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, N0);
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
@ -13066,9 +13083,10 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
|
||||
const X86Subtarget *Subtarget) {
|
||||
EVT VT = N->getValueType(0);
|
||||
if (!VT.isVector() && VT.isInteger() &&
|
||||
N->getOpcode() == ISD::SHL)
|
||||
return PerformSHLCombine(N, DAG);
|
||||
if (N->getOpcode() == ISD::SHL) {
|
||||
SDValue V = PerformSHLCombine(N, DAG);
|
||||
if (V.getNode()) return V;
|
||||
}
|
||||
|
||||
// On X86 with SSE2 support, we can transform this to a vector shift if
|
||||
// all elements are shifted by the same amount. We can't do this in legalize
|
||||
|
20
test/CodeGen/X86/2011-10-30-padd.ll
Normal file
20
test/CodeGen/X86/2011-10-30-padd.ll
Normal file
@ -0,0 +1,20 @@
|
||||
; RUN: llc < %s -march=x86 -mcpu=corei7 | FileCheck %s
|
||||
|
||||
;CHECK: addXX_test
|
||||
;CHECK: padd
|
||||
;CHECK: ret
|
||||
|
||||
|
||||
define <16 x i8> @addXX_test(<16 x i8> %a) {
|
||||
%b = add <16 x i8> %a, %a
|
||||
ret <16 x i8> %b
|
||||
}
|
||||
|
||||
;CHECK: instcombine_test
|
||||
;CHECK: padd
|
||||
;CHECK: ret
|
||||
define <16 x i8> @instcombine_test(<16 x i8> %a) {
|
||||
%b = shl <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||
ret <16 x i8> %b
|
||||
}
|
||||
|
@ -6,8 +6,9 @@
|
||||
define <4 x i32> @shl4(<4 x i32> %A) nounwind {
|
||||
entry:
|
||||
; CHECK: shl4
|
||||
; CHECK: padd
|
||||
; CHECK: pslld
|
||||
; CHECK-NEXT: pslld
|
||||
; CHECK: ret
|
||||
%B = shl <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2>
|
||||
%C = shl <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1>
|
||||
%K = xor <4 x i32> %B, %C
|
||||
@ -19,6 +20,7 @@ entry:
|
||||
; CHECK: shr4
|
||||
; CHECK: psrld
|
||||
; CHECK-NEXT: psrld
|
||||
; CHECK: ret
|
||||
%B = lshr <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2>
|
||||
%C = lshr <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1>
|
||||
%K = xor <4 x i32> %B, %C
|
||||
@ -30,6 +32,7 @@ entry:
|
||||
; CHECK: sra4
|
||||
; CHECK: psrad
|
||||
; CHECK-NEXT: psrad
|
||||
; CHECK: ret
|
||||
%B = ashr <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2>
|
||||
%C = ashr <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1>
|
||||
%K = xor <4 x i32> %B, %C
|
||||
@ -41,6 +44,7 @@ entry:
|
||||
; CHECK: shl2
|
||||
; CHECK: psllq
|
||||
; CHECK-NEXT: psllq
|
||||
; CHECK: ret
|
||||
%B = shl <2 x i64> %A, < i64 2, i64 2>
|
||||
%C = shl <2 x i64> %A, < i64 9, i64 9>
|
||||
%K = xor <2 x i64> %B, %C
|
||||
@ -52,6 +56,7 @@ entry:
|
||||
; CHECK: shr2
|
||||
; CHECK: psrlq
|
||||
; CHECK-NEXT: psrlq
|
||||
; CHECK: ret
|
||||
%B = lshr <2 x i64> %A, < i64 8, i64 8>
|
||||
%C = lshr <2 x i64> %A, < i64 1, i64 1>
|
||||
%K = xor <2 x i64> %B, %C
|
||||
@ -62,8 +67,9 @@ entry:
|
||||
define <8 x i16> @shl8(<8 x i16> %A) nounwind {
|
||||
entry:
|
||||
; CHECK: shl8
|
||||
; CHECK: padd
|
||||
; CHECK: psllw
|
||||
; CHECK-NEXT: psllw
|
||||
; CHECK: ret
|
||||
%B = shl <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
|
||||
%C = shl <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
||||
%K = xor <8 x i16> %B, %C
|
||||
@ -75,6 +81,7 @@ entry:
|
||||
; CHECK: shr8
|
||||
; CHECK: psrlw
|
||||
; CHECK-NEXT: psrlw
|
||||
; CHECK: ret
|
||||
%B = lshr <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
|
||||
%C = lshr <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
||||
%K = xor <8 x i16> %B, %C
|
||||
@ -86,6 +93,7 @@ entry:
|
||||
; CHECK: sra8
|
||||
; CHECK: psraw
|
||||
; CHECK-NEXT: psraw
|
||||
; CHECK: ret
|
||||
%B = ashr <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
|
||||
%C = ashr <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
||||
%K = xor <8 x i16> %B, %C
|
||||
@ -100,6 +108,7 @@ entry:
|
||||
; CHECK: sll8_nosplat
|
||||
; CHECK-NOT: psll
|
||||
; CHECK-NOT: psll
|
||||
; CHECK: ret
|
||||
%B = shl <8 x i16> %A, < i16 1, i16 2, i16 3, i16 6, i16 2, i16 2, i16 2, i16 2>
|
||||
%C = shl <8 x i16> %A, < i16 9, i16 7, i16 5, i16 1, i16 4, i16 1, i16 1, i16 1>
|
||||
%K = xor <8 x i16> %B, %C
|
||||
@ -112,6 +121,7 @@ entry:
|
||||
; CHECK: shr2_nosplat
|
||||
; CHECK-NOT: psrlq
|
||||
; CHECK-NOT: psrlq
|
||||
; CHECK: ret
|
||||
%B = lshr <2 x i64> %A, < i64 8, i64 1>
|
||||
%C = lshr <2 x i64> %A, < i64 1, i64 0>
|
||||
%K = xor <2 x i64> %B, %C
|
||||
@ -125,6 +135,7 @@ define <2 x i32> @shl2_other(<2 x i32> %A) nounwind {
|
||||
entry:
|
||||
; CHECK: shl2_other
|
||||
; CHECK: psllq
|
||||
; CHECK: ret
|
||||
%B = shl <2 x i32> %A, < i32 2, i32 2>
|
||||
%C = shl <2 x i32> %A, < i32 9, i32 9>
|
||||
%K = xor <2 x i32> %B, %C
|
||||
@ -135,6 +146,7 @@ define <2 x i32> @shr2_other(<2 x i32> %A) nounwind {
|
||||
entry:
|
||||
; CHECK: shr2_other
|
||||
; CHECK: psrlq
|
||||
; CHECK: ret
|
||||
%B = lshr <2 x i32> %A, < i32 8, i32 8>
|
||||
%C = lshr <2 x i32> %A, < i32 1, i32 1>
|
||||
%K = xor <2 x i32> %B, %C
|
||||
|
Loading…
Reference in New Issue
Block a user