1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 19:23:23 +01:00

X86: Emit logical shift by constant splat of <16 x i8> as a <8 x i16> shift and zero out the bits where zeros should've been shifted in.

llvm-svn: 143315
This commit is contained in:
Benjamin Kramer 2011-10-30 17:31:21 +00:00
parent e77289b243
commit c0001c42c6
2 changed files with 44 additions and 0 deletions

View File

@ -9929,6 +9929,19 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) {
uint64_t ShiftAmt = C->getZExtValue();
if (VT == MVT::v16i8 && Op.getOpcode() == ISD::SHL) {
// Make a large shift.
SDValue SHL =
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
R, DAG.getConstant(ShiftAmt, MVT::i32));
// Zero out the rightmost bits.
SmallVector<SDValue, 16> V(16, DAG.getConstant(uint8_t(-1U << ShiftAmt),
MVT::i8));
return DAG.getNode(ISD::AND, dl, VT, SHL,
DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16));
}
if (VT == MVT::v2i64 && Op.getOpcode() == ISD::SHL)
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
@ -9944,6 +9957,19 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
R, DAG.getConstant(ShiftAmt, MVT::i32));
if (VT == MVT::v16i8 && Op.getOpcode() == ISD::SRL) {
// Make a large shift.
SDValue SRL =
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32),
R, DAG.getConstant(ShiftAmt, MVT::i32));
// Zero out the leftmost bits.
SmallVector<SDValue, 16> V(16, DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
MVT::i8));
return DAG.getNode(ISD::AND, dl, VT, SRL,
DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16));
}
if (VT == MVT::v2i64 && Op.getOpcode() == ISD::SRL)
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),

View File

@ -152,3 +152,21 @@ entry:
%K = xor <2 x i32> %B, %C
ret <2 x i32> %K
}
define <16 x i8> @shl9(<16 x i8> %A) nounwind {
%B = shl <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <16 x i8> %B
; CHECK: shl9:
; CHECK: psllw $3
; CHECK: pand
; CHECK: ret
}
define <16 x i8> @shr9(<16 x i8> %A) nounwind {
%B = lshr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <16 x i8> %B
; CHECK: shr9:
; CHECK: psrlw $3
; CHECK: pand
; CHECK: ret
}