mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[X86] Optimize vector shifts with variable but uniform shift amounts
Summary: For instructions such as PSLLW/PSLLD/PSLLQ a variable shift amount may be passed in an XMM register. The lower 64-bits of the register are evaluated to determine the shift amount. This patch improves the construction of the vector containing the shift amount. Reviewers: craig.topper, delena, RKSimon Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D28353 llvm-svn: 291120
This commit is contained in:
parent
82db3cf4e4
commit
7f372e97f6
@ -18306,27 +18306,33 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
|
||||
case X86ISD::VSRAI: Opc = X86ISD::VSRA; break;
|
||||
}
|
||||
|
||||
// Need to build a vector containing shift amount.
|
||||
// SSE/AVX packed shifts only use the lower 64-bit of the shift count.
|
||||
// +=================+============+=======================================+
|
||||
// | ShAmt is | HasSSE4.1? | Construct ShAmt vector as |
|
||||
// +=================+============+=======================================+
|
||||
// | i64 | Yes, No | Use ShAmt as lowest elt |
|
||||
// | i32 | Yes | zero-extend in-reg |
|
||||
// | (i32 zext(i16)) | Yes | zero-extend in-reg |
|
||||
// | i16/i32 | No | v4i32 build_vector(ShAmt, 0, ud, ud)) |
|
||||
// +=================+============+=======================================+
|
||||
const X86Subtarget &Subtarget =
|
||||
static_cast<const X86Subtarget &>(DAG.getSubtarget());
|
||||
if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::ZERO_EXTEND &&
|
||||
ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) {
|
||||
// Let the shuffle legalizer expand this shift amount node.
|
||||
if (SVT == MVT::i64)
|
||||
ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v2i64, ShAmt);
|
||||
else if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::ZERO_EXTEND &&
|
||||
ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) {
|
||||
SDValue Op0 = ShAmt.getOperand(0);
|
||||
Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(Op0), MVT::v8i16, Op0);
|
||||
ShAmt = getShuffleVectorZeroOrUndef(Op0, 0, true, Subtarget, DAG);
|
||||
ShAmt = DAG.getZeroExtendVectorInReg(Op0, SDLoc(Op0), MVT::v2i64);
|
||||
} else if (Subtarget.hasSSE41() &&
|
||||
ShAmt.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
|
||||
ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v4i32, ShAmt);
|
||||
ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64);
|
||||
} else {
|
||||
// Need to build a vector containing shift amount.
|
||||
// SSE/AVX packed shifts only use the lower 64-bit of the shift count.
|
||||
SmallVector<SDValue, 4> ShOps;
|
||||
ShOps.push_back(ShAmt);
|
||||
if (SVT == MVT::i32) {
|
||||
ShOps.push_back(DAG.getConstant(0, dl, SVT));
|
||||
ShOps.push_back(DAG.getUNDEF(SVT));
|
||||
}
|
||||
ShOps.push_back(DAG.getUNDEF(SVT));
|
||||
|
||||
MVT BVT = SVT == MVT::i32 ? MVT::v4i32 : MVT::v2i64;
|
||||
ShAmt = DAG.getBuildVector(BVT, dl, ShOps);
|
||||
SmallVector<SDValue, 4> ShOps = {ShAmt, DAG.getConstant(0, dl, SVT),
|
||||
DAG.getUNDEF(SVT), DAG.getUNDEF(SVT)};
|
||||
ShAmt = DAG.getBuildVector(MVT::v4i32, dl, ShOps);
|
||||
}
|
||||
|
||||
// The return type has to be a 128-bit type with the same element
|
||||
|
@ -12,8 +12,7 @@ define <8 x i16> @test1(<8 x i16> %A, <8 x i16> %B) {
|
||||
;
|
||||
; AVX-LABEL: test1:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
|
||||
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
@ -32,8 +31,7 @@ define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) {
|
||||
;
|
||||
; AVX-LABEL: test2:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
|
||||
; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX-NEXT: vpslld %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
@ -68,8 +66,7 @@ define <8 x i16> @test4(<8 x i16> %A, <8 x i16> %B) {
|
||||
;
|
||||
; AVX-LABEL: test4:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
|
||||
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
@ -88,8 +85,7 @@ define <4 x i32> @test5(<4 x i32> %A, <4 x i32> %B) {
|
||||
;
|
||||
; AVX-LABEL: test5:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
|
||||
; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX-NEXT: vpsrld %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
@ -124,8 +120,7 @@ define <8 x i16> @test7(<8 x i16> %A, <8 x i16> %B) {
|
||||
;
|
||||
; AVX-LABEL: test7:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
|
||||
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX-NEXT: vpsraw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
@ -144,8 +139,7 @@ define <4 x i32> @test8(<4 x i32> %A, <4 x i32> %B) {
|
||||
;
|
||||
; AVX-LABEL: test8:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
|
||||
; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX-NEXT: vpsrad %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
|
@ -87,14 +87,12 @@ define <2 x i64> @var_rotate_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm4
|
||||
; X32-SSE-NEXT: psllq %xmm3, %xmm4
|
||||
; X32-SSE-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm3
|
||||
; X32-SSE-NEXT: psllq %xmm1, %xmm3
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm4 = xmm3[0],xmm4[1]
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,0,1]
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm1
|
||||
; X32-SSE-NEXT: psrlq %xmm3, %xmm1
|
||||
; X32-SSE-NEXT: movq {{.*#+}} xmm2 = xmm2[0],zero
|
||||
; X32-SSE-NEXT: psrlq %xmm2, %xmm0
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; X32-SSE-NEXT: orpd %xmm4, %xmm1
|
||||
|
@ -90,20 +90,19 @@ define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
|
||||
;
|
||||
; X32-SSE-LABEL: var_shift_v2i64:
|
||||
; X32-SSE: # BB#0:
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648]
|
||||
; X32-SSE-NEXT: movdqa %xmm3, %xmm4
|
||||
; X32-SSE-NEXT: psrlq %xmm2, %xmm4
|
||||
; X32-SSE-NEXT: movq {{.*#+}} xmm5 = xmm1[0],zero
|
||||
; X32-SSE-NEXT: psrlq %xmm5, %xmm3
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm4 = xmm3[0],xmm4[1]
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm1
|
||||
; X32-SSE-NEXT: psrlq %xmm2, %xmm1
|
||||
; X32-SSE-NEXT: psrlq %xmm5, %xmm0
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; X32-SSE-NEXT: xorpd %xmm4, %xmm1
|
||||
; X32-SSE-NEXT: psubq %xmm4, %xmm1
|
||||
; X32-SSE-NEXT: movdqa %xmm1, %xmm0
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
|
||||
; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
|
||||
; X32-SSE-NEXT: movdqa %xmm2, %xmm4
|
||||
; X32-SSE-NEXT: psrlq %xmm3, %xmm4
|
||||
; X32-SSE-NEXT: psrlq %xmm1, %xmm2
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm4 = xmm2[0],xmm4[1]
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm2
|
||||
; X32-SSE-NEXT: psrlq %xmm3, %xmm2
|
||||
; X32-SSE-NEXT: psrlq %xmm1, %xmm0
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
|
||||
; X32-SSE-NEXT: xorpd %xmm4, %xmm2
|
||||
; X32-SSE-NEXT: psubq %xmm4, %xmm2
|
||||
; X32-SSE-NEXT: movdqa %xmm2, %xmm0
|
||||
; X32-SSE-NEXT: retl
|
||||
%shift = ashr <2 x i64> %a, %b
|
||||
ret <2 x i64> %shift
|
||||
@ -637,7 +636,6 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
|
||||
;
|
||||
; X32-SSE-LABEL: splatvar_shift_v2i64:
|
||||
; X32-SSE: # BB#0:
|
||||
; X32-SSE-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero
|
||||
; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
|
||||
; X32-SSE-NEXT: psrlq %xmm1, %xmm2
|
||||
; X32-SSE-NEXT: psrlq %xmm1, %xmm0
|
||||
@ -659,29 +657,25 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
|
||||
;
|
||||
; SSE41-LABEL: splatvar_shift_v4i32:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3,4,5,6,7]
|
||||
; SSE41-NEXT: psrad %xmm2, %xmm0
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; SSE41-NEXT: psrad %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: splatvar_shift_v4i32:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
|
||||
; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX-NEXT: vpsrad %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: splatvar_shift_v4i32:
|
||||
; XOP: # BB#0:
|
||||
; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; XOP-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
|
||||
; XOP-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOP-NEXT: vpsrad %xmm1, %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatvar_shift_v4i32:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; AVX512-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
|
||||
; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX512-NEXT: vpsrad %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
@ -706,29 +700,25 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
|
||||
;
|
||||
; SSE41-LABEL: splatvar_shift_v8i16:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3,4,5,6,7]
|
||||
; SSE41-NEXT: psraw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; SSE41-NEXT: psraw %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: splatvar_shift_v8i16:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
|
||||
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX-NEXT: vpsraw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: splatvar_shift_v8i16:
|
||||
; XOP: # BB#0:
|
||||
; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; XOP-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
|
||||
; XOP-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; XOP-NEXT: vpsraw %xmm1, %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatvar_shift_v8i16:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
|
||||
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512-NEXT: vpsraw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
|
@ -426,9 +426,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
|
||||
define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
|
||||
; AVX1-LABEL: splatvar_shift_v8i32:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
@ -436,16 +435,14 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
|
||||
;
|
||||
; AVX2-LABEL: splatvar_shift_v8i32:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatvar_shift_v8i32:
|
||||
; XOPAVX1: # BB#0:
|
||||
; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOPAVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
@ -453,15 +450,13 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
|
||||
;
|
||||
; XOPAVX2-LABEL: splatvar_shift_v8i32:
|
||||
; XOPAVX2: # BB#0:
|
||||
; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
|
||||
; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOPAVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatvar_shift_v8i32:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; AVX512-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
|
||||
; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX512-NEXT: vpsrad %xmm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
%splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
|
||||
@ -473,8 +468,7 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
|
||||
; AVX1-LABEL: splatvar_shift_v16i16:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm1
|
||||
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
@ -482,16 +476,14 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
|
||||
;
|
||||
; AVX2-LABEL: splatvar_shift_v16i16:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm1
|
||||
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatvar_shift_v16i16:
|
||||
; XOPAVX1: # BB#0:
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; XOPAVX1-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; XOPAVX1-NEXT: vmovd %eax, %xmm1
|
||||
; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; XOPAVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
@ -499,15 +491,13 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
|
||||
;
|
||||
; XOPAVX2-LABEL: splatvar_shift_v16i16:
|
||||
; XOPAVX2: # BB#0:
|
||||
; XOPAVX2-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; XOPAVX2-NEXT: vmovd %eax, %xmm1
|
||||
; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; XOPAVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatvar_shift_v16i16:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; AVX512-NEXT: vmovd %eax, %xmm1
|
||||
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512-NEXT: vpsraw %xmm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
%splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
|
||||
|
@ -525,8 +525,7 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
|
||||
define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
|
||||
; ALL-LABEL: splatvar_shift_v16i32:
|
||||
; ALL: ## BB#0:
|
||||
; ALL-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; ALL-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
|
||||
; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; ALL-NEXT: vpsrad %xmm1, %zmm0, %zmm0
|
||||
; ALL-NEXT: retq
|
||||
%splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
@ -537,16 +536,14 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind
|
||||
define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
|
||||
; AVX512DQ-LABEL: splatvar_shift_v32i16:
|
||||
; AVX512DQ: ## BB#0:
|
||||
; AVX512DQ-NEXT: vpextrw $0, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: vmovd %eax, %xmm2
|
||||
; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512DQ-NEXT: vpsraw %xmm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpsraw %xmm2, %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatvar_shift_v32i16:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; AVX512BW-NEXT: vmovd %eax, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
|
||||
|
@ -69,7 +69,6 @@ define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm2
|
||||
; X32-SSE-NEXT: psrlq %xmm3, %xmm2
|
||||
; X32-SSE-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero
|
||||
; X32-SSE-NEXT: psrlq %xmm1, %xmm0
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
|
||||
; X32-SSE-NEXT: movapd %xmm2, %xmm0
|
||||
@ -493,7 +492,6 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
|
||||
;
|
||||
; X32-SSE-LABEL: splatvar_shift_v2i64:
|
||||
; X32-SSE: # BB#0:
|
||||
; X32-SSE-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero
|
||||
; X32-SSE-NEXT: psrlq %xmm1, %xmm0
|
||||
; X32-SSE-NEXT: retl
|
||||
%splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
|
||||
@ -511,29 +509,25 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
|
||||
;
|
||||
; SSE41-LABEL: splatvar_shift_v4i32:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3,4,5,6,7]
|
||||
; SSE41-NEXT: psrld %xmm2, %xmm0
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; SSE41-NEXT: psrld %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: splatvar_shift_v4i32:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
|
||||
; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX-NEXT: vpsrld %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: splatvar_shift_v4i32:
|
||||
; XOP: # BB#0:
|
||||
; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; XOP-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
|
||||
; XOP-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOP-NEXT: vpsrld %xmm1, %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatvar_shift_v4i32:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; AVX512-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
|
||||
; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX512-NEXT: vpsrld %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
@ -558,29 +552,25 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
|
||||
;
|
||||
; SSE41-LABEL: splatvar_shift_v8i16:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3,4,5,6,7]
|
||||
; SSE41-NEXT: psrlw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; SSE41-NEXT: psrlw %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: splatvar_shift_v8i16:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
|
||||
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: splatvar_shift_v8i16:
|
||||
; XOP: # BB#0:
|
||||
; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; XOP-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
|
||||
; XOP-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; XOP-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatvar_shift_v8i16:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
|
||||
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
|
@ -337,9 +337,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
|
||||
define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
|
||||
; AVX1-LABEL: splatvar_shift_v8i32:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
@ -347,16 +346,14 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
|
||||
;
|
||||
; AVX2-LABEL: splatvar_shift_v8i32:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatvar_shift_v8i32:
|
||||
; XOPAVX1: # BB#0:
|
||||
; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOPAVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
@ -364,15 +361,13 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
|
||||
;
|
||||
; XOPAVX2-LABEL: splatvar_shift_v8i32:
|
||||
; XOPAVX2: # BB#0:
|
||||
; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
|
||||
; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOPAVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatvar_shift_v8i32:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; AVX512-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
|
||||
; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX512-NEXT: vpsrld %xmm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
%splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
|
||||
@ -384,8 +379,7 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
|
||||
; AVX1-LABEL: splatvar_shift_v16i16:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm1
|
||||
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
@ -393,16 +387,14 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
|
||||
;
|
||||
; AVX2-LABEL: splatvar_shift_v16i16:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm1
|
||||
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatvar_shift_v16i16:
|
||||
; XOPAVX1: # BB#0:
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; XOPAVX1-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; XOPAVX1-NEXT: vmovd %eax, %xmm1
|
||||
; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
@ -410,15 +402,13 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
|
||||
;
|
||||
; XOPAVX2-LABEL: splatvar_shift_v16i16:
|
||||
; XOPAVX2: # BB#0:
|
||||
; XOPAVX2-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; XOPAVX2-NEXT: vmovd %eax, %xmm1
|
||||
; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; XOPAVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatvar_shift_v16i16:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; AVX512-NEXT: vmovd %eax, %xmm1
|
||||
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
%splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
|
||||
|
@ -505,8 +505,7 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
|
||||
define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
|
||||
; ALL-LABEL: splatvar_shift_v16i32:
|
||||
; ALL: ## BB#0:
|
||||
; ALL-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; ALL-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
|
||||
; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; ALL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
|
||||
; ALL-NEXT: retq
|
||||
%splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
@ -517,16 +516,14 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind
|
||||
define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
|
||||
; AVX512DQ-LABEL: splatvar_shift_v32i16:
|
||||
; AVX512DQ: ## BB#0:
|
||||
; AVX512DQ-NEXT: vpextrw $0, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: vmovd %eax, %xmm2
|
||||
; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512DQ-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatvar_shift_v32i16:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; AVX512BW-NEXT: vmovd %eax, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
|
||||
|
@ -67,7 +67,6 @@ define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm2
|
||||
; X32-SSE-NEXT: psllq %xmm3, %xmm2
|
||||
; X32-SSE-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero
|
||||
; X32-SSE-NEXT: psllq %xmm1, %xmm0
|
||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
|
||||
; X32-SSE-NEXT: movapd %xmm2, %xmm0
|
||||
@ -441,7 +440,6 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
|
||||
;
|
||||
; X32-SSE-LABEL: splatvar_shift_v2i64:
|
||||
; X32-SSE: # BB#0:
|
||||
; X32-SSE-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero
|
||||
; X32-SSE-NEXT: psllq %xmm1, %xmm0
|
||||
; X32-SSE-NEXT: retl
|
||||
%splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
|
||||
@ -459,29 +457,25 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
|
||||
;
|
||||
; SSE41-LABEL: splatvar_shift_v4i32:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3,4,5,6,7]
|
||||
; SSE41-NEXT: pslld %xmm2, %xmm0
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; SSE41-NEXT: pslld %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: splatvar_shift_v4i32:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
|
||||
; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX-NEXT: vpslld %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: splatvar_shift_v4i32:
|
||||
; XOP: # BB#0:
|
||||
; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; XOP-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
|
||||
; XOP-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOP-NEXT: vpslld %xmm1, %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatvar_shift_v4i32:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; AVX512-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
|
||||
; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX512-NEXT: vpslld %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
@ -506,29 +500,25 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
|
||||
;
|
||||
; SSE41-LABEL: splatvar_shift_v8i16:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3,4,5,6,7]
|
||||
; SSE41-NEXT: psllw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; SSE41-NEXT: psllw %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: splatvar_shift_v8i16:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
|
||||
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: splatvar_shift_v8i16:
|
||||
; XOP: # BB#0:
|
||||
; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; XOP-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
|
||||
; XOP-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; XOP-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatvar_shift_v8i16:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
|
||||
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
|
@ -301,9 +301,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
|
||||
define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
|
||||
; AVX1-LABEL: splatvar_shift_v8i32:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
@ -311,16 +310,14 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
|
||||
;
|
||||
; AVX2-LABEL: splatvar_shift_v8i32:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatvar_shift_v8i32:
|
||||
; XOPAVX1: # BB#0:
|
||||
; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOPAVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
@ -328,15 +325,13 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
|
||||
;
|
||||
; XOPAVX2-LABEL: splatvar_shift_v8i32:
|
||||
; XOPAVX2: # BB#0:
|
||||
; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
|
||||
; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOPAVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatvar_shift_v8i32:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; AVX512-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
|
||||
; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; AVX512-NEXT: vpslld %xmm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
%splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
|
||||
@ -348,8 +343,7 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
|
||||
; AVX1-LABEL: splatvar_shift_v16i16:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm1
|
||||
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
@ -357,16 +351,14 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
|
||||
;
|
||||
; AVX2-LABEL: splatvar_shift_v16i16:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm1
|
||||
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatvar_shift_v16i16:
|
||||
; XOPAVX1: # BB#0:
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; XOPAVX1-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; XOPAVX1-NEXT: vmovd %eax, %xmm1
|
||||
; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; XOPAVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
|
||||
; XOPAVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
@ -374,15 +366,13 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
|
||||
;
|
||||
; XOPAVX2-LABEL: splatvar_shift_v16i16:
|
||||
; XOPAVX2: # BB#0:
|
||||
; XOPAVX2-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; XOPAVX2-NEXT: vmovd %eax, %xmm1
|
||||
; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; XOPAVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatvar_shift_v16i16:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; AVX512-NEXT: vmovd %eax, %xmm1
|
||||
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
%splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
|
||||
|
@ -502,8 +502,7 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
|
||||
define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
|
||||
; ALL-LABEL: splatvar_shift_v16i32:
|
||||
; ALL: ## BB#0:
|
||||
; ALL-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; ALL-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
|
||||
; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
|
||||
; ALL-NEXT: vpslld %xmm1, %zmm0, %zmm0
|
||||
; ALL-NEXT: retq
|
||||
%splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||
@ -514,16 +513,14 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind
|
||||
define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
|
||||
; AVX512DQ-LABEL: splatvar_shift_v32i16:
|
||||
; AVX512DQ: ## BB#0:
|
||||
; AVX512DQ-NEXT: vpextrw $0, %xmm2, %eax
|
||||
; AVX512DQ-NEXT: vmovd %eax, %xmm2
|
||||
; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; AVX512DQ-NEXT: vpsllw %xmm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpsllw %xmm2, %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatvar_shift_v32i16:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; AVX512BW-NEXT: vmovd %eax, %xmm1
|
||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
|
||||
|
@ -9,7 +9,6 @@ define void @shift1a(<2 x i64> %val, <2 x i64>* %dst, <2 x i64> %sh) nounwind {
|
||||
; X32-LABEL: shift1a:
|
||||
; X32: # BB#0: # %entry
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero
|
||||
; X32-NEXT: psllq %xmm1, %xmm0
|
||||
; X32-NEXT: movdqa %xmm0, (%eax)
|
||||
; X32-NEXT: retl
|
||||
@ -34,7 +33,6 @@ define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, <2 x i64> %sh) nounwind {
|
||||
; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; X32-NEXT: movdqa %xmm0, %xmm3
|
||||
; X32-NEXT: psllq %xmm2, %xmm3
|
||||
; X32-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero
|
||||
; X32-NEXT: psllq %xmm1, %xmm0
|
||||
; X32-NEXT: movsd {{.*#+}} xmm3 = xmm0[0],xmm3[1]
|
||||
; X32-NEXT: movapd %xmm3, (%eax)
|
||||
|
Loading…
Reference in New Issue
Block a user