1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

[DAGCombiner][x86] scalarize binop followed by extractelement

As noted in PR39973 and D55558:
https://bugs.llvm.org/show_bug.cgi?id=39973
...this is a partial implementation of a fold that we do as an IR canonicalization in instcombine:

// extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)

We want to have this in the DAG too because as we can see in some of the test diffs (reductions), 
the pattern may not be visible in IR.

Given that this is already an IR canonicalization, any backend that would prefer a vector op over 
a scalar op is expected to already have the reverse transform in DAG lowering (not sure if that's
a realistic expectation though). The transform is limited with a TLI hook because there's an
existing transform in CodeGenPrepare that tries to do the opposite transform.

Differential Revision: https://reviews.llvm.org/D55722

llvm-svn: 350354
This commit is contained in:
Sanjay Patel 2019-01-03 21:31:16 +00:00
parent 3076709e1e
commit 36c33bfea9
31 changed files with 1269 additions and 1462 deletions

View File

@ -2407,6 +2407,12 @@ public:
return false;
}
/// Try to convert an extract element of a vector binary operation into an
/// extract element followed by a scalar operation.
virtual bool shouldScalarizeBinop(SDValue VecOp) const {
return false;
}
// Return true if it is profitable to use a scalar input to a BUILD_VECTOR
// even if the vector itself has multiple uses.
virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const {

View File

@ -915,9 +915,11 @@ static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
// undef's.
static bool isAnyConstantBuildVector(const SDNode *N) {
return ISD::isBuildVectorOfConstantSDNodes(N) ||
ISD::isBuildVectorOfConstantFPSDNodes(N);
static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
if (V.getOpcode() != ISD::BUILD_VECTOR)
return false;
return isConstantOrConstantVector(V, NoOpaques) ||
ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
}
SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
@ -15580,6 +15582,40 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
return SDValue(EVE, 0);
}
/// Transform a vector binary operation into a scalar binary operation by moving
/// the math/logic after an extract element of a vector.
static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
bool LegalOperations) {
SDValue Vec = ExtElt->getOperand(0);
SDValue Index = ExtElt->getOperand(1);
auto *IndexC = dyn_cast<ConstantSDNode>(Index);
if (!IndexC || !ISD::isBinaryOp(Vec.getNode()) || !Vec.hasOneUse())
return SDValue();
// Targets may want to avoid this to prevent an expensive register transfer.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.shouldScalarizeBinop(Vec))
return SDValue();
// Extracting an element of a vector constant is constant-folded, so this
// transform is just replacing a vector op with a scalar op while moving the
// extract.
SDValue Op0 = Vec.getOperand(0);
SDValue Op1 = Vec.getOperand(1);
if (isAnyConstantBuildVector(Op0, true) ||
isAnyConstantBuildVector(Op1, true)) {
// extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
// extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
SDLoc DL(ExtElt);
EVT VT = ExtElt->getValueType(0);
SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
}
return SDValue();
}
SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
SDValue VecOp = N->getOperand(0);
SDValue Index = N->getOperand(1);
@ -15670,6 +15706,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
}
}
if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
return BO;
// Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
// We only perform this optimization before the op legalization phase because
// we may introduce new vector instructions which are not backed by TD
@ -17055,8 +17094,8 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
if (!N1->hasOneUse())
return SDValue();
bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
bool N0AnyConst = isAnyConstantBuildVector(N0);
bool N1AnyConst = isAnyConstantBuildVector(N1);
if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
return SDValue();
if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))

View File

@ -4875,6 +4875,18 @@ bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
return (Index % ResVT.getVectorNumElements()) == 0;
}
bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
// If the vector op is not supported, try to convert to scalar.
EVT VecVT = VecOp.getValueType();
if (!isOperationLegalOrCustomOrPromote(VecOp.getOpcode(), VecVT))
return true;
// If the vector op is supported, but the scalar op is not, the transform may
// not be worthwhile.
EVT ScalarVT = VecVT.getScalarType();
return isOperationLegalOrCustomOrPromote(VecOp.getOpcode(), ScalarVT);
}
bool X86TargetLowering::isCheapToSpeculateCttz() const {
// Speculate cttz only if we can directly use TZCNT.
return Subtarget.hasBMI();

View File

@ -1039,6 +1039,11 @@ namespace llvm {
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
unsigned Index) const override;
/// Scalar ops always have equal or better analysis/performance/power than
/// the vector equivalent, so this always makes sense if the scalar op is
/// supported.
bool shouldScalarizeBinop(SDValue) const override;
bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
unsigned AddrSpace) const override {
// If we can replace more than 2 scalar stores, there will be a reduction

View File

@ -8,9 +8,8 @@ define i8 @foo(<4 x i8>* %V) {
; CHECK: # %bb.0:
; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
; CHECK-NEXT: pextrw $4, %xmm0, %eax
; CHECK-NEXT: pextrw $2, %xmm0, %eax
; CHECK-NEXT: andb $95, %al
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%Vp = bitcast <4 x i8>* %V to <3 x i8>*

View File

@ -4,8 +4,8 @@
define float @ext_fadd_v4f32(<4 x float> %x) {
; CHECK-LABEL: ext_fadd_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: addps {{.*}}(%rip), %xmm0
; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: addss {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%bo = fadd <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 42.0>
%ext = extractelement <4 x float> %bo, i32 2
@ -15,9 +15,9 @@ define float @ext_fadd_v4f32(<4 x float> %x) {
define float @ext_fsub_v4f32(<4 x float> %x) {
; CHECK-LABEL: ext_fsub_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps {{.*#+}} xmm1 = <u,2.0E+0,u,u>
; CHECK-NEXT: subps %xmm0, %xmm1
; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: subss %xmm0, %xmm1
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: retq
%bo = fsub <4 x float> <float 1.0, float 2.0, float 3.0, float 42.0>, %x
@ -28,19 +28,20 @@ define float @ext_fsub_v4f32(<4 x float> %x) {
define float @ext_fmul_v4f32(<4 x float> %x) {
; CHECK-LABEL: ext_fmul_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%bo = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 42.0>
%ext = extractelement <4 x float> %bo, i32 3
ret float %ext
}
; TODO: X / 1.0 --> X
define float @ext_fdiv_v4f32(<4 x float> %x) {
; CHECK-LABEL: ext_fdiv_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: divps %xmm1, %xmm0
; CHECK-NEXT: divss {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%bo = fdiv <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 42.0>
%ext = extractelement <4 x float> %bo, i32 0
@ -50,9 +51,9 @@ define float @ext_fdiv_v4f32(<4 x float> %x) {
define float @ext_fdiv_v4f32_constant_op0(<4 x float> %x) {
; CHECK-LABEL: ext_fdiv_v4f32_constant_op0:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps {{.*#+}} xmm1 = <u,2.0E+0,u,u>
; CHECK-NEXT: divps %xmm0, %xmm1
; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: divss %xmm0, %xmm1
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: retq
%bo = fdiv <4 x float> <float 1.0, float 2.0, float 3.0, float 42.0>, %x

View File

@ -211,21 +211,19 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
;
; X86-SSE42-LABEL: test_reduce_v8i16:
; X86-SSE42: ## %bb.0:
; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: pxor LCPI2_0, %xmm0
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE42-NEXT: retl
;
; X86-AVX-LABEL: test_reduce_v8i16:
; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpxor LCPI2_0, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vmovd %xmm0, %eax
; X86-AVX-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX-NEXT: retl
;
@ -244,21 +242,19 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
;
; X64-SSE42-LABEL: test_reduce_v8i16:
; X64-SSE42: ## %bb.0:
; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE42-NEXT: retq
;
; X64-AVX-LABEL: test_reduce_v8i16:
; X64-AVX: ## %bb.0:
; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vmovd %xmm0, %eax
; X64-AVX-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX-NEXT: retq
%1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
@ -309,26 +305,24 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
;
; X86-SSE42-LABEL: test_reduce_v16i8:
; X86-SSE42: ## %bb.0:
; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
; X86-SSE42-NEXT: psrlw $8, %xmm2
; X86-SSE42-NEXT: pminub %xmm0, %xmm2
; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: pxor LCPI3_0, %xmm0
; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
; X86-SSE42-NEXT: psrlw $8, %xmm1
; X86-SSE42-NEXT: pminub %xmm0, %xmm1
; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X86-SSE42-NEXT: xorb $127, %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
;
; X86-AVX-LABEL: test_reduce_v16i8:
; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
; X86-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X86-AVX-NEXT: vpxor LCPI3_0, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX-NEXT: xorb $127, %al
; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX-NEXT: retl
;
@ -366,26 +360,24 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
;
; X64-SSE42-LABEL: test_reduce_v16i8:
; X64-SSE42: ## %bb.0:
; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
; X64-SSE42-NEXT: psrlw $8, %xmm2
; X64-SSE42-NEXT: pminub %xmm0, %xmm2
; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
; X64-SSE42-NEXT: psrlw $8, %xmm1
; X64-SSE42-NEXT: pminub %xmm0, %xmm1
; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X64-SSE42-NEXT: xorb $127, %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
;
; X64-AVX-LABEL: test_reduce_v16i8:
; X64-AVX: ## %bb.0:
; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
; X64-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX-NEXT: xorb $127, %al
; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX-NEXT: retq
%1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@ -736,11 +728,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X86-SSE42-LABEL: test_reduce_v16i16:
; X86-SSE42: ## %bb.0:
; X86-SSE42-NEXT: pmaxsw %xmm1, %xmm0
; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: pxor LCPI6_0, %xmm0
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE42-NEXT: retl
;
@ -748,11 +739,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpxor LCPI6_0, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl
@ -761,11 +751,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X86-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpxor LCPI6_0, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
@ -787,11 +776,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X64-SSE42-LABEL: test_reduce_v16i16:
; X64-SSE42: ## %bb.0:
; X64-SSE42-NEXT: pmaxsw %xmm1, %xmm0
; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE42-NEXT: retq
;
@ -799,11 +787,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X64-AVX1: ## %bb.0:
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X64-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vmovd %xmm0, %eax
; X64-AVX1-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
@ -812,11 +799,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
@ -825,11 +811,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
@ -890,14 +875,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X86-SSE42-LABEL: test_reduce_v32i8:
; X86-SSE42: ## %bb.0:
; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0
; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
; X86-SSE42-NEXT: psrlw $8, %xmm2
; X86-SSE42-NEXT: pminub %xmm0, %xmm2
; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: pxor LCPI7_0, %xmm0
; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
; X86-SSE42-NEXT: psrlw $8, %xmm1
; X86-SSE42-NEXT: pminub %xmm0, %xmm1
; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X86-SSE42-NEXT: xorb $127, %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
;
@ -905,13 +889,12 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X86-AVX1-NEXT: vpxor LCPI7_0, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX1-NEXT: xorb $127, %al
; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl
@ -920,13 +903,12 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X86-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X86-AVX2-NEXT: vpxor LCPI7_0, %xmm0, %xmm0
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX2-NEXT: xorb $127, %al
; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
@ -971,14 +953,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-SSE42-LABEL: test_reduce_v32i8:
; X64-SSE42: ## %bb.0:
; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0
; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
; X64-SSE42-NEXT: psrlw $8, %xmm2
; X64-SSE42-NEXT: pminub %xmm0, %xmm2
; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
; X64-SSE42-NEXT: psrlw $8, %xmm1
; X64-SSE42-NEXT: pminub %xmm0, %xmm1
; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X64-SSE42-NEXT: xorb $127, %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
;
@ -986,13 +967,12 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-AVX1: ## %bb.0:
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX1-NEXT: xorb $127, %al
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
@ -1001,13 +981,12 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX2-NEXT: xorb $127, %al
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
@ -1016,13 +995,12 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX512-NEXT: xorb $127, %al
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
@ -1513,11 +1491,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X86-SSE42-NEXT: pmaxsw %xmm3, %xmm1
; X86-SSE42-NEXT: pmaxsw %xmm2, %xmm0
; X86-SSE42-NEXT: pmaxsw %xmm1, %xmm0
; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: pxor LCPI10_0, %xmm0
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE42-NEXT: retl
;
@ -1528,11 +1505,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X86-AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2
; X86-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpxor LCPI10_0, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl
@ -1542,11 +1518,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X86-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X86-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpxor LCPI10_0, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
@ -1572,11 +1547,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X64-SSE42-NEXT: pmaxsw %xmm3, %xmm1
; X64-SSE42-NEXT: pmaxsw %xmm2, %xmm0
; X64-SSE42-NEXT: pmaxsw %xmm1, %xmm0
; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE42-NEXT: retq
;
@ -1587,11 +1561,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X64-AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2
; X64-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0
; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vmovd %xmm0, %eax
; X64-AVX1-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
@ -1601,11 +1574,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X64-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
@ -1616,11 +1588,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X64-AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: xorl $32767, %eax ## imm = 0x7FFF
; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
@ -1696,14 +1667,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X86-SSE42-NEXT: pmaxsb %xmm3, %xmm1
; X86-SSE42-NEXT: pmaxsb %xmm2, %xmm0
; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0
; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
; X86-SSE42-NEXT: psrlw $8, %xmm2
; X86-SSE42-NEXT: pminub %xmm0, %xmm2
; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: pxor LCPI11_0, %xmm0
; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
; X86-SSE42-NEXT: psrlw $8, %xmm1
; X86-SSE42-NEXT: pminub %xmm0, %xmm1
; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X86-SSE42-NEXT: xorb $127, %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
;
@ -1714,13 +1684,12 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X86-AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2
; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X86-AVX1-NEXT: vpxor LCPI11_0, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX1-NEXT: xorb $127, %al
; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl
@ -1730,13 +1699,12 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X86-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X86-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X86-AVX2-NEXT: vpxor LCPI11_0, %xmm0, %xmm0
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX2-NEXT: xorb $127, %al
; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
@ -1793,14 +1761,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-SSE42-NEXT: pmaxsb %xmm3, %xmm1
; X64-SSE42-NEXT: pmaxsb %xmm2, %xmm0
; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0
; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
; X64-SSE42-NEXT: psrlw $8, %xmm2
; X64-SSE42-NEXT: pminub %xmm0, %xmm2
; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
; X64-SSE42-NEXT: psrlw $8, %xmm1
; X64-SSE42-NEXT: pminub %xmm0, %xmm1
; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X64-SSE42-NEXT: xorb $127, %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
;
@ -1811,13 +1778,12 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2
; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0
; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX1-NEXT: xorb $127, %al
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
@ -1827,13 +1793,12 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX2-NEXT: xorb $127, %al
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
@ -1844,13 +1809,12 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX512-NEXT: xorb $127, %al
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq

View File

@ -213,21 +213,19 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
;
; X86-SSE42-LABEL: test_reduce_v8i16:
; X86-SSE42: ## %bb.0:
; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: pxor LCPI2_0, %xmm0
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE42-NEXT: retl
;
; X86-AVX-LABEL: test_reduce_v8i16:
; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpxor LCPI2_0, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vmovd %xmm0, %eax
; X86-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000
; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX-NEXT: retl
;
@ -246,21 +244,19 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
;
; X64-SSE42-LABEL: test_reduce_v8i16:
; X64-SSE42: ## %bb.0:
; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE42-NEXT: retq
;
; X64-AVX-LABEL: test_reduce_v8i16:
; X64-AVX: ## %bb.0:
; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vmovd %xmm0, %eax
; X64-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX-NEXT: retq
%1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
@ -311,26 +307,24 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
;
; X86-SSE42-LABEL: test_reduce_v16i8:
; X86-SSE42: ## %bb.0:
; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
; X86-SSE42-NEXT: psrlw $8, %xmm2
; X86-SSE42-NEXT: pminub %xmm0, %xmm2
; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: pxor LCPI3_0, %xmm0
; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
; X86-SSE42-NEXT: psrlw $8, %xmm1
; X86-SSE42-NEXT: pminub %xmm0, %xmm1
; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X86-SSE42-NEXT: xorb $-128, %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
;
; X86-AVX-LABEL: test_reduce_v16i8:
; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
; X86-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X86-AVX-NEXT: vpxor LCPI3_0, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX-NEXT: xorb $-128, %al
; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX-NEXT: retl
;
@ -368,26 +362,24 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
;
; X64-SSE42-LABEL: test_reduce_v16i8:
; X64-SSE42: ## %bb.0:
; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
; X64-SSE42-NEXT: psrlw $8, %xmm2
; X64-SSE42-NEXT: pminub %xmm0, %xmm2
; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
; X64-SSE42-NEXT: psrlw $8, %xmm1
; X64-SSE42-NEXT: pminub %xmm0, %xmm1
; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X64-SSE42-NEXT: xorb $-128, %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
;
; X64-AVX-LABEL: test_reduce_v16i8:
; X64-AVX: ## %bb.0:
; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
; X64-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX-NEXT: xorb $-128, %al
; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX-NEXT: retq
%1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@ -740,11 +732,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X86-SSE42-LABEL: test_reduce_v16i16:
; X86-SSE42: ## %bb.0:
; X86-SSE42-NEXT: pminsw %xmm1, %xmm0
; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: pxor LCPI6_0, %xmm0
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE42-NEXT: retl
;
@ -752,11 +743,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpxor LCPI6_0, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000
; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl
@ -765,11 +755,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X86-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpxor LCPI6_0, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
@ -791,11 +780,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X64-SSE42-LABEL: test_reduce_v16i16:
; X64-SSE42: ## %bb.0:
; X64-SSE42-NEXT: pminsw %xmm1, %xmm0
; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE42-NEXT: retq
;
@ -803,11 +791,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X64-AVX1: ## %bb.0:
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X64-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vmovd %xmm0, %eax
; X64-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
@ -816,11 +803,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
@ -829,11 +815,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
@ -894,14 +879,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X86-SSE42-LABEL: test_reduce_v32i8:
; X86-SSE42: ## %bb.0:
; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
; X86-SSE42-NEXT: psrlw $8, %xmm2
; X86-SSE42-NEXT: pminub %xmm0, %xmm2
; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: pxor LCPI7_0, %xmm0
; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
; X86-SSE42-NEXT: psrlw $8, %xmm1
; X86-SSE42-NEXT: pminub %xmm0, %xmm1
; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X86-SSE42-NEXT: xorb $-128, %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
;
@ -909,13 +893,12 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X86-AVX1-NEXT: vpxor LCPI7_0, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX1-NEXT: xorb $-128, %al
; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl
@ -924,13 +907,12 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X86-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X86-AVX2-NEXT: vpxor LCPI7_0, %xmm0, %xmm0
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX2-NEXT: xorb $-128, %al
; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
@ -975,14 +957,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-SSE42-LABEL: test_reduce_v32i8:
; X64-SSE42: ## %bb.0:
; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
; X64-SSE42-NEXT: psrlw $8, %xmm2
; X64-SSE42-NEXT: pminub %xmm0, %xmm2
; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
; X64-SSE42-NEXT: psrlw $8, %xmm1
; X64-SSE42-NEXT: pminub %xmm0, %xmm1
; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X64-SSE42-NEXT: xorb $-128, %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
;
@ -990,13 +971,12 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-AVX1: ## %bb.0:
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX1-NEXT: xorb $-128, %al
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
@ -1005,13 +985,12 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX2-NEXT: xorb $-128, %al
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
@ -1020,13 +999,12 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX512-NEXT: xorb $-128, %al
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
@ -1517,11 +1495,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X86-SSE42-NEXT: pminsw %xmm3, %xmm1
; X86-SSE42-NEXT: pminsw %xmm2, %xmm0
; X86-SSE42-NEXT: pminsw %xmm1, %xmm0
; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: pxor LCPI10_0, %xmm0
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE42-NEXT: retl
;
@ -1532,11 +1509,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X86-AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2
; X86-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpxor LCPI10_0, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000
; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl
@ -1546,11 +1522,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X86-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X86-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpxor LCPI10_0, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
@ -1576,11 +1551,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X64-SSE42-NEXT: pminsw %xmm3, %xmm1
; X64-SSE42-NEXT: pminsw %xmm2, %xmm0
; X64-SSE42-NEXT: pminsw %xmm1, %xmm0
; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE42-NEXT: retq
;
@ -1591,11 +1565,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X64-AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2
; X64-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0
; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vmovd %xmm0, %eax
; X64-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
@ -1605,11 +1578,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X64-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
@ -1620,11 +1592,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X64-AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
@ -1700,14 +1671,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X86-SSE42-NEXT: pminsb %xmm3, %xmm1
; X86-SSE42-NEXT: pminsb %xmm2, %xmm0
; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
; X86-SSE42-NEXT: psrlw $8, %xmm2
; X86-SSE42-NEXT: pminub %xmm0, %xmm2
; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: pxor LCPI11_0, %xmm0
; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
; X86-SSE42-NEXT: psrlw $8, %xmm1
; X86-SSE42-NEXT: pminub %xmm0, %xmm1
; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X86-SSE42-NEXT: xorb $-128, %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
;
@ -1718,13 +1688,12 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X86-AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X86-AVX1-NEXT: vpxor LCPI11_0, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX1-NEXT: xorb $-128, %al
; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl
@ -1734,13 +1703,12 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X86-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X86-AVX2-NEXT: vpxor LCPI11_0, %xmm0, %xmm0
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX2-NEXT: xorb $-128, %al
; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
@ -1797,14 +1765,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-SSE42-NEXT: pminsb %xmm3, %xmm1
; X64-SSE42-NEXT: pminsb %xmm2, %xmm0
; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
; X64-SSE42-NEXT: psrlw $8, %xmm2
; X64-SSE42-NEXT: pminub %xmm0, %xmm2
; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
; X64-SSE42-NEXT: psrlw $8, %xmm1
; X64-SSE42-NEXT: pminub %xmm0, %xmm1
; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X64-SSE42-NEXT: xorb $-128, %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
;
@ -1815,13 +1782,12 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX1-NEXT: xorb $-128, %al
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
@ -1831,13 +1797,12 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX2-NEXT: xorb $-128, %al
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
@ -1848,13 +1813,12 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; X64-AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX512-NEXT: xorb $-128, %al
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq

View File

@ -240,18 +240,18 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
; X86-SSE2-NEXT: psrld $16, %xmm1
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
; X86-SSE2-NEXT: movd %xmm1, %eax
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE2-NEXT: retl
;
; X86-SSE42-LABEL: test_reduce_v8i16:
; X86-SSE42: ## %bb.0:
; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: pxor %xmm0, %xmm1
; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: notl %eax
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE42-NEXT: retl
;
@ -260,8 +260,8 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vmovd %xmm0, %eax
; X86-AVX-NEXT: notl %eax
; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX-NEXT: retl
;
@ -282,18 +282,18 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
; X64-SSE2-NEXT: psrld $16, %xmm1
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
; X64-SSE2-NEXT: movd %xmm1, %eax
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE2-NEXT: retq
;
; X64-SSE42-LABEL: test_reduce_v8i16:
; X64-SSE42: ## %bb.0:
; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: pxor %xmm0, %xmm1
; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: notl %eax
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE42-NEXT: retq
;
@ -302,8 +302,8 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vmovd %xmm0, %eax
; X64-AVX1-NEXT: notl %eax
; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX1-NEXT: retq
;
@ -312,8 +312,8 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: notl %eax
; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX2-NEXT: retq
;
@ -321,8 +321,8 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: notl %eax
; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX512-NEXT: retq
%1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
@ -358,13 +358,13 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
; X86-SSE42-LABEL: test_reduce_v16i8:
; X86-SSE42: ## %bb.0:
; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
; X86-SSE42-NEXT: psrlw $8, %xmm2
; X86-SSE42-NEXT: pminub %xmm0, %xmm2
; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: pxor %xmm0, %xmm1
; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
; X86-SSE42-NEXT: psrlw $8, %xmm0
; X86-SSE42-NEXT: pminub %xmm1, %xmm0
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X86-SSE42-NEXT: notb %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
;
@ -372,11 +372,11 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
; X86-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX-NEXT: notb %al
; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX-NEXT: retl
;
@ -399,13 +399,13 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
; X64-SSE42-LABEL: test_reduce_v16i8:
; X64-SSE42: ## %bb.0:
; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
; X64-SSE42-NEXT: psrlw $8, %xmm2
; X64-SSE42-NEXT: pminub %xmm0, %xmm2
; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: pxor %xmm0, %xmm1
; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
; X64-SSE42-NEXT: psrlw $8, %xmm0
; X64-SSE42-NEXT: pminub %xmm1, %xmm0
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X64-SSE42-NEXT: notb %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
;
@ -413,11 +413,11 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
; X64-AVX1: ## %bb.0:
; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX1-NEXT: notb %al
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX1-NEXT: retq
;
@ -425,11 +425,11 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX2-NEXT: notb %al
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX2-NEXT: retq
;
@ -439,8 +439,8 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX512-NEXT: notb %al
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX512-NEXT: retq
%1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@ -845,8 +845,8 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X86-SSE2-NEXT: psrld $16, %xmm1
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
; X86-SSE2-NEXT: movd %xmm1, %eax
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE2-NEXT: retl
;
@ -854,10 +854,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X86-SSE42: ## %bb.0:
; X86-SSE42-NEXT: pmaxuw %xmm1, %xmm0
; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: pxor %xmm0, %xmm1
; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: notl %eax
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE42-NEXT: retl
;
@ -868,8 +868,8 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: notl %eax
; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl
@ -881,8 +881,8 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X86-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: notl %eax
; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
@ -908,8 +908,8 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X64-SSE2-NEXT: psrld $16, %xmm1
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
; X64-SSE2-NEXT: movd %xmm1, %eax
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE2-NEXT: retq
;
@ -917,10 +917,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X64-SSE42: ## %bb.0:
; X64-SSE42-NEXT: pmaxuw %xmm1, %xmm0
; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: pxor %xmm0, %xmm1
; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: notl %eax
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE42-NEXT: retq
;
@ -931,8 +931,8 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vmovd %xmm0, %eax
; X64-AVX1-NEXT: notl %eax
; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
@ -944,8 +944,8 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: notl %eax
; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
@ -956,8 +956,8 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X64-AVX512-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: notl %eax
; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
@ -999,13 +999,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X86-SSE42: ## %bb.0:
; X86-SSE42-NEXT: pmaxub %xmm1, %xmm0
; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
; X86-SSE42-NEXT: psrlw $8, %xmm2
; X86-SSE42-NEXT: pminub %xmm0, %xmm2
; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: pxor %xmm0, %xmm1
; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
; X86-SSE42-NEXT: psrlw $8, %xmm0
; X86-SSE42-NEXT: pminub %xmm1, %xmm0
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X86-SSE42-NEXT: notb %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
;
@ -1015,11 +1015,11 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X86-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX1-NEXT: notb %al
; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl
@ -1030,11 +1030,11 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X86-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX2-NEXT: notb %al
; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
@ -1060,13 +1060,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-SSE42: ## %bb.0:
; X64-SSE42-NEXT: pmaxub %xmm1, %xmm0
; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
; X64-SSE42-NEXT: psrlw $8, %xmm2
; X64-SSE42-NEXT: pminub %xmm0, %xmm2
; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: pxor %xmm0, %xmm1
; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
; X64-SSE42-NEXT: psrlw $8, %xmm0
; X64-SSE42-NEXT: pminub %xmm1, %xmm0
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X64-SSE42-NEXT: notb %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
;
@ -1076,11 +1076,11 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX1-NEXT: notb %al
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
@ -1091,11 +1091,11 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX2-NEXT: notb %al
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
@ -1108,8 +1108,8 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX512-NEXT: notb %al
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
@ -1688,8 +1688,8 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X86-SSE2-NEXT: psrld $16, %xmm1
; X86-SSE2-NEXT: pxor %xmm4, %xmm1
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
; X86-SSE2-NEXT: pxor %xmm4, %xmm1
; X86-SSE2-NEXT: movd %xmm1, %eax
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE2-NEXT: retl
;
@ -1699,10 +1699,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X86-SSE42-NEXT: pmaxuw %xmm2, %xmm0
; X86-SSE42-NEXT: pmaxuw %xmm1, %xmm0
; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: pxor %xmm0, %xmm1
; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X86-SSE42-NEXT: movd %xmm0, %eax
; X86-SSE42-NEXT: notl %eax
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE42-NEXT: retl
;
@ -1716,8 +1716,8 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: notl %eax
; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl
@ -1730,8 +1730,8 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X86-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: notl %eax
; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
@ -1761,8 +1761,8 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X64-SSE2-NEXT: psrld $16, %xmm1
; X64-SSE2-NEXT: pxor %xmm4, %xmm1
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
; X64-SSE2-NEXT: pxor %xmm4, %xmm1
; X64-SSE2-NEXT: movd %xmm1, %eax
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE2-NEXT: retq
;
@ -1772,10 +1772,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X64-SSE42-NEXT: pmaxuw %xmm2, %xmm0
; X64-SSE42-NEXT: pmaxuw %xmm1, %xmm0
; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: pxor %xmm0, %xmm1
; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
; X64-SSE42-NEXT: movd %xmm0, %eax
; X64-SSE42-NEXT: notl %eax
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE42-NEXT: retq
;
@ -1789,8 +1789,8 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vmovd %xmm0, %eax
; X64-AVX1-NEXT: notl %eax
; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
@ -1803,8 +1803,8 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: notl %eax
; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
@ -1817,8 +1817,8 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X64-AVX512-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; X64-AVX512-NEXT: vmovd %xmm0, %eax
; X64-AVX512-NEXT: notl %eax
; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq
@ -1867,13 +1867,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X86-SSE42-NEXT: pmaxub %xmm2, %xmm0
; X86-SSE42-NEXT: pmaxub %xmm1, %xmm0
; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
; X86-SSE42-NEXT: psrlw $8, %xmm2
; X86-SSE42-NEXT: pminub %xmm0, %xmm2
; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
; X86-SSE42-NEXT: pxor %xmm0, %xmm1
; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
; X86-SSE42-NEXT: psrlw $8, %xmm0
; X86-SSE42-NEXT: pminub %xmm1, %xmm0
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X86-SSE42-NEXT: notb %al
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X86-SSE42-NEXT: retl
;
@ -1886,11 +1886,11 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X86-AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm0
; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX1-NEXT: notb %al
; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX1-NEXT: vzeroupper
; X86-AVX1-NEXT: retl
@ -1902,11 +1902,11 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X86-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X86-AVX2-NEXT: notb %al
; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X86-AVX2-NEXT: vzeroupper
; X86-AVX2-NEXT: retl
@ -1936,13 +1936,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-SSE42-NEXT: pmaxub %xmm2, %xmm0
; X64-SSE42-NEXT: pmaxub %xmm1, %xmm0
; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
; X64-SSE42-NEXT: psrlw $8, %xmm2
; X64-SSE42-NEXT: pminub %xmm0, %xmm2
; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
; X64-SSE42-NEXT: pxor %xmm0, %xmm1
; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
; X64-SSE42-NEXT: psrlw $8, %xmm0
; X64-SSE42-NEXT: pminub %xmm1, %xmm0
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
; X64-SSE42-NEXT: notb %al
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
; X64-SSE42-NEXT: retq
;
@ -1955,11 +1955,11 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm0
; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX1-NEXT: notb %al
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX1-NEXT: vzeroupper
; X64-AVX1-NEXT: retq
@ -1971,11 +1971,11 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX2-NEXT: notb %al
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
@ -1990,8 +1990,8 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
; X64-AVX512-NEXT: notb %al
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
; X64-AVX512-NEXT: vzeroupper
; X64-AVX512-NEXT: retq

View File

@ -242,8 +242,8 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
; X86-SSE2-NEXT: psrld $16, %xmm1
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
; X86-SSE2-NEXT: movd %xmm1, %eax
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE2-NEXT: retl
;
@ -278,8 +278,8 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
; X64-SSE2-NEXT: psrld $16, %xmm1
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
; X64-SSE2-NEXT: movd %xmm1, %eax
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE2-NEXT: retq
;
@ -785,8 +785,8 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X86-SSE2-NEXT: psrld $16, %xmm1
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
; X86-SSE2-NEXT: movd %xmm1, %eax
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE2-NEXT: retl
;
@ -839,8 +839,8 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
; X64-SSE2-NEXT: psrld $16, %xmm1
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
; X64-SSE2-NEXT: movd %xmm1, %eax
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE2-NEXT: retq
;
@ -1592,8 +1592,8 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X86-SSE2-NEXT: psrld $16, %xmm1
; X86-SSE2-NEXT: pxor %xmm4, %xmm1
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
; X86-SSE2-NEXT: pxor %xmm4, %xmm1
; X86-SSE2-NEXT: movd %xmm1, %eax
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-SSE2-NEXT: retl
;
@ -1656,8 +1656,8 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
; X64-SSE2-NEXT: psrld $16, %xmm1
; X64-SSE2-NEXT: pxor %xmm4, %xmm1
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
; X64-SSE2-NEXT: pxor %xmm4, %xmm1
; X64-SSE2-NEXT: movd %xmm1, %eax
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
; X64-SSE2-NEXT: retq
;

View File

@ -5,18 +5,14 @@
define i32 @knownbits_mask_extract_sext(<8 x i16> %a0) nounwind {
; X32-LABEL: knownbits_mask_extract_sext:
; X32: # %bb.0:
; X32-NEXT: movl $15, %eax
; X32-NEXT: vmovd %eax, %xmm1
; X32-NEXT: vpand %xmm1, %xmm0, %xmm0
; X32-NEXT: vpextrw $0, %xmm0, %eax
; X32-NEXT: vmovd %xmm0, %eax
; X32-NEXT: andl $15, %eax
; X32-NEXT: retl
;
; X64-LABEL: knownbits_mask_extract_sext:
; X64: # %bb.0:
; X64-NEXT: movl $15, %eax
; X64-NEXT: vmovd %eax, %xmm1
; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-NEXT: vpextrw $0, %xmm0, %eax
; X64-NEXT: vmovd %xmm0, %eax
; X64-NEXT: andl $15, %eax
; X64-NEXT: retq
%1 = and <8 x i16> %a0, <i16 15, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
%2 = extractelement <8 x i16> %1, i32 0
@ -38,8 +34,8 @@ define float @knownbits_mask_extract_uitofp(<2 x i64> %a0) nounwind {
;
; X64-LABEL: knownbits_mask_extract_uitofp:
; X64: # %bb.0:
; X64-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; X64-NEXT: vmovq %xmm0, %rax
; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0
; X64-NEXT: retq
%1 = and <2 x i64> %a0, <i64 65535, i64 -1>

View File

@ -74,8 +74,8 @@ define float @signbits_ashr_extract_sitofp_0(<2 x i64> %a0) nounwind {
;
; X64-LABEL: signbits_ashr_extract_sitofp_0:
; X64: # %bb.0:
; X64-NEXT: vpsrlq $32, %xmm0, %xmm0
; X64-NEXT: vmovq %xmm0, %rax
; X64-NEXT: shrq $32, %rax
; X64-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0
; X64-NEXT: retq
%1 = ashr <2 x i64> %a0, <i64 32, i64 32>
@ -101,12 +101,9 @@ define float @signbits_ashr_extract_sitofp_1(<2 x i64> %a0) nounwind {
;
; X64-LABEL: signbits_ashr_extract_sitofp_1:
; X64: # %bb.0:
; X64-NEXT: vpsrlq $32, %xmm0, %xmm0
; X64-NEXT: vmovdqa {{.*#+}} xmm1 = [2147483648,1]
; X64-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-NEXT: vpsubq %xmm1, %xmm0, %xmm0
; X64-NEXT: vmovq %xmm0, %rax
; X64-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
; X64-NEXT: shrq $32, %rax
; X64-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0
; X64-NEXT: retq
%1 = ashr <2 x i64> %a0, <i64 32, i64 63>
%2 = extractelement <2 x i64> %1, i32 0
@ -132,13 +129,10 @@ define float @signbits_ashr_shl_extract_sitofp(<2 x i64> %a0) nounwind {
;
; X64-LABEL: signbits_ashr_shl_extract_sitofp:
; X64: # %bb.0:
; X64-NEXT: vpsrlq $61, %xmm0, %xmm0
; X64-NEXT: vmovdqa {{.*#+}} xmm1 = [4,8]
; X64-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-NEXT: vpsubq %xmm1, %xmm0, %xmm0
; X64-NEXT: vpsllq $20, %xmm0, %xmm0
; X64-NEXT: vmovq %xmm0, %rax
; X64-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
; X64-NEXT: sarq $61, %rax
; X64-NEXT: shll $20, %eax
; X64-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0
; X64-NEXT: retq
%1 = ashr <2 x i64> %a0, <i64 61, i64 60>
%2 = shl <2 x i64> %1, <i64 20, i64 16>
@ -168,10 +162,8 @@ define float @signbits_ashr_insert_ashr_extract_sitofp(i64 %a0, i64 %a1) nounwin
; X64-LABEL: signbits_ashr_insert_ashr_extract_sitofp:
; X64: # %bb.0:
; X64-NEXT: sarq $30, %rdi
; X64-NEXT: vmovq %rdi, %xmm0
; X64-NEXT: vpsrlq $3, %xmm0, %xmm0
; X64-NEXT: vmovq %xmm0, %rax
; X64-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0
; X64-NEXT: shrq $3, %rdi
; X64-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0
; X64-NEXT: retq
%1 = ashr i64 %a0, 30
%2 = insertelement <2 x i64> undef, i64 %1, i32 0

View File

@ -9,7 +9,7 @@ define i64 @PR30511(<2 x double> %a) {
; CHECK: # %bb.0:
; CHECK-NEXT: addpd {{.*}}(%rip), %xmm0
; CHECK-NEXT: cvtdq2pd %xmm0, %xmm0
; CHECK-NEXT: mulpd {{.*}}(%rip), %xmm0
; CHECK-NEXT: mulsd {{.*}}(%rip), %xmm0
; CHECK-NEXT: movq %xmm0, %rax
; CHECK-NEXT: retq
%1 = fadd <2 x double> %a, <double 0x4338000000000000, double 0x4338000000000000>

View File

@ -6,18 +6,16 @@ define i32 @test_eq_1(<4 x i32> %A, <4 x i32> %B) {
; SSE2-LABEL: test_eq_1:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: notl %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_eq_1:
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pextrd $1, %xmm0, %eax
; SSE41-NEXT: pextrd $1, %xmm1, %eax
; SSE41-NEXT: notl %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %A, %B
%sext = sext <4 x i1> %cmp to <4 x i32>
@ -65,18 +63,16 @@ define i32 @test_ge_1(<4 x i32> %A, <4 x i32> %B) {
; SSE2-LABEL: test_ge_1:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: notl %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_ge_1:
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pextrd $1, %xmm0, %eax
; SSE41-NEXT: pextrd $1, %xmm1, %eax
; SSE41-NEXT: notl %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %A, %B
%sext = sext <4 x i1> %cmp to <4 x i32>
@ -124,18 +120,16 @@ define i32 @test_eq_2(<4 x i32> %A, <4 x i32> %B) {
; SSE2-LABEL: test_eq_2:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
; SSE2-NEXT: pxor %xmm0, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: notl %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_eq_2:
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
; SSE41-NEXT: pxor %xmm0, %xmm1
; SSE41-NEXT: pextrd $1, %xmm1, %eax
; SSE41-NEXT: pextrd $1, %xmm0, %eax
; SSE41-NEXT: notl %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %B, %A
%sext = sext <4 x i1> %cmp to <4 x i32>
@ -170,18 +164,16 @@ define i32 @test_le_2(<4 x i32> %A, <4 x i32> %B) {
; SSE2-LABEL: test_le_2:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
; SSE2-NEXT: pxor %xmm0, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: notl %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_le_2:
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
; SSE41-NEXT: pxor %xmm0, %xmm1
; SSE41-NEXT: pextrd $1, %xmm1, %eax
; SSE41-NEXT: pextrd $1, %xmm0, %eax
; SSE41-NEXT: notl %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %B, %A
%sext = sext <4 x i1> %cmp to <4 x i32>

View File

@ -2096,7 +2096,7 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movdqa (%eax), %xmm5
; X86-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-SSE-NEXT: movdqa (%ecx), %xmm3
; X86-SSE-NEXT: movdqa (%ecx), %xmm2
; X86-SSE-NEXT: movdqa 16(%ecx), %xmm6
; X86-SSE-NEXT: pxor %xmm0, %xmm0
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
@ -2110,10 +2110,10 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
; X86-SSE-NEXT: movd %edx, %xmm0
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm5[2,3,0,1]
; X86-SSE-NEXT: movd %xmm2, %eax
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm6[2,3,0,1]
; X86-SSE-NEXT: movd %xmm2, %esi
; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm5[2,3,0,1]
; X86-SSE-NEXT: movd %xmm3, %eax
; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm6[2,3,0,1]
; X86-SSE-NEXT: movd %xmm3, %esi
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
; X86-SSE-NEXT: movd %edx, %xmm7
@ -2122,7 +2122,7 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X86-SSE-NEXT: movd %xmm6, %esi
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
; X86-SSE-NEXT: movd %edx, %xmm2
; X86-SSE-NEXT: movd %edx, %xmm3
; X86-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,2,3]
; X86-SSE-NEXT: movd %xmm5, %eax
; X86-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,2,3]
@ -2130,60 +2130,57 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
; X86-SSE-NEXT: movd %edx, %xmm5
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm7[0]
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm7[0]
; X86-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm4[3,1,2,3]
; X86-SSE-NEXT: movd %xmm6, %eax
; X86-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm3[3,1,2,3]
; X86-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm2[3,1,2,3]
; X86-SSE-NEXT: movd %xmm6, %esi
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
; X86-SSE-NEXT: movd %edx, %xmm6
; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,3,0,1]
; X86-SSE-NEXT: movd %xmm7, %eax
; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm3[2,3,0,1]
; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm2[2,3,0,1]
; X86-SSE-NEXT: movd %xmm7, %esi
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
; X86-SSE-NEXT: movd %edx, %xmm7
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1]
; X86-SSE-NEXT: movd %xmm4, %eax
; X86-SSE-NEXT: movd %xmm3, %esi
; X86-SSE-NEXT: movd %xmm2, %esi
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,2,3]
; X86-SSE-NEXT: movd %xmm4, %eax
; X86-SSE-NEXT: movd %edx, %xmm4
; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,2,3]
; X86-SSE-NEXT: movd %xmm3, %esi
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
; X86-SSE-NEXT: movd %xmm2, %esi
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
; X86-SSE-NEXT: movd %edx, %xmm3
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
; X86-SSE-NEXT: movd %edx, %xmm2
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm7[0]
; X86-SSE-NEXT: movd %xmm1, %eax
; X86-SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,0],xmm6[0,0]
; X86-SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm6[0,0]
; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [8199,8199,8199,8199]
; X86-SSE-NEXT: pmuludq %xmm1, %xmm4
; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
; X86-SSE-NEXT: pmuludq %xmm1, %xmm3
; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
; X86-SSE-NEXT: pmuludq %xmm1, %xmm2
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
; X86-SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,0],xmm0[0,0]
; X86-SSE-NEXT: pmuludq %xmm1, %xmm3
; X86-SSE-NEXT: pmuludq %xmm1, %xmm5
; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm5[0,2,2,3]
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl 32(%ecx)
; X86-SSE-NEXT: pmuludq %xmm1, %xmm2
; X86-SSE-NEXT: pmuludq %xmm1, %xmm5
; X86-SSE-NEXT: movd %edx, %xmm0
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm5[0,2,2,3]
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; X86-SSE-NEXT: movl $8199, %eax # imm = 0x2007
; X86-SSE-NEXT: movd %eax, %xmm2
; X86-SSE-NEXT: pmuludq %xmm0, %xmm2
; X86-SSE-NEXT: movd %xmm2, (%eax)
; X86-SSE-NEXT: movdqa %xmm1, (%eax)
; X86-SSE-NEXT: movdqa %xmm0, (%eax)
; X86-SSE-NEXT: movdqa %xmm4, (%eax)
; X86-SSE-NEXT: imull $8199, %edx, %eax # imm = 0x2007
; X86-SSE-NEXT: movl %eax, (%eax)
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: retl
;
@ -2252,14 +2249,13 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X86-AVX1-NEXT: vpinsrd $1, (%esp), %xmm1, %xmm1 # 4-byte Folded Reload
; X86-AVX1-NEXT: vpinsrd $2, {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 4-byte Folded Reload
; X86-AVX1-NEXT: vpinsrd $3, {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 4-byte Folded Reload
; X86-AVX1-NEXT: vmovd {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 4-byte Folded Reload
; X86-AVX1-NEXT: # xmm2 = mem[0],zero,zero,zero
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [8199,8199,8199,8199]
; X86-AVX1-NEXT: vpmulld %xmm3, %xmm0, %xmm0
; X86-AVX1-NEXT: vpmulld %xmm3, %xmm1, %xmm1
; X86-AVX1-NEXT: imull $8199, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-AVX1-NEXT: # imm = 0x2007
; X86-AVX1-NEXT: movl %eax, (%eax)
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [8199,8199,8199,8199]
; X86-AVX1-NEXT: vpmulld %xmm2, %xmm0, %xmm0
; X86-AVX1-NEXT: vpmulld %xmm2, %xmm1, %xmm1
; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vpmulld %xmm3, %xmm2, %xmm1
; X86-AVX1-NEXT: vmovd %xmm1, (%eax)
; X86-AVX1-NEXT: vmovaps %ymm0, (%eax)
; X86-AVX1-NEXT: addl $16, %esp
; X86-AVX1-NEXT: popl %esi
@ -2326,12 +2322,11 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: xorl %edx, %edx
; X86-AVX2-NEXT: divl 32(%esi)
; X86-AVX2-NEXT: vmovd %edx, %xmm0
; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [8199,8199,8199,8199,8199,8199,8199,8199]
; X86-AVX2-NEXT: vpmulld %ymm2, %ymm1, %ymm1
; X86-AVX2-NEXT: vpmulld %xmm2, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, (%eax)
; X86-AVX2-NEXT: vmovdqa %ymm1, (%eax)
; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm0 = [8199,8199,8199,8199,8199,8199,8199,8199]
; X86-AVX2-NEXT: vpmulld %ymm0, %ymm1, %ymm0
; X86-AVX2-NEXT: imull $8199, %edx, %eax # imm = 0x2007
; X86-AVX2-NEXT: movl %eax, (%eax)
; X86-AVX2-NEXT: vmovdqa %ymm0, (%eax)
; X86-AVX2-NEXT: popl %esi
; X86-AVX2-NEXT: popl %edi
; X86-AVX2-NEXT: vzeroupper
@ -2345,8 +2340,8 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X64-SSE-NEXT: movdqa 16(%rsi), %xmm6
; X64-SSE-NEXT: pxor %xmm0, %xmm0
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; X64-SSE-NEXT: movdqa %xmm5, %xmm4
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
; X64-SSE-NEXT: movdqa %xmm5, %xmm3
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm5[3,1,2,3]
; X64-SSE-NEXT: movd %xmm0, %eax
@ -2355,10 +2350,10 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl %ecx
; X64-SSE-NEXT: movd %edx, %xmm8
; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm5[2,3,0,1]
; X64-SSE-NEXT: movd %xmm3, %eax
; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm6[2,3,0,1]
; X64-SSE-NEXT: movd %xmm3, %ecx
; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm5[2,3,0,1]
; X64-SSE-NEXT: movd %xmm4, %eax
; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm6[2,3,0,1]
; X64-SSE-NEXT: movd %xmm4, %ecx
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl %ecx
; X64-SSE-NEXT: movd %edx, %xmm7
@ -2367,7 +2362,7 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X64-SSE-NEXT: movd %xmm6, %ecx
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl %ecx
; X64-SSE-NEXT: movd %edx, %xmm3
; X64-SSE-NEXT: movd %edx, %xmm4
; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,2,3]
; X64-SSE-NEXT: movd %xmm5, %eax
; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,2,3]
@ -2375,16 +2370,16 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl %ecx
; X64-SSE-NEXT: movd %edx, %xmm5
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm7[0]
; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm4[3,1,2,3]
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm7[0]
; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm3[3,1,2,3]
; X64-SSE-NEXT: movd %xmm6, %eax
; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm2[3,1,2,3]
; X64-SSE-NEXT: movd %xmm6, %ecx
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl %ecx
; X64-SSE-NEXT: movd %edx, %xmm6
; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,3,0,1]
; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm3[2,3,0,1]
; X64-SSE-NEXT: movd %xmm7, %eax
; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm2[2,3,0,1]
; X64-SSE-NEXT: movd %xmm7, %ecx
@ -2392,13 +2387,13 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X64-SSE-NEXT: divl %ecx
; X64-SSE-NEXT: movd %edx, %xmm7
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1]
; X64-SSE-NEXT: movd %xmm4, %eax
; X64-SSE-NEXT: movd %xmm3, %eax
; X64-SSE-NEXT: movd %xmm2, %ecx
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl %ecx
; X64-SSE-NEXT: movd %edx, %xmm0
; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,2,3]
; X64-SSE-NEXT: movd %xmm4, %eax
; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,2,3]
; X64-SSE-NEXT: movd %xmm3, %eax
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
; X64-SSE-NEXT: movd %xmm2, %ecx
; X64-SSE-NEXT: xorl %edx, %edx
@ -2409,24 +2404,21 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X64-SSE-NEXT: movd %xmm1, %eax
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl 32(%rsi)
; X64-SSE-NEXT: movd %edx, %xmm1
; X64-SSE-NEXT: movdqa {{.*#+}} xmm4 = [8199,8199,8199,8199]
; X64-SSE-NEXT: pmuludq %xmm4, %xmm0
; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = [8199,8199,8199,8199]
; X64-SSE-NEXT: pmuludq %xmm1, %xmm0
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm6[0,0]
; X64-SSE-NEXT: pmuludq %xmm4, %xmm2
; X64-SSE-NEXT: pmuludq %xmm1, %xmm2
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X64-SSE-NEXT: pmuludq %xmm4, %xmm3
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
; X64-SSE-NEXT: pmuludq %xmm1, %xmm4
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[0,2,2,3]
; X64-SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,0],xmm8[0,0]
; X64-SSE-NEXT: pmuludq %xmm4, %xmm5
; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm5[0,2,2,3]
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; X64-SSE-NEXT: movl $8199, %eax # imm = 0x2007
; X64-SSE-NEXT: movd %eax, %xmm3
; X64-SSE-NEXT: pmuludq %xmm1, %xmm3
; X64-SSE-NEXT: movd %xmm3, (%rax)
; X64-SSE-NEXT: pmuludq %xmm1, %xmm5
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm5[0,2,2,3]
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X64-SSE-NEXT: imull $8199, %edx, %eax # imm = 0x2007
; X64-SSE-NEXT: movl %eax, (%rax)
; X64-SSE-NEXT: movdqa %xmm2, (%rax)
; X64-SSE-NEXT: movdqa %xmm0, (%rax)
; X64-SSE-NEXT: retq
@ -2493,11 +2485,10 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X64-AVX1-NEXT: vpinsrd $1, %r11d, %xmm2, %xmm2
; X64-AVX1-NEXT: vpinsrd $2, %r10d, %xmm2, %xmm2
; X64-AVX1-NEXT: vpinsrd $3, %r9d, %xmm2, %xmm2
; X64-AVX1-NEXT: vpmulld %xmm1, %xmm2, %xmm2
; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; X64-AVX1-NEXT: vmovd %r8d, %xmm2
; X64-AVX1-NEXT: vpmulld %xmm1, %xmm2, %xmm1
; X64-AVX1-NEXT: vmovd %xmm1, (%rax)
; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; X64-AVX1-NEXT: imull $8199, %r8d, %eax # imm = 0x2007
; X64-AVX1-NEXT: movl %eax, (%rax)
; X64-AVX1-NEXT: vmovaps %ymm0, (%rax)
; X64-AVX1-NEXT: popq %rbx
; X64-AVX1-NEXT: popq %rbp
@ -2557,12 +2548,11 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: xorl %edx, %edx
; X64-AVX2-NEXT: divl 32(%rsi)
; X64-AVX2-NEXT: vmovd %edx, %xmm0
; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [8199,8199,8199,8199,8199,8199,8199,8199]
; X64-AVX2-NEXT: vpmulld %ymm2, %ymm1, %ymm1
; X64-AVX2-NEXT: vpmulld %xmm2, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, (%rax)
; X64-AVX2-NEXT: vmovdqa %ymm1, (%rax)
; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm0 = [8199,8199,8199,8199,8199,8199,8199,8199]
; X64-AVX2-NEXT: vpmulld %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: imull $8199, %edx, %eax # imm = 0x2007
; X64-AVX2-NEXT: movl %eax, (%rax)
; X64-AVX2-NEXT: vmovdqa %ymm0, (%rax)
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
%a0 = load <9 x i16>, <9 x i16>* %p0, align 64

View File

@ -2079,7 +2079,7 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movdqa (%eax), %xmm5
; X86-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-SSE-NEXT: movdqa (%ecx), %xmm3
; X86-SSE-NEXT: movdqa (%ecx), %xmm2
; X86-SSE-NEXT: movdqa 16(%ecx), %xmm6
; X86-SSE-NEXT: pxor %xmm0, %xmm0
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
@ -2093,10 +2093,10 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
; X86-SSE-NEXT: movd %edx, %xmm0
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm5[2,3,0,1]
; X86-SSE-NEXT: movd %xmm2, %eax
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm6[2,3,0,1]
; X86-SSE-NEXT: movd %xmm2, %esi
; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm5[2,3,0,1]
; X86-SSE-NEXT: movd %xmm3, %eax
; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm6[2,3,0,1]
; X86-SSE-NEXT: movd %xmm3, %esi
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
; X86-SSE-NEXT: movd %edx, %xmm7
@ -2105,7 +2105,7 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X86-SSE-NEXT: movd %xmm6, %esi
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
; X86-SSE-NEXT: movd %edx, %xmm2
; X86-SSE-NEXT: movd %edx, %xmm3
; X86-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,2,3]
; X86-SSE-NEXT: movd %xmm5, %eax
; X86-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,2,3]
@ -2113,60 +2113,57 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
; X86-SSE-NEXT: movd %edx, %xmm5
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm7[0]
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm7[0]
; X86-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm4[3,1,2,3]
; X86-SSE-NEXT: movd %xmm6, %eax
; X86-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm3[3,1,2,3]
; X86-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm2[3,1,2,3]
; X86-SSE-NEXT: movd %xmm6, %esi
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
; X86-SSE-NEXT: movd %edx, %xmm6
; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,3,0,1]
; X86-SSE-NEXT: movd %xmm7, %eax
; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm3[2,3,0,1]
; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm2[2,3,0,1]
; X86-SSE-NEXT: movd %xmm7, %esi
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
; X86-SSE-NEXT: movd %edx, %xmm7
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1]
; X86-SSE-NEXT: movd %xmm4, %eax
; X86-SSE-NEXT: movd %xmm3, %esi
; X86-SSE-NEXT: movd %xmm2, %esi
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,2,3]
; X86-SSE-NEXT: movd %xmm4, %eax
; X86-SSE-NEXT: movd %edx, %xmm4
; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,2,3]
; X86-SSE-NEXT: movd %xmm3, %esi
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
; X86-SSE-NEXT: movd %xmm2, %esi
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl %esi
; X86-SSE-NEXT: movd %edx, %xmm3
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
; X86-SSE-NEXT: movd %edx, %xmm2
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm7[0]
; X86-SSE-NEXT: movd %xmm1, %eax
; X86-SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,0],xmm6[0,0]
; X86-SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm6[0,0]
; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [8199,8199,8199,8199]
; X86-SSE-NEXT: pmuludq %xmm1, %xmm4
; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
; X86-SSE-NEXT: pmuludq %xmm1, %xmm3
; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
; X86-SSE-NEXT: pmuludq %xmm1, %xmm2
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
; X86-SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,0],xmm0[0,0]
; X86-SSE-NEXT: pmuludq %xmm1, %xmm3
; X86-SSE-NEXT: pmuludq %xmm1, %xmm5
; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm5[0,2,2,3]
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X86-SSE-NEXT: xorl %edx, %edx
; X86-SSE-NEXT: divl 32(%ecx)
; X86-SSE-NEXT: pmuludq %xmm1, %xmm2
; X86-SSE-NEXT: pmuludq %xmm1, %xmm5
; X86-SSE-NEXT: movd %edx, %xmm0
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm5[0,2,2,3]
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; X86-SSE-NEXT: movl $8199, %eax # imm = 0x2007
; X86-SSE-NEXT: movd %eax, %xmm2
; X86-SSE-NEXT: pmuludq %xmm0, %xmm2
; X86-SSE-NEXT: movd %xmm2, (%eax)
; X86-SSE-NEXT: movdqa %xmm1, (%eax)
; X86-SSE-NEXT: movdqa %xmm0, (%eax)
; X86-SSE-NEXT: movdqa %xmm4, (%eax)
; X86-SSE-NEXT: imull $8199, %edx, %eax # imm = 0x2007
; X86-SSE-NEXT: movl %eax, (%eax)
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: retl
;
@ -2235,14 +2232,13 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X86-AVX1-NEXT: vpinsrd $1, (%esp), %xmm1, %xmm1 # 4-byte Folded Reload
; X86-AVX1-NEXT: vpinsrd $2, {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 4-byte Folded Reload
; X86-AVX1-NEXT: vpinsrd $3, {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 4-byte Folded Reload
; X86-AVX1-NEXT: vmovd {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 4-byte Folded Reload
; X86-AVX1-NEXT: # xmm2 = mem[0],zero,zero,zero
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [8199,8199,8199,8199]
; X86-AVX1-NEXT: vpmulld %xmm3, %xmm0, %xmm0
; X86-AVX1-NEXT: vpmulld %xmm3, %xmm1, %xmm1
; X86-AVX1-NEXT: imull $8199, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-AVX1-NEXT: # imm = 0x2007
; X86-AVX1-NEXT: movl %eax, (%eax)
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [8199,8199,8199,8199]
; X86-AVX1-NEXT: vpmulld %xmm2, %xmm0, %xmm0
; X86-AVX1-NEXT: vpmulld %xmm2, %xmm1, %xmm1
; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vpmulld %xmm3, %xmm2, %xmm1
; X86-AVX1-NEXT: vmovd %xmm1, (%eax)
; X86-AVX1-NEXT: vmovaps %ymm0, (%eax)
; X86-AVX1-NEXT: addl $16, %esp
; X86-AVX1-NEXT: popl %esi
@ -2309,12 +2305,11 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X86-AVX2-NEXT: vmovd %xmm0, %eax
; X86-AVX2-NEXT: xorl %edx, %edx
; X86-AVX2-NEXT: divl 32(%esi)
; X86-AVX2-NEXT: vmovd %edx, %xmm0
; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [8199,8199,8199,8199,8199,8199,8199,8199]
; X86-AVX2-NEXT: vpmulld %ymm2, %ymm1, %ymm1
; X86-AVX2-NEXT: vpmulld %xmm2, %xmm0, %xmm0
; X86-AVX2-NEXT: vmovd %xmm0, (%eax)
; X86-AVX2-NEXT: vmovdqa %ymm1, (%eax)
; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm0 = [8199,8199,8199,8199,8199,8199,8199,8199]
; X86-AVX2-NEXT: vpmulld %ymm0, %ymm1, %ymm0
; X86-AVX2-NEXT: imull $8199, %edx, %eax # imm = 0x2007
; X86-AVX2-NEXT: movl %eax, (%eax)
; X86-AVX2-NEXT: vmovdqa %ymm0, (%eax)
; X86-AVX2-NEXT: popl %esi
; X86-AVX2-NEXT: popl %edi
; X86-AVX2-NEXT: vzeroupper
@ -2328,8 +2323,8 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X64-SSE-NEXT: movdqa 16(%rsi), %xmm6
; X64-SSE-NEXT: pxor %xmm0, %xmm0
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; X64-SSE-NEXT: movdqa %xmm5, %xmm4
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
; X64-SSE-NEXT: movdqa %xmm5, %xmm3
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm5[3,1,2,3]
; X64-SSE-NEXT: movd %xmm0, %eax
@ -2338,10 +2333,10 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl %ecx
; X64-SSE-NEXT: movd %edx, %xmm8
; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm5[2,3,0,1]
; X64-SSE-NEXT: movd %xmm3, %eax
; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm6[2,3,0,1]
; X64-SSE-NEXT: movd %xmm3, %ecx
; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm5[2,3,0,1]
; X64-SSE-NEXT: movd %xmm4, %eax
; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm6[2,3,0,1]
; X64-SSE-NEXT: movd %xmm4, %ecx
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl %ecx
; X64-SSE-NEXT: movd %edx, %xmm7
@ -2350,7 +2345,7 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X64-SSE-NEXT: movd %xmm6, %ecx
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl %ecx
; X64-SSE-NEXT: movd %edx, %xmm3
; X64-SSE-NEXT: movd %edx, %xmm4
; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,2,3]
; X64-SSE-NEXT: movd %xmm5, %eax
; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,2,3]
@ -2358,16 +2353,16 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl %ecx
; X64-SSE-NEXT: movd %edx, %xmm5
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm7[0]
; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm4[3,1,2,3]
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm7[0]
; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm3[3,1,2,3]
; X64-SSE-NEXT: movd %xmm6, %eax
; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm2[3,1,2,3]
; X64-SSE-NEXT: movd %xmm6, %ecx
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl %ecx
; X64-SSE-NEXT: movd %edx, %xmm6
; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,3,0,1]
; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm3[2,3,0,1]
; X64-SSE-NEXT: movd %xmm7, %eax
; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm2[2,3,0,1]
; X64-SSE-NEXT: movd %xmm7, %ecx
@ -2375,13 +2370,13 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X64-SSE-NEXT: divl %ecx
; X64-SSE-NEXT: movd %edx, %xmm7
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1]
; X64-SSE-NEXT: movd %xmm4, %eax
; X64-SSE-NEXT: movd %xmm3, %eax
; X64-SSE-NEXT: movd %xmm2, %ecx
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl %ecx
; X64-SSE-NEXT: movd %edx, %xmm0
; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,2,3]
; X64-SSE-NEXT: movd %xmm4, %eax
; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,2,3]
; X64-SSE-NEXT: movd %xmm3, %eax
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
; X64-SSE-NEXT: movd %xmm2, %ecx
; X64-SSE-NEXT: xorl %edx, %edx
@ -2392,24 +2387,21 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X64-SSE-NEXT: movd %xmm1, %eax
; X64-SSE-NEXT: xorl %edx, %edx
; X64-SSE-NEXT: divl 32(%rsi)
; X64-SSE-NEXT: movd %edx, %xmm1
; X64-SSE-NEXT: movdqa {{.*#+}} xmm4 = [8199,8199,8199,8199]
; X64-SSE-NEXT: pmuludq %xmm4, %xmm0
; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = [8199,8199,8199,8199]
; X64-SSE-NEXT: pmuludq %xmm1, %xmm0
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm6[0,0]
; X64-SSE-NEXT: pmuludq %xmm4, %xmm2
; X64-SSE-NEXT: pmuludq %xmm1, %xmm2
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X64-SSE-NEXT: pmuludq %xmm4, %xmm3
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
; X64-SSE-NEXT: pmuludq %xmm1, %xmm4
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[0,2,2,3]
; X64-SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,0],xmm8[0,0]
; X64-SSE-NEXT: pmuludq %xmm4, %xmm5
; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm5[0,2,2,3]
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; X64-SSE-NEXT: movl $8199, %eax # imm = 0x2007
; X64-SSE-NEXT: movd %eax, %xmm3
; X64-SSE-NEXT: pmuludq %xmm1, %xmm3
; X64-SSE-NEXT: movd %xmm3, (%rax)
; X64-SSE-NEXT: pmuludq %xmm1, %xmm5
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm5[0,2,2,3]
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X64-SSE-NEXT: imull $8199, %edx, %eax # imm = 0x2007
; X64-SSE-NEXT: movl %eax, (%rax)
; X64-SSE-NEXT: movdqa %xmm2, (%rax)
; X64-SSE-NEXT: movdqa %xmm0, (%rax)
; X64-SSE-NEXT: retq
@ -2476,11 +2468,10 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X64-AVX1-NEXT: vpinsrd $1, %r11d, %xmm2, %xmm2
; X64-AVX1-NEXT: vpinsrd $2, %r10d, %xmm2, %xmm2
; X64-AVX1-NEXT: vpinsrd $3, %r9d, %xmm2, %xmm2
; X64-AVX1-NEXT: vpmulld %xmm1, %xmm2, %xmm2
; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; X64-AVX1-NEXT: vmovd %r8d, %xmm2
; X64-AVX1-NEXT: vpmulld %xmm1, %xmm2, %xmm1
; X64-AVX1-NEXT: vmovd %xmm1, (%rax)
; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; X64-AVX1-NEXT: imull $8199, %r8d, %eax # imm = 0x2007
; X64-AVX1-NEXT: movl %eax, (%rax)
; X64-AVX1-NEXT: vmovaps %ymm0, (%rax)
; X64-AVX1-NEXT: popq %rbx
; X64-AVX1-NEXT: popq %rbp
@ -2540,12 +2531,11 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
; X64-AVX2-NEXT: vmovd %xmm0, %eax
; X64-AVX2-NEXT: xorl %edx, %edx
; X64-AVX2-NEXT: divl 32(%rsi)
; X64-AVX2-NEXT: vmovd %edx, %xmm0
; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [8199,8199,8199,8199,8199,8199,8199,8199]
; X64-AVX2-NEXT: vpmulld %ymm2, %ymm1, %ymm1
; X64-AVX2-NEXT: vpmulld %xmm2, %xmm0, %xmm0
; X64-AVX2-NEXT: vmovd %xmm0, (%rax)
; X64-AVX2-NEXT: vmovdqa %ymm1, (%rax)
; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm0 = [8199,8199,8199,8199,8199,8199,8199,8199]
; X64-AVX2-NEXT: vpmulld %ymm0, %ymm1, %ymm0
; X64-AVX2-NEXT: imull $8199, %edx, %eax # imm = 0x2007
; X64-AVX2-NEXT: movl %eax, (%rax)
; X64-AVX2-NEXT: vmovdqa %ymm0, (%rax)
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
%a0 = load <9 x i16>, <9 x i16>* %p0, align 64

View File

@ -1294,12 +1294,13 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
; SSE2-NEXT: movss {{.*#+}} xmm4 = xmm2[0],xmm4[1,2,3]
; SSE2-NEXT: movdqa %xmm0, %xmm5
; SSE2-NEXT: pslld %xmm4, %xmm5
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [32,32,32,32]
; SSE2-NEXT: psubd %xmm2, %xmm4
; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm4[0],xmm3[1,2,3]
; SSE2-NEXT: psrld %xmm3, %xmm1
; SSE2-NEXT: movd %xmm2, %eax
; SSE2-NEXT: movl $32, %ecx
; SSE2-NEXT: subl %eax, %ecx
; SSE2-NEXT: movd %ecx, %xmm4
; SSE2-NEXT: psrld %xmm4, %xmm1
; SSE2-NEXT: por %xmm5, %xmm1
; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
; SSE2-NEXT: pand %xmm2, %xmm0
; SSE2-NEXT: pandn %xmm1, %xmm2
; SSE2-NEXT: por %xmm2, %xmm0
@ -1471,12 +1472,13 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
; X32-SSE-NEXT: movss {{.*#+}} xmm4 = xmm2[0],xmm4[1,2,3]
; X32-SSE-NEXT: movdqa %xmm0, %xmm5
; X32-SSE-NEXT: pslld %xmm4, %xmm5
; X32-SSE-NEXT: movdqa {{.*#+}} xmm4 = [32,32,32,32]
; X32-SSE-NEXT: psubd %xmm2, %xmm4
; X32-SSE-NEXT: pcmpeqd %xmm3, %xmm2
; X32-SSE-NEXT: movss {{.*#+}} xmm3 = xmm4[0],xmm3[1,2,3]
; X32-SSE-NEXT: psrld %xmm3, %xmm1
; X32-SSE-NEXT: movd %xmm2, %eax
; X32-SSE-NEXT: movl $32, %ecx
; X32-SSE-NEXT: subl %eax, %ecx
; X32-SSE-NEXT: movd %ecx, %xmm4
; X32-SSE-NEXT: psrld %xmm4, %xmm1
; X32-SSE-NEXT: por %xmm5, %xmm1
; X32-SSE-NEXT: pcmpeqd %xmm3, %xmm2
; X32-SSE-NEXT: pand %xmm2, %xmm0
; X32-SSE-NEXT: pandn %xmm1, %xmm2
; X32-SSE-NEXT: por %xmm2, %xmm0

View File

@ -783,18 +783,16 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind
define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind {
; SSE2-LABEL: splatvar_funnnel_v4i32:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
; SSE2-NEXT: xorps %xmm2, %xmm2
; SSE2-NEXT: xorps %xmm3, %xmm3
; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
; SSE2-NEXT: movdqa %xmm0, %xmm4
; SSE2-NEXT: pslld %xmm3, %xmm4
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [32,32,32,32]
; SSE2-NEXT: psubd %xmm1, %xmm3
; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3]
; SSE2-NEXT: psrld %xmm2, %xmm0
; SSE2-NEXT: por %xmm4, %xmm0
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: andl $31, %eax
; SSE2-NEXT: movd %eax, %xmm1
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: pslld %xmm1, %xmm2
; SSE2-NEXT: movl $32, %ecx
; SSE2-NEXT: subl %eax, %ecx
; SSE2-NEXT: movd %ecx, %xmm1
; SSE2-NEXT: psrld %xmm1, %xmm0
; SSE2-NEXT: por %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: splatvar_funnnel_v4i32:
@ -882,18 +880,16 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind
;
; X32-SSE-LABEL: splatvar_funnnel_v4i32:
; X32-SSE: # %bb.0:
; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm1
; X32-SSE-NEXT: xorps %xmm2, %xmm2
; X32-SSE-NEXT: xorps %xmm3, %xmm3
; X32-SSE-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
; X32-SSE-NEXT: movdqa %xmm0, %xmm4
; X32-SSE-NEXT: pslld %xmm3, %xmm4
; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [32,32,32,32]
; X32-SSE-NEXT: psubd %xmm1, %xmm3
; X32-SSE-NEXT: movss {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3]
; X32-SSE-NEXT: psrld %xmm2, %xmm0
; X32-SSE-NEXT: por %xmm4, %xmm0
; X32-SSE-NEXT: movd %xmm1, %eax
; X32-SSE-NEXT: andl $31, %eax
; X32-SSE-NEXT: movd %eax, %xmm1
; X32-SSE-NEXT: movdqa %xmm0, %xmm2
; X32-SSE-NEXT: pslld %xmm1, %xmm2
; X32-SSE-NEXT: movl $32, %ecx
; X32-SSE-NEXT: subl %eax, %ecx
; X32-SSE-NEXT: movd %ecx, %xmm1
; X32-SSE-NEXT: psrld %xmm1, %xmm0
; X32-SSE-NEXT: por %xmm2, %xmm0
; X32-SSE-NEXT: retl
%splat = shufflevector <4 x i32> %amt, <4 x i32> undef, <4 x i32> zeroinitializer
%res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %splat)

View File

@ -1309,12 +1309,13 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
; SSE2-NEXT: movss {{.*#+}} xmm4 = xmm2[0],xmm4[1,2,3]
; SSE2-NEXT: movdqa %xmm1, %xmm5
; SSE2-NEXT: psrld %xmm4, %xmm5
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [32,32,32,32]
; SSE2-NEXT: psubd %xmm2, %xmm4
; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm4[0],xmm3[1,2,3]
; SSE2-NEXT: pslld %xmm3, %xmm0
; SSE2-NEXT: movd %xmm2, %eax
; SSE2-NEXT: movl $32, %ecx
; SSE2-NEXT: subl %eax, %ecx
; SSE2-NEXT: movd %ecx, %xmm4
; SSE2-NEXT: pslld %xmm4, %xmm0
; SSE2-NEXT: por %xmm5, %xmm0
; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
; SSE2-NEXT: pand %xmm2, %xmm1
; SSE2-NEXT: pandn %xmm0, %xmm2
; SSE2-NEXT: por %xmm1, %xmm2
@ -1485,12 +1486,13 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
; X32-SSE-NEXT: movss {{.*#+}} xmm4 = xmm2[0],xmm4[1,2,3]
; X32-SSE-NEXT: movdqa %xmm1, %xmm5
; X32-SSE-NEXT: psrld %xmm4, %xmm5
; X32-SSE-NEXT: movdqa {{.*#+}} xmm4 = [32,32,32,32]
; X32-SSE-NEXT: psubd %xmm2, %xmm4
; X32-SSE-NEXT: pcmpeqd %xmm3, %xmm2
; X32-SSE-NEXT: movss {{.*#+}} xmm3 = xmm4[0],xmm3[1,2,3]
; X32-SSE-NEXT: pslld %xmm3, %xmm0
; X32-SSE-NEXT: movd %xmm2, %eax
; X32-SSE-NEXT: movl $32, %ecx
; X32-SSE-NEXT: subl %eax, %ecx
; X32-SSE-NEXT: movd %ecx, %xmm4
; X32-SSE-NEXT: pslld %xmm4, %xmm0
; X32-SSE-NEXT: por %xmm5, %xmm0
; X32-SSE-NEXT: pcmpeqd %xmm3, %xmm2
; X32-SSE-NEXT: pand %xmm2, %xmm1
; X32-SSE-NEXT: pandn %xmm0, %xmm2
; X32-SSE-NEXT: por %xmm1, %xmm2

View File

@ -828,20 +828,17 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind
define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind {
; SSE2-LABEL: splatvar_funnnel_v4i32:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; SSE2-NEXT: xorps %xmm2, %xmm2
; SSE2-NEXT: pxor %xmm3, %xmm3
; SSE2-NEXT: psubd %xmm1, %xmm3
; SSE2-NEXT: pand {{.*}}(%rip), %xmm3
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm3[0],xmm1[1,2,3]
; SSE2-NEXT: movdqa %xmm0, %xmm4
; SSE2-NEXT: pslld %xmm1, %xmm4
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [32,32,32,32]
; SSE2-NEXT: psubd %xmm3, %xmm1
; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
; SSE2-NEXT: psrld %xmm2, %xmm0
; SSE2-NEXT: por %xmm4, %xmm0
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: negl %eax
; SSE2-NEXT: andl $31, %eax
; SSE2-NEXT: movd %eax, %xmm1
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: pslld %xmm1, %xmm2
; SSE2-NEXT: movl $32, %ecx
; SSE2-NEXT: subl %eax, %ecx
; SSE2-NEXT: movd %ecx, %xmm1
; SSE2-NEXT: psrld %xmm1, %xmm0
; SSE2-NEXT: por %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: splatvar_funnnel_v4i32:
@ -939,20 +936,17 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind
;
; X32-SSE-LABEL: splatvar_funnnel_v4i32:
; X32-SSE: # %bb.0:
; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; X32-SSE-NEXT: xorps %xmm2, %xmm2
; X32-SSE-NEXT: pxor %xmm3, %xmm3
; X32-SSE-NEXT: psubd %xmm1, %xmm3
; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm3
; X32-SSE-NEXT: pxor %xmm1, %xmm1
; X32-SSE-NEXT: movss {{.*#+}} xmm1 = xmm3[0],xmm1[1,2,3]
; X32-SSE-NEXT: movdqa %xmm0, %xmm4
; X32-SSE-NEXT: pslld %xmm1, %xmm4
; X32-SSE-NEXT: movdqa {{.*#+}} xmm1 = [32,32,32,32]
; X32-SSE-NEXT: psubd %xmm3, %xmm1
; X32-SSE-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
; X32-SSE-NEXT: psrld %xmm2, %xmm0
; X32-SSE-NEXT: por %xmm4, %xmm0
; X32-SSE-NEXT: movd %xmm1, %eax
; X32-SSE-NEXT: negl %eax
; X32-SSE-NEXT: andl $31, %eax
; X32-SSE-NEXT: movd %eax, %xmm1
; X32-SSE-NEXT: movdqa %xmm0, %xmm2
; X32-SSE-NEXT: pslld %xmm1, %xmm2
; X32-SSE-NEXT: movl $32, %ecx
; X32-SSE-NEXT: subl %eax, %ecx
; X32-SSE-NEXT: movd %ecx, %xmm1
; X32-SSE-NEXT: psrld %xmm1, %xmm0
; X32-SSE-NEXT: por %xmm2, %xmm0
; X32-SSE-NEXT: retl
%splat = shufflevector <4 x i32> %amt, <4 x i32> undef, <4 x i32> zeroinitializer
%res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %splat)

View File

@ -20,9 +20,8 @@ define <4 x i32*> @AGEP0(i32* %ptr) nounwind {
define i32 @AGEP1(<4 x i32*> %param) nounwind {
; CHECK-LABEL: AGEP1:
; CHECK: # %bb.0:
; CHECK-NEXT: vpaddd {{\.LCPI.*}}, %xmm0, %xmm0
; CHECK-NEXT: vpextrd $3, %xmm0, %eax
; CHECK-NEXT: movl (%eax), %eax
; CHECK-NEXT: vextractps $3, %xmm0, %eax
; CHECK-NEXT: movl 16(%eax), %eax
; CHECK-NEXT: retl
%A2 = getelementptr i32, <4 x i32*> %param, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
%k = extractelement <4 x i32*> %A2, i32 3

View File

@ -1120,31 +1120,28 @@ define i16 @test_v8i16(<8 x i16> %a0) {
;
; SSE41-LABEL: test_v8i16:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v8i16:
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vphminposuw %xmm0, %xmm0
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v8i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
%1 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> %a0)
@ -1169,11 +1166,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; SSE41-LABEL: test_v16i16:
; SSE41: # %bb.0:
; SSE41-NEXT: pmaxsw %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
@ -1181,11 +1177,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -1194,11 +1189,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -1207,11 +1201,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX512: # %bb.0:
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@ -1241,11 +1234,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; SSE41-NEXT: pmaxsw %xmm3, %xmm1
; SSE41-NEXT: pmaxsw %xmm2, %xmm0
; SSE41-NEXT: pmaxsw %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
@ -1256,11 +1248,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -1270,11 +1261,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -1285,11 +1275,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@ -1327,11 +1316,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; SSE41-NEXT: pmaxsw %xmm4, %xmm0
; SSE41-NEXT: pmaxsw %xmm2, %xmm0
; SSE41-NEXT: pmaxsw %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
@ -1348,11 +1336,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmaxsw %xmm4, %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -1364,11 +1351,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -1380,11 +1366,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@ -1597,38 +1582,35 @@ define i8 @test_v16i8(<16 x i8> %a0) {
;
; SSE41-LABEL: test_v16i8:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: psrlw $8, %xmm2
; SSE41-NEXT: pminub %xmm0, %xmm2
; SSE41-NEXT: phminposuw %xmm2, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrlw $8, %xmm1
; SSE41-NEXT: pminub %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
; SSE41-NEXT: xorb $127, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v16i8:
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX-NEXT: vphminposuw %xmm0, %xmm0
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: xorb $127, %al
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v16i8:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: xorb $127, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> %a0)
@ -1676,14 +1658,13 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; SSE41-LABEL: test_v32i8:
; SSE41: # %bb.0:
; SSE41-NEXT: pmaxsb %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: psrlw $8, %xmm2
; SSE41-NEXT: pminub %xmm0, %xmm2
; SSE41-NEXT: phminposuw %xmm2, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrlw $8, %xmm1
; SSE41-NEXT: pminub %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
; SSE41-NEXT: xorb $127, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@ -1691,13 +1672,12 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: xorb $127, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -1706,13 +1686,12 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: xorb $127, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -1721,13 +1700,12 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512: # %bb.0:
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: xorb $127, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@ -1788,14 +1766,13 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; SSE41-NEXT: pmaxsb %xmm3, %xmm1
; SSE41-NEXT: pmaxsb %xmm2, %xmm0
; SSE41-NEXT: pmaxsb %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: psrlw $8, %xmm2
; SSE41-NEXT: pminub %xmm0, %xmm2
; SSE41-NEXT: phminposuw %xmm2, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrlw $8, %xmm1
; SSE41-NEXT: pminub %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
; SSE41-NEXT: xorb $127, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@ -1806,13 +1783,12 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: xorb $127, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -1822,13 +1798,12 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: xorb $127, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -1839,13 +1814,12 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: xorb $127, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@ -1930,14 +1904,13 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; SSE41-NEXT: pmaxsb %xmm4, %xmm0
; SSE41-NEXT: pmaxsb %xmm2, %xmm0
; SSE41-NEXT: pmaxsb %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: psrlw $8, %xmm2
; SSE41-NEXT: pminub %xmm0, %xmm2
; SSE41-NEXT: phminposuw %xmm2, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrlw $8, %xmm1
; SSE41-NEXT: pminub %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
; SSE41-NEXT: xorb $127, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@ -1954,13 +1927,12 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmaxsb %xmm4, %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: xorb $127, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -1972,13 +1944,12 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: xorb $127, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -1990,13 +1961,12 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: xorb $127, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq

View File

@ -1340,31 +1340,28 @@ define i16 @test_v8i16(<8 x i16> %a0) {
;
; SSE41-LABEL: test_v8i16:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v8i16:
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vphminposuw %xmm0, %xmm0
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v8i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
%1 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> %a0)
@ -1389,11 +1386,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; SSE41-LABEL: test_v16i16:
; SSE41: # %bb.0:
; SSE41-NEXT: pmaxsw %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
@ -1401,11 +1397,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -1414,11 +1409,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -1427,11 +1421,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX512: # %bb.0:
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@ -1461,11 +1454,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; SSE41-NEXT: pmaxsw %xmm3, %xmm1
; SSE41-NEXT: pmaxsw %xmm2, %xmm0
; SSE41-NEXT: pmaxsw %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
@ -1476,11 +1468,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -1490,11 +1481,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -1505,11 +1495,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@ -1547,11 +1536,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; SSE41-NEXT: pmaxsw %xmm4, %xmm0
; SSE41-NEXT: pmaxsw %xmm2, %xmm0
; SSE41-NEXT: pmaxsw %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
@ -1568,11 +1556,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmaxsw %xmm4, %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -1584,11 +1571,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -1600,11 +1586,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@ -1947,38 +1932,35 @@ define i8 @test_v16i8(<16 x i8> %a0) {
;
; SSE41-LABEL: test_v16i8:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: psrlw $8, %xmm2
; SSE41-NEXT: pminub %xmm0, %xmm2
; SSE41-NEXT: phminposuw %xmm2, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrlw $8, %xmm1
; SSE41-NEXT: pminub %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
; SSE41-NEXT: xorb $127, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v16i8:
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX-NEXT: vphminposuw %xmm0, %xmm0
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: xorb $127, %al
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v16i8:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: xorb $127, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> %a0)
@ -2026,14 +2008,13 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; SSE41-LABEL: test_v32i8:
; SSE41: # %bb.0:
; SSE41-NEXT: pmaxsb %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: psrlw $8, %xmm2
; SSE41-NEXT: pminub %xmm0, %xmm2
; SSE41-NEXT: phminposuw %xmm2, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrlw $8, %xmm1
; SSE41-NEXT: pminub %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
; SSE41-NEXT: xorb $127, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@ -2041,13 +2022,12 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: xorb $127, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -2056,13 +2036,12 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: xorb $127, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -2071,13 +2050,12 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512: # %bb.0:
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: xorb $127, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@ -2138,14 +2116,13 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; SSE41-NEXT: pmaxsb %xmm3, %xmm1
; SSE41-NEXT: pmaxsb %xmm2, %xmm0
; SSE41-NEXT: pmaxsb %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: psrlw $8, %xmm2
; SSE41-NEXT: pminub %xmm0, %xmm2
; SSE41-NEXT: phminposuw %xmm2, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrlw $8, %xmm1
; SSE41-NEXT: pminub %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
; SSE41-NEXT: xorb $127, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@ -2156,13 +2133,12 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: xorb $127, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -2172,13 +2148,12 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: xorb $127, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -2189,13 +2164,12 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: xorb $127, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@ -2280,14 +2254,13 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; SSE41-NEXT: pmaxsb %xmm4, %xmm0
; SSE41-NEXT: pmaxsb %xmm2, %xmm0
; SSE41-NEXT: pmaxsb %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: psrlw $8, %xmm2
; SSE41-NEXT: pminub %xmm0, %xmm2
; SSE41-NEXT: phminposuw %xmm2, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrlw $8, %xmm1
; SSE41-NEXT: pminub %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
; SSE41-NEXT: xorb $127, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@ -2304,13 +2277,12 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmaxsb %xmm4, %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: xorb $127, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -2322,13 +2294,12 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: xorb $127, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -2340,13 +2311,12 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: xorb $127, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq

View File

@ -1119,31 +1119,28 @@ define i16 @test_v8i16(<8 x i16> %a0) {
;
; SSE41-LABEL: test_v8i16:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v8i16:
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vphminposuw %xmm0, %xmm0
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v8i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
%1 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> %a0)
@ -1168,11 +1165,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; SSE41-LABEL: test_v16i16:
; SSE41: # %bb.0:
; SSE41-NEXT: pminsw %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
@ -1180,11 +1176,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -1193,11 +1188,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -1206,11 +1200,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX512: # %bb.0:
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@ -1240,11 +1233,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; SSE41-NEXT: pminsw %xmm3, %xmm1
; SSE41-NEXT: pminsw %xmm2, %xmm0
; SSE41-NEXT: pminsw %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
@ -1255,11 +1247,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -1269,11 +1260,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -1284,11 +1274,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@ -1326,11 +1315,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; SSE41-NEXT: pminsw %xmm4, %xmm0
; SSE41-NEXT: pminsw %xmm2, %xmm0
; SSE41-NEXT: pminsw %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
@ -1347,11 +1335,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpminsw %xmm4, %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -1363,11 +1350,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -1379,11 +1365,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@ -1596,38 +1581,35 @@ define i8 @test_v16i8(<16 x i8> %a0) {
;
; SSE41-LABEL: test_v16i8:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: psrlw $8, %xmm2
; SSE41-NEXT: pminub %xmm0, %xmm2
; SSE41-NEXT: phminposuw %xmm2, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrlw $8, %xmm1
; SSE41-NEXT: pminub %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
; SSE41-NEXT: xorb $-128, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v16i8:
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX-NEXT: vphminposuw %xmm0, %xmm0
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: xorb $-128, %al
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v16i8:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: xorb $-128, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> %a0)
@ -1675,14 +1657,13 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; SSE41-LABEL: test_v32i8:
; SSE41: # %bb.0:
; SSE41-NEXT: pminsb %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: psrlw $8, %xmm2
; SSE41-NEXT: pminub %xmm0, %xmm2
; SSE41-NEXT: phminposuw %xmm2, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrlw $8, %xmm1
; SSE41-NEXT: pminub %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
; SSE41-NEXT: xorb $-128, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@ -1690,13 +1671,12 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: xorb $-128, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -1705,13 +1685,12 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: xorb $-128, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -1720,13 +1699,12 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512: # %bb.0:
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: xorb $-128, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@ -1787,14 +1765,13 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; SSE41-NEXT: pminsb %xmm3, %xmm1
; SSE41-NEXT: pminsb %xmm2, %xmm0
; SSE41-NEXT: pminsb %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: psrlw $8, %xmm2
; SSE41-NEXT: pminub %xmm0, %xmm2
; SSE41-NEXT: phminposuw %xmm2, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrlw $8, %xmm1
; SSE41-NEXT: pminub %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
; SSE41-NEXT: xorb $-128, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@ -1805,13 +1782,12 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: xorb $-128, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -1821,13 +1797,12 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: xorb $-128, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -1838,13 +1813,12 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: xorb $-128, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@ -1929,14 +1903,13 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; SSE41-NEXT: pminsb %xmm4, %xmm0
; SSE41-NEXT: pminsb %xmm2, %xmm0
; SSE41-NEXT: pminsb %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: psrlw $8, %xmm2
; SSE41-NEXT: pminub %xmm0, %xmm2
; SSE41-NEXT: phminposuw %xmm2, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrlw $8, %xmm1
; SSE41-NEXT: pminub %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
; SSE41-NEXT: xorb $-128, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@ -1953,13 +1926,12 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpminsb %xmm4, %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: xorb $-128, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -1971,13 +1943,12 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: xorb $-128, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -1989,13 +1960,12 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: xorb $-128, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq

View File

@ -1339,31 +1339,28 @@ define i16 @test_v8i16(<8 x i16> %a0) {
;
; SSE41-LABEL: test_v8i16:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v8i16:
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vphminposuw %xmm0, %xmm0
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v8i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
%1 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> %a0)
@ -1388,11 +1385,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; SSE41-LABEL: test_v16i16:
; SSE41: # %bb.0:
; SSE41-NEXT: pminsw %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
@ -1400,11 +1396,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -1413,11 +1408,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -1426,11 +1420,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX512: # %bb.0:
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@ -1460,11 +1453,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; SSE41-NEXT: pminsw %xmm3, %xmm1
; SSE41-NEXT: pminsw %xmm2, %xmm0
; SSE41-NEXT: pminsw %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
@ -1475,11 +1467,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -1489,11 +1480,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -1504,11 +1494,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@ -1546,11 +1535,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; SSE41-NEXT: pminsw %xmm4, %xmm0
; SSE41-NEXT: pminsw %xmm2, %xmm0
; SSE41-NEXT: pminsw %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
@ -1567,11 +1555,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpminsw %xmm4, %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -1583,11 +1570,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -1599,11 +1585,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@ -1946,38 +1931,35 @@ define i8 @test_v16i8(<16 x i8> %a0) {
;
; SSE41-LABEL: test_v16i8:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: psrlw $8, %xmm2
; SSE41-NEXT: pminub %xmm0, %xmm2
; SSE41-NEXT: phminposuw %xmm2, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrlw $8, %xmm1
; SSE41-NEXT: pminub %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
; SSE41-NEXT: xorb $-128, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v16i8:
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX-NEXT: vphminposuw %xmm0, %xmm0
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: xorb $-128, %al
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v16i8:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: xorb $-128, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> %a0)
@ -2025,14 +2007,13 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; SSE41-LABEL: test_v32i8:
; SSE41: # %bb.0:
; SSE41-NEXT: pminsb %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: psrlw $8, %xmm2
; SSE41-NEXT: pminub %xmm0, %xmm2
; SSE41-NEXT: phminposuw %xmm2, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrlw $8, %xmm1
; SSE41-NEXT: pminub %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
; SSE41-NEXT: xorb $-128, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@ -2040,13 +2021,12 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: xorb $-128, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -2055,13 +2035,12 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: xorb $-128, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -2070,13 +2049,12 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512: # %bb.0:
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: xorb $-128, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@ -2137,14 +2115,13 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; SSE41-NEXT: pminsb %xmm3, %xmm1
; SSE41-NEXT: pminsb %xmm2, %xmm0
; SSE41-NEXT: pminsb %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: psrlw $8, %xmm2
; SSE41-NEXT: pminub %xmm0, %xmm2
; SSE41-NEXT: phminposuw %xmm2, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrlw $8, %xmm1
; SSE41-NEXT: pminub %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
; SSE41-NEXT: xorb $-128, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@ -2155,13 +2132,12 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: xorb $-128, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -2171,13 +2147,12 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: xorb $-128, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -2188,13 +2163,12 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: xorb $-128, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@ -2279,14 +2253,13 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; SSE41-NEXT: pminsb %xmm4, %xmm0
; SSE41-NEXT: pminsb %xmm2, %xmm0
; SSE41-NEXT: pminsb %xmm1, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: psrlw $8, %xmm2
; SSE41-NEXT: pminub %xmm0, %xmm2
; SSE41-NEXT: phminposuw %xmm2, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: psrlw $8, %xmm1
; SSE41-NEXT: pminub %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
; SSE41-NEXT: xorb $-128, %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@ -2303,13 +2276,12 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpminsb %xmm4, %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: xorb $-128, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -2321,13 +2293,12 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: xorb $-128, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -2339,13 +2310,12 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: xorb $-128, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq

View File

@ -1166,8 +1166,8 @@ define i16 @test_v2i16(<2 x i16> %a0) {
; SSE2-NEXT: pxor %xmm2, %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
@ -1212,8 +1212,8 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; SSE2-NEXT: psrld $16, %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm0
; SSE2-NEXT: pmaxsw %xmm1, %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm0
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
@ -1269,18 +1269,18 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; SSE2-NEXT: psrld $16, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_v8i16:
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: notl %eax
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
@ -1289,8 +1289,8 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vphminposuw %xmm0, %xmm0
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: notl %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
;
@ -1299,8 +1299,8 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: notl %eax
; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@ -1309,8 +1309,8 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vmovd %xmm0, %eax
; AVX512VL-NEXT: notl %eax
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512VL-NEXT: retq
%1 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> %a0)
@ -1339,8 +1339,8 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; SSE2-NEXT: psrld $16, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
@ -1348,10 +1348,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; SSE41: # %bb.0:
; SSE41-NEXT: pmaxuw %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: notl %eax
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
@ -1362,8 +1362,8 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: notl %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -1375,8 +1375,8 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: notl %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -1387,8 +1387,8 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX512BW-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: notl %eax
; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@ -1399,8 +1399,8 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX512VL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vmovd %xmm0, %eax
; AVX512VL-NEXT: notl %eax
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
@ -1434,8 +1434,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; SSE2-NEXT: psrld $16, %xmm1
; SSE2-NEXT: pxor %xmm4, %xmm1
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
; SSE2-NEXT: pxor %xmm4, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
@ -1445,10 +1445,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; SSE41-NEXT: pmaxuw %xmm2, %xmm0
; SSE41-NEXT: pmaxuw %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: notl %eax
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
@ -1462,8 +1462,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: notl %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -1476,8 +1476,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: notl %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -1490,8 +1490,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX512BW-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: notl %eax
; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@ -1504,8 +1504,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX512VL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vmovd %xmm0, %eax
; AVX512VL-NEXT: notl %eax
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
@ -1547,8 +1547,8 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; SSE2-NEXT: psrld $16, %xmm0
; SSE2-NEXT: pxor %xmm8, %xmm0
; SSE2-NEXT: pmaxsw %xmm1, %xmm0
; SSE2-NEXT: pxor %xmm8, %xmm0
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
@ -1562,10 +1562,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; SSE41-NEXT: pmaxuw %xmm2, %xmm0
; SSE41-NEXT: pmaxuw %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: notl %eax
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
@ -1585,8 +1585,8 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: notl %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -1601,8 +1601,8 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: notl %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -1616,8 +1616,8 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX512BW-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: notl %eax
; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@ -1631,8 +1631,8 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX512VL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vmovd %xmm0, %eax
; AVX512VL-NEXT: notl %eax
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
@ -1806,13 +1806,13 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; SSE41-LABEL: test_v16i8:
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: psrlw $8, %xmm2
; SSE41-NEXT: pminub %xmm0, %xmm2
; SSE41-NEXT: phminposuw %xmm2, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor %xmm0, %xmm1
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: psrlw $8, %xmm0
; SSE41-NEXT: pminub %xmm1, %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
; SSE41-NEXT: notb %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@ -1820,11 +1820,11 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX: # %bb.0:
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX-NEXT: vphminposuw %xmm0, %xmm0
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: notb %al
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
@ -1835,8 +1835,8 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
; AVX512BW-NEXT: notb %al
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@ -1847,8 +1847,8 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
; AVX512VL-NEXT: notb %al
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> %a0)
@ -1877,13 +1877,13 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; SSE41: # %bb.0:
; SSE41-NEXT: pmaxub %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: psrlw $8, %xmm2
; SSE41-NEXT: pminub %xmm0, %xmm2
; SSE41-NEXT: phminposuw %xmm2, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor %xmm0, %xmm1
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: psrlw $8, %xmm0
; SSE41-NEXT: pminub %xmm1, %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
; SSE41-NEXT: notb %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@ -1893,11 +1893,11 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: notb %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -1908,11 +1908,11 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: notb %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -1925,8 +1925,8 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
; AVX512BW-NEXT: notb %al
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@ -1939,8 +1939,8 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
; AVX512VL-NEXT: notb %al
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
@ -1974,13 +1974,13 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; SSE41-NEXT: pmaxub %xmm2, %xmm0
; SSE41-NEXT: pmaxub %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: psrlw $8, %xmm2
; SSE41-NEXT: pminub %xmm0, %xmm2
; SSE41-NEXT: phminposuw %xmm2, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor %xmm0, %xmm1
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: psrlw $8, %xmm0
; SSE41-NEXT: pminub %xmm1, %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
; SSE41-NEXT: notb %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@ -1993,11 +1993,11 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: notb %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -2009,11 +2009,11 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: notb %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -2028,8 +2028,8 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
; AVX512BW-NEXT: notb %al
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@ -2044,8 +2044,8 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
; AVX512VL-NEXT: notb %al
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
@ -2087,13 +2087,13 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; SSE41-NEXT: pmaxub %xmm2, %xmm0
; SSE41-NEXT: pmaxub %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: psrlw $8, %xmm2
; SSE41-NEXT: pminub %xmm0, %xmm2
; SSE41-NEXT: phminposuw %xmm2, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor %xmm0, %xmm1
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: psrlw $8, %xmm0
; SSE41-NEXT: pminub %xmm1, %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
; SSE41-NEXT: notb %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@ -2112,11 +2112,11 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX1-NEXT: vpmaxub %xmm4, %xmm0, %xmm0
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: notb %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -2130,11 +2130,11 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: notb %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -2150,8 +2150,8 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
; AVX512BW-NEXT: notb %al
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@ -2167,8 +2167,8 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
; AVX512VL-NEXT: notb %al
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq

View File

@ -1389,18 +1389,18 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; SSE2-NEXT: psrld $16, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_v8i16:
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: notl %eax
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
@ -1409,8 +1409,8 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vphminposuw %xmm0, %xmm0
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: notl %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
;
@ -1419,8 +1419,8 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: notl %eax
; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@ -1429,8 +1429,8 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vmovd %xmm0, %eax
; AVX512VL-NEXT: notl %eax
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512VL-NEXT: retq
%1 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> %a0)
@ -1459,8 +1459,8 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; SSE2-NEXT: psrld $16, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
@ -1468,10 +1468,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; SSE41: # %bb.0:
; SSE41-NEXT: pmaxuw %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: notl %eax
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
@ -1482,8 +1482,8 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: notl %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -1495,8 +1495,8 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: notl %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -1507,8 +1507,8 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX512BW-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: notl %eax
; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@ -1519,8 +1519,8 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX512VL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vmovd %xmm0, %eax
; AVX512VL-NEXT: notl %eax
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
@ -1554,8 +1554,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; SSE2-NEXT: psrld $16, %xmm1
; SSE2-NEXT: pxor %xmm4, %xmm1
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
; SSE2-NEXT: pxor %xmm4, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
@ -1565,10 +1565,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; SSE41-NEXT: pmaxuw %xmm2, %xmm0
; SSE41-NEXT: pmaxuw %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: notl %eax
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
@ -1582,8 +1582,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: notl %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -1596,8 +1596,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: notl %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -1610,8 +1610,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX512BW-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: notl %eax
; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@ -1624,8 +1624,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX512VL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vmovd %xmm0, %eax
; AVX512VL-NEXT: notl %eax
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
@ -1667,8 +1667,8 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; SSE2-NEXT: psrld $16, %xmm0
; SSE2-NEXT: pxor %xmm8, %xmm0
; SSE2-NEXT: pmaxsw %xmm1, %xmm0
; SSE2-NEXT: pxor %xmm8, %xmm0
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
@ -1682,10 +1682,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; SSE41-NEXT: pmaxuw %xmm2, %xmm0
; SSE41-NEXT: pmaxuw %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor %xmm0, %xmm1
; SSE41-NEXT: phminposuw %xmm1, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: notl %eax
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: retq
;
@ -1705,8 +1705,8 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: notl %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -1721,8 +1721,8 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: notl %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -1736,8 +1736,8 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX512BW-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: notl %eax
; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@ -1751,8 +1751,8 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX512VL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vmovd %xmm0, %eax
; AVX512VL-NEXT: notl %eax
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
@ -2052,13 +2052,13 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; SSE41-LABEL: test_v16i8:
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: psrlw $8, %xmm2
; SSE41-NEXT: pminub %xmm0, %xmm2
; SSE41-NEXT: phminposuw %xmm2, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor %xmm0, %xmm1
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: psrlw $8, %xmm0
; SSE41-NEXT: pminub %xmm1, %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
; SSE41-NEXT: notb %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@ -2066,11 +2066,11 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX: # %bb.0:
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX-NEXT: vphminposuw %xmm0, %xmm0
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: notb %al
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
@ -2081,8 +2081,8 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
; AVX512BW-NEXT: notb %al
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@ -2093,8 +2093,8 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
; AVX512VL-NEXT: notb %al
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: retq
%1 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> %a0)
@ -2123,13 +2123,13 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; SSE41: # %bb.0:
; SSE41-NEXT: pmaxub %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: psrlw $8, %xmm2
; SSE41-NEXT: pminub %xmm0, %xmm2
; SSE41-NEXT: phminposuw %xmm2, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor %xmm0, %xmm1
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: psrlw $8, %xmm0
; SSE41-NEXT: pminub %xmm1, %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
; SSE41-NEXT: notb %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@ -2139,11 +2139,11 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: notb %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -2154,11 +2154,11 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: notb %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -2171,8 +2171,8 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
; AVX512BW-NEXT: notb %al
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@ -2185,8 +2185,8 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
; AVX512VL-NEXT: notb %al
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
@ -2220,13 +2220,13 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; SSE41-NEXT: pmaxub %xmm2, %xmm0
; SSE41-NEXT: pmaxub %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: psrlw $8, %xmm2
; SSE41-NEXT: pminub %xmm0, %xmm2
; SSE41-NEXT: phminposuw %xmm2, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor %xmm0, %xmm1
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: psrlw $8, %xmm0
; SSE41-NEXT: pminub %xmm1, %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
; SSE41-NEXT: notb %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@ -2239,11 +2239,11 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: notb %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -2255,11 +2255,11 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: notb %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -2274,8 +2274,8 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
; AVX512BW-NEXT: notb %al
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@ -2290,8 +2290,8 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
; AVX512VL-NEXT: notb %al
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
@ -2333,13 +2333,13 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; SSE41-NEXT: pmaxub %xmm2, %xmm0
; SSE41-NEXT: pmaxub %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: psrlw $8, %xmm2
; SSE41-NEXT: pminub %xmm0, %xmm2
; SSE41-NEXT: phminposuw %xmm2, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pxor %xmm0, %xmm1
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: psrlw $8, %xmm0
; SSE41-NEXT: pminub %xmm1, %xmm0
; SSE41-NEXT: phminposuw %xmm0, %xmm0
; SSE41-NEXT: pextrb $0, %xmm0, %eax
; SSE41-NEXT: notb %al
; SSE41-NEXT: # kill: def $al killed $al killed $eax
; SSE41-NEXT: retq
;
@ -2358,11 +2358,11 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX1-NEXT: vpmaxub %xmm4, %xmm0, %xmm0
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
; AVX1-NEXT: notb %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -2376,11 +2376,11 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: notb %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -2396,8 +2396,8 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
; AVX512BW-NEXT: notb %al
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@ -2413,8 +2413,8 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
; AVX512VL-NEXT: notb %al
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq

View File

@ -1165,8 +1165,8 @@ define i16 @test_v2i16(<2 x i16> %a0) {
; SSE2-NEXT: pxor %xmm2, %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: pminsw %xmm0, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
@ -1211,8 +1211,8 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; SSE2-NEXT: psrld $16, %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm0
; SSE2-NEXT: pminsw %xmm1, %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm0
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
@ -1268,8 +1268,8 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; SSE2-NEXT: psrld $16, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: pminsw %xmm0, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
@ -1319,8 +1319,8 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; SSE2-NEXT: psrld $16, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: pminsw %xmm0, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
@ -1391,8 +1391,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; SSE2-NEXT: psrld $16, %xmm1
; SSE2-NEXT: pxor %xmm4, %xmm1
; SSE2-NEXT: pminsw %xmm0, %xmm1
; SSE2-NEXT: pxor %xmm4, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
@ -1479,8 +1479,8 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; SSE2-NEXT: psrld $16, %xmm0
; SSE2-NEXT: pxor %xmm8, %xmm0
; SSE2-NEXT: pminsw %xmm1, %xmm0
; SSE2-NEXT: pxor %xmm8, %xmm0
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;

View File

@ -1388,8 +1388,8 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; SSE2-NEXT: psrld $16, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: pminsw %xmm0, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
@ -1439,8 +1439,8 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; SSE2-NEXT: psrld $16, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: pminsw %xmm0, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
@ -1511,8 +1511,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; SSE2-NEXT: psrld $16, %xmm1
; SSE2-NEXT: pxor %xmm4, %xmm1
; SSE2-NEXT: pminsw %xmm0, %xmm1
; SSE2-NEXT: pxor %xmm4, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
@ -1599,8 +1599,8 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; SSE2-NEXT: psrld $16, %xmm0
; SSE2-NEXT: pxor %xmm8, %xmm0
; SSE2-NEXT: pminsw %xmm1, %xmm0
; SSE2-NEXT: pxor %xmm8, %xmm0
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;

View File

@ -738,18 +738,16 @@ define <2 x i64> @splatvar_rotate_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
define <4 x i32> @splatvar_rotate_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; SSE2-LABEL: splatvar_rotate_v4i32:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
; SSE2-NEXT: xorps %xmm2, %xmm2
; SSE2-NEXT: xorps %xmm3, %xmm3
; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
; SSE2-NEXT: movdqa %xmm0, %xmm4
; SSE2-NEXT: pslld %xmm3, %xmm4
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [32,32,32,32]
; SSE2-NEXT: psubd %xmm1, %xmm3
; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3]
; SSE2-NEXT: psrld %xmm2, %xmm0
; SSE2-NEXT: por %xmm4, %xmm0
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: andl $31, %eax
; SSE2-NEXT: movd %eax, %xmm1
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: pslld %xmm1, %xmm2
; SSE2-NEXT: movl $32, %ecx
; SSE2-NEXT: subl %eax, %ecx
; SSE2-NEXT: movd %ecx, %xmm1
; SSE2-NEXT: psrld %xmm1, %xmm0
; SSE2-NEXT: por %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: splatvar_rotate_v4i32:
@ -837,18 +835,16 @@ define <4 x i32> @splatvar_rotate_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
;
; X32-SSE-LABEL: splatvar_rotate_v4i32:
; X32-SSE: # %bb.0:
; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm1
; X32-SSE-NEXT: xorps %xmm2, %xmm2
; X32-SSE-NEXT: xorps %xmm3, %xmm3
; X32-SSE-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
; X32-SSE-NEXT: movdqa %xmm0, %xmm4
; X32-SSE-NEXT: pslld %xmm3, %xmm4
; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [32,32,32,32]
; X32-SSE-NEXT: psubd %xmm1, %xmm3
; X32-SSE-NEXT: movss {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3]
; X32-SSE-NEXT: psrld %xmm2, %xmm0
; X32-SSE-NEXT: por %xmm4, %xmm0
; X32-SSE-NEXT: movd %xmm1, %eax
; X32-SSE-NEXT: andl $31, %eax
; X32-SSE-NEXT: movd %eax, %xmm1
; X32-SSE-NEXT: movdqa %xmm0, %xmm2
; X32-SSE-NEXT: pslld %xmm1, %xmm2
; X32-SSE-NEXT: movl $32, %ecx
; X32-SSE-NEXT: subl %eax, %ecx
; X32-SSE-NEXT: movd %ecx, %xmm1
; X32-SSE-NEXT: psrld %xmm1, %xmm0
; X32-SSE-NEXT: por %xmm2, %xmm0
; X32-SSE-NEXT: retl
%splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
%splat32 = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, %splat

View File

@ -420,26 +420,14 @@ define i32 @PR17487(i1 %tobool) {
; X64-LIN-LABEL: PR17487:
; X64-LIN: # %bb.0:
; X64-LIN-NEXT: movd %edi, %xmm0
; X64-LIN-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; X64-LIN-NEXT: pandn {{.*}}(%rip), %xmm0
; X64-LIN-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; X64-LIN-NEXT: movq %xmm0, %rcx
; X64-LIN-NEXT: xorl %eax, %eax
; X64-LIN-NEXT: cmpq $1, %rcx
; X64-LIN-NEXT: setne %al
; X64-LIN-NEXT: pextrw $0, %xmm0, %eax
; X64-LIN-NEXT: andl $1, %eax
; X64-LIN-NEXT: retq
;
; X64-WIN-LABEL: PR17487:
; X64-WIN: # %bb.0:
; X64-WIN-NEXT: andb $1, %cl
; X64-WIN-NEXT: movzbl %cl, %eax
; X64-WIN-NEXT: movd %eax, %xmm0
; X64-WIN-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; X64-WIN-NEXT: pandn __xmm@{{.*}}(%rip), %xmm0
; X64-WIN-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; X64-WIN-NEXT: movq %xmm0, %rcx
; X64-WIN-NEXT: xorl %eax, %eax
; X64-WIN-NEXT: cmpq $1, %rcx
; X64-WIN-NEXT: setne %al
; X64-WIN-NEXT: retq
%tmp = insertelement <2 x i1> undef, i1 %tobool, i32 1
%tmp1 = zext <2 x i1> %tmp to <2 x i64>