mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[DAGCombiner][x86] scalarize binop followed by extractelement
As noted in PR39973 and D55558: https://bugs.llvm.org/show_bug.cgi?id=39973 ...this is a partial implementation of a fold that we do as an IR canonicalization in instcombine: // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index) We want to have this in the DAG too because as we can see in some of the test diffs (reductions), the pattern may not be visible in IR. Given that this is already an IR canonicalization, any backend that would prefer a vector op over a scalar op is expected to already have the reverse transform in DAG lowering (not sure if that's a realistic expectation though). The transform is limited with a TLI hook because there's an existing transform in CodeGenPrepare that tries to do the opposite transform. Differential Revision: https://reviews.llvm.org/D55722 llvm-svn: 350354
This commit is contained in:
parent
3076709e1e
commit
36c33bfea9
@ -2407,6 +2407,12 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Try to convert an extract element of a vector binary operation into an
|
||||
/// extract element followed by a scalar operation.
|
||||
virtual bool shouldScalarizeBinop(SDValue VecOp) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Return true if it is profitable to use a scalar input to a BUILD_VECTOR
|
||||
// even if the vector itself has multiple uses.
|
||||
virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const {
|
||||
|
@ -915,9 +915,11 @@ static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
|
||||
|
||||
// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
|
||||
// undef's.
|
||||
static bool isAnyConstantBuildVector(const SDNode *N) {
|
||||
return ISD::isBuildVectorOfConstantSDNodes(N) ||
|
||||
ISD::isBuildVectorOfConstantFPSDNodes(N);
|
||||
static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
|
||||
if (V.getOpcode() != ISD::BUILD_VECTOR)
|
||||
return false;
|
||||
return isConstantOrConstantVector(V, NoOpaques) ||
|
||||
ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
|
||||
}
|
||||
|
||||
SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
|
||||
@ -15580,6 +15582,40 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
|
||||
return SDValue(EVE, 0);
|
||||
}
|
||||
|
||||
/// Transform a vector binary operation into a scalar binary operation by moving
|
||||
/// the math/logic after an extract element of a vector.
|
||||
static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
|
||||
bool LegalOperations) {
|
||||
SDValue Vec = ExtElt->getOperand(0);
|
||||
SDValue Index = ExtElt->getOperand(1);
|
||||
auto *IndexC = dyn_cast<ConstantSDNode>(Index);
|
||||
if (!IndexC || !ISD::isBinaryOp(Vec.getNode()) || !Vec.hasOneUse())
|
||||
return SDValue();
|
||||
|
||||
// Targets may want to avoid this to prevent an expensive register transfer.
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
if (!TLI.shouldScalarizeBinop(Vec))
|
||||
return SDValue();
|
||||
|
||||
// Extracting an element of a vector constant is constant-folded, so this
|
||||
// transform is just replacing a vector op with a scalar op while moving the
|
||||
// extract.
|
||||
SDValue Op0 = Vec.getOperand(0);
|
||||
SDValue Op1 = Vec.getOperand(1);
|
||||
if (isAnyConstantBuildVector(Op0, true) ||
|
||||
isAnyConstantBuildVector(Op1, true)) {
|
||||
// extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
|
||||
// extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
|
||||
SDLoc DL(ExtElt);
|
||||
EVT VT = ExtElt->getValueType(0);
|
||||
SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
|
||||
SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
|
||||
return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
|
||||
SDValue VecOp = N->getOperand(0);
|
||||
SDValue Index = N->getOperand(1);
|
||||
@ -15670,6 +15706,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
|
||||
}
|
||||
}
|
||||
|
||||
if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
|
||||
return BO;
|
||||
|
||||
// Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
|
||||
// We only perform this optimization before the op legalization phase because
|
||||
// we may introduce new vector instructions which are not backed by TD
|
||||
@ -17055,8 +17094,8 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
|
||||
if (!N1->hasOneUse())
|
||||
return SDValue();
|
||||
|
||||
bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
|
||||
bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
|
||||
bool N0AnyConst = isAnyConstantBuildVector(N0);
|
||||
bool N1AnyConst = isAnyConstantBuildVector(N1);
|
||||
if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
|
||||
return SDValue();
|
||||
if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
|
||||
|
@ -4875,6 +4875,18 @@ bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
|
||||
return (Index % ResVT.getVectorNumElements()) == 0;
|
||||
}
|
||||
|
||||
bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
|
||||
// If the vector op is not supported, try to convert to scalar.
|
||||
EVT VecVT = VecOp.getValueType();
|
||||
if (!isOperationLegalOrCustomOrPromote(VecOp.getOpcode(), VecVT))
|
||||
return true;
|
||||
|
||||
// If the vector op is supported, but the scalar op is not, the transform may
|
||||
// not be worthwhile.
|
||||
EVT ScalarVT = VecVT.getScalarType();
|
||||
return isOperationLegalOrCustomOrPromote(VecOp.getOpcode(), ScalarVT);
|
||||
}
|
||||
|
||||
bool X86TargetLowering::isCheapToSpeculateCttz() const {
|
||||
// Speculate cttz only if we can directly use TZCNT.
|
||||
return Subtarget.hasBMI();
|
||||
|
@ -1039,6 +1039,11 @@ namespace llvm {
|
||||
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
|
||||
unsigned Index) const override;
|
||||
|
||||
/// Scalar ops always have equal or better analysis/performance/power than
|
||||
/// the vector equivalent, so this always makes sense if the scalar op is
|
||||
/// supported.
|
||||
bool shouldScalarizeBinop(SDValue) const override;
|
||||
|
||||
bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
|
||||
unsigned AddrSpace) const override {
|
||||
// If we can replace more than 2 scalar stores, there will be a reduction
|
||||
|
@ -8,9 +8,8 @@ define i8 @foo(<4 x i8>* %V) {
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
|
||||
; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: pextrw $4, %xmm0, %eax
|
||||
; CHECK-NEXT: pextrw $2, %xmm0, %eax
|
||||
; CHECK-NEXT: andb $95, %al
|
||||
; CHECK-NEXT: # kill: def $al killed $al killed $eax
|
||||
; CHECK-NEXT: retq
|
||||
%Vp = bitcast <4 x i8>* %V to <3 x i8>*
|
||||
|
@ -4,8 +4,8 @@
|
||||
define float @ext_fadd_v4f32(<4 x float> %x) {
|
||||
; CHECK-LABEL: ext_fadd_v4f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addps {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
|
||||
; CHECK-NEXT: addss {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%bo = fadd <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 42.0>
|
||||
%ext = extractelement <4 x float> %bo, i32 2
|
||||
@ -15,9 +15,9 @@ define float @ext_fadd_v4f32(<4 x float> %x) {
|
||||
define float @ext_fsub_v4f32(<4 x float> %x) {
|
||||
; CHECK-LABEL: ext_fsub_v4f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movaps {{.*#+}} xmm1 = <u,2.0E+0,u,u>
|
||||
; CHECK-NEXT: subps %xmm0, %xmm1
|
||||
; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
|
||||
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: subss %xmm0, %xmm1
|
||||
; CHECK-NEXT: movaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%bo = fsub <4 x float> <float 1.0, float 2.0, float 3.0, float 42.0>, %x
|
||||
@ -28,19 +28,20 @@ define float @ext_fsub_v4f32(<4 x float> %x) {
|
||||
define float @ext_fmul_v4f32(<4 x float> %x) {
|
||||
; CHECK-LABEL: ext_fmul_v4f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%bo = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 42.0>
|
||||
%ext = extractelement <4 x float> %bo, i32 3
|
||||
ret float %ext
|
||||
}
|
||||
|
||||
; TODO: X / 1.0 --> X
|
||||
|
||||
define float @ext_fdiv_v4f32(<4 x float> %x) {
|
||||
; CHECK-LABEL: ext_fdiv_v4f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: divps %xmm1, %xmm0
|
||||
; CHECK-NEXT: divss {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%bo = fdiv <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 42.0>
|
||||
%ext = extractelement <4 x float> %bo, i32 0
|
||||
@ -50,9 +51,9 @@ define float @ext_fdiv_v4f32(<4 x float> %x) {
|
||||
define float @ext_fdiv_v4f32_constant_op0(<4 x float> %x) {
|
||||
; CHECK-LABEL: ext_fdiv_v4f32_constant_op0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movaps {{.*#+}} xmm1 = <u,2.0E+0,u,u>
|
||||
; CHECK-NEXT: divps %xmm0, %xmm1
|
||||
; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
|
||||
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: divss %xmm0, %xmm1
|
||||
; CHECK-NEXT: movaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%bo = fdiv <4 x float> <float 1.0, float 2.0, float 3.0, float 42.0>, %x
|
||||
|
@ -211,21 +211,19 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
|
||||
;
|
||||
; X86-SSE42-LABEL: test_reduce_v8i16:
|
||||
; X86-SSE42: ## %bb.0:
|
||||
; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pxor LCPI2_0, %xmm0
|
||||
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: movd %xmm0, %eax
|
||||
; X86-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF
|
||||
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X86-SSE42-NEXT: retl
|
||||
;
|
||||
; X86-AVX-LABEL: test_reduce_v8i16:
|
||||
; X86-AVX: ## %bb.0:
|
||||
; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vpxor LCPI2_0, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX-NEXT: xorl $32767, %eax ## imm = 0x7FFF
|
||||
; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X86-AVX-NEXT: retl
|
||||
;
|
||||
@ -244,21 +242,19 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
|
||||
;
|
||||
; X64-SSE42-LABEL: test_reduce_v8i16:
|
||||
; X64-SSE42: ## %bb.0:
|
||||
; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: movd %xmm0, %eax
|
||||
; X64-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF
|
||||
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-SSE42-NEXT: retq
|
||||
;
|
||||
; X64-AVX-LABEL: test_reduce_v8i16:
|
||||
; X64-AVX: ## %bb.0:
|
||||
; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vmovd %xmm0, %eax
|
||||
; X64-AVX-NEXT: xorl $32767, %eax ## imm = 0x7FFF
|
||||
; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-AVX-NEXT: retq
|
||||
%1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
@ -309,26 +305,24 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
|
||||
;
|
||||
; X86-SSE42-LABEL: test_reduce_v16i8:
|
||||
; X86-SSE42: ## %bb.0:
|
||||
; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
|
||||
; X86-SSE42-NEXT: psrlw $8, %xmm2
|
||||
; X86-SSE42-NEXT: pminub %xmm0, %xmm2
|
||||
; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pxor LCPI3_0, %xmm0
|
||||
; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: psrlw $8, %xmm1
|
||||
; X86-SSE42-NEXT: pminub %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
|
||||
; X86-SSE42-NEXT: xorb $127, %al
|
||||
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X86-SSE42-NEXT: retl
|
||||
;
|
||||
; X86-AVX-LABEL: test_reduce_v16i8:
|
||||
; X86-AVX: ## %bb.0:
|
||||
; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X86-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vpxor LCPI3_0, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X86-AVX-NEXT: xorb $127, %al
|
||||
; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X86-AVX-NEXT: retl
|
||||
;
|
||||
@ -366,26 +360,24 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
|
||||
;
|
||||
; X64-SSE42-LABEL: test_reduce_v16i8:
|
||||
; X64-SSE42: ## %bb.0:
|
||||
; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
|
||||
; X64-SSE42-NEXT: psrlw $8, %xmm2
|
||||
; X64-SSE42-NEXT: pminub %xmm0, %xmm2
|
||||
; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: psrlw $8, %xmm1
|
||||
; X64-SSE42-NEXT: pminub %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
|
||||
; X64-SSE42-NEXT: xorb $127, %al
|
||||
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-SSE42-NEXT: retq
|
||||
;
|
||||
; X64-AVX-LABEL: test_reduce_v16i8:
|
||||
; X64-AVX: ## %bb.0:
|
||||
; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X64-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X64-AVX-NEXT: xorb $127, %al
|
||||
; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-AVX-NEXT: retq
|
||||
%1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
@ -736,11 +728,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X86-SSE42-LABEL: test_reduce_v16i16:
|
||||
; X86-SSE42: ## %bb.0:
|
||||
; X86-SSE42-NEXT: pmaxsw %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pxor LCPI6_0, %xmm0
|
||||
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: movd %xmm0, %eax
|
||||
; X86-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF
|
||||
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X86-SSE42-NEXT: retl
|
||||
;
|
||||
@ -748,11 +739,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X86-AVX1: ## %bb.0:
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X86-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpxor LCPI6_0, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX1-NEXT: xorl $32767, %eax ## imm = 0x7FFF
|
||||
; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X86-AVX1-NEXT: vzeroupper
|
||||
; X86-AVX1-NEXT: retl
|
||||
@ -761,11 +751,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X86-AVX2: ## %bb.0:
|
||||
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X86-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpxor LCPI6_0, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX2-NEXT: xorl $32767, %eax ## imm = 0x7FFF
|
||||
; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X86-AVX2-NEXT: vzeroupper
|
||||
; X86-AVX2-NEXT: retl
|
||||
@ -787,11 +776,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X64-SSE42-LABEL: test_reduce_v16i16:
|
||||
; X64-SSE42: ## %bb.0:
|
||||
; X64-SSE42-NEXT: pmaxsw %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: movd %xmm0, %eax
|
||||
; X64-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF
|
||||
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-SSE42-NEXT: retq
|
||||
;
|
||||
@ -799,11 +787,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X64-AVX1: ## %bb.0:
|
||||
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X64-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; X64-AVX1-NEXT: xorl $32767, %eax ## imm = 0x7FFF
|
||||
; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-AVX1-NEXT: vzeroupper
|
||||
; X64-AVX1-NEXT: retq
|
||||
@ -812,11 +799,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X64-AVX2: ## %bb.0:
|
||||
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; X64-AVX2-NEXT: xorl $32767, %eax ## imm = 0x7FFF
|
||||
; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-AVX2-NEXT: vzeroupper
|
||||
; X64-AVX2-NEXT: retq
|
||||
@ -825,11 +811,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X64-AVX512: ## %bb.0:
|
||||
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vmovd %xmm0, %eax
|
||||
; X64-AVX512-NEXT: xorl $32767, %eax ## imm = 0x7FFF
|
||||
; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-AVX512-NEXT: vzeroupper
|
||||
; X64-AVX512-NEXT: retq
|
||||
@ -890,14 +875,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
|
||||
; X86-SSE42-LABEL: test_reduce_v32i8:
|
||||
; X86-SSE42: ## %bb.0:
|
||||
; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
|
||||
; X86-SSE42-NEXT: psrlw $8, %xmm2
|
||||
; X86-SSE42-NEXT: pminub %xmm0, %xmm2
|
||||
; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pxor LCPI7_0, %xmm0
|
||||
; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: psrlw $8, %xmm1
|
||||
; X86-SSE42-NEXT: pminub %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
|
||||
; X86-SSE42-NEXT: xorb $127, %al
|
||||
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X86-SSE42-NEXT: retl
|
||||
;
|
||||
@ -905,13 +889,12 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
|
||||
; X86-AVX1: ## %bb.0:
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpxor LCPI7_0, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X86-AVX1-NEXT: xorb $127, %al
|
||||
; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X86-AVX1-NEXT: vzeroupper
|
||||
; X86-AVX1-NEXT: retl
|
||||
@ -920,13 +903,12 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
|
||||
; X86-AVX2: ## %bb.0:
|
||||
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X86-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpxor LCPI7_0, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X86-AVX2-NEXT: xorb $127, %al
|
||||
; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X86-AVX2-NEXT: vzeroupper
|
||||
; X86-AVX2-NEXT: retl
|
||||
@ -971,14 +953,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
|
||||
; X64-SSE42-LABEL: test_reduce_v32i8:
|
||||
; X64-SSE42: ## %bb.0:
|
||||
; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
|
||||
; X64-SSE42-NEXT: psrlw $8, %xmm2
|
||||
; X64-SSE42-NEXT: pminub %xmm0, %xmm2
|
||||
; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: psrlw $8, %xmm1
|
||||
; X64-SSE42-NEXT: pminub %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
|
||||
; X64-SSE42-NEXT: xorb $127, %al
|
||||
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-SSE42-NEXT: retq
|
||||
;
|
||||
@ -986,13 +967,12 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
|
||||
; X64-AVX1: ## %bb.0:
|
||||
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X64-AVX1-NEXT: xorb $127, %al
|
||||
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-AVX1-NEXT: vzeroupper
|
||||
; X64-AVX1-NEXT: retq
|
||||
@ -1001,13 +981,12 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
|
||||
; X64-AVX2: ## %bb.0:
|
||||
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X64-AVX2-NEXT: xorb $127, %al
|
||||
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-AVX2-NEXT: vzeroupper
|
||||
; X64-AVX2-NEXT: retq
|
||||
@ -1016,13 +995,12 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
|
||||
; X64-AVX512: ## %bb.0:
|
||||
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X64-AVX512-NEXT: xorb $127, %al
|
||||
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-AVX512-NEXT: vzeroupper
|
||||
; X64-AVX512-NEXT: retq
|
||||
@ -1513,11 +1491,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X86-SSE42-NEXT: pmaxsw %xmm3, %xmm1
|
||||
; X86-SSE42-NEXT: pmaxsw %xmm2, %xmm0
|
||||
; X86-SSE42-NEXT: pmaxsw %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pxor LCPI10_0, %xmm0
|
||||
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: movd %xmm0, %eax
|
||||
; X86-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF
|
||||
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X86-SSE42-NEXT: retl
|
||||
;
|
||||
@ -1528,11 +1505,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X86-AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2
|
||||
; X86-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpxor LCPI10_0, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX1-NEXT: xorl $32767, %eax ## imm = 0x7FFF
|
||||
; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X86-AVX1-NEXT: vzeroupper
|
||||
; X86-AVX1-NEXT: retl
|
||||
@ -1542,11 +1518,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X86-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X86-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpxor LCPI10_0, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX2-NEXT: xorl $32767, %eax ## imm = 0x7FFF
|
||||
; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X86-AVX2-NEXT: vzeroupper
|
||||
; X86-AVX2-NEXT: retl
|
||||
@ -1572,11 +1547,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X64-SSE42-NEXT: pmaxsw %xmm3, %xmm1
|
||||
; X64-SSE42-NEXT: pmaxsw %xmm2, %xmm0
|
||||
; X64-SSE42-NEXT: pmaxsw %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: movd %xmm0, %eax
|
||||
; X64-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF
|
||||
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-SSE42-NEXT: retq
|
||||
;
|
||||
@ -1587,11 +1561,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X64-AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2
|
||||
; X64-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; X64-AVX1-NEXT: xorl $32767, %eax ## imm = 0x7FFF
|
||||
; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-AVX1-NEXT: vzeroupper
|
||||
; X64-AVX1-NEXT: retq
|
||||
@ -1601,11 +1574,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X64-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
|
||||
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; X64-AVX2-NEXT: xorl $32767, %eax ## imm = 0x7FFF
|
||||
; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-AVX2-NEXT: vzeroupper
|
||||
; X64-AVX2-NEXT: retq
|
||||
@ -1616,11 +1588,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X64-AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
|
||||
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vmovd %xmm0, %eax
|
||||
; X64-AVX512-NEXT: xorl $32767, %eax ## imm = 0x7FFF
|
||||
; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-AVX512-NEXT: vzeroupper
|
||||
; X64-AVX512-NEXT: retq
|
||||
@ -1696,14 +1667,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
; X86-SSE42-NEXT: pmaxsb %xmm3, %xmm1
|
||||
; X86-SSE42-NEXT: pmaxsb %xmm2, %xmm0
|
||||
; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
|
||||
; X86-SSE42-NEXT: psrlw $8, %xmm2
|
||||
; X86-SSE42-NEXT: pminub %xmm0, %xmm2
|
||||
; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pxor LCPI11_0, %xmm0
|
||||
; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: psrlw $8, %xmm1
|
||||
; X86-SSE42-NEXT: pminub %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
|
||||
; X86-SSE42-NEXT: xorb $127, %al
|
||||
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X86-SSE42-NEXT: retl
|
||||
;
|
||||
@ -1714,13 +1684,12 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
; X86-AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2
|
||||
; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpxor LCPI11_0, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X86-AVX1-NEXT: xorb $127, %al
|
||||
; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X86-AVX1-NEXT: vzeroupper
|
||||
; X86-AVX1-NEXT: retl
|
||||
@ -1730,13 +1699,12 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
; X86-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X86-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpxor LCPI11_0, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X86-AVX2-NEXT: xorb $127, %al
|
||||
; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X86-AVX2-NEXT: vzeroupper
|
||||
; X86-AVX2-NEXT: retl
|
||||
@ -1793,14 +1761,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
; X64-SSE42-NEXT: pmaxsb %xmm3, %xmm1
|
||||
; X64-SSE42-NEXT: pmaxsb %xmm2, %xmm0
|
||||
; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
|
||||
; X64-SSE42-NEXT: psrlw $8, %xmm2
|
||||
; X64-SSE42-NEXT: pminub %xmm0, %xmm2
|
||||
; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: psrlw $8, %xmm1
|
||||
; X64-SSE42-NEXT: pminub %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
|
||||
; X64-SSE42-NEXT: xorb $127, %al
|
||||
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-SSE42-NEXT: retq
|
||||
;
|
||||
@ -1811,13 +1778,12 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
; X64-AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2
|
||||
; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X64-AVX1-NEXT: xorb $127, %al
|
||||
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-AVX1-NEXT: vzeroupper
|
||||
; X64-AVX1-NEXT: retq
|
||||
@ -1827,13 +1793,12 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
; X64-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
|
||||
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X64-AVX2-NEXT: xorb $127, %al
|
||||
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-AVX2-NEXT: vzeroupper
|
||||
; X64-AVX2-NEXT: retq
|
||||
@ -1844,13 +1809,12 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
; X64-AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
|
||||
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X64-AVX512-NEXT: xorb $127, %al
|
||||
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-AVX512-NEXT: vzeroupper
|
||||
; X64-AVX512-NEXT: retq
|
||||
|
@ -213,21 +213,19 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
|
||||
;
|
||||
; X86-SSE42-LABEL: test_reduce_v8i16:
|
||||
; X86-SSE42: ## %bb.0:
|
||||
; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pxor LCPI2_0, %xmm0
|
||||
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: movd %xmm0, %eax
|
||||
; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X86-SSE42-NEXT: retl
|
||||
;
|
||||
; X86-AVX-LABEL: test_reduce_v8i16:
|
||||
; X86-AVX: ## %bb.0:
|
||||
; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vpxor LCPI2_0, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||
; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X86-AVX-NEXT: retl
|
||||
;
|
||||
@ -246,21 +244,19 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
|
||||
;
|
||||
; X64-SSE42-LABEL: test_reduce_v8i16:
|
||||
; X64-SSE42: ## %bb.0:
|
||||
; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: movd %xmm0, %eax
|
||||
; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-SSE42-NEXT: retq
|
||||
;
|
||||
; X64-AVX-LABEL: test_reduce_v8i16:
|
||||
; X64-AVX: ## %bb.0:
|
||||
; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vmovd %xmm0, %eax
|
||||
; X64-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||
; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-AVX-NEXT: retq
|
||||
%1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
@ -311,26 +307,24 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
|
||||
;
|
||||
; X86-SSE42-LABEL: test_reduce_v16i8:
|
||||
; X86-SSE42: ## %bb.0:
|
||||
; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
|
||||
; X86-SSE42-NEXT: psrlw $8, %xmm2
|
||||
; X86-SSE42-NEXT: pminub %xmm0, %xmm2
|
||||
; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pxor LCPI3_0, %xmm0
|
||||
; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: psrlw $8, %xmm1
|
||||
; X86-SSE42-NEXT: pminub %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
|
||||
; X86-SSE42-NEXT: xorb $-128, %al
|
||||
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X86-SSE42-NEXT: retl
|
||||
;
|
||||
; X86-AVX-LABEL: test_reduce_v16i8:
|
||||
; X86-AVX: ## %bb.0:
|
||||
; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X86-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vpxor LCPI3_0, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X86-AVX-NEXT: xorb $-128, %al
|
||||
; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X86-AVX-NEXT: retl
|
||||
;
|
||||
@ -368,26 +362,24 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
|
||||
;
|
||||
; X64-SSE42-LABEL: test_reduce_v16i8:
|
||||
; X64-SSE42: ## %bb.0:
|
||||
; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
|
||||
; X64-SSE42-NEXT: psrlw $8, %xmm2
|
||||
; X64-SSE42-NEXT: pminub %xmm0, %xmm2
|
||||
; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: psrlw $8, %xmm1
|
||||
; X64-SSE42-NEXT: pminub %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
|
||||
; X64-SSE42-NEXT: xorb $-128, %al
|
||||
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-SSE42-NEXT: retq
|
||||
;
|
||||
; X64-AVX-LABEL: test_reduce_v16i8:
|
||||
; X64-AVX: ## %bb.0:
|
||||
; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X64-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X64-AVX-NEXT: xorb $-128, %al
|
||||
; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-AVX-NEXT: retq
|
||||
%1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
@ -740,11 +732,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X86-SSE42-LABEL: test_reduce_v16i16:
|
||||
; X86-SSE42: ## %bb.0:
|
||||
; X86-SSE42-NEXT: pminsw %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pxor LCPI6_0, %xmm0
|
||||
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: movd %xmm0, %eax
|
||||
; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X86-SSE42-NEXT: retl
|
||||
;
|
||||
@ -752,11 +743,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X86-AVX1: ## %bb.0:
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X86-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpxor LCPI6_0, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||
; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X86-AVX1-NEXT: vzeroupper
|
||||
; X86-AVX1-NEXT: retl
|
||||
@ -765,11 +755,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X86-AVX2: ## %bb.0:
|
||||
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X86-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpxor LCPI6_0, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||
; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X86-AVX2-NEXT: vzeroupper
|
||||
; X86-AVX2-NEXT: retl
|
||||
@ -791,11 +780,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X64-SSE42-LABEL: test_reduce_v16i16:
|
||||
; X64-SSE42: ## %bb.0:
|
||||
; X64-SSE42-NEXT: pminsw %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: movd %xmm0, %eax
|
||||
; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-SSE42-NEXT: retq
|
||||
;
|
||||
@ -803,11 +791,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X64-AVX1: ## %bb.0:
|
||||
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X64-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; X64-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||
; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-AVX1-NEXT: vzeroupper
|
||||
; X64-AVX1-NEXT: retq
|
||||
@ -816,11 +803,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X64-AVX2: ## %bb.0:
|
||||
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; X64-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||
; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-AVX2-NEXT: vzeroupper
|
||||
; X64-AVX2-NEXT: retq
|
||||
@ -829,11 +815,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X64-AVX512: ## %bb.0:
|
||||
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vmovd %xmm0, %eax
|
||||
; X64-AVX512-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||
; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-AVX512-NEXT: vzeroupper
|
||||
; X64-AVX512-NEXT: retq
|
||||
@ -894,14 +879,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
|
||||
; X86-SSE42-LABEL: test_reduce_v32i8:
|
||||
; X86-SSE42: ## %bb.0:
|
||||
; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
|
||||
; X86-SSE42-NEXT: psrlw $8, %xmm2
|
||||
; X86-SSE42-NEXT: pminub %xmm0, %xmm2
|
||||
; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pxor LCPI7_0, %xmm0
|
||||
; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: psrlw $8, %xmm1
|
||||
; X86-SSE42-NEXT: pminub %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
|
||||
; X86-SSE42-NEXT: xorb $-128, %al
|
||||
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X86-SSE42-NEXT: retl
|
||||
;
|
||||
@ -909,13 +893,12 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
|
||||
; X86-AVX1: ## %bb.0:
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpxor LCPI7_0, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X86-AVX1-NEXT: xorb $-128, %al
|
||||
; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X86-AVX1-NEXT: vzeroupper
|
||||
; X86-AVX1-NEXT: retl
|
||||
@ -924,13 +907,12 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
|
||||
; X86-AVX2: ## %bb.0:
|
||||
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X86-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpxor LCPI7_0, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X86-AVX2-NEXT: xorb $-128, %al
|
||||
; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X86-AVX2-NEXT: vzeroupper
|
||||
; X86-AVX2-NEXT: retl
|
||||
@ -975,14 +957,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
|
||||
; X64-SSE42-LABEL: test_reduce_v32i8:
|
||||
; X64-SSE42: ## %bb.0:
|
||||
; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
|
||||
; X64-SSE42-NEXT: psrlw $8, %xmm2
|
||||
; X64-SSE42-NEXT: pminub %xmm0, %xmm2
|
||||
; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: psrlw $8, %xmm1
|
||||
; X64-SSE42-NEXT: pminub %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
|
||||
; X64-SSE42-NEXT: xorb $-128, %al
|
||||
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-SSE42-NEXT: retq
|
||||
;
|
||||
@ -990,13 +971,12 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
|
||||
; X64-AVX1: ## %bb.0:
|
||||
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X64-AVX1-NEXT: xorb $-128, %al
|
||||
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-AVX1-NEXT: vzeroupper
|
||||
; X64-AVX1-NEXT: retq
|
||||
@ -1005,13 +985,12 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
|
||||
; X64-AVX2: ## %bb.0:
|
||||
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X64-AVX2-NEXT: xorb $-128, %al
|
||||
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-AVX2-NEXT: vzeroupper
|
||||
; X64-AVX2-NEXT: retq
|
||||
@ -1020,13 +999,12 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
|
||||
; X64-AVX512: ## %bb.0:
|
||||
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X64-AVX512-NEXT: xorb $-128, %al
|
||||
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-AVX512-NEXT: vzeroupper
|
||||
; X64-AVX512-NEXT: retq
|
||||
@ -1517,11 +1495,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X86-SSE42-NEXT: pminsw %xmm3, %xmm1
|
||||
; X86-SSE42-NEXT: pminsw %xmm2, %xmm0
|
||||
; X86-SSE42-NEXT: pminsw %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pxor LCPI10_0, %xmm0
|
||||
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: movd %xmm0, %eax
|
||||
; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X86-SSE42-NEXT: retl
|
||||
;
|
||||
@ -1532,11 +1509,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X86-AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2
|
||||
; X86-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpxor LCPI10_0, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||
; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X86-AVX1-NEXT: vzeroupper
|
||||
; X86-AVX1-NEXT: retl
|
||||
@ -1546,11 +1522,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X86-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X86-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpxor LCPI10_0, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||
; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X86-AVX2-NEXT: vzeroupper
|
||||
; X86-AVX2-NEXT: retl
|
||||
@ -1576,11 +1551,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X64-SSE42-NEXT: pminsw %xmm3, %xmm1
|
||||
; X64-SSE42-NEXT: pminsw %xmm2, %xmm0
|
||||
; X64-SSE42-NEXT: pminsw %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: movd %xmm0, %eax
|
||||
; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-SSE42-NEXT: retq
|
||||
;
|
||||
@ -1591,11 +1565,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X64-AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2
|
||||
; X64-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; X64-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||
; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-AVX1-NEXT: vzeroupper
|
||||
; X64-AVX1-NEXT: retq
|
||||
@ -1605,11 +1578,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X64-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
|
||||
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; X64-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||
; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-AVX2-NEXT: vzeroupper
|
||||
; X64-AVX2-NEXT: retq
|
||||
@ -1620,11 +1592,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X64-AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
|
||||
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vmovd %xmm0, %eax
|
||||
; X64-AVX512-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||
; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-AVX512-NEXT: vzeroupper
|
||||
; X64-AVX512-NEXT: retq
|
||||
@ -1700,14 +1671,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
; X86-SSE42-NEXT: pminsb %xmm3, %xmm1
|
||||
; X86-SSE42-NEXT: pminsb %xmm2, %xmm0
|
||||
; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
|
||||
; X86-SSE42-NEXT: psrlw $8, %xmm2
|
||||
; X86-SSE42-NEXT: pminub %xmm0, %xmm2
|
||||
; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pxor LCPI11_0, %xmm0
|
||||
; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: psrlw $8, %xmm1
|
||||
; X86-SSE42-NEXT: pminub %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
|
||||
; X86-SSE42-NEXT: xorb $-128, %al
|
||||
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X86-SSE42-NEXT: retl
|
||||
;
|
||||
@ -1718,13 +1688,12 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
; X86-AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
|
||||
; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpxor LCPI11_0, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X86-AVX1-NEXT: xorb $-128, %al
|
||||
; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X86-AVX1-NEXT: vzeroupper
|
||||
; X86-AVX1-NEXT: retl
|
||||
@ -1734,13 +1703,12 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X86-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpxor LCPI11_0, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X86-AVX2-NEXT: xorb $-128, %al
|
||||
; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X86-AVX2-NEXT: vzeroupper
|
||||
; X86-AVX2-NEXT: retl
|
||||
@ -1797,14 +1765,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
; X64-SSE42-NEXT: pminsb %xmm3, %xmm1
|
||||
; X64-SSE42-NEXT: pminsb %xmm2, %xmm0
|
||||
; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
|
||||
; X64-SSE42-NEXT: psrlw $8, %xmm2
|
||||
; X64-SSE42-NEXT: pminub %xmm0, %xmm2
|
||||
; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: psrlw $8, %xmm1
|
||||
; X64-SSE42-NEXT: pminub %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
|
||||
; X64-SSE42-NEXT: xorb $-128, %al
|
||||
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-SSE42-NEXT: retq
|
||||
;
|
||||
@ -1815,13 +1782,12 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
; X64-AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
|
||||
; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X64-AVX1-NEXT: xorb $-128, %al
|
||||
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-AVX1-NEXT: vzeroupper
|
||||
; X64-AVX1-NEXT: retq
|
||||
@ -1831,13 +1797,12 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
|
||||
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X64-AVX2-NEXT: xorb $-128, %al
|
||||
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-AVX2-NEXT: vzeroupper
|
||||
; X64-AVX2-NEXT: retq
|
||||
@ -1848,13 +1813,12 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
|
||||
; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X64-AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X64-AVX512-NEXT: xorb $-128, %al
|
||||
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-AVX512-NEXT: vzeroupper
|
||||
; X64-AVX512-NEXT: retq
|
||||
|
@ -240,18 +240,18 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
|
||||
; X86-SSE2-NEXT: psrld $16, %xmm1
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X86-SSE2-NEXT: movd %xmm1, %eax
|
||||
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||
; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X86-SSE2-NEXT: retl
|
||||
;
|
||||
; X86-SSE42-LABEL: test_reduce_v8i16:
|
||||
; X86-SSE42: ## %bb.0:
|
||||
; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: movd %xmm0, %eax
|
||||
; X86-SSE42-NEXT: notl %eax
|
||||
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X86-SSE42-NEXT: retl
|
||||
;
|
||||
@ -260,8 +260,8 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
|
||||
; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX-NEXT: notl %eax
|
||||
; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X86-AVX-NEXT: retl
|
||||
;
|
||||
@ -282,18 +282,18 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
|
||||
; X64-SSE2-NEXT: psrld $16, %xmm1
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X64-SSE2-NEXT: movd %xmm1, %eax
|
||||
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||
; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-SSE2-NEXT: retq
|
||||
;
|
||||
; X64-SSE42-LABEL: test_reduce_v8i16:
|
||||
; X64-SSE42: ## %bb.0:
|
||||
; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: movd %xmm0, %eax
|
||||
; X64-SSE42-NEXT: notl %eax
|
||||
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-SSE42-NEXT: retq
|
||||
;
|
||||
@ -302,8 +302,8 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
|
||||
; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; X64-AVX1-NEXT: notl %eax
|
||||
; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-AVX1-NEXT: retq
|
||||
;
|
||||
@ -312,8 +312,8 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
|
||||
; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; X64-AVX2-NEXT: notl %eax
|
||||
; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-AVX2-NEXT: retq
|
||||
;
|
||||
@ -321,8 +321,8 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
|
||||
; X64-AVX512: ## %bb.0:
|
||||
; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vmovd %xmm0, %eax
|
||||
; X64-AVX512-NEXT: notl %eax
|
||||
; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-AVX512-NEXT: retq
|
||||
%1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
@ -358,13 +358,13 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
|
||||
; X86-SSE42-LABEL: test_reduce_v16i8:
|
||||
; X86-SSE42: ## %bb.0:
|
||||
; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
|
||||
; X86-SSE42-NEXT: psrlw $8, %xmm2
|
||||
; X86-SSE42-NEXT: pminub %xmm0, %xmm2
|
||||
; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: psrlw $8, %xmm0
|
||||
; X86-SSE42-NEXT: pminub %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
|
||||
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
|
||||
; X86-SSE42-NEXT: notb %al
|
||||
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X86-SSE42-NEXT: retl
|
||||
;
|
||||
@ -372,11 +372,11 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
|
||||
; X86-AVX: ## %bb.0:
|
||||
; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X86-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X86-AVX-NEXT: notb %al
|
||||
; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X86-AVX-NEXT: retl
|
||||
;
|
||||
@ -399,13 +399,13 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
|
||||
; X64-SSE42-LABEL: test_reduce_v16i8:
|
||||
; X64-SSE42: ## %bb.0:
|
||||
; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
|
||||
; X64-SSE42-NEXT: psrlw $8, %xmm2
|
||||
; X64-SSE42-NEXT: pminub %xmm0, %xmm2
|
||||
; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: psrlw $8, %xmm0
|
||||
; X64-SSE42-NEXT: pminub %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
|
||||
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
|
||||
; X64-SSE42-NEXT: notb %al
|
||||
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-SSE42-NEXT: retq
|
||||
;
|
||||
@ -413,11 +413,11 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
|
||||
; X64-AVX1: ## %bb.0:
|
||||
; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X64-AVX1-NEXT: notb %al
|
||||
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-AVX1-NEXT: retq
|
||||
;
|
||||
@ -425,11 +425,11 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
|
||||
; X64-AVX2: ## %bb.0:
|
||||
; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X64-AVX2-NEXT: notb %al
|
||||
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-AVX2-NEXT: retq
|
||||
;
|
||||
@ -439,8 +439,8 @@ define i8 @test_reduce_v16i8(<16 x i8> %a0) {
|
||||
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X64-AVX512-NEXT: notb %al
|
||||
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-AVX512-NEXT: retq
|
||||
%1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
@ -845,8 +845,8 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X86-SSE2-NEXT: psrld $16, %xmm1
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X86-SSE2-NEXT: movd %xmm1, %eax
|
||||
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||
; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X86-SSE2-NEXT: retl
|
||||
;
|
||||
@ -854,10 +854,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X86-SSE42: ## %bb.0:
|
||||
; X86-SSE42-NEXT: pmaxuw %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: movd %xmm0, %eax
|
||||
; X86-SSE42-NEXT: notl %eax
|
||||
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X86-SSE42-NEXT: retl
|
||||
;
|
||||
@ -868,8 +868,8 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX1-NEXT: notl %eax
|
||||
; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X86-AVX1-NEXT: vzeroupper
|
||||
; X86-AVX1-NEXT: retl
|
||||
@ -881,8 +881,8 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X86-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX2-NEXT: notl %eax
|
||||
; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X86-AVX2-NEXT: vzeroupper
|
||||
; X86-AVX2-NEXT: retl
|
||||
@ -908,8 +908,8 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X64-SSE2-NEXT: psrld $16, %xmm1
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X64-SSE2-NEXT: movd %xmm1, %eax
|
||||
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||
; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-SSE2-NEXT: retq
|
||||
;
|
||||
@ -917,10 +917,10 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X64-SSE42: ## %bb.0:
|
||||
; X64-SSE42-NEXT: pmaxuw %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: movd %xmm0, %eax
|
||||
; X64-SSE42-NEXT: notl %eax
|
||||
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-SSE42-NEXT: retq
|
||||
;
|
||||
@ -931,8 +931,8 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; X64-AVX1-NEXT: notl %eax
|
||||
; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-AVX1-NEXT: vzeroupper
|
||||
; X64-AVX1-NEXT: retq
|
||||
@ -944,8 +944,8 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; X64-AVX2-NEXT: notl %eax
|
||||
; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-AVX2-NEXT: vzeroupper
|
||||
; X64-AVX2-NEXT: retq
|
||||
@ -956,8 +956,8 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X64-AVX512-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vmovd %xmm0, %eax
|
||||
; X64-AVX512-NEXT: notl %eax
|
||||
; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-AVX512-NEXT: vzeroupper
|
||||
; X64-AVX512-NEXT: retq
|
||||
@ -999,13 +999,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
|
||||
; X86-SSE42: ## %bb.0:
|
||||
; X86-SSE42-NEXT: pmaxub %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
|
||||
; X86-SSE42-NEXT: psrlw $8, %xmm2
|
||||
; X86-SSE42-NEXT: pminub %xmm0, %xmm2
|
||||
; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: psrlw $8, %xmm0
|
||||
; X86-SSE42-NEXT: pminub %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
|
||||
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
|
||||
; X86-SSE42-NEXT: notb %al
|
||||
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X86-SSE42-NEXT: retl
|
||||
;
|
||||
@ -1015,11 +1015,11 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
|
||||
; X86-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X86-AVX1-NEXT: notb %al
|
||||
; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X86-AVX1-NEXT: vzeroupper
|
||||
; X86-AVX1-NEXT: retl
|
||||
@ -1030,11 +1030,11 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
|
||||
; X86-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X86-AVX2-NEXT: notb %al
|
||||
; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X86-AVX2-NEXT: vzeroupper
|
||||
; X86-AVX2-NEXT: retl
|
||||
@ -1060,13 +1060,13 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
|
||||
; X64-SSE42: ## %bb.0:
|
||||
; X64-SSE42-NEXT: pmaxub %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
|
||||
; X64-SSE42-NEXT: psrlw $8, %xmm2
|
||||
; X64-SSE42-NEXT: pminub %xmm0, %xmm2
|
||||
; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: psrlw $8, %xmm0
|
||||
; X64-SSE42-NEXT: pminub %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
|
||||
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
|
||||
; X64-SSE42-NEXT: notb %al
|
||||
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-SSE42-NEXT: retq
|
||||
;
|
||||
@ -1076,11 +1076,11 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
|
||||
; X64-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X64-AVX1-NEXT: notb %al
|
||||
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-AVX1-NEXT: vzeroupper
|
||||
; X64-AVX1-NEXT: retq
|
||||
@ -1091,11 +1091,11 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
|
||||
; X64-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X64-AVX2-NEXT: notb %al
|
||||
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-AVX2-NEXT: vzeroupper
|
||||
; X64-AVX2-NEXT: retq
|
||||
@ -1108,8 +1108,8 @@ define i8 @test_reduce_v32i8(<32 x i8> %a0) {
|
||||
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X64-AVX512-NEXT: notb %al
|
||||
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-AVX512-NEXT: vzeroupper
|
||||
; X64-AVX512-NEXT: retq
|
||||
@ -1688,8 +1688,8 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X86-SSE2-NEXT: psrld $16, %xmm1
|
||||
; X86-SSE2-NEXT: pxor %xmm4, %xmm1
|
||||
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pxor %xmm4, %xmm1
|
||||
; X86-SSE2-NEXT: movd %xmm1, %eax
|
||||
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||
; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X86-SSE2-NEXT: retl
|
||||
;
|
||||
@ -1699,10 +1699,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X86-SSE42-NEXT: pmaxuw %xmm2, %xmm0
|
||||
; X86-SSE42-NEXT: pmaxuw %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: movd %xmm0, %eax
|
||||
; X86-SSE42-NEXT: notl %eax
|
||||
; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X86-SSE42-NEXT: retl
|
||||
;
|
||||
@ -1716,8 +1716,8 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX1-NEXT: notl %eax
|
||||
; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X86-AVX1-NEXT: vzeroupper
|
||||
; X86-AVX1-NEXT: retl
|
||||
@ -1730,8 +1730,8 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X86-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX2-NEXT: notl %eax
|
||||
; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X86-AVX2-NEXT: vzeroupper
|
||||
; X86-AVX2-NEXT: retl
|
||||
@ -1761,8 +1761,8 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X64-SSE2-NEXT: psrld $16, %xmm1
|
||||
; X64-SSE2-NEXT: pxor %xmm4, %xmm1
|
||||
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pxor %xmm4, %xmm1
|
||||
; X64-SSE2-NEXT: movd %xmm1, %eax
|
||||
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||
; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-SSE2-NEXT: retq
|
||||
;
|
||||
@ -1772,10 +1772,10 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X64-SSE42-NEXT: pmaxuw %xmm2, %xmm0
|
||||
; X64-SSE42-NEXT: pmaxuw %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: movd %xmm0, %eax
|
||||
; X64-SSE42-NEXT: notl %eax
|
||||
; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-SSE42-NEXT: retq
|
||||
;
|
||||
@ -1789,8 +1789,8 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; X64-AVX1-NEXT: notl %eax
|
||||
; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-AVX1-NEXT: vzeroupper
|
||||
; X64-AVX1-NEXT: retq
|
||||
@ -1803,8 +1803,8 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; X64-AVX2-NEXT: notl %eax
|
||||
; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-AVX2-NEXT: vzeroupper
|
||||
; X64-AVX2-NEXT: retq
|
||||
@ -1817,8 +1817,8 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X64-AVX512-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vmovd %xmm0, %eax
|
||||
; X64-AVX512-NEXT: notl %eax
|
||||
; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-AVX512-NEXT: vzeroupper
|
||||
; X64-AVX512-NEXT: retq
|
||||
@ -1867,13 +1867,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
; X86-SSE42-NEXT: pmaxub %xmm2, %xmm0
|
||||
; X86-SSE42-NEXT: pmaxub %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
|
||||
; X86-SSE42-NEXT: psrlw $8, %xmm2
|
||||
; X86-SSE42-NEXT: pminub %xmm0, %xmm2
|
||||
; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: pxor %xmm0, %xmm1
|
||||
; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: psrlw $8, %xmm0
|
||||
; X86-SSE42-NEXT: pminub %xmm1, %xmm0
|
||||
; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
|
||||
; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
|
||||
; X86-SSE42-NEXT: notb %al
|
||||
; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X86-SSE42-NEXT: retl
|
||||
;
|
||||
@ -1886,11 +1886,11 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
; X86-AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X86-AVX1-NEXT: notb %al
|
||||
; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X86-AVX1-NEXT: vzeroupper
|
||||
; X86-AVX1-NEXT: retl
|
||||
@ -1902,11 +1902,11 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
; X86-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X86-AVX2-NEXT: notb %al
|
||||
; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X86-AVX2-NEXT: vzeroupper
|
||||
; X86-AVX2-NEXT: retl
|
||||
@ -1936,13 +1936,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
; X64-SSE42-NEXT: pmaxub %xmm2, %xmm0
|
||||
; X64-SSE42-NEXT: pmaxub %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
|
||||
; X64-SSE42-NEXT: psrlw $8, %xmm2
|
||||
; X64-SSE42-NEXT: pminub %xmm0, %xmm2
|
||||
; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: pxor %xmm0, %xmm1
|
||||
; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: psrlw $8, %xmm0
|
||||
; X64-SSE42-NEXT: pminub %xmm1, %xmm0
|
||||
; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
|
||||
; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
|
||||
; X64-SSE42-NEXT: notb %al
|
||||
; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-SSE42-NEXT: retq
|
||||
;
|
||||
@ -1955,11 +1955,11 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
; X64-AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X64-AVX1-NEXT: notb %al
|
||||
; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-AVX1-NEXT: vzeroupper
|
||||
; X64-AVX1-NEXT: retq
|
||||
@ -1971,11 +1971,11 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
; X64-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X64-AVX2-NEXT: notb %al
|
||||
; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-AVX2-NEXT: vzeroupper
|
||||
; X64-AVX2-NEXT: retq
|
||||
@ -1990,8 +1990,8 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
|
||||
; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; X64-AVX512-NEXT: notb %al
|
||||
; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; X64-AVX512-NEXT: vzeroupper
|
||||
; X64-AVX512-NEXT: retq
|
||||
|
@ -242,8 +242,8 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
|
||||
; X86-SSE2-NEXT: psrld $16, %xmm1
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X86-SSE2-NEXT: movd %xmm1, %eax
|
||||
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||
; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X86-SSE2-NEXT: retl
|
||||
;
|
||||
@ -278,8 +278,8 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
|
||||
; X64-SSE2-NEXT: psrld $16, %xmm1
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X64-SSE2-NEXT: movd %xmm1, %eax
|
||||
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||
; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-SSE2-NEXT: retq
|
||||
;
|
||||
@ -785,8 +785,8 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X86-SSE2-NEXT: psrld $16, %xmm1
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X86-SSE2-NEXT: movd %xmm1, %eax
|
||||
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||
; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X86-SSE2-NEXT: retl
|
||||
;
|
||||
@ -839,8 +839,8 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||
; X64-SSE2-NEXT: psrld $16, %xmm1
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; X64-SSE2-NEXT: movd %xmm1, %eax
|
||||
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||
; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-SSE2-NEXT: retq
|
||||
;
|
||||
@ -1592,8 +1592,8 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X86-SSE2-NEXT: psrld $16, %xmm1
|
||||
; X86-SSE2-NEXT: pxor %xmm4, %xmm1
|
||||
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: pxor %xmm4, %xmm1
|
||||
; X86-SSE2-NEXT: movd %xmm1, %eax
|
||||
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||
; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X86-SSE2-NEXT: retl
|
||||
;
|
||||
@ -1656,8 +1656,8 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||
; X64-SSE2-NEXT: psrld $16, %xmm1
|
||||
; X64-SSE2-NEXT: pxor %xmm4, %xmm1
|
||||
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||
; X64-SSE2-NEXT: pxor %xmm4, %xmm1
|
||||
; X64-SSE2-NEXT: movd %xmm1, %eax
|
||||
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||
; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; X64-SSE2-NEXT: retq
|
||||
;
|
||||
|
@ -5,18 +5,14 @@
|
||||
define i32 @knownbits_mask_extract_sext(<8 x i16> %a0) nounwind {
|
||||
; X32-LABEL: knownbits_mask_extract_sext:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: movl $15, %eax
|
||||
; X32-NEXT: vmovd %eax, %xmm1
|
||||
; X32-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; X32-NEXT: vpextrw $0, %xmm0, %eax
|
||||
; X32-NEXT: vmovd %xmm0, %eax
|
||||
; X32-NEXT: andl $15, %eax
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: knownbits_mask_extract_sext:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movl $15, %eax
|
||||
; X64-NEXT: vmovd %eax, %xmm1
|
||||
; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: vpextrw $0, %xmm0, %eax
|
||||
; X64-NEXT: vmovd %xmm0, %eax
|
||||
; X64-NEXT: andl $15, %eax
|
||||
; X64-NEXT: retq
|
||||
%1 = and <8 x i16> %a0, <i16 15, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
|
||||
%2 = extractelement <8 x i16> %1, i32 0
|
||||
@ -38,8 +34,8 @@ define float @knownbits_mask_extract_uitofp(<2 x i64> %a0) nounwind {
|
||||
;
|
||||
; X64-LABEL: knownbits_mask_extract_uitofp:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
|
||||
; X64-NEXT: vmovq %xmm0, %rax
|
||||
; X64-NEXT: movzwl %ax, %eax
|
||||
; X64-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%1 = and <2 x i64> %a0, <i64 65535, i64 -1>
|
||||
|
@ -74,8 +74,8 @@ define float @signbits_ashr_extract_sitofp_0(<2 x i64> %a0) nounwind {
|
||||
;
|
||||
; X64-LABEL: signbits_ashr_extract_sitofp_0:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpsrlq $32, %xmm0, %xmm0
|
||||
; X64-NEXT: vmovq %xmm0, %rax
|
||||
; X64-NEXT: shrq $32, %rax
|
||||
; X64-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%1 = ashr <2 x i64> %a0, <i64 32, i64 32>
|
||||
@ -101,12 +101,9 @@ define float @signbits_ashr_extract_sitofp_1(<2 x i64> %a0) nounwind {
|
||||
;
|
||||
; X64-LABEL: signbits_ashr_extract_sitofp_1:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpsrlq $32, %xmm0, %xmm0
|
||||
; X64-NEXT: vmovdqa {{.*#+}} xmm1 = [2147483648,1]
|
||||
; X64-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: vpsubq %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: vmovq %xmm0, %rax
|
||||
; X64-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
|
||||
; X64-NEXT: shrq $32, %rax
|
||||
; X64-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%1 = ashr <2 x i64> %a0, <i64 32, i64 63>
|
||||
%2 = extractelement <2 x i64> %1, i32 0
|
||||
@ -132,13 +129,10 @@ define float @signbits_ashr_shl_extract_sitofp(<2 x i64> %a0) nounwind {
|
||||
;
|
||||
; X64-LABEL: signbits_ashr_shl_extract_sitofp:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpsrlq $61, %xmm0, %xmm0
|
||||
; X64-NEXT: vmovdqa {{.*#+}} xmm1 = [4,8]
|
||||
; X64-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: vpsubq %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: vpsllq $20, %xmm0, %xmm0
|
||||
; X64-NEXT: vmovq %xmm0, %rax
|
||||
; X64-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
|
||||
; X64-NEXT: sarq $61, %rax
|
||||
; X64-NEXT: shll $20, %eax
|
||||
; X64-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%1 = ashr <2 x i64> %a0, <i64 61, i64 60>
|
||||
%2 = shl <2 x i64> %1, <i64 20, i64 16>
|
||||
@ -168,10 +162,8 @@ define float @signbits_ashr_insert_ashr_extract_sitofp(i64 %a0, i64 %a1) nounwin
|
||||
; X64-LABEL: signbits_ashr_insert_ashr_extract_sitofp:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: sarq $30, %rdi
|
||||
; X64-NEXT: vmovq %rdi, %xmm0
|
||||
; X64-NEXT: vpsrlq $3, %xmm0, %xmm0
|
||||
; X64-NEXT: vmovq %xmm0, %rax
|
||||
; X64-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0
|
||||
; X64-NEXT: shrq $3, %rdi
|
||||
; X64-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%1 = ashr i64 %a0, 30
|
||||
%2 = insertelement <2 x i64> undef, i64 %1, i32 0
|
||||
|
@ -9,7 +9,7 @@ define i64 @PR30511(<2 x double> %a) {
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addpd {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: cvtdq2pd %xmm0, %xmm0
|
||||
; CHECK-NEXT: mulpd {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: mulsd {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: movq %xmm0, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%1 = fadd <2 x double> %a, <double 0x4338000000000000, double 0x4338000000000000>
|
||||
|
@ -6,18 +6,16 @@ define i32 @test_eq_1(<4 x i32> %A, <4 x i32> %B) {
|
||||
; SSE2-LABEL: test_eq_1:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
|
||||
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE2-NEXT: movd %xmm0, %eax
|
||||
; SSE2-NEXT: notl %eax
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: test_eq_1:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
|
||||
; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pextrd $1, %xmm0, %eax
|
||||
; SSE41-NEXT: pextrd $1, %xmm1, %eax
|
||||
; SSE41-NEXT: notl %eax
|
||||
; SSE41-NEXT: retq
|
||||
%cmp = icmp slt <4 x i32> %A, %B
|
||||
%sext = sext <4 x i1> %cmp to <4 x i32>
|
||||
@ -65,18 +63,16 @@ define i32 @test_ge_1(<4 x i32> %A, <4 x i32> %B) {
|
||||
; SSE2-LABEL: test_ge_1:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
|
||||
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE2-NEXT: movd %xmm0, %eax
|
||||
; SSE2-NEXT: notl %eax
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: test_ge_1:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
|
||||
; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pextrd $1, %xmm0, %eax
|
||||
; SSE41-NEXT: pextrd $1, %xmm1, %eax
|
||||
; SSE41-NEXT: notl %eax
|
||||
; SSE41-NEXT: retq
|
||||
%cmp = icmp slt <4 x i32> %A, %B
|
||||
%sext = sext <4 x i1> %cmp to <4 x i32>
|
||||
@ -124,18 +120,16 @@ define i32 @test_eq_2(<4 x i32> %A, <4 x i32> %B) {
|
||||
; SSE2-LABEL: test_eq_2:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
|
||||
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; SSE2-NEXT: movd %xmm0, %eax
|
||||
; SSE2-NEXT: notl %eax
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: test_eq_2:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
|
||||
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSE41-NEXT: pxor %xmm0, %xmm1
|
||||
; SSE41-NEXT: pextrd $1, %xmm1, %eax
|
||||
; SSE41-NEXT: pextrd $1, %xmm0, %eax
|
||||
; SSE41-NEXT: notl %eax
|
||||
; SSE41-NEXT: retq
|
||||
%cmp = icmp slt <4 x i32> %B, %A
|
||||
%sext = sext <4 x i1> %cmp to <4 x i32>
|
||||
@ -170,18 +164,16 @@ define i32 @test_le_2(<4 x i32> %A, <4 x i32> %B) {
|
||||
; SSE2-LABEL: test_le_2:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
|
||||
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; SSE2-NEXT: movd %xmm0, %eax
|
||||
; SSE2-NEXT: notl %eax
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: test_le_2:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
|
||||
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSE41-NEXT: pxor %xmm0, %xmm1
|
||||
; SSE41-NEXT: pextrd $1, %xmm1, %eax
|
||||
; SSE41-NEXT: pextrd $1, %xmm0, %eax
|
||||
; SSE41-NEXT: notl %eax
|
||||
; SSE41-NEXT: retq
|
||||
%cmp = icmp slt <4 x i32> %B, %A
|
||||
%sext = sext <4 x i1> %cmp to <4 x i32>
|
||||
|
@ -2096,7 +2096,7 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE-NEXT: movdqa (%eax), %xmm5
|
||||
; X86-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; X86-SSE-NEXT: movdqa (%ecx), %xmm3
|
||||
; X86-SSE-NEXT: movdqa (%ecx), %xmm2
|
||||
; X86-SSE-NEXT: movdqa 16(%ecx), %xmm6
|
||||
; X86-SSE-NEXT: pxor %xmm0, %xmm0
|
||||
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
@ -2110,10 +2110,10 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X86-SSE-NEXT: xorl %edx, %edx
|
||||
; X86-SSE-NEXT: divl %esi
|
||||
; X86-SSE-NEXT: movd %edx, %xmm0
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm5[2,3,0,1]
|
||||
; X86-SSE-NEXT: movd %xmm2, %eax
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm6[2,3,0,1]
|
||||
; X86-SSE-NEXT: movd %xmm2, %esi
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm5[2,3,0,1]
|
||||
; X86-SSE-NEXT: movd %xmm3, %eax
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm6[2,3,0,1]
|
||||
; X86-SSE-NEXT: movd %xmm3, %esi
|
||||
; X86-SSE-NEXT: xorl %edx, %edx
|
||||
; X86-SSE-NEXT: divl %esi
|
||||
; X86-SSE-NEXT: movd %edx, %xmm7
|
||||
@ -2122,7 +2122,7 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X86-SSE-NEXT: movd %xmm6, %esi
|
||||
; X86-SSE-NEXT: xorl %edx, %edx
|
||||
; X86-SSE-NEXT: divl %esi
|
||||
; X86-SSE-NEXT: movd %edx, %xmm2
|
||||
; X86-SSE-NEXT: movd %edx, %xmm3
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,2,3]
|
||||
; X86-SSE-NEXT: movd %xmm5, %eax
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,2,3]
|
||||
@ -2130,60 +2130,57 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X86-SSE-NEXT: xorl %edx, %edx
|
||||
; X86-SSE-NEXT: divl %esi
|
||||
; X86-SSE-NEXT: movd %edx, %xmm5
|
||||
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
|
||||
; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm7[0]
|
||||
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
|
||||
; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm7[0]
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm4[3,1,2,3]
|
||||
; X86-SSE-NEXT: movd %xmm6, %eax
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm3[3,1,2,3]
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm2[3,1,2,3]
|
||||
; X86-SSE-NEXT: movd %xmm6, %esi
|
||||
; X86-SSE-NEXT: xorl %edx, %edx
|
||||
; X86-SSE-NEXT: divl %esi
|
||||
; X86-SSE-NEXT: movd %edx, %xmm6
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,3,0,1]
|
||||
; X86-SSE-NEXT: movd %xmm7, %eax
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm3[2,3,0,1]
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm2[2,3,0,1]
|
||||
; X86-SSE-NEXT: movd %xmm7, %esi
|
||||
; X86-SSE-NEXT: xorl %edx, %edx
|
||||
; X86-SSE-NEXT: divl %esi
|
||||
; X86-SSE-NEXT: movd %edx, %xmm7
|
||||
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1]
|
||||
; X86-SSE-NEXT: movd %xmm4, %eax
|
||||
; X86-SSE-NEXT: movd %xmm3, %esi
|
||||
; X86-SSE-NEXT: movd %xmm2, %esi
|
||||
; X86-SSE-NEXT: xorl %edx, %edx
|
||||
; X86-SSE-NEXT: divl %esi
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,2,3]
|
||||
; X86-SSE-NEXT: movd %xmm4, %eax
|
||||
; X86-SSE-NEXT: movd %edx, %xmm4
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,2,3]
|
||||
; X86-SSE-NEXT: movd %xmm3, %esi
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
|
||||
; X86-SSE-NEXT: movd %xmm2, %esi
|
||||
; X86-SSE-NEXT: xorl %edx, %edx
|
||||
; X86-SSE-NEXT: divl %esi
|
||||
; X86-SSE-NEXT: movd %edx, %xmm3
|
||||
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
|
||||
; X86-SSE-NEXT: movd %edx, %xmm2
|
||||
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
|
||||
; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm7[0]
|
||||
; X86-SSE-NEXT: movd %xmm1, %eax
|
||||
; X86-SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,0],xmm6[0,0]
|
||||
; X86-SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm6[0,0]
|
||||
; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [8199,8199,8199,8199]
|
||||
; X86-SSE-NEXT: pmuludq %xmm1, %xmm4
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
|
||||
; X86-SSE-NEXT: pmuludq %xmm1, %xmm3
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
|
||||
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
|
||||
; X86-SSE-NEXT: pmuludq %xmm1, %xmm2
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
|
||||
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
|
||||
; X86-SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,0],xmm0[0,0]
|
||||
; X86-SSE-NEXT: pmuludq %xmm1, %xmm3
|
||||
; X86-SSE-NEXT: pmuludq %xmm1, %xmm5
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm5[0,2,2,3]
|
||||
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; X86-SSE-NEXT: xorl %edx, %edx
|
||||
; X86-SSE-NEXT: divl 32(%ecx)
|
||||
; X86-SSE-NEXT: pmuludq %xmm1, %xmm2
|
||||
; X86-SSE-NEXT: pmuludq %xmm1, %xmm5
|
||||
; X86-SSE-NEXT: movd %edx, %xmm0
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm5[0,2,2,3]
|
||||
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||
; X86-SSE-NEXT: movl $8199, %eax # imm = 0x2007
|
||||
; X86-SSE-NEXT: movd %eax, %xmm2
|
||||
; X86-SSE-NEXT: pmuludq %xmm0, %xmm2
|
||||
; X86-SSE-NEXT: movd %xmm2, (%eax)
|
||||
; X86-SSE-NEXT: movdqa %xmm1, (%eax)
|
||||
; X86-SSE-NEXT: movdqa %xmm0, (%eax)
|
||||
; X86-SSE-NEXT: movdqa %xmm4, (%eax)
|
||||
; X86-SSE-NEXT: imull $8199, %edx, %eax # imm = 0x2007
|
||||
; X86-SSE-NEXT: movl %eax, (%eax)
|
||||
; X86-SSE-NEXT: popl %esi
|
||||
; X86-SSE-NEXT: retl
|
||||
;
|
||||
@ -2252,14 +2249,13 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X86-AVX1-NEXT: vpinsrd $1, (%esp), %xmm1, %xmm1 # 4-byte Folded Reload
|
||||
; X86-AVX1-NEXT: vpinsrd $2, {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 4-byte Folded Reload
|
||||
; X86-AVX1-NEXT: vpinsrd $3, {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 4-byte Folded Reload
|
||||
; X86-AVX1-NEXT: vmovd {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 4-byte Folded Reload
|
||||
; X86-AVX1-NEXT: # xmm2 = mem[0],zero,zero,zero
|
||||
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [8199,8199,8199,8199]
|
||||
; X86-AVX1-NEXT: vpmulld %xmm3, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpmulld %xmm3, %xmm1, %xmm1
|
||||
; X86-AVX1-NEXT: imull $8199, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
|
||||
; X86-AVX1-NEXT: # imm = 0x2007
|
||||
; X86-AVX1-NEXT: movl %eax, (%eax)
|
||||
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [8199,8199,8199,8199]
|
||||
; X86-AVX1-NEXT: vpmulld %xmm2, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpmulld %xmm2, %xmm1, %xmm1
|
||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; X86-AVX1-NEXT: vpmulld %xmm3, %xmm2, %xmm1
|
||||
; X86-AVX1-NEXT: vmovd %xmm1, (%eax)
|
||||
; X86-AVX1-NEXT: vmovaps %ymm0, (%eax)
|
||||
; X86-AVX1-NEXT: addl $16, %esp
|
||||
; X86-AVX1-NEXT: popl %esi
|
||||
@ -2326,12 +2322,11 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X86-AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX2-NEXT: xorl %edx, %edx
|
||||
; X86-AVX2-NEXT: divl 32(%esi)
|
||||
; X86-AVX2-NEXT: vmovd %edx, %xmm0
|
||||
; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [8199,8199,8199,8199,8199,8199,8199,8199]
|
||||
; X86-AVX2-NEXT: vpmulld %ymm2, %ymm1, %ymm1
|
||||
; X86-AVX2-NEXT: vpmulld %xmm2, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vmovd %xmm0, (%eax)
|
||||
; X86-AVX2-NEXT: vmovdqa %ymm1, (%eax)
|
||||
; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm0 = [8199,8199,8199,8199,8199,8199,8199,8199]
|
||||
; X86-AVX2-NEXT: vpmulld %ymm0, %ymm1, %ymm0
|
||||
; X86-AVX2-NEXT: imull $8199, %edx, %eax # imm = 0x2007
|
||||
; X86-AVX2-NEXT: movl %eax, (%eax)
|
||||
; X86-AVX2-NEXT: vmovdqa %ymm0, (%eax)
|
||||
; X86-AVX2-NEXT: popl %esi
|
||||
; X86-AVX2-NEXT: popl %edi
|
||||
; X86-AVX2-NEXT: vzeroupper
|
||||
@ -2345,8 +2340,8 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X64-SSE-NEXT: movdqa 16(%rsi), %xmm6
|
||||
; X64-SSE-NEXT: pxor %xmm0, %xmm0
|
||||
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; X64-SSE-NEXT: movdqa %xmm5, %xmm4
|
||||
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
|
||||
; X64-SSE-NEXT: movdqa %xmm5, %xmm3
|
||||
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
|
||||
; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm5[3,1,2,3]
|
||||
; X64-SSE-NEXT: movd %xmm0, %eax
|
||||
@ -2355,10 +2350,10 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X64-SSE-NEXT: xorl %edx, %edx
|
||||
; X64-SSE-NEXT: divl %ecx
|
||||
; X64-SSE-NEXT: movd %edx, %xmm8
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm5[2,3,0,1]
|
||||
; X64-SSE-NEXT: movd %xmm3, %eax
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm6[2,3,0,1]
|
||||
; X64-SSE-NEXT: movd %xmm3, %ecx
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm5[2,3,0,1]
|
||||
; X64-SSE-NEXT: movd %xmm4, %eax
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm6[2,3,0,1]
|
||||
; X64-SSE-NEXT: movd %xmm4, %ecx
|
||||
; X64-SSE-NEXT: xorl %edx, %edx
|
||||
; X64-SSE-NEXT: divl %ecx
|
||||
; X64-SSE-NEXT: movd %edx, %xmm7
|
||||
@ -2367,7 +2362,7 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X64-SSE-NEXT: movd %xmm6, %ecx
|
||||
; X64-SSE-NEXT: xorl %edx, %edx
|
||||
; X64-SSE-NEXT: divl %ecx
|
||||
; X64-SSE-NEXT: movd %edx, %xmm3
|
||||
; X64-SSE-NEXT: movd %edx, %xmm4
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,2,3]
|
||||
; X64-SSE-NEXT: movd %xmm5, %eax
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,2,3]
|
||||
@ -2375,16 +2370,16 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X64-SSE-NEXT: xorl %edx, %edx
|
||||
; X64-SSE-NEXT: divl %ecx
|
||||
; X64-SSE-NEXT: movd %edx, %xmm5
|
||||
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
|
||||
; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm7[0]
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm4[3,1,2,3]
|
||||
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
|
||||
; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm7[0]
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm3[3,1,2,3]
|
||||
; X64-SSE-NEXT: movd %xmm6, %eax
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm2[3,1,2,3]
|
||||
; X64-SSE-NEXT: movd %xmm6, %ecx
|
||||
; X64-SSE-NEXT: xorl %edx, %edx
|
||||
; X64-SSE-NEXT: divl %ecx
|
||||
; X64-SSE-NEXT: movd %edx, %xmm6
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,3,0,1]
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm3[2,3,0,1]
|
||||
; X64-SSE-NEXT: movd %xmm7, %eax
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm2[2,3,0,1]
|
||||
; X64-SSE-NEXT: movd %xmm7, %ecx
|
||||
@ -2392,13 +2387,13 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X64-SSE-NEXT: divl %ecx
|
||||
; X64-SSE-NEXT: movd %edx, %xmm7
|
||||
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1]
|
||||
; X64-SSE-NEXT: movd %xmm4, %eax
|
||||
; X64-SSE-NEXT: movd %xmm3, %eax
|
||||
; X64-SSE-NEXT: movd %xmm2, %ecx
|
||||
; X64-SSE-NEXT: xorl %edx, %edx
|
||||
; X64-SSE-NEXT: divl %ecx
|
||||
; X64-SSE-NEXT: movd %edx, %xmm0
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,2,3]
|
||||
; X64-SSE-NEXT: movd %xmm4, %eax
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,2,3]
|
||||
; X64-SSE-NEXT: movd %xmm3, %eax
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
|
||||
; X64-SSE-NEXT: movd %xmm2, %ecx
|
||||
; X64-SSE-NEXT: xorl %edx, %edx
|
||||
@ -2409,24 +2404,21 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X64-SSE-NEXT: movd %xmm1, %eax
|
||||
; X64-SSE-NEXT: xorl %edx, %edx
|
||||
; X64-SSE-NEXT: divl 32(%rsi)
|
||||
; X64-SSE-NEXT: movd %edx, %xmm1
|
||||
; X64-SSE-NEXT: movdqa {{.*#+}} xmm4 = [8199,8199,8199,8199]
|
||||
; X64-SSE-NEXT: pmuludq %xmm4, %xmm0
|
||||
; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = [8199,8199,8199,8199]
|
||||
; X64-SSE-NEXT: pmuludq %xmm1, %xmm0
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; X64-SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm6[0,0]
|
||||
; X64-SSE-NEXT: pmuludq %xmm4, %xmm2
|
||||
; X64-SSE-NEXT: pmuludq %xmm1, %xmm2
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
|
||||
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; X64-SSE-NEXT: pmuludq %xmm4, %xmm3
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
|
||||
; X64-SSE-NEXT: pmuludq %xmm1, %xmm4
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[0,2,2,3]
|
||||
; X64-SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,0],xmm8[0,0]
|
||||
; X64-SSE-NEXT: pmuludq %xmm4, %xmm5
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm5[0,2,2,3]
|
||||
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
|
||||
; X64-SSE-NEXT: movl $8199, %eax # imm = 0x2007
|
||||
; X64-SSE-NEXT: movd %eax, %xmm3
|
||||
; X64-SSE-NEXT: pmuludq %xmm1, %xmm3
|
||||
; X64-SSE-NEXT: movd %xmm3, (%rax)
|
||||
; X64-SSE-NEXT: pmuludq %xmm1, %xmm5
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm5[0,2,2,3]
|
||||
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; X64-SSE-NEXT: imull $8199, %edx, %eax # imm = 0x2007
|
||||
; X64-SSE-NEXT: movl %eax, (%rax)
|
||||
; X64-SSE-NEXT: movdqa %xmm2, (%rax)
|
||||
; X64-SSE-NEXT: movdqa %xmm0, (%rax)
|
||||
; X64-SSE-NEXT: retq
|
||||
@ -2493,11 +2485,10 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X64-AVX1-NEXT: vpinsrd $1, %r11d, %xmm2, %xmm2
|
||||
; X64-AVX1-NEXT: vpinsrd $2, %r10d, %xmm2, %xmm2
|
||||
; X64-AVX1-NEXT: vpinsrd $3, %r9d, %xmm2, %xmm2
|
||||
; X64-AVX1-NEXT: vpmulld %xmm1, %xmm2, %xmm2
|
||||
; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
|
||||
; X64-AVX1-NEXT: vmovd %r8d, %xmm2
|
||||
; X64-AVX1-NEXT: vpmulld %xmm1, %xmm2, %xmm1
|
||||
; X64-AVX1-NEXT: vmovd %xmm1, (%rax)
|
||||
; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; X64-AVX1-NEXT: imull $8199, %r8d, %eax # imm = 0x2007
|
||||
; X64-AVX1-NEXT: movl %eax, (%rax)
|
||||
; X64-AVX1-NEXT: vmovaps %ymm0, (%rax)
|
||||
; X64-AVX1-NEXT: popq %rbx
|
||||
; X64-AVX1-NEXT: popq %rbp
|
||||
@ -2557,12 +2548,11 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X64-AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; X64-AVX2-NEXT: xorl %edx, %edx
|
||||
; X64-AVX2-NEXT: divl 32(%rsi)
|
||||
; X64-AVX2-NEXT: vmovd %edx, %xmm0
|
||||
; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [8199,8199,8199,8199,8199,8199,8199,8199]
|
||||
; X64-AVX2-NEXT: vpmulld %ymm2, %ymm1, %ymm1
|
||||
; X64-AVX2-NEXT: vpmulld %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vmovd %xmm0, (%rax)
|
||||
; X64-AVX2-NEXT: vmovdqa %ymm1, (%rax)
|
||||
; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm0 = [8199,8199,8199,8199,8199,8199,8199,8199]
|
||||
; X64-AVX2-NEXT: vpmulld %ymm0, %ymm1, %ymm0
|
||||
; X64-AVX2-NEXT: imull $8199, %edx, %eax # imm = 0x2007
|
||||
; X64-AVX2-NEXT: movl %eax, (%rax)
|
||||
; X64-AVX2-NEXT: vmovdqa %ymm0, (%rax)
|
||||
; X64-AVX2-NEXT: vzeroupper
|
||||
; X64-AVX2-NEXT: retq
|
||||
%a0 = load <9 x i16>, <9 x i16>* %p0, align 64
|
||||
|
@ -2079,7 +2079,7 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE-NEXT: movdqa (%eax), %xmm5
|
||||
; X86-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; X86-SSE-NEXT: movdqa (%ecx), %xmm3
|
||||
; X86-SSE-NEXT: movdqa (%ecx), %xmm2
|
||||
; X86-SSE-NEXT: movdqa 16(%ecx), %xmm6
|
||||
; X86-SSE-NEXT: pxor %xmm0, %xmm0
|
||||
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
@ -2093,10 +2093,10 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X86-SSE-NEXT: xorl %edx, %edx
|
||||
; X86-SSE-NEXT: divl %esi
|
||||
; X86-SSE-NEXT: movd %edx, %xmm0
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm5[2,3,0,1]
|
||||
; X86-SSE-NEXT: movd %xmm2, %eax
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm6[2,3,0,1]
|
||||
; X86-SSE-NEXT: movd %xmm2, %esi
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm5[2,3,0,1]
|
||||
; X86-SSE-NEXT: movd %xmm3, %eax
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm6[2,3,0,1]
|
||||
; X86-SSE-NEXT: movd %xmm3, %esi
|
||||
; X86-SSE-NEXT: xorl %edx, %edx
|
||||
; X86-SSE-NEXT: divl %esi
|
||||
; X86-SSE-NEXT: movd %edx, %xmm7
|
||||
@ -2105,7 +2105,7 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X86-SSE-NEXT: movd %xmm6, %esi
|
||||
; X86-SSE-NEXT: xorl %edx, %edx
|
||||
; X86-SSE-NEXT: divl %esi
|
||||
; X86-SSE-NEXT: movd %edx, %xmm2
|
||||
; X86-SSE-NEXT: movd %edx, %xmm3
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,2,3]
|
||||
; X86-SSE-NEXT: movd %xmm5, %eax
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,2,3]
|
||||
@ -2113,60 +2113,57 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X86-SSE-NEXT: xorl %edx, %edx
|
||||
; X86-SSE-NEXT: divl %esi
|
||||
; X86-SSE-NEXT: movd %edx, %xmm5
|
||||
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
|
||||
; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm7[0]
|
||||
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
|
||||
; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm7[0]
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm4[3,1,2,3]
|
||||
; X86-SSE-NEXT: movd %xmm6, %eax
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm3[3,1,2,3]
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm2[3,1,2,3]
|
||||
; X86-SSE-NEXT: movd %xmm6, %esi
|
||||
; X86-SSE-NEXT: xorl %edx, %edx
|
||||
; X86-SSE-NEXT: divl %esi
|
||||
; X86-SSE-NEXT: movd %edx, %xmm6
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,3,0,1]
|
||||
; X86-SSE-NEXT: movd %xmm7, %eax
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm3[2,3,0,1]
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm2[2,3,0,1]
|
||||
; X86-SSE-NEXT: movd %xmm7, %esi
|
||||
; X86-SSE-NEXT: xorl %edx, %edx
|
||||
; X86-SSE-NEXT: divl %esi
|
||||
; X86-SSE-NEXT: movd %edx, %xmm7
|
||||
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1]
|
||||
; X86-SSE-NEXT: movd %xmm4, %eax
|
||||
; X86-SSE-NEXT: movd %xmm3, %esi
|
||||
; X86-SSE-NEXT: movd %xmm2, %esi
|
||||
; X86-SSE-NEXT: xorl %edx, %edx
|
||||
; X86-SSE-NEXT: divl %esi
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,2,3]
|
||||
; X86-SSE-NEXT: movd %xmm4, %eax
|
||||
; X86-SSE-NEXT: movd %edx, %xmm4
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,2,3]
|
||||
; X86-SSE-NEXT: movd %xmm3, %esi
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
|
||||
; X86-SSE-NEXT: movd %xmm2, %esi
|
||||
; X86-SSE-NEXT: xorl %edx, %edx
|
||||
; X86-SSE-NEXT: divl %esi
|
||||
; X86-SSE-NEXT: movd %edx, %xmm3
|
||||
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
|
||||
; X86-SSE-NEXT: movd %edx, %xmm2
|
||||
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
|
||||
; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm7[0]
|
||||
; X86-SSE-NEXT: movd %xmm1, %eax
|
||||
; X86-SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,0],xmm6[0,0]
|
||||
; X86-SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm6[0,0]
|
||||
; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [8199,8199,8199,8199]
|
||||
; X86-SSE-NEXT: pmuludq %xmm1, %xmm4
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
|
||||
; X86-SSE-NEXT: pmuludq %xmm1, %xmm3
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
|
||||
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
|
||||
; X86-SSE-NEXT: pmuludq %xmm1, %xmm2
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
|
||||
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
|
||||
; X86-SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,0],xmm0[0,0]
|
||||
; X86-SSE-NEXT: pmuludq %xmm1, %xmm3
|
||||
; X86-SSE-NEXT: pmuludq %xmm1, %xmm5
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm5[0,2,2,3]
|
||||
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; X86-SSE-NEXT: xorl %edx, %edx
|
||||
; X86-SSE-NEXT: divl 32(%ecx)
|
||||
; X86-SSE-NEXT: pmuludq %xmm1, %xmm2
|
||||
; X86-SSE-NEXT: pmuludq %xmm1, %xmm5
|
||||
; X86-SSE-NEXT: movd %edx, %xmm0
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm5[0,2,2,3]
|
||||
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||
; X86-SSE-NEXT: movl $8199, %eax # imm = 0x2007
|
||||
; X86-SSE-NEXT: movd %eax, %xmm2
|
||||
; X86-SSE-NEXT: pmuludq %xmm0, %xmm2
|
||||
; X86-SSE-NEXT: movd %xmm2, (%eax)
|
||||
; X86-SSE-NEXT: movdqa %xmm1, (%eax)
|
||||
; X86-SSE-NEXT: movdqa %xmm0, (%eax)
|
||||
; X86-SSE-NEXT: movdqa %xmm4, (%eax)
|
||||
; X86-SSE-NEXT: imull $8199, %edx, %eax # imm = 0x2007
|
||||
; X86-SSE-NEXT: movl %eax, (%eax)
|
||||
; X86-SSE-NEXT: popl %esi
|
||||
; X86-SSE-NEXT: retl
|
||||
;
|
||||
@ -2235,14 +2232,13 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X86-AVX1-NEXT: vpinsrd $1, (%esp), %xmm1, %xmm1 # 4-byte Folded Reload
|
||||
; X86-AVX1-NEXT: vpinsrd $2, {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 4-byte Folded Reload
|
||||
; X86-AVX1-NEXT: vpinsrd $3, {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 4-byte Folded Reload
|
||||
; X86-AVX1-NEXT: vmovd {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 4-byte Folded Reload
|
||||
; X86-AVX1-NEXT: # xmm2 = mem[0],zero,zero,zero
|
||||
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [8199,8199,8199,8199]
|
||||
; X86-AVX1-NEXT: vpmulld %xmm3, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpmulld %xmm3, %xmm1, %xmm1
|
||||
; X86-AVX1-NEXT: imull $8199, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
|
||||
; X86-AVX1-NEXT: # imm = 0x2007
|
||||
; X86-AVX1-NEXT: movl %eax, (%eax)
|
||||
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [8199,8199,8199,8199]
|
||||
; X86-AVX1-NEXT: vpmulld %xmm2, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpmulld %xmm2, %xmm1, %xmm1
|
||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; X86-AVX1-NEXT: vpmulld %xmm3, %xmm2, %xmm1
|
||||
; X86-AVX1-NEXT: vmovd %xmm1, (%eax)
|
||||
; X86-AVX1-NEXT: vmovaps %ymm0, (%eax)
|
||||
; X86-AVX1-NEXT: addl $16, %esp
|
||||
; X86-AVX1-NEXT: popl %esi
|
||||
@ -2309,12 +2305,11 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X86-AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX2-NEXT: xorl %edx, %edx
|
||||
; X86-AVX2-NEXT: divl 32(%esi)
|
||||
; X86-AVX2-NEXT: vmovd %edx, %xmm0
|
||||
; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [8199,8199,8199,8199,8199,8199,8199,8199]
|
||||
; X86-AVX2-NEXT: vpmulld %ymm2, %ymm1, %ymm1
|
||||
; X86-AVX2-NEXT: vpmulld %xmm2, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vmovd %xmm0, (%eax)
|
||||
; X86-AVX2-NEXT: vmovdqa %ymm1, (%eax)
|
||||
; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm0 = [8199,8199,8199,8199,8199,8199,8199,8199]
|
||||
; X86-AVX2-NEXT: vpmulld %ymm0, %ymm1, %ymm0
|
||||
; X86-AVX2-NEXT: imull $8199, %edx, %eax # imm = 0x2007
|
||||
; X86-AVX2-NEXT: movl %eax, (%eax)
|
||||
; X86-AVX2-NEXT: vmovdqa %ymm0, (%eax)
|
||||
; X86-AVX2-NEXT: popl %esi
|
||||
; X86-AVX2-NEXT: popl %edi
|
||||
; X86-AVX2-NEXT: vzeroupper
|
||||
@ -2328,8 +2323,8 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X64-SSE-NEXT: movdqa 16(%rsi), %xmm6
|
||||
; X64-SSE-NEXT: pxor %xmm0, %xmm0
|
||||
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; X64-SSE-NEXT: movdqa %xmm5, %xmm4
|
||||
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
|
||||
; X64-SSE-NEXT: movdqa %xmm5, %xmm3
|
||||
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
|
||||
; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm5[3,1,2,3]
|
||||
; X64-SSE-NEXT: movd %xmm0, %eax
|
||||
@ -2338,10 +2333,10 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X64-SSE-NEXT: xorl %edx, %edx
|
||||
; X64-SSE-NEXT: divl %ecx
|
||||
; X64-SSE-NEXT: movd %edx, %xmm8
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm5[2,3,0,1]
|
||||
; X64-SSE-NEXT: movd %xmm3, %eax
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm6[2,3,0,1]
|
||||
; X64-SSE-NEXT: movd %xmm3, %ecx
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm5[2,3,0,1]
|
||||
; X64-SSE-NEXT: movd %xmm4, %eax
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm6[2,3,0,1]
|
||||
; X64-SSE-NEXT: movd %xmm4, %ecx
|
||||
; X64-SSE-NEXT: xorl %edx, %edx
|
||||
; X64-SSE-NEXT: divl %ecx
|
||||
; X64-SSE-NEXT: movd %edx, %xmm7
|
||||
@ -2350,7 +2345,7 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X64-SSE-NEXT: movd %xmm6, %ecx
|
||||
; X64-SSE-NEXT: xorl %edx, %edx
|
||||
; X64-SSE-NEXT: divl %ecx
|
||||
; X64-SSE-NEXT: movd %edx, %xmm3
|
||||
; X64-SSE-NEXT: movd %edx, %xmm4
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,2,3]
|
||||
; X64-SSE-NEXT: movd %xmm5, %eax
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,2,3]
|
||||
@ -2358,16 +2353,16 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X64-SSE-NEXT: xorl %edx, %edx
|
||||
; X64-SSE-NEXT: divl %ecx
|
||||
; X64-SSE-NEXT: movd %edx, %xmm5
|
||||
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
|
||||
; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm7[0]
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm4[3,1,2,3]
|
||||
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
|
||||
; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm7[0]
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm3[3,1,2,3]
|
||||
; X64-SSE-NEXT: movd %xmm6, %eax
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm2[3,1,2,3]
|
||||
; X64-SSE-NEXT: movd %xmm6, %ecx
|
||||
; X64-SSE-NEXT: xorl %edx, %edx
|
||||
; X64-SSE-NEXT: divl %ecx
|
||||
; X64-SSE-NEXT: movd %edx, %xmm6
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,3,0,1]
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm3[2,3,0,1]
|
||||
; X64-SSE-NEXT: movd %xmm7, %eax
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm2[2,3,0,1]
|
||||
; X64-SSE-NEXT: movd %xmm7, %ecx
|
||||
@ -2375,13 +2370,13 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X64-SSE-NEXT: divl %ecx
|
||||
; X64-SSE-NEXT: movd %edx, %xmm7
|
||||
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1]
|
||||
; X64-SSE-NEXT: movd %xmm4, %eax
|
||||
; X64-SSE-NEXT: movd %xmm3, %eax
|
||||
; X64-SSE-NEXT: movd %xmm2, %ecx
|
||||
; X64-SSE-NEXT: xorl %edx, %edx
|
||||
; X64-SSE-NEXT: divl %ecx
|
||||
; X64-SSE-NEXT: movd %edx, %xmm0
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,2,3]
|
||||
; X64-SSE-NEXT: movd %xmm4, %eax
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,2,3]
|
||||
; X64-SSE-NEXT: movd %xmm3, %eax
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
|
||||
; X64-SSE-NEXT: movd %xmm2, %ecx
|
||||
; X64-SSE-NEXT: xorl %edx, %edx
|
||||
@ -2392,24 +2387,21 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X64-SSE-NEXT: movd %xmm1, %eax
|
||||
; X64-SSE-NEXT: xorl %edx, %edx
|
||||
; X64-SSE-NEXT: divl 32(%rsi)
|
||||
; X64-SSE-NEXT: movd %edx, %xmm1
|
||||
; X64-SSE-NEXT: movdqa {{.*#+}} xmm4 = [8199,8199,8199,8199]
|
||||
; X64-SSE-NEXT: pmuludq %xmm4, %xmm0
|
||||
; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = [8199,8199,8199,8199]
|
||||
; X64-SSE-NEXT: pmuludq %xmm1, %xmm0
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; X64-SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm6[0,0]
|
||||
; X64-SSE-NEXT: pmuludq %xmm4, %xmm2
|
||||
; X64-SSE-NEXT: pmuludq %xmm1, %xmm2
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
|
||||
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; X64-SSE-NEXT: pmuludq %xmm4, %xmm3
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
|
||||
; X64-SSE-NEXT: pmuludq %xmm1, %xmm4
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[0,2,2,3]
|
||||
; X64-SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,0],xmm8[0,0]
|
||||
; X64-SSE-NEXT: pmuludq %xmm4, %xmm5
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm5[0,2,2,3]
|
||||
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
|
||||
; X64-SSE-NEXT: movl $8199, %eax # imm = 0x2007
|
||||
; X64-SSE-NEXT: movd %eax, %xmm3
|
||||
; X64-SSE-NEXT: pmuludq %xmm1, %xmm3
|
||||
; X64-SSE-NEXT: movd %xmm3, (%rax)
|
||||
; X64-SSE-NEXT: pmuludq %xmm1, %xmm5
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm5[0,2,2,3]
|
||||
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; X64-SSE-NEXT: imull $8199, %edx, %eax # imm = 0x2007
|
||||
; X64-SSE-NEXT: movl %eax, (%rax)
|
||||
; X64-SSE-NEXT: movdqa %xmm2, (%rax)
|
||||
; X64-SSE-NEXT: movdqa %xmm0, (%rax)
|
||||
; X64-SSE-NEXT: retq
|
||||
@ -2476,11 +2468,10 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X64-AVX1-NEXT: vpinsrd $1, %r11d, %xmm2, %xmm2
|
||||
; X64-AVX1-NEXT: vpinsrd $2, %r10d, %xmm2, %xmm2
|
||||
; X64-AVX1-NEXT: vpinsrd $3, %r9d, %xmm2, %xmm2
|
||||
; X64-AVX1-NEXT: vpmulld %xmm1, %xmm2, %xmm2
|
||||
; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
|
||||
; X64-AVX1-NEXT: vmovd %r8d, %xmm2
|
||||
; X64-AVX1-NEXT: vpmulld %xmm1, %xmm2, %xmm1
|
||||
; X64-AVX1-NEXT: vmovd %xmm1, (%rax)
|
||||
; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; X64-AVX1-NEXT: imull $8199, %r8d, %eax # imm = 0x2007
|
||||
; X64-AVX1-NEXT: movl %eax, (%rax)
|
||||
; X64-AVX1-NEXT: vmovaps %ymm0, (%rax)
|
||||
; X64-AVX1-NEXT: popq %rbx
|
||||
; X64-AVX1-NEXT: popq %rbp
|
||||
@ -2540,12 +2531,11 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
|
||||
; X64-AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; X64-AVX2-NEXT: xorl %edx, %edx
|
||||
; X64-AVX2-NEXT: divl 32(%rsi)
|
||||
; X64-AVX2-NEXT: vmovd %edx, %xmm0
|
||||
; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [8199,8199,8199,8199,8199,8199,8199,8199]
|
||||
; X64-AVX2-NEXT: vpmulld %ymm2, %ymm1, %ymm1
|
||||
; X64-AVX2-NEXT: vpmulld %xmm2, %xmm0, %xmm0
|
||||
; X64-AVX2-NEXT: vmovd %xmm0, (%rax)
|
||||
; X64-AVX2-NEXT: vmovdqa %ymm1, (%rax)
|
||||
; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm0 = [8199,8199,8199,8199,8199,8199,8199,8199]
|
||||
; X64-AVX2-NEXT: vpmulld %ymm0, %ymm1, %ymm0
|
||||
; X64-AVX2-NEXT: imull $8199, %edx, %eax # imm = 0x2007
|
||||
; X64-AVX2-NEXT: movl %eax, (%rax)
|
||||
; X64-AVX2-NEXT: vmovdqa %ymm0, (%rax)
|
||||
; X64-AVX2-NEXT: vzeroupper
|
||||
; X64-AVX2-NEXT: retq
|
||||
%a0 = load <9 x i16>, <9 x i16>* %p0, align 64
|
||||
|
@ -1294,12 +1294,13 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm4 = xmm2[0],xmm4[1,2,3]
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm5
|
||||
; SSE2-NEXT: pslld %xmm4, %xmm5
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [32,32,32,32]
|
||||
; SSE2-NEXT: psubd %xmm2, %xmm4
|
||||
; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm4[0],xmm3[1,2,3]
|
||||
; SSE2-NEXT: psrld %xmm3, %xmm1
|
||||
; SSE2-NEXT: movd %xmm2, %eax
|
||||
; SSE2-NEXT: movl $32, %ecx
|
||||
; SSE2-NEXT: subl %eax, %ecx
|
||||
; SSE2-NEXT: movd %ecx, %xmm4
|
||||
; SSE2-NEXT: psrld %xmm4, %xmm1
|
||||
; SSE2-NEXT: por %xmm5, %xmm1
|
||||
; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
|
||||
; SSE2-NEXT: pand %xmm2, %xmm0
|
||||
; SSE2-NEXT: pandn %xmm1, %xmm2
|
||||
; SSE2-NEXT: por %xmm2, %xmm0
|
||||
@ -1471,12 +1472,13 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
|
||||
; X32-SSE-NEXT: movss {{.*#+}} xmm4 = xmm2[0],xmm4[1,2,3]
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm5
|
||||
; X32-SSE-NEXT: pslld %xmm4, %xmm5
|
||||
; X32-SSE-NEXT: movdqa {{.*#+}} xmm4 = [32,32,32,32]
|
||||
; X32-SSE-NEXT: psubd %xmm2, %xmm4
|
||||
; X32-SSE-NEXT: pcmpeqd %xmm3, %xmm2
|
||||
; X32-SSE-NEXT: movss {{.*#+}} xmm3 = xmm4[0],xmm3[1,2,3]
|
||||
; X32-SSE-NEXT: psrld %xmm3, %xmm1
|
||||
; X32-SSE-NEXT: movd %xmm2, %eax
|
||||
; X32-SSE-NEXT: movl $32, %ecx
|
||||
; X32-SSE-NEXT: subl %eax, %ecx
|
||||
; X32-SSE-NEXT: movd %ecx, %xmm4
|
||||
; X32-SSE-NEXT: psrld %xmm4, %xmm1
|
||||
; X32-SSE-NEXT: por %xmm5, %xmm1
|
||||
; X32-SSE-NEXT: pcmpeqd %xmm3, %xmm2
|
||||
; X32-SSE-NEXT: pand %xmm2, %xmm0
|
||||
; X32-SSE-NEXT: pandn %xmm1, %xmm2
|
||||
; X32-SSE-NEXT: por %xmm2, %xmm0
|
||||
|
@ -783,18 +783,16 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind
|
||||
define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind {
|
||||
; SSE2-LABEL: splatvar_funnnel_v4i32:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
|
||||
; SSE2-NEXT: xorps %xmm2, %xmm2
|
||||
; SSE2-NEXT: xorps %xmm3, %xmm3
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm4
|
||||
; SSE2-NEXT: pslld %xmm3, %xmm4
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [32,32,32,32]
|
||||
; SSE2-NEXT: psubd %xmm1, %xmm3
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3]
|
||||
; SSE2-NEXT: psrld %xmm2, %xmm0
|
||||
; SSE2-NEXT: por %xmm4, %xmm0
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: andl $31, %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm1
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE2-NEXT: pslld %xmm1, %xmm2
|
||||
; SSE2-NEXT: movl $32, %ecx
|
||||
; SSE2-NEXT: subl %eax, %ecx
|
||||
; SSE2-NEXT: movd %ecx, %xmm1
|
||||
; SSE2-NEXT: psrld %xmm1, %xmm0
|
||||
; SSE2-NEXT: por %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: splatvar_funnnel_v4i32:
|
||||
@ -882,18 +880,16 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind
|
||||
;
|
||||
; X32-SSE-LABEL: splatvar_funnnel_v4i32:
|
||||
; X32-SSE: # %bb.0:
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
|
||||
; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm1
|
||||
; X32-SSE-NEXT: xorps %xmm2, %xmm2
|
||||
; X32-SSE-NEXT: xorps %xmm3, %xmm3
|
||||
; X32-SSE-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm4
|
||||
; X32-SSE-NEXT: pslld %xmm3, %xmm4
|
||||
; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [32,32,32,32]
|
||||
; X32-SSE-NEXT: psubd %xmm1, %xmm3
|
||||
; X32-SSE-NEXT: movss {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3]
|
||||
; X32-SSE-NEXT: psrld %xmm2, %xmm0
|
||||
; X32-SSE-NEXT: por %xmm4, %xmm0
|
||||
; X32-SSE-NEXT: movd %xmm1, %eax
|
||||
; X32-SSE-NEXT: andl $31, %eax
|
||||
; X32-SSE-NEXT: movd %eax, %xmm1
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm2
|
||||
; X32-SSE-NEXT: pslld %xmm1, %xmm2
|
||||
; X32-SSE-NEXT: movl $32, %ecx
|
||||
; X32-SSE-NEXT: subl %eax, %ecx
|
||||
; X32-SSE-NEXT: movd %ecx, %xmm1
|
||||
; X32-SSE-NEXT: psrld %xmm1, %xmm0
|
||||
; X32-SSE-NEXT: por %xmm2, %xmm0
|
||||
; X32-SSE-NEXT: retl
|
||||
%splat = shufflevector <4 x i32> %amt, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %splat)
|
||||
|
@ -1309,12 +1309,13 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm4 = xmm2[0],xmm4[1,2,3]
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm5
|
||||
; SSE2-NEXT: psrld %xmm4, %xmm5
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [32,32,32,32]
|
||||
; SSE2-NEXT: psubd %xmm2, %xmm4
|
||||
; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm4[0],xmm3[1,2,3]
|
||||
; SSE2-NEXT: pslld %xmm3, %xmm0
|
||||
; SSE2-NEXT: movd %xmm2, %eax
|
||||
; SSE2-NEXT: movl $32, %ecx
|
||||
; SSE2-NEXT: subl %eax, %ecx
|
||||
; SSE2-NEXT: movd %ecx, %xmm4
|
||||
; SSE2-NEXT: pslld %xmm4, %xmm0
|
||||
; SSE2-NEXT: por %xmm5, %xmm0
|
||||
; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
|
||||
; SSE2-NEXT: pand %xmm2, %xmm1
|
||||
; SSE2-NEXT: pandn %xmm0, %xmm2
|
||||
; SSE2-NEXT: por %xmm1, %xmm2
|
||||
@ -1485,12 +1486,13 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
|
||||
; X32-SSE-NEXT: movss {{.*#+}} xmm4 = xmm2[0],xmm4[1,2,3]
|
||||
; X32-SSE-NEXT: movdqa %xmm1, %xmm5
|
||||
; X32-SSE-NEXT: psrld %xmm4, %xmm5
|
||||
; X32-SSE-NEXT: movdqa {{.*#+}} xmm4 = [32,32,32,32]
|
||||
; X32-SSE-NEXT: psubd %xmm2, %xmm4
|
||||
; X32-SSE-NEXT: pcmpeqd %xmm3, %xmm2
|
||||
; X32-SSE-NEXT: movss {{.*#+}} xmm3 = xmm4[0],xmm3[1,2,3]
|
||||
; X32-SSE-NEXT: pslld %xmm3, %xmm0
|
||||
; X32-SSE-NEXT: movd %xmm2, %eax
|
||||
; X32-SSE-NEXT: movl $32, %ecx
|
||||
; X32-SSE-NEXT: subl %eax, %ecx
|
||||
; X32-SSE-NEXT: movd %ecx, %xmm4
|
||||
; X32-SSE-NEXT: pslld %xmm4, %xmm0
|
||||
; X32-SSE-NEXT: por %xmm5, %xmm0
|
||||
; X32-SSE-NEXT: pcmpeqd %xmm3, %xmm2
|
||||
; X32-SSE-NEXT: pand %xmm2, %xmm1
|
||||
; X32-SSE-NEXT: pandn %xmm0, %xmm2
|
||||
; X32-SSE-NEXT: por %xmm1, %xmm2
|
||||
|
@ -828,20 +828,17 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind
|
||||
define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind {
|
||||
; SSE2-LABEL: splatvar_funnnel_v4i32:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
|
||||
; SSE2-NEXT: xorps %xmm2, %xmm2
|
||||
; SSE2-NEXT: pxor %xmm3, %xmm3
|
||||
; SSE2-NEXT: psubd %xmm1, %xmm3
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm3
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm3[0],xmm1[1,2,3]
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm4
|
||||
; SSE2-NEXT: pslld %xmm1, %xmm4
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [32,32,32,32]
|
||||
; SSE2-NEXT: psubd %xmm3, %xmm1
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
|
||||
; SSE2-NEXT: psrld %xmm2, %xmm0
|
||||
; SSE2-NEXT: por %xmm4, %xmm0
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: negl %eax
|
||||
; SSE2-NEXT: andl $31, %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm1
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE2-NEXT: pslld %xmm1, %xmm2
|
||||
; SSE2-NEXT: movl $32, %ecx
|
||||
; SSE2-NEXT: subl %eax, %ecx
|
||||
; SSE2-NEXT: movd %ecx, %xmm1
|
||||
; SSE2-NEXT: psrld %xmm1, %xmm0
|
||||
; SSE2-NEXT: por %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: splatvar_funnnel_v4i32:
|
||||
@ -939,20 +936,17 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind
|
||||
;
|
||||
; X32-SSE-LABEL: splatvar_funnnel_v4i32:
|
||||
; X32-SSE: # %bb.0:
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
|
||||
; X32-SSE-NEXT: xorps %xmm2, %xmm2
|
||||
; X32-SSE-NEXT: pxor %xmm3, %xmm3
|
||||
; X32-SSE-NEXT: psubd %xmm1, %xmm3
|
||||
; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm3
|
||||
; X32-SSE-NEXT: pxor %xmm1, %xmm1
|
||||
; X32-SSE-NEXT: movss {{.*#+}} xmm1 = xmm3[0],xmm1[1,2,3]
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm4
|
||||
; X32-SSE-NEXT: pslld %xmm1, %xmm4
|
||||
; X32-SSE-NEXT: movdqa {{.*#+}} xmm1 = [32,32,32,32]
|
||||
; X32-SSE-NEXT: psubd %xmm3, %xmm1
|
||||
; X32-SSE-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
|
||||
; X32-SSE-NEXT: psrld %xmm2, %xmm0
|
||||
; X32-SSE-NEXT: por %xmm4, %xmm0
|
||||
; X32-SSE-NEXT: movd %xmm1, %eax
|
||||
; X32-SSE-NEXT: negl %eax
|
||||
; X32-SSE-NEXT: andl $31, %eax
|
||||
; X32-SSE-NEXT: movd %eax, %xmm1
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm2
|
||||
; X32-SSE-NEXT: pslld %xmm1, %xmm2
|
||||
; X32-SSE-NEXT: movl $32, %ecx
|
||||
; X32-SSE-NEXT: subl %eax, %ecx
|
||||
; X32-SSE-NEXT: movd %ecx, %xmm1
|
||||
; X32-SSE-NEXT: psrld %xmm1, %xmm0
|
||||
; X32-SSE-NEXT: por %xmm2, %xmm0
|
||||
; X32-SSE-NEXT: retl
|
||||
%splat = shufflevector <4 x i32> %amt, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %splat)
|
||||
|
@ -20,9 +20,8 @@ define <4 x i32*> @AGEP0(i32* %ptr) nounwind {
|
||||
define i32 @AGEP1(<4 x i32*> %param) nounwind {
|
||||
; CHECK-LABEL: AGEP1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpaddd {{\.LCPI.*}}, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpextrd $3, %xmm0, %eax
|
||||
; CHECK-NEXT: movl (%eax), %eax
|
||||
; CHECK-NEXT: vextractps $3, %xmm0, %eax
|
||||
; CHECK-NEXT: movl 16(%eax), %eax
|
||||
; CHECK-NEXT: retl
|
||||
%A2 = getelementptr i32, <4 x i32*> %param, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
|
||||
%k = extractelement <4 x i32*> %A2, i32 3
|
||||
|
@ -1120,31 +1120,28 @@ define i16 @test_v8i16(<8 x i16> %a0) {
|
||||
;
|
||||
; SSE41-LABEL: test_v8i16:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF
|
||||
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v8i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovd %xmm0, %eax
|
||||
; AVX-NEXT: xorl $32767, %eax # imm = 0x7FFF
|
||||
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v8i16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF
|
||||
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512-NEXT: retq
|
||||
%1 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> %a0)
|
||||
@ -1169,11 +1166,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; SSE41-LABEL: test_v16i16:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmaxsw %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF
|
||||
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1181,11 +1177,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: xorl $32767, %eax # imm = 0x7FFF
|
||||
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -1194,11 +1189,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; AVX2-NEXT: xorl $32767, %eax # imm = 0x7FFF
|
||||
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -1207,11 +1201,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF
|
||||
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
@ -1241,11 +1234,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; SSE41-NEXT: pmaxsw %xmm3, %xmm1
|
||||
; SSE41-NEXT: pmaxsw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pmaxsw %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF
|
||||
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1256,11 +1248,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: xorl $32767, %eax # imm = 0x7FFF
|
||||
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -1270,11 +1261,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; AVX2-NEXT: xorl $32767, %eax # imm = 0x7FFF
|
||||
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -1285,11 +1275,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF
|
||||
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
@ -1327,11 +1316,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; SSE41-NEXT: pmaxsw %xmm4, %xmm0
|
||||
; SSE41-NEXT: pmaxsw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pmaxsw %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF
|
||||
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1348,11 +1336,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmaxsw %xmm4, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: xorl $32767, %eax # imm = 0x7FFF
|
||||
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -1364,11 +1351,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; AVX2-NEXT: xorl $32767, %eax # imm = 0x7FFF
|
||||
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -1380,11 +1366,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF
|
||||
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
@ -1597,38 +1582,35 @@ define i8 @test_v16i8(<16 x i8> %a0) {
|
||||
;
|
||||
; SSE41-LABEL: test_v16i8:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm2
|
||||
; SSE41-NEXT: phminposuw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: psrlw $8, %xmm1
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm1
|
||||
; SSE41-NEXT: phminposuw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pextrb $0, %xmm0, %eax
|
||||
; SSE41-NEXT: xorb $127, %al
|
||||
; SSE41-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v16i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX-NEXT: xorb $127, %al
|
||||
; AVX-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v16i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512-NEXT: xorb $127, %al
|
||||
; AVX512-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512-NEXT: retq
|
||||
%1 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> %a0)
|
||||
@ -1676,14 +1658,13 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; SSE41-LABEL: test_v32i8:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmaxsb %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm2
|
||||
; SSE41-NEXT: phminposuw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: psrlw $8, %xmm1
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm1
|
||||
; SSE41-NEXT: phminposuw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pextrb $0, %xmm0, %eax
|
||||
; SSE41-NEXT: xorb $127, %al
|
||||
; SSE41-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1691,13 +1672,12 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX1-NEXT: xorb $127, %al
|
||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -1706,13 +1686,12 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX2-NEXT: xorb $127, %al
|
||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -1721,13 +1700,12 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512-NEXT: xorb $127, %al
|
||||
; AVX512-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
@ -1788,14 +1766,13 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; SSE41-NEXT: pmaxsb %xmm3, %xmm1
|
||||
; SSE41-NEXT: pmaxsb %xmm2, %xmm0
|
||||
; SSE41-NEXT: pmaxsb %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm2
|
||||
; SSE41-NEXT: phminposuw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: psrlw $8, %xmm1
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm1
|
||||
; SSE41-NEXT: phminposuw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pextrb $0, %xmm0, %eax
|
||||
; SSE41-NEXT: xorb $127, %al
|
||||
; SSE41-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1806,13 +1783,12 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX1-NEXT: xorb $127, %al
|
||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -1822,13 +1798,12 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX2-NEXT: xorb $127, %al
|
||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -1839,13 +1814,12 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512-NEXT: xorb $127, %al
|
||||
; AVX512-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
@ -1930,14 +1904,13 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; SSE41-NEXT: pmaxsb %xmm4, %xmm0
|
||||
; SSE41-NEXT: pmaxsb %xmm2, %xmm0
|
||||
; SSE41-NEXT: pmaxsb %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm2
|
||||
; SSE41-NEXT: phminposuw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: psrlw $8, %xmm1
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm1
|
||||
; SSE41-NEXT: phminposuw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pextrb $0, %xmm0, %eax
|
||||
; SSE41-NEXT: xorb $127, %al
|
||||
; SSE41-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1954,13 +1927,12 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmaxsb %xmm4, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX1-NEXT: xorb $127, %al
|
||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -1972,13 +1944,12 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX2-NEXT: xorb $127, %al
|
||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -1990,13 +1961,12 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512-NEXT: xorb $127, %al
|
||||
; AVX512-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
|
@ -1340,31 +1340,28 @@ define i16 @test_v8i16(<8 x i16> %a0) {
|
||||
;
|
||||
; SSE41-LABEL: test_v8i16:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF
|
||||
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v8i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovd %xmm0, %eax
|
||||
; AVX-NEXT: xorl $32767, %eax # imm = 0x7FFF
|
||||
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v8i16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF
|
||||
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512-NEXT: retq
|
||||
%1 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> %a0)
|
||||
@ -1389,11 +1386,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; SSE41-LABEL: test_v16i16:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmaxsw %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF
|
||||
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1401,11 +1397,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: xorl $32767, %eax # imm = 0x7FFF
|
||||
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -1414,11 +1409,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; AVX2-NEXT: xorl $32767, %eax # imm = 0x7FFF
|
||||
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -1427,11 +1421,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF
|
||||
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
@ -1461,11 +1454,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; SSE41-NEXT: pmaxsw %xmm3, %xmm1
|
||||
; SSE41-NEXT: pmaxsw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pmaxsw %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF
|
||||
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1476,11 +1468,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: xorl $32767, %eax # imm = 0x7FFF
|
||||
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -1490,11 +1481,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; AVX2-NEXT: xorl $32767, %eax # imm = 0x7FFF
|
||||
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -1505,11 +1495,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF
|
||||
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
@ -1547,11 +1536,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; SSE41-NEXT: pmaxsw %xmm4, %xmm0
|
||||
; SSE41-NEXT: pmaxsw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pmaxsw %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF
|
||||
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1568,11 +1556,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmaxsw %xmm4, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: xorl $32767, %eax # imm = 0x7FFF
|
||||
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -1584,11 +1571,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; AVX2-NEXT: xorl $32767, %eax # imm = 0x7FFF
|
||||
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -1600,11 +1586,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF
|
||||
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
@ -1947,38 +1932,35 @@ define i8 @test_v16i8(<16 x i8> %a0) {
|
||||
;
|
||||
; SSE41-LABEL: test_v16i8:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm2
|
||||
; SSE41-NEXT: phminposuw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: psrlw $8, %xmm1
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm1
|
||||
; SSE41-NEXT: phminposuw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pextrb $0, %xmm0, %eax
|
||||
; SSE41-NEXT: xorb $127, %al
|
||||
; SSE41-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v16i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX-NEXT: xorb $127, %al
|
||||
; AVX-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v16i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512-NEXT: xorb $127, %al
|
||||
; AVX512-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512-NEXT: retq
|
||||
%1 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> %a0)
|
||||
@ -2026,14 +2008,13 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; SSE41-LABEL: test_v32i8:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmaxsb %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm2
|
||||
; SSE41-NEXT: phminposuw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: psrlw $8, %xmm1
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm1
|
||||
; SSE41-NEXT: phminposuw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pextrb $0, %xmm0, %eax
|
||||
; SSE41-NEXT: xorb $127, %al
|
||||
; SSE41-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -2041,13 +2022,12 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX1-NEXT: xorb $127, %al
|
||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -2056,13 +2036,12 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX2-NEXT: xorb $127, %al
|
||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -2071,13 +2050,12 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512-NEXT: xorb $127, %al
|
||||
; AVX512-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
@ -2138,14 +2116,13 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; SSE41-NEXT: pmaxsb %xmm3, %xmm1
|
||||
; SSE41-NEXT: pmaxsb %xmm2, %xmm0
|
||||
; SSE41-NEXT: pmaxsb %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm2
|
||||
; SSE41-NEXT: phminposuw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: psrlw $8, %xmm1
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm1
|
||||
; SSE41-NEXT: phminposuw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pextrb $0, %xmm0, %eax
|
||||
; SSE41-NEXT: xorb $127, %al
|
||||
; SSE41-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -2156,13 +2133,12 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX1-NEXT: xorb $127, %al
|
||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -2172,13 +2148,12 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX2-NEXT: xorb $127, %al
|
||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -2189,13 +2164,12 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512-NEXT: xorb $127, %al
|
||||
; AVX512-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
@ -2280,14 +2254,13 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; SSE41-NEXT: pmaxsb %xmm4, %xmm0
|
||||
; SSE41-NEXT: pmaxsb %xmm2, %xmm0
|
||||
; SSE41-NEXT: pmaxsb %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm2
|
||||
; SSE41-NEXT: phminposuw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: psrlw $8, %xmm1
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm1
|
||||
; SSE41-NEXT: phminposuw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pextrb $0, %xmm0, %eax
|
||||
; SSE41-NEXT: xorb $127, %al
|
||||
; SSE41-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -2304,13 +2277,12 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmaxsb %xmm4, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX1-NEXT: xorb $127, %al
|
||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -2322,13 +2294,12 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX2-NEXT: xorb $127, %al
|
||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -2340,13 +2311,12 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512-NEXT: xorb $127, %al
|
||||
; AVX512-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
|
@ -1119,31 +1119,28 @@ define i16 @test_v8i16(<8 x i16> %a0) {
|
||||
;
|
||||
; SSE41-LABEL: test_v8i16:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v8i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovd %xmm0, %eax
|
||||
; AVX-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v8i16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512-NEXT: retq
|
||||
%1 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> %a0)
|
||||
@ -1168,11 +1165,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; SSE41-LABEL: test_v16i16:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pminsw %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1180,11 +1176,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -1193,11 +1188,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; AVX2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -1206,11 +1200,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
@ -1240,11 +1233,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; SSE41-NEXT: pminsw %xmm3, %xmm1
|
||||
; SSE41-NEXT: pminsw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pminsw %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1255,11 +1247,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -1269,11 +1260,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; AVX2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -1284,11 +1274,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
@ -1326,11 +1315,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; SSE41-NEXT: pminsw %xmm4, %xmm0
|
||||
; SSE41-NEXT: pminsw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pminsw %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1347,11 +1335,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpminsw %xmm4, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -1363,11 +1350,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; AVX2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -1379,11 +1365,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
@ -1596,38 +1581,35 @@ define i8 @test_v16i8(<16 x i8> %a0) {
|
||||
;
|
||||
; SSE41-LABEL: test_v16i8:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm2
|
||||
; SSE41-NEXT: phminposuw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: psrlw $8, %xmm1
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm1
|
||||
; SSE41-NEXT: phminposuw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pextrb $0, %xmm0, %eax
|
||||
; SSE41-NEXT: xorb $-128, %al
|
||||
; SSE41-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v16i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX-NEXT: xorb $-128, %al
|
||||
; AVX-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v16i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512-NEXT: xorb $-128, %al
|
||||
; AVX512-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512-NEXT: retq
|
||||
%1 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> %a0)
|
||||
@ -1675,14 +1657,13 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; SSE41-LABEL: test_v32i8:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pminsb %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm2
|
||||
; SSE41-NEXT: phminposuw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: psrlw $8, %xmm1
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm1
|
||||
; SSE41-NEXT: phminposuw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pextrb $0, %xmm0, %eax
|
||||
; SSE41-NEXT: xorb $-128, %al
|
||||
; SSE41-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1690,13 +1671,12 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX1-NEXT: xorb $-128, %al
|
||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -1705,13 +1685,12 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX2-NEXT: xorb $-128, %al
|
||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -1720,13 +1699,12 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512-NEXT: xorb $-128, %al
|
||||
; AVX512-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
@ -1787,14 +1765,13 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; SSE41-NEXT: pminsb %xmm3, %xmm1
|
||||
; SSE41-NEXT: pminsb %xmm2, %xmm0
|
||||
; SSE41-NEXT: pminsb %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm2
|
||||
; SSE41-NEXT: phminposuw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: psrlw $8, %xmm1
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm1
|
||||
; SSE41-NEXT: phminposuw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pextrb $0, %xmm0, %eax
|
||||
; SSE41-NEXT: xorb $-128, %al
|
||||
; SSE41-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1805,13 +1782,12 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX1-NEXT: xorb $-128, %al
|
||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -1821,13 +1797,12 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX2-NEXT: xorb $-128, %al
|
||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -1838,13 +1813,12 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512-NEXT: xorb $-128, %al
|
||||
; AVX512-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
@ -1929,14 +1903,13 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; SSE41-NEXT: pminsb %xmm4, %xmm0
|
||||
; SSE41-NEXT: pminsb %xmm2, %xmm0
|
||||
; SSE41-NEXT: pminsb %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm2
|
||||
; SSE41-NEXT: phminposuw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: psrlw $8, %xmm1
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm1
|
||||
; SSE41-NEXT: phminposuw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pextrb $0, %xmm0, %eax
|
||||
; SSE41-NEXT: xorb $-128, %al
|
||||
; SSE41-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1953,13 +1926,12 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpminsb %xmm4, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX1-NEXT: xorb $-128, %al
|
||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -1971,13 +1943,12 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX2-NEXT: xorb $-128, %al
|
||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -1989,13 +1960,12 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512-NEXT: xorb $-128, %al
|
||||
; AVX512-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
|
@ -1339,31 +1339,28 @@ define i16 @test_v8i16(<8 x i16> %a0) {
|
||||
;
|
||||
; SSE41-LABEL: test_v8i16:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v8i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovd %xmm0, %eax
|
||||
; AVX-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v8i16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512-NEXT: retq
|
||||
%1 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> %a0)
|
||||
@ -1388,11 +1385,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; SSE41-LABEL: test_v16i16:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pminsw %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1400,11 +1396,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -1413,11 +1408,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; AVX2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -1426,11 +1420,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
@ -1460,11 +1453,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; SSE41-NEXT: pminsw %xmm3, %xmm1
|
||||
; SSE41-NEXT: pminsw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pminsw %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1475,11 +1467,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -1489,11 +1480,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; AVX2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -1504,11 +1494,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
@ -1546,11 +1535,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; SSE41-NEXT: pminsw %xmm4, %xmm0
|
||||
; SSE41-NEXT: pminsw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pminsw %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1567,11 +1555,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpminsw %xmm4, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -1583,11 +1570,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; AVX2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -1599,11 +1585,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
@ -1946,38 +1931,35 @@ define i8 @test_v16i8(<16 x i8> %a0) {
|
||||
;
|
||||
; SSE41-LABEL: test_v16i8:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm2
|
||||
; SSE41-NEXT: phminposuw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: psrlw $8, %xmm1
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm1
|
||||
; SSE41-NEXT: phminposuw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pextrb $0, %xmm0, %eax
|
||||
; SSE41-NEXT: xorb $-128, %al
|
||||
; SSE41-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v16i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX-NEXT: xorb $-128, %al
|
||||
; AVX-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: test_v16i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512-NEXT: xorb $-128, %al
|
||||
; AVX512-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512-NEXT: retq
|
||||
%1 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> %a0)
|
||||
@ -2025,14 +2007,13 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; SSE41-LABEL: test_v32i8:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pminsb %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm2
|
||||
; SSE41-NEXT: phminposuw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: psrlw $8, %xmm1
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm1
|
||||
; SSE41-NEXT: phminposuw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pextrb $0, %xmm0, %eax
|
||||
; SSE41-NEXT: xorb $-128, %al
|
||||
; SSE41-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -2040,13 +2021,12 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX1-NEXT: xorb $-128, %al
|
||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -2055,13 +2035,12 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX2-NEXT: xorb $-128, %al
|
||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -2070,13 +2049,12 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512-NEXT: xorb $-128, %al
|
||||
; AVX512-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
@ -2137,14 +2115,13 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; SSE41-NEXT: pminsb %xmm3, %xmm1
|
||||
; SSE41-NEXT: pminsb %xmm2, %xmm0
|
||||
; SSE41-NEXT: pminsb %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm2
|
||||
; SSE41-NEXT: phminposuw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: psrlw $8, %xmm1
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm1
|
||||
; SSE41-NEXT: phminposuw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pextrb $0, %xmm0, %eax
|
||||
; SSE41-NEXT: xorb $-128, %al
|
||||
; SSE41-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -2155,13 +2132,12 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX1-NEXT: xorb $-128, %al
|
||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -2171,13 +2147,12 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX2-NEXT: xorb $-128, %al
|
||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -2188,13 +2163,12 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512-NEXT: xorb $-128, %al
|
||||
; AVX512-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
@ -2279,14 +2253,13 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; SSE41-NEXT: pminsb %xmm4, %xmm0
|
||||
; SSE41-NEXT: pminsb %xmm2, %xmm0
|
||||
; SSE41-NEXT: pminsb %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm2
|
||||
; SSE41-NEXT: phminposuw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE41-NEXT: psrlw $8, %xmm1
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm1
|
||||
; SSE41-NEXT: phminposuw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pextrb $0, %xmm0, %eax
|
||||
; SSE41-NEXT: xorb $-128, %al
|
||||
; SSE41-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -2303,13 +2276,12 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpminsb %xmm4, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX1-NEXT: xorb $-128, %al
|
||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -2321,13 +2293,12 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX2-NEXT: xorb $-128, %al
|
||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -2339,13 +2310,12 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512-NEXT: xorb $-128, %al
|
||||
; AVX512-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
|
@ -1166,8 +1166,8 @@ define i16 @test_v2i16(<2 x i16> %a0) {
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -1212,8 +1212,8 @@ define i16 @test_v4i16(<4 x i16> %a0) {
|
||||
; SSE2-NEXT: psrld $16, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm0
|
||||
; SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm0
|
||||
; SSE2-NEXT: movd %xmm0, %eax
|
||||
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -1269,18 +1269,18 @@ define i16 @test_v8i16(<8 x i16> %a0) {
|
||||
; SSE2-NEXT: psrld $16, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: test_v8i16:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm0, %xmm1
|
||||
; SSE41-NEXT: phminposuw %xmm1, %xmm0
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: notl %eax
|
||||
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1289,8 +1289,8 @@ define i16 @test_v8i16(<8 x i16> %a0) {
|
||||
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovd %xmm0, %eax
|
||||
; AVX-NEXT: notl %eax
|
||||
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
@ -1299,8 +1299,8 @@ define i16 @test_v8i16(<8 x i16> %a0) {
|
||||
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512BW-NEXT: notl %eax
|
||||
; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
@ -1309,8 +1309,8 @@ define i16 @test_v8i16(<8 x i16> %a0) {
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512VL-NEXT: notl %eax
|
||||
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512VL-NEXT: retq
|
||||
%1 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> %a0)
|
||||
@ -1339,8 +1339,8 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; SSE2-NEXT: psrld $16, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -1348,10 +1348,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmaxuw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm0, %xmm1
|
||||
; SSE41-NEXT: phminposuw %xmm1, %xmm0
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: notl %eax
|
||||
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1362,8 +1362,8 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: notl %eax
|
||||
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -1375,8 +1375,8 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; AVX2-NEXT: notl %eax
|
||||
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -1387,8 +1387,8 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; AVX512BW-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512BW-NEXT: notl %eax
|
||||
; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
@ -1399,8 +1399,8 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; AVX512VL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512VL-NEXT: notl %eax
|
||||
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
@ -1434,8 +1434,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; SSE2-NEXT: psrld $16, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm4, %xmm1
|
||||
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm4, %xmm1
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -1445,10 +1445,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; SSE41-NEXT: pmaxuw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pmaxuw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm0, %xmm1
|
||||
; SSE41-NEXT: phminposuw %xmm1, %xmm0
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: notl %eax
|
||||
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1462,8 +1462,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: notl %eax
|
||||
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -1476,8 +1476,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; AVX2-NEXT: notl %eax
|
||||
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -1490,8 +1490,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; AVX512BW-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512BW-NEXT: notl %eax
|
||||
; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
@ -1504,8 +1504,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; AVX512VL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512VL-NEXT: notl %eax
|
||||
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
@ -1547,8 +1547,8 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; SSE2-NEXT: psrld $16, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm8, %xmm0
|
||||
; SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm8, %xmm0
|
||||
; SSE2-NEXT: movd %xmm0, %eax
|
||||
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -1562,10 +1562,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; SSE41-NEXT: pmaxuw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pmaxuw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm0, %xmm1
|
||||
; SSE41-NEXT: phminposuw %xmm1, %xmm0
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: notl %eax
|
||||
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1585,8 +1585,8 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: notl %eax
|
||||
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -1601,8 +1601,8 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; AVX2-NEXT: notl %eax
|
||||
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -1616,8 +1616,8 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; AVX512BW-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512BW-NEXT: notl %eax
|
||||
; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
@ -1631,8 +1631,8 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; AVX512VL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512VL-NEXT: notl %eax
|
||||
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
@ -1806,13 +1806,13 @@ define i8 @test_v16i8(<16 x i8> %a0) {
|
||||
; SSE41-LABEL: test_v16i8:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm2
|
||||
; SSE41-NEXT: phminposuw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm0, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: psrlw $8, %xmm0
|
||||
; SSE41-NEXT: pminub %xmm1, %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pextrb $0, %xmm0, %eax
|
||||
; SSE41-NEXT: notb %al
|
||||
; SSE41-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1820,11 +1820,11 @@ define i8 @test_v16i8(<16 x i8> %a0) {
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX-NEXT: notb %al
|
||||
; AVX-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
@ -1835,8 +1835,8 @@ define i8 @test_v16i8(<16 x i8> %a0) {
|
||||
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512BW-NEXT: notb %al
|
||||
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
@ -1847,8 +1847,8 @@ define i8 @test_v16i8(<16 x i8> %a0) {
|
||||
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512VL-NEXT: notb %al
|
||||
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512VL-NEXT: retq
|
||||
%1 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> %a0)
|
||||
@ -1877,13 +1877,13 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmaxub %xmm1, %xmm0
|
||||
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm2
|
||||
; SSE41-NEXT: phminposuw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm0, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: psrlw $8, %xmm0
|
||||
; SSE41-NEXT: pminub %xmm1, %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pextrb $0, %xmm0, %eax
|
||||
; SSE41-NEXT: notb %al
|
||||
; SSE41-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1893,11 +1893,11 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX1-NEXT: notb %al
|
||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -1908,11 +1908,11 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX2-NEXT: notb %al
|
||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -1925,8 +1925,8 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512BW-NEXT: notb %al
|
||||
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
@ -1939,8 +1939,8 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512VL-NEXT: notb %al
|
||||
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
@ -1974,13 +1974,13 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; SSE41-NEXT: pmaxub %xmm2, %xmm0
|
||||
; SSE41-NEXT: pmaxub %xmm1, %xmm0
|
||||
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm2
|
||||
; SSE41-NEXT: phminposuw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm0, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: psrlw $8, %xmm0
|
||||
; SSE41-NEXT: pminub %xmm1, %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pextrb $0, %xmm0, %eax
|
||||
; SSE41-NEXT: notb %al
|
||||
; SSE41-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1993,11 +1993,11 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX1-NEXT: notb %al
|
||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -2009,11 +2009,11 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX2-NEXT: notb %al
|
||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -2028,8 +2028,8 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512BW-NEXT: notb %al
|
||||
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
@ -2044,8 +2044,8 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512VL-NEXT: notb %al
|
||||
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
@ -2087,13 +2087,13 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; SSE41-NEXT: pmaxub %xmm2, %xmm0
|
||||
; SSE41-NEXT: pmaxub %xmm1, %xmm0
|
||||
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm2
|
||||
; SSE41-NEXT: phminposuw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm0, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: psrlw $8, %xmm0
|
||||
; SSE41-NEXT: pminub %xmm1, %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pextrb $0, %xmm0, %eax
|
||||
; SSE41-NEXT: notb %al
|
||||
; SSE41-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -2112,11 +2112,11 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX1-NEXT: vpmaxub %xmm4, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX1-NEXT: notb %al
|
||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -2130,11 +2130,11 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX2-NEXT: notb %al
|
||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -2150,8 +2150,8 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512BW-NEXT: notb %al
|
||||
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
@ -2167,8 +2167,8 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512VL-NEXT: notb %al
|
||||
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
|
@ -1389,18 +1389,18 @@ define i16 @test_v8i16(<8 x i16> %a0) {
|
||||
; SSE2-NEXT: psrld $16, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: test_v8i16:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm0, %xmm1
|
||||
; SSE41-NEXT: phminposuw %xmm1, %xmm0
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: notl %eax
|
||||
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1409,8 +1409,8 @@ define i16 @test_v8i16(<8 x i16> %a0) {
|
||||
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovd %xmm0, %eax
|
||||
; AVX-NEXT: notl %eax
|
||||
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
@ -1419,8 +1419,8 @@ define i16 @test_v8i16(<8 x i16> %a0) {
|
||||
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512BW-NEXT: notl %eax
|
||||
; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
@ -1429,8 +1429,8 @@ define i16 @test_v8i16(<8 x i16> %a0) {
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512VL-NEXT: notl %eax
|
||||
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512VL-NEXT: retq
|
||||
%1 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> %a0)
|
||||
@ -1459,8 +1459,8 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; SSE2-NEXT: psrld $16, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -1468,10 +1468,10 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmaxuw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm0, %xmm1
|
||||
; SSE41-NEXT: phminposuw %xmm1, %xmm0
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: notl %eax
|
||||
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1482,8 +1482,8 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: notl %eax
|
||||
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -1495,8 +1495,8 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; AVX2-NEXT: notl %eax
|
||||
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -1507,8 +1507,8 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; AVX512BW-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512BW-NEXT: notl %eax
|
||||
; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
@ -1519,8 +1519,8 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; AVX512VL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512VL-NEXT: notl %eax
|
||||
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
@ -1554,8 +1554,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; SSE2-NEXT: psrld $16, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm4, %xmm1
|
||||
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm4, %xmm1
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -1565,10 +1565,10 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; SSE41-NEXT: pmaxuw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pmaxuw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm0, %xmm1
|
||||
; SSE41-NEXT: phminposuw %xmm1, %xmm0
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: notl %eax
|
||||
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1582,8 +1582,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: notl %eax
|
||||
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -1596,8 +1596,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; AVX2-NEXT: notl %eax
|
||||
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -1610,8 +1610,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; AVX512BW-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512BW-NEXT: notl %eax
|
||||
; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
@ -1624,8 +1624,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; AVX512VL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512VL-NEXT: notl %eax
|
||||
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
@ -1667,8 +1667,8 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; SSE2-NEXT: psrld $16, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm8, %xmm0
|
||||
; SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm8, %xmm0
|
||||
; SSE2-NEXT: movd %xmm0, %eax
|
||||
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -1682,10 +1682,10 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; SSE41-NEXT: pmaxuw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pmaxuw %xmm1, %xmm0
|
||||
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm0, %xmm1
|
||||
; SSE41-NEXT: phminposuw %xmm1, %xmm0
|
||||
; SSE41-NEXT: movd %xmm0, %eax
|
||||
; SSE41-NEXT: notl %eax
|
||||
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1705,8 +1705,8 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: notl %eax
|
||||
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -1721,8 +1721,8 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; AVX2-NEXT: notl %eax
|
||||
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -1736,8 +1736,8 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; AVX512BW-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512BW-NEXT: notl %eax
|
||||
; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
@ -1751,8 +1751,8 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; AVX512VL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512VL-NEXT: notl %eax
|
||||
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
@ -2052,13 +2052,13 @@ define i8 @test_v16i8(<16 x i8> %a0) {
|
||||
; SSE41-LABEL: test_v16i8:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm2
|
||||
; SSE41-NEXT: phminposuw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm0, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: psrlw $8, %xmm0
|
||||
; SSE41-NEXT: pminub %xmm1, %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pextrb $0, %xmm0, %eax
|
||||
; SSE41-NEXT: notb %al
|
||||
; SSE41-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -2066,11 +2066,11 @@ define i8 @test_v16i8(<16 x i8> %a0) {
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX-NEXT: notb %al
|
||||
; AVX-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
@ -2081,8 +2081,8 @@ define i8 @test_v16i8(<16 x i8> %a0) {
|
||||
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512BW-NEXT: notb %al
|
||||
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
@ -2093,8 +2093,8 @@ define i8 @test_v16i8(<16 x i8> %a0) {
|
||||
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512VL-NEXT: notb %al
|
||||
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512VL-NEXT: retq
|
||||
%1 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> %a0)
|
||||
@ -2123,13 +2123,13 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmaxub %xmm1, %xmm0
|
||||
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm2
|
||||
; SSE41-NEXT: phminposuw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm0, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: psrlw $8, %xmm0
|
||||
; SSE41-NEXT: pminub %xmm1, %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pextrb $0, %xmm0, %eax
|
||||
; SSE41-NEXT: notb %al
|
||||
; SSE41-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -2139,11 +2139,11 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX1-NEXT: notb %al
|
||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -2154,11 +2154,11 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX2-NEXT: notb %al
|
||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -2171,8 +2171,8 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512BW-NEXT: notb %al
|
||||
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
@ -2185,8 +2185,8 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512VL-NEXT: notb %al
|
||||
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
@ -2220,13 +2220,13 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; SSE41-NEXT: pmaxub %xmm2, %xmm0
|
||||
; SSE41-NEXT: pmaxub %xmm1, %xmm0
|
||||
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm2
|
||||
; SSE41-NEXT: phminposuw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm0, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: psrlw $8, %xmm0
|
||||
; SSE41-NEXT: pminub %xmm1, %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pextrb $0, %xmm0, %eax
|
||||
; SSE41-NEXT: notb %al
|
||||
; SSE41-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -2239,11 +2239,11 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX1-NEXT: notb %al
|
||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -2255,11 +2255,11 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX2-NEXT: notb %al
|
||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -2274,8 +2274,8 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512BW-NEXT: notb %al
|
||||
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
@ -2290,8 +2290,8 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512VL-NEXT: notb %al
|
||||
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
@ -2333,13 +2333,13 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; SSE41-NEXT: pmaxub %xmm2, %xmm0
|
||||
; SSE41-NEXT: pmaxub %xmm1, %xmm0
|
||||
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE41-NEXT: psrlw $8, %xmm2
|
||||
; SSE41-NEXT: pminub %xmm0, %xmm2
|
||||
; SSE41-NEXT: phminposuw %xmm2, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm0, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: psrlw $8, %xmm0
|
||||
; SSE41-NEXT: pminub %xmm1, %xmm0
|
||||
; SSE41-NEXT: phminposuw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pextrb $0, %xmm0, %eax
|
||||
; SSE41-NEXT: notb %al
|
||||
; SSE41-NEXT: # kill: def $al killed $al killed $eax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -2358,11 +2358,11 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX1-NEXT: vpmaxub %xmm4, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX1-NEXT: notb %al
|
||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -2376,11 +2376,11 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX2-NEXT: notb %al
|
||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -2396,8 +2396,8 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512BW-NEXT: notb %al
|
||||
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
@ -2413,8 +2413,8 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512VL-NEXT: notb %al
|
||||
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
|
@ -1165,8 +1165,8 @@ define i16 @test_v2i16(<2 x i16> %a0) {
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -1211,8 +1211,8 @@ define i16 @test_v4i16(<4 x i16> %a0) {
|
||||
; SSE2-NEXT: psrld $16, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm0
|
||||
; SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm0
|
||||
; SSE2-NEXT: movd %xmm0, %eax
|
||||
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -1268,8 +1268,8 @@ define i16 @test_v8i16(<8 x i16> %a0) {
|
||||
; SSE2-NEXT: psrld $16, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -1319,8 +1319,8 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; SSE2-NEXT: psrld $16, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -1391,8 +1391,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; SSE2-NEXT: psrld $16, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm4, %xmm1
|
||||
; SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm4, %xmm1
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -1479,8 +1479,8 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; SSE2-NEXT: psrld $16, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm8, %xmm0
|
||||
; SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm8, %xmm0
|
||||
; SSE2-NEXT: movd %xmm0, %eax
|
||||
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
|
@ -1388,8 +1388,8 @@ define i16 @test_v8i16(<8 x i16> %a0) {
|
||||
; SSE2-NEXT: psrld $16, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -1439,8 +1439,8 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; SSE2-NEXT: psrld $16, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -1511,8 +1511,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; SSE2-NEXT: psrld $16, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm4, %xmm1
|
||||
; SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm4, %xmm1
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -1599,8 +1599,8 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; SSE2-NEXT: psrld $16, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm8, %xmm0
|
||||
; SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm8, %xmm0
|
||||
; SSE2-NEXT: movd %xmm0, %eax
|
||||
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
|
@ -738,18 +738,16 @@ define <2 x i64> @splatvar_rotate_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
|
||||
define <4 x i32> @splatvar_rotate_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
|
||||
; SSE2-LABEL: splatvar_rotate_v4i32:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
|
||||
; SSE2-NEXT: xorps %xmm2, %xmm2
|
||||
; SSE2-NEXT: xorps %xmm3, %xmm3
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm4
|
||||
; SSE2-NEXT: pslld %xmm3, %xmm4
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [32,32,32,32]
|
||||
; SSE2-NEXT: psubd %xmm1, %xmm3
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3]
|
||||
; SSE2-NEXT: psrld %xmm2, %xmm0
|
||||
; SSE2-NEXT: por %xmm4, %xmm0
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: andl $31, %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm1
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE2-NEXT: pslld %xmm1, %xmm2
|
||||
; SSE2-NEXT: movl $32, %ecx
|
||||
; SSE2-NEXT: subl %eax, %ecx
|
||||
; SSE2-NEXT: movd %ecx, %xmm1
|
||||
; SSE2-NEXT: psrld %xmm1, %xmm0
|
||||
; SSE2-NEXT: por %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: splatvar_rotate_v4i32:
|
||||
@ -837,18 +835,16 @@ define <4 x i32> @splatvar_rotate_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
|
||||
;
|
||||
; X32-SSE-LABEL: splatvar_rotate_v4i32:
|
||||
; X32-SSE: # %bb.0:
|
||||
; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
|
||||
; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm1
|
||||
; X32-SSE-NEXT: xorps %xmm2, %xmm2
|
||||
; X32-SSE-NEXT: xorps %xmm3, %xmm3
|
||||
; X32-SSE-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm4
|
||||
; X32-SSE-NEXT: pslld %xmm3, %xmm4
|
||||
; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [32,32,32,32]
|
||||
; X32-SSE-NEXT: psubd %xmm1, %xmm3
|
||||
; X32-SSE-NEXT: movss {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3]
|
||||
; X32-SSE-NEXT: psrld %xmm2, %xmm0
|
||||
; X32-SSE-NEXT: por %xmm4, %xmm0
|
||||
; X32-SSE-NEXT: movd %xmm1, %eax
|
||||
; X32-SSE-NEXT: andl $31, %eax
|
||||
; X32-SSE-NEXT: movd %eax, %xmm1
|
||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm2
|
||||
; X32-SSE-NEXT: pslld %xmm1, %xmm2
|
||||
; X32-SSE-NEXT: movl $32, %ecx
|
||||
; X32-SSE-NEXT: subl %eax, %ecx
|
||||
; X32-SSE-NEXT: movd %ecx, %xmm1
|
||||
; X32-SSE-NEXT: psrld %xmm1, %xmm0
|
||||
; X32-SSE-NEXT: por %xmm2, %xmm0
|
||||
; X32-SSE-NEXT: retl
|
||||
%splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%splat32 = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, %splat
|
||||
|
@ -420,26 +420,14 @@ define i32 @PR17487(i1 %tobool) {
|
||||
; X64-LIN-LABEL: PR17487:
|
||||
; X64-LIN: # %bb.0:
|
||||
; X64-LIN-NEXT: movd %edi, %xmm0
|
||||
; X64-LIN-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
||||
; X64-LIN-NEXT: pandn {{.*}}(%rip), %xmm0
|
||||
; X64-LIN-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; X64-LIN-NEXT: movq %xmm0, %rcx
|
||||
; X64-LIN-NEXT: xorl %eax, %eax
|
||||
; X64-LIN-NEXT: cmpq $1, %rcx
|
||||
; X64-LIN-NEXT: setne %al
|
||||
; X64-LIN-NEXT: pextrw $0, %xmm0, %eax
|
||||
; X64-LIN-NEXT: andl $1, %eax
|
||||
; X64-LIN-NEXT: retq
|
||||
;
|
||||
; X64-WIN-LABEL: PR17487:
|
||||
; X64-WIN: # %bb.0:
|
||||
; X64-WIN-NEXT: andb $1, %cl
|
||||
; X64-WIN-NEXT: movzbl %cl, %eax
|
||||
; X64-WIN-NEXT: movd %eax, %xmm0
|
||||
; X64-WIN-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
||||
; X64-WIN-NEXT: pandn __xmm@{{.*}}(%rip), %xmm0
|
||||
; X64-WIN-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; X64-WIN-NEXT: movq %xmm0, %rcx
|
||||
; X64-WIN-NEXT: xorl %eax, %eax
|
||||
; X64-WIN-NEXT: cmpq $1, %rcx
|
||||
; X64-WIN-NEXT: setne %al
|
||||
; X64-WIN-NEXT: retq
|
||||
%tmp = insertelement <2 x i1> undef, i1 %tobool, i32 1
|
||||
%tmp1 = zext <2 x i1> %tmp to <2 x i64>
|
||||
|
Loading…
x
Reference in New Issue
Block a user