mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[X86][SSE] Add X86ISD::PACKSS\PACKUS to SimplifyDemandedVectorEltsForTargetNode vector splitting
llvm-svn: 359673
This commit is contained in:
parent
baa9adae36
commit
8ad8285736
@ -33410,15 +33410,21 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
|
||||
case X86ISD::PSHUFB:
|
||||
case X86ISD::UNPCKL:
|
||||
case X86ISD::UNPCKH:
|
||||
// Saturated Packs.
|
||||
case X86ISD::PACKSS:
|
||||
case X86ISD::PACKUS:
|
||||
// Horizontal Ops.
|
||||
case X86ISD::HADD:
|
||||
case X86ISD::HSUB:
|
||||
case X86ISD::FHADD:
|
||||
case X86ISD::FHSUB: {
|
||||
SDLoc DL(Op);
|
||||
MVT ExtVT = VT.getSimpleVT();
|
||||
ExtVT = MVT::getVectorVT(ExtVT.getScalarType(),
|
||||
128 / ExtVT.getScalarSizeInBits());
|
||||
SDValue Ext0 = extract128BitVector(Op.getOperand(0), 0, TLO.DAG, DL);
|
||||
SDValue Ext1 = extract128BitVector(Op.getOperand(1), 0, TLO.DAG, DL);
|
||||
SDValue ExtOp = TLO.DAG.getNode(Opc, DL, Ext0.getValueType(), Ext0, Ext1);
|
||||
SDValue ExtOp = TLO.DAG.getNode(Opc, DL, ExtVT, Ext0, Ext1);
|
||||
SDValue UndefVec = TLO.DAG.getUNDEF(VT);
|
||||
SDValue Insert = insert128BitVector(UndefVec, ExtOp, 0, TLO.DAG, DL);
|
||||
return TLO.CombineTo(Op, Insert);
|
||||
|
@ -2090,41 +2090,39 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
;
|
||||
; AVX2-LABEL: test_v32i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
|
||||
; AVX2-NEXT: vpmullw %ymm3, %ymm1, %ymm3
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX2-NEXT: vpand %ymm1, %ymm3, %ymm3
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23]
|
||||
; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX2-NEXT: vpmullw %xmm2, %xmm3, %xmm2
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm3
|
||||
; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm3
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
@ -2188,41 +2186,39 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
;
|
||||
; AVX512DQ-LABEL: test_v32i8:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
|
||||
; AVX512DQ-NEXT: vpmullw %ymm3, %ymm1, %ymm3
|
||||
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX512DQ-NEXT: vpand %ymm1, %ymm3, %ymm3
|
||||
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23]
|
||||
; AVX512DQ-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX512DQ-NEXT: vpmullw %xmm2, %xmm3, %xmm2
|
||||
; AVX512DQ-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX512DQ-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
|
||||
; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpsrld $16, %xmm0, %xmm3
|
||||
; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512DQ-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpsrlw $8, %xmm0, %xmm3
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpsrld $16, %xmm0, %xmm2
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512DQ-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
@ -2231,40 +2227,38 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; AVX512DQVL-LABEL: test_v32i8:
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm3, %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX512DQVL-NEXT: vpand %ymm3, %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm2, %xmm3, %xmm2
|
||||
; AVX512DQVL-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX512DQVL-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpsrld $16, %xmm0, %xmm2
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512DQVL-NEXT: vzeroupper
|
||||
@ -2459,48 +2453,47 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX2-NEXT: vpmullw %ymm2, %ymm3, %ymm3
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX2-NEXT: vpand %ymm2, %ymm3, %ymm3
|
||||
; AVX2-NEXT: vpmullw %ymm2, %ymm3, %ymm2
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX2-NEXT: vpmullw %ymm4, %ymm3, %ymm3
|
||||
; AVX2-NEXT: vpand %ymm2, %ymm3, %ymm3
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
|
||||
; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX2-NEXT: vpmullw %xmm2, %xmm3, %xmm2
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm3
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm3
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
@ -2614,48 +2607,47 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX512DQ-NEXT: vpmullw %ymm2, %ymm3, %ymm3
|
||||
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX512DQ-NEXT: vpand %ymm2, %ymm3, %ymm3
|
||||
; AVX512DQ-NEXT: vpmullw %ymm2, %ymm3, %ymm2
|
||||
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
|
||||
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX512DQ-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX512DQ-NEXT: vpmullw %ymm4, %ymm3, %ymm3
|
||||
; AVX512DQ-NEXT: vpand %ymm2, %ymm3, %ymm3
|
||||
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
|
||||
; AVX512DQ-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX512DQ-NEXT: vpmullw %xmm2, %xmm3, %xmm2
|
||||
; AVX512DQ-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX512DQ-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
|
||||
; AVX512DQ-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpsrld $16, %xmm0, %xmm3
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpsrld $16, %xmm0, %xmm2
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpsrlw $8, %xmm0, %xmm3
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512DQ-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
@ -2674,39 +2666,38 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX512DQVL-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm4, %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpand %ymm3, %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm2, %xmm3, %xmm2
|
||||
; AVX512DQVL-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX512DQVL-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpsrld $16, %xmm0, %xmm2
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512DQVL-NEXT: vzeroupper
|
||||
@ -3019,66 +3010,65 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX2-NEXT: vpmullw %ymm4, %ymm5, %ymm5
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX2-NEXT: vpand %ymm4, %ymm5, %ymm5
|
||||
; AVX2-NEXT: vpmullw %ymm4, %ymm5, %ymm4
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23]
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm5, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %ymm5, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm4, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm3[8],ymm0[8],ymm3[9],ymm0[9],ymm3[10],ymm0[10],ymm3[11],ymm0[11],ymm3[12],ymm0[12],ymm3[13],ymm0[13],ymm3[14],ymm0[14],ymm3[15],ymm0[15],ymm3[24],ymm0[24],ymm3[25],ymm0[25],ymm3[26],ymm0[26],ymm3[27],ymm0[27],ymm3[28],ymm0[28],ymm3[29],ymm0[29],ymm3[30],ymm0[30],ymm3[31],ymm0[31]
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX2-NEXT: vpmullw %ymm2, %ymm5, %ymm2
|
||||
; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX2-NEXT: vpmullw %ymm2, %ymm4, %ymm2
|
||||
; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm3[0],ymm0[0],ymm3[1],ymm0[1],ymm3[2],ymm0[2],ymm3[3],ymm0[3],ymm3[4],ymm0[4],ymm3[5],ymm0[5],ymm3[6],ymm0[6],ymm3[7],ymm0[7],ymm3[16],ymm0[16],ymm3[17],ymm0[17],ymm3[18],ymm0[18],ymm3[19],ymm0[19],ymm3[20],ymm0[20],ymm3[21],ymm0[21],ymm3[22],ymm0[22],ymm3[23],ymm0[23]
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
|
||||
; AVX2-NEXT: vpmullw %ymm3, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpand %ymm4, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpand %ymm5, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpackuswb %ymm2, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX2-NEXT: vpmullw %ymm2, %ymm3, %ymm2
|
||||
; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %ymm5, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX2-NEXT: vpmullw %ymm3, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
|
||||
; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX2-NEXT: vpmullw %xmm2, %xmm3, %xmm2
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
@ -3210,66 +3200,65 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX512DQ-NEXT: vpmullw %ymm4, %ymm5, %ymm5
|
||||
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX512DQ-NEXT: vpand %ymm4, %ymm5, %ymm5
|
||||
; AVX512DQ-NEXT: vpmullw %ymm4, %ymm5, %ymm4
|
||||
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX512DQ-NEXT: vpand %ymm5, %ymm4, %ymm4
|
||||
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23]
|
||||
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX512DQ-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm5, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpand %ymm5, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm3[8],ymm0[8],ymm3[9],ymm0[9],ymm3[10],ymm0[10],ymm3[11],ymm0[11],ymm3[12],ymm0[12],ymm3[13],ymm0[13],ymm3[14],ymm0[14],ymm3[15],ymm0[15],ymm3[24],ymm0[24],ymm3[25],ymm0[25],ymm3[26],ymm0[26],ymm3[27],ymm0[27],ymm3[28],ymm0[28],ymm3[29],ymm0[29],ymm3[30],ymm0[30],ymm3[31],ymm0[31]
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX512DQ-NEXT: vpmullw %ymm2, %ymm5, %ymm2
|
||||
; AVX512DQ-NEXT: vpand %ymm4, %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX512DQ-NEXT: vpmullw %ymm2, %ymm4, %ymm2
|
||||
; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm3[0],ymm0[0],ymm3[1],ymm0[1],ymm3[2],ymm0[2],ymm3[3],ymm0[3],ymm3[4],ymm0[4],ymm3[5],ymm0[5],ymm3[6],ymm0[6],ymm3[7],ymm0[7],ymm3[16],ymm0[16],ymm3[17],ymm0[17],ymm3[18],ymm0[18],ymm3[19],ymm0[19],ymm3[20],ymm0[20],ymm3[21],ymm0[21],ymm3[22],ymm0[22],ymm3[23],ymm0[23]
|
||||
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
|
||||
; AVX512DQ-NEXT: vpmullw %ymm3, %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: vpand %ymm4, %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: vpand %ymm5, %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX512DQ-NEXT: vpmullw %ymm2, %ymm3, %ymm2
|
||||
; AVX512DQ-NEXT: vpand %ymm4, %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
|
||||
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX512DQ-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpand %ymm5, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX512DQ-NEXT: vpmullw %ymm3, %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpand %ymm4, %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
|
||||
; AVX512DQ-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX512DQ-NEXT: vpmullw %xmm2, %xmm3, %xmm2
|
||||
; AVX512DQ-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX512DQ-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpsrld $16, %xmm0, %xmm2
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX512DQ-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512DQ-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
@ -3279,66 +3268,65 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm4, %ymm5, %ymm5
|
||||
; AVX512DQVL-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX512DQVL-NEXT: vpand %ymm4, %ymm5, %ymm5
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm4, %ymm5, %ymm4
|
||||
; AVX512DQVL-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX512DQVL-NEXT: vpand %ymm5, %ymm4, %ymm4
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm5, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm5, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm3[8],ymm0[8],ymm3[9],ymm0[9],ymm3[10],ymm0[10],ymm3[11],ymm0[11],ymm3[12],ymm0[12],ymm3[13],ymm0[13],ymm3[14],ymm0[14],ymm3[15],ymm0[15],ymm3[24],ymm0[24],ymm3[25],ymm0[25],ymm3[26],ymm0[26],ymm3[27],ymm0[27],ymm3[28],ymm0[28],ymm3[29],ymm0[29],ymm3[30],ymm0[30],ymm3[31],ymm0[31]
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm5, %ymm2
|
||||
; AVX512DQVL-NEXT: vpand %ymm4, %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm4, %ymm2
|
||||
; AVX512DQVL-NEXT: vpand %ymm5, %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm3[0],ymm0[0],ymm3[1],ymm0[1],ymm3[2],ymm0[2],ymm3[3],ymm0[3],ymm3[4],ymm0[4],ymm3[5],ymm0[5],ymm3[6],ymm0[6],ymm3[7],ymm0[7],ymm3[16],ymm0[16],ymm3[17],ymm0[17],ymm3[18],ymm0[18],ymm3[19],ymm0[19],ymm3[20],ymm0[20],ymm3[21],ymm0[21],ymm3[22],ymm0[22],ymm3[23],ymm0[23]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm3, %ymm1, %ymm1
|
||||
; AVX512DQVL-NEXT: vpand %ymm4, %ymm1, %ymm1
|
||||
; AVX512DQVL-NEXT: vpand %ymm5, %ymm1, %ymm1
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm2, %ymm1, %ymm1
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm3, %ymm2
|
||||
; AVX512DQVL-NEXT: vpand %ymm4, %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpand %ymm5, %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm5, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm3, %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpand %ymm4, %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm2, %xmm3, %xmm2
|
||||
; AVX512DQVL-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX512DQVL-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpsrld $16, %xmm0, %xmm2
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512DQVL-NEXT: vzeroupper
|
||||
|
@ -2042,41 +2042,39 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
;
|
||||
; AVX2-LABEL: test_v32i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
|
||||
; AVX2-NEXT: vpmullw %ymm3, %ymm1, %ymm3
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX2-NEXT: vpand %ymm1, %ymm3, %ymm3
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23]
|
||||
; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX2-NEXT: vpmullw %xmm2, %xmm3, %xmm2
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm3
|
||||
; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm3
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
@ -2140,41 +2138,39 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
;
|
||||
; AVX512DQ-LABEL: test_v32i8:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
|
||||
; AVX512DQ-NEXT: vpmullw %ymm3, %ymm1, %ymm3
|
||||
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX512DQ-NEXT: vpand %ymm1, %ymm3, %ymm3
|
||||
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23]
|
||||
; AVX512DQ-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX512DQ-NEXT: vpmullw %xmm2, %xmm3, %xmm2
|
||||
; AVX512DQ-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX512DQ-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
|
||||
; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpsrld $16, %xmm0, %xmm3
|
||||
; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512DQ-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpsrlw $8, %xmm0, %xmm3
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpsrld $16, %xmm0, %xmm2
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512DQ-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
@ -2183,40 +2179,38 @@ define i8 @test_v32i8(<32 x i8> %a0) {
|
||||
; AVX512DQVL-LABEL: test_v32i8:
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm3, %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX512DQVL-NEXT: vpand %ymm3, %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm2, %xmm3, %xmm2
|
||||
; AVX512DQVL-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX512DQVL-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpsrld $16, %xmm0, %xmm2
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512DQVL-NEXT: vzeroupper
|
||||
@ -2411,48 +2405,47 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX2-NEXT: vpmullw %ymm2, %ymm3, %ymm3
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX2-NEXT: vpand %ymm2, %ymm3, %ymm3
|
||||
; AVX2-NEXT: vpmullw %ymm2, %ymm3, %ymm2
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX2-NEXT: vpmullw %ymm4, %ymm3, %ymm3
|
||||
; AVX2-NEXT: vpand %ymm2, %ymm3, %ymm3
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
|
||||
; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX2-NEXT: vpmullw %xmm2, %xmm3, %xmm2
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm3
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm3
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
@ -2566,48 +2559,47 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX512DQ-NEXT: vpmullw %ymm2, %ymm3, %ymm3
|
||||
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX512DQ-NEXT: vpand %ymm2, %ymm3, %ymm3
|
||||
; AVX512DQ-NEXT: vpmullw %ymm2, %ymm3, %ymm2
|
||||
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
|
||||
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX512DQ-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX512DQ-NEXT: vpmullw %ymm4, %ymm3, %ymm3
|
||||
; AVX512DQ-NEXT: vpand %ymm2, %ymm3, %ymm3
|
||||
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
|
||||
; AVX512DQ-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX512DQ-NEXT: vpmullw %xmm2, %xmm3, %xmm2
|
||||
; AVX512DQ-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX512DQ-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
|
||||
; AVX512DQ-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpsrld $16, %xmm0, %xmm3
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpsrld $16, %xmm0, %xmm2
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpsrlw $8, %xmm0, %xmm3
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512DQ-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
@ -2626,39 +2618,38 @@ define i8 @test_v64i8(<64 x i8> %a0) {
|
||||
; AVX512DQVL-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm4, %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpand %ymm3, %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm2, %xmm3, %xmm2
|
||||
; AVX512DQVL-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX512DQVL-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpsrld $16, %xmm0, %xmm2
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512DQVL-NEXT: vzeroupper
|
||||
@ -2971,66 +2962,65 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX2-NEXT: vpmullw %ymm4, %ymm5, %ymm5
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX2-NEXT: vpand %ymm4, %ymm5, %ymm5
|
||||
; AVX2-NEXT: vpmullw %ymm4, %ymm5, %ymm4
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23]
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm5, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %ymm5, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm4, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm3[8],ymm0[8],ymm3[9],ymm0[9],ymm3[10],ymm0[10],ymm3[11],ymm0[11],ymm3[12],ymm0[12],ymm3[13],ymm0[13],ymm3[14],ymm0[14],ymm3[15],ymm0[15],ymm3[24],ymm0[24],ymm3[25],ymm0[25],ymm3[26],ymm0[26],ymm3[27],ymm0[27],ymm3[28],ymm0[28],ymm3[29],ymm0[29],ymm3[30],ymm0[30],ymm3[31],ymm0[31]
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX2-NEXT: vpmullw %ymm2, %ymm5, %ymm2
|
||||
; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX2-NEXT: vpmullw %ymm2, %ymm4, %ymm2
|
||||
; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm3[0],ymm0[0],ymm3[1],ymm0[1],ymm3[2],ymm0[2],ymm3[3],ymm0[3],ymm3[4],ymm0[4],ymm3[5],ymm0[5],ymm3[6],ymm0[6],ymm3[7],ymm0[7],ymm3[16],ymm0[16],ymm3[17],ymm0[17],ymm3[18],ymm0[18],ymm3[19],ymm0[19],ymm3[20],ymm0[20],ymm3[21],ymm0[21],ymm3[22],ymm0[22],ymm3[23],ymm0[23]
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
|
||||
; AVX2-NEXT: vpmullw %ymm3, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpand %ymm4, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpand %ymm5, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpackuswb %ymm2, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX2-NEXT: vpmullw %ymm2, %ymm3, %ymm2
|
||||
; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %ymm5, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX2-NEXT: vpmullw %ymm3, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
|
||||
; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX2-NEXT: vpmullw %xmm2, %xmm3, %xmm2
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
@ -3162,66 +3152,65 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX512DQ-NEXT: vpmullw %ymm4, %ymm5, %ymm5
|
||||
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX512DQ-NEXT: vpand %ymm4, %ymm5, %ymm5
|
||||
; AVX512DQ-NEXT: vpmullw %ymm4, %ymm5, %ymm4
|
||||
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX512DQ-NEXT: vpand %ymm5, %ymm4, %ymm4
|
||||
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23]
|
||||
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX512DQ-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm5, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpand %ymm5, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm3[8],ymm0[8],ymm3[9],ymm0[9],ymm3[10],ymm0[10],ymm3[11],ymm0[11],ymm3[12],ymm0[12],ymm3[13],ymm0[13],ymm3[14],ymm0[14],ymm3[15],ymm0[15],ymm3[24],ymm0[24],ymm3[25],ymm0[25],ymm3[26],ymm0[26],ymm3[27],ymm0[27],ymm3[28],ymm0[28],ymm3[29],ymm0[29],ymm3[30],ymm0[30],ymm3[31],ymm0[31]
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX512DQ-NEXT: vpmullw %ymm2, %ymm5, %ymm2
|
||||
; AVX512DQ-NEXT: vpand %ymm4, %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX512DQ-NEXT: vpmullw %ymm2, %ymm4, %ymm2
|
||||
; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm3[0],ymm0[0],ymm3[1],ymm0[1],ymm3[2],ymm0[2],ymm3[3],ymm0[3],ymm3[4],ymm0[4],ymm3[5],ymm0[5],ymm3[6],ymm0[6],ymm3[7],ymm0[7],ymm3[16],ymm0[16],ymm3[17],ymm0[17],ymm3[18],ymm0[18],ymm3[19],ymm0[19],ymm3[20],ymm0[20],ymm3[21],ymm0[21],ymm3[22],ymm0[22],ymm3[23],ymm0[23]
|
||||
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
|
||||
; AVX512DQ-NEXT: vpmullw %ymm3, %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: vpand %ymm4, %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: vpand %ymm5, %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX512DQ-NEXT: vpmullw %ymm2, %ymm3, %ymm2
|
||||
; AVX512DQ-NEXT: vpand %ymm4, %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
|
||||
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX512DQ-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpand %ymm5, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX512DQ-NEXT: vpmullw %ymm3, %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpand %ymm4, %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
|
||||
; AVX512DQ-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX512DQ-NEXT: vpmullw %xmm2, %xmm3, %xmm2
|
||||
; AVX512DQ-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX512DQ-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpsrld $16, %xmm0, %xmm2
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX512DQ-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512DQ-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
@ -3231,66 +3220,65 @@ define i8 @test_v128i8(<128 x i8> %a0) {
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm4, %ymm5, %ymm5
|
||||
; AVX512DQVL-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX512DQVL-NEXT: vpand %ymm4, %ymm5, %ymm5
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm4, %ymm5, %ymm4
|
||||
; AVX512DQVL-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX512DQVL-NEXT: vpand %ymm5, %ymm4, %ymm4
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm5, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm5, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm3[8],ymm0[8],ymm3[9],ymm0[9],ymm3[10],ymm0[10],ymm3[11],ymm0[11],ymm3[12],ymm0[12],ymm3[13],ymm0[13],ymm3[14],ymm0[14],ymm3[15],ymm0[15],ymm3[24],ymm0[24],ymm3[25],ymm0[25],ymm3[26],ymm0[26],ymm3[27],ymm0[27],ymm3[28],ymm0[28],ymm3[29],ymm0[29],ymm3[30],ymm0[30],ymm3[31],ymm0[31]
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm5, %ymm2
|
||||
; AVX512DQVL-NEXT: vpand %ymm4, %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm4, %ymm2
|
||||
; AVX512DQVL-NEXT: vpand %ymm5, %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm3[0],ymm0[0],ymm3[1],ymm0[1],ymm3[2],ymm0[2],ymm3[3],ymm0[3],ymm3[4],ymm0[4],ymm3[5],ymm0[5],ymm3[6],ymm0[6],ymm3[7],ymm0[7],ymm3[16],ymm0[16],ymm3[17],ymm0[17],ymm3[18],ymm0[18],ymm3[19],ymm0[19],ymm3[20],ymm0[20],ymm3[21],ymm0[21],ymm3[22],ymm0[22],ymm3[23],ymm0[23]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm3, %ymm1, %ymm1
|
||||
; AVX512DQVL-NEXT: vpand %ymm4, %ymm1, %ymm1
|
||||
; AVX512DQVL-NEXT: vpand %ymm5, %ymm1, %ymm1
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm2, %ymm1, %ymm1
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm3, %ymm2
|
||||
; AVX512DQVL-NEXT: vpand %ymm4, %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpand %ymm5, %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm5, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm3, %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpand %ymm4, %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm2, %xmm3, %xmm2
|
||||
; AVX512DQVL-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||
; AVX512DQVL-NEXT: vpand %xmm3, %xmm2, %xmm2
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpsrld $16, %xmm0, %xmm2
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpsrlw $8, %xmm0, %xmm2
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm2, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpand %xmm3, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpsrlw $8, %xmm0, %xmm1
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
|
||||
; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpextrb $0, %xmm0, %eax
|
||||
; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512DQVL-NEXT: vzeroupper
|
||||
|
Loading…
Reference in New Issue
Block a user