mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
[X86][XOP] createVariablePermute - use VPPERM for v32i8 variable permutes
llvm-svn: 327213
This commit is contained in:
parent
3b1dee4eab
commit
504e94710f
@ -7972,6 +7972,16 @@ SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,
|
||||
case MVT::v32i8:
|
||||
if (Subtarget.hasVLX() && Subtarget.hasVBMI())
|
||||
Opcode = X86ISD::VPERMV;
|
||||
else if (Subtarget.hasXOP()) {
|
||||
SDValue LoSrc = extract128BitVector(SrcVec, 0, DAG, DL);
|
||||
SDValue HiSrc = extract128BitVector(SrcVec, 16, DAG, DL);
|
||||
SDValue LoIdx = extract128BitVector(IndicesVec, 0, DAG, DL);
|
||||
SDValue HiIdx = extract128BitVector(IndicesVec, 16, DAG, DL);
|
||||
return DAG.getNode(
|
||||
ISD::CONCAT_VECTORS, DL, VT,
|
||||
DAG.getNode(X86ISD::VPPERM, DL, MVT::v16i8, LoSrc, HiSrc, LoIdx),
|
||||
DAG.getNode(X86ISD::VPPERM, DL, MVT::v16i8, LoSrc, HiSrc, HiIdx));
|
||||
}
|
||||
break;
|
||||
case MVT::v16i16:
|
||||
if (Subtarget.hasVLX() && Subtarget.hasBWI())
|
||||
|
@ -673,129 +673,11 @@ define <16 x i16> @var_shuffle_v16i16(<16 x i16> %v, <16 x i16> %indices) nounwi
|
||||
define <32 x i8> @var_shuffle_v32i8(<32 x i8> %v, <32 x i8> %indices) nounwind {
|
||||
; XOP-LABEL: var_shuffle_v32i8:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: pushq %rbp
|
||||
; XOP-NEXT: movq %rsp, %rbp
|
||||
; XOP-NEXT: andq $-32, %rsp
|
||||
; XOP-NEXT: subq $64, %rsp
|
||||
; XOP-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||
; XOP-NEXT: vpextrb $0, %xmm2, %eax
|
||||
; XOP-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vmovd %eax, %xmm0
|
||||
; XOP-NEXT: vpextrb $1, %xmm2, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $2, %xmm2, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $3, %xmm2, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $4, %xmm2, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $5, %xmm2, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $6, %xmm2, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $7, %xmm2, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $9, %xmm2, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $10, %xmm2, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $11, %xmm2, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $12, %xmm2, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $13, %xmm2, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $14, %xmm2, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $15, %xmm2, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $0, %xmm1, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vmovd %eax, %xmm2
|
||||
; XOP-NEXT: vpextrb $1, %xmm1, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: vpinsrb $1, (%rsp,%rax), %xmm2, %xmm2
|
||||
; XOP-NEXT: vpextrb $2, %xmm1, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: vpinsrb $2, (%rsp,%rax), %xmm2, %xmm2
|
||||
; XOP-NEXT: vpextrb $3, %xmm1, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: vpinsrb $3, (%rsp,%rax), %xmm2, %xmm2
|
||||
; XOP-NEXT: vpextrb $4, %xmm1, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: vpinsrb $4, (%rsp,%rax), %xmm2, %xmm2
|
||||
; XOP-NEXT: vpextrb $5, %xmm1, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: vpinsrb $5, (%rsp,%rax), %xmm2, %xmm2
|
||||
; XOP-NEXT: vpextrb $6, %xmm1, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: vpinsrb $6, (%rsp,%rax), %xmm2, %xmm2
|
||||
; XOP-NEXT: vpextrb $7, %xmm1, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: vpinsrb $7, (%rsp,%rax), %xmm2, %xmm2
|
||||
; XOP-NEXT: vpextrb $8, %xmm1, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: vpinsrb $8, (%rsp,%rax), %xmm2, %xmm2
|
||||
; XOP-NEXT: vpextrb $9, %xmm1, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: vpinsrb $9, (%rsp,%rax), %xmm2, %xmm2
|
||||
; XOP-NEXT: vpextrb $10, %xmm1, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: vpinsrb $10, (%rsp,%rax), %xmm2, %xmm2
|
||||
; XOP-NEXT: vpextrb $11, %xmm1, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: vpinsrb $11, (%rsp,%rax), %xmm2, %xmm2
|
||||
; XOP-NEXT: vpextrb $12, %xmm1, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: vpinsrb $12, (%rsp,%rax), %xmm2, %xmm2
|
||||
; XOP-NEXT: vpextrb $13, %xmm1, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: vpinsrb $13, (%rsp,%rax), %xmm2, %xmm2
|
||||
; XOP-NEXT: vpextrb $14, %xmm1, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: vpinsrb $14, (%rsp,%rax), %xmm2, %xmm2
|
||||
; XOP-NEXT: vpextrb $15, %xmm1, %eax
|
||||
; XOP-NEXT: andl $31, %eax
|
||||
; XOP-NEXT: movzbl (%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; XOP-NEXT: movq %rbp, %rsp
|
||||
; XOP-NEXT: popq %rbp
|
||||
; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; XOP-NEXT: vpperm %xmm2, %xmm3, %xmm0, %xmm2
|
||||
; XOP-NEXT: vpperm %xmm1, %xmm3, %xmm0, %xmm0
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: var_shuffle_v32i8:
|
||||
@ -2376,122 +2258,9 @@ define <32 x i8> @var_shuffle_v32i8_from_v16i8(<16 x i8> %v, <32 x i8> %indices)
|
||||
; XOP-LABEL: var_shuffle_v32i8_from_v16i8:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||
; XOP-NEXT: vpextrb $0, %xmm2, %eax
|
||||
; XOP-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vmovd %eax, %xmm0
|
||||
; XOP-NEXT: vpextrb $1, %xmm2, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $2, %xmm2, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $3, %xmm2, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $4, %xmm2, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $5, %xmm2, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $6, %xmm2, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $7, %xmm2, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $8, %xmm2, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $9, %xmm2, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $10, %xmm2, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $11, %xmm2, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $12, %xmm2, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $13, %xmm2, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $14, %xmm2, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $15, %xmm2, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpextrb $0, %xmm1, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vmovd %eax, %xmm2
|
||||
; XOP-NEXT: vpextrb $1, %xmm1, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: vpinsrb $1, -24(%rsp,%rax), %xmm2, %xmm2
|
||||
; XOP-NEXT: vpextrb $2, %xmm1, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: vpinsrb $2, -24(%rsp,%rax), %xmm2, %xmm2
|
||||
; XOP-NEXT: vpextrb $3, %xmm1, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: vpinsrb $3, -24(%rsp,%rax), %xmm2, %xmm2
|
||||
; XOP-NEXT: vpextrb $4, %xmm1, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: vpinsrb $4, -24(%rsp,%rax), %xmm2, %xmm2
|
||||
; XOP-NEXT: vpextrb $5, %xmm1, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: vpinsrb $5, -24(%rsp,%rax), %xmm2, %xmm2
|
||||
; XOP-NEXT: vpextrb $6, %xmm1, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: vpinsrb $6, -24(%rsp,%rax), %xmm2, %xmm2
|
||||
; XOP-NEXT: vpextrb $7, %xmm1, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: vpinsrb $7, -24(%rsp,%rax), %xmm2, %xmm2
|
||||
; XOP-NEXT: vpextrb $8, %xmm1, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: vpinsrb $8, -24(%rsp,%rax), %xmm2, %xmm2
|
||||
; XOP-NEXT: vpextrb $9, %xmm1, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: vpinsrb $9, -24(%rsp,%rax), %xmm2, %xmm2
|
||||
; XOP-NEXT: vpextrb $10, %xmm1, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: vpinsrb $10, -24(%rsp,%rax), %xmm2, %xmm2
|
||||
; XOP-NEXT: vpextrb $11, %xmm1, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: vpinsrb $11, -24(%rsp,%rax), %xmm2, %xmm2
|
||||
; XOP-NEXT: vpextrb $12, %xmm1, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: vpinsrb $12, -24(%rsp,%rax), %xmm2, %xmm2
|
||||
; XOP-NEXT: vpextrb $13, %xmm1, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: vpinsrb $13, -24(%rsp,%rax), %xmm2, %xmm2
|
||||
; XOP-NEXT: vpextrb $14, %xmm1, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: vpinsrb $14, -24(%rsp,%rax), %xmm2, %xmm2
|
||||
; XOP-NEXT: vpextrb $15, %xmm1, %eax
|
||||
; XOP-NEXT: andl $15, %eax
|
||||
; XOP-NEXT: movzbl -24(%rsp,%rax), %eax
|
||||
; XOP-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; XOP-NEXT: vpperm %xmm2, %xmm0, %xmm0, %xmm2
|
||||
; XOP-NEXT: vpperm %xmm1, %xmm0, %xmm0, %xmm0
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: var_shuffle_v32i8_from_v16i8:
|
||||
|
Loading…
x
Reference in New Issue
Block a user