1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-21 03:53:04 +02:00

[AVX512] The vpermi2 instructions require an integer vector for the index vector. This is reflected correctly in the intrinsics, but was not refelected in the isel patterns.

For the floating point types, this requires adding a bitcast to the index vector when its passed through to the output.

llvm-svn: 254277
This commit is contained in:
Craig Topper 2015-11-30 00:13:24 +00:00
parent 8b4411c523
commit 32bf88a844
3 changed files with 66 additions and 35 deletions

View File

@ -16354,7 +16354,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
if (IntrData->Type == VPERM_3OP_MASKZ)
PassThru = getZeroVector(VT, Subtarget, DAG, dl);
else
PassThru = Src2;
PassThru = DAG.getBitcast(VT, Src2);
// Swap Src1 and Src2 in the node creation
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,

View File

@ -276,6 +276,22 @@ multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
(vselect _.KRCWM:$mask, RHS, _.RC:$src1)>;
// Similar to AVX512_maskable_3rc but in this case the input VT for the tied
// operand differs from the output VT. This requires a bitconvert on
// the preserved vector going into the vselect.
multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
X86VectorVTInfo InVT,
dag Outs, dag NonTiedIns, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
dag RHS> :
AVX512_maskable_common<O, F, OutVT, Outs,
!con((ins InVT.RC:$src1), NonTiedIns),
!con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
!con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
(vselect InVT.KRCWM:$mask, RHS,
(bitconvert InVT.RC:$src1))>;
multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag NonTiedIns, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
@ -1136,71 +1152,82 @@ defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
avx512vl_i64_info, VK8>, VEX_W;
//===----------------------------------------------------------------------===//
// -- VPERM2I - 3 source operands form --
// -- VPERMI2 - 3 source operands form --
multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86VectorVTInfo _> {
X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
let Constraints = "$src1 = $dst" in {
defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>, EVEX_4V,
(_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2, _.RC:$src3))>, EVEX_4V,
AVX5128IBase;
let mayLoad = 1 in
defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src1, _.RC:$src2,
(_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2,
(_.VT (bitconvert (_.LdFrag addr:$src3)))))>,
EVEX_4V, AVX5128IBase;
}
}
multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86VectorVTInfo _> {
X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
let mayLoad = 1, Constraints = "$src1 = $dst" in
defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
!strconcat("$src2, ${src3}", _.BroadcastStr ),
(_.VT (OpNode _.RC:$src1,
(_.VT (X86VPermi2X IdxVT.RC:$src1,
_.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))>,
AVX5128IBase, EVEX_4V, EVEX_B;
}
multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
SDNode OpNode, AVX512VLVectorVTInfo VTInfo> {
defm NAME: avx512_perm_i<opc, OpcodeStr, OpNode, VTInfo.info512>,
avx512_perm_i_mb<opc, OpcodeStr, OpNode, VTInfo.info512>, EVEX_V512;
AVX512VLVectorVTInfo VTInfo,
AVX512VLVectorVTInfo ShuffleMask> {
defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512,
ShuffleMask.info512>,
avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info512,
ShuffleMask.info512>, EVEX_V512;
let Predicates = [HasVLX] in {
defm NAME#128: avx512_perm_i<opc, OpcodeStr, OpNode, VTInfo.info128>,
avx512_perm_i_mb<opc, OpcodeStr, OpNode, VTInfo.info128>, EVEX_V128;
defm NAME#256: avx512_perm_i<opc, OpcodeStr, OpNode, VTInfo.info256>,
avx512_perm_i_mb<opc, OpcodeStr, OpNode, VTInfo.info256>, EVEX_V256;
defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128,
ShuffleMask.info128>,
avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info128,
ShuffleMask.info128>, EVEX_V128;
defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256,
ShuffleMask.info256>,
avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info256,
ShuffleMask.info256>, EVEX_V256;
}
}
multiclass avx512_perm_i_sizes_w<bits<8> opc, string OpcodeStr,
SDNode OpNode, AVX512VLVectorVTInfo VTInfo> {
AVX512VLVectorVTInfo VTInfo,
AVX512VLVectorVTInfo Idx> {
let Predicates = [HasBWI] in
defm NAME: avx512_perm_i<opc, OpcodeStr, OpNode, VTInfo.info512>, EVEX_V512;
defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512,
Idx.info512>, EVEX_V512;
let Predicates = [HasBWI, HasVLX] in {
defm NAME#128: avx512_perm_i<opc, OpcodeStr, OpNode, VTInfo.info128>, EVEX_V128;
defm NAME#256: avx512_perm_i<opc, OpcodeStr, OpNode, VTInfo.info256>, EVEX_V256;
defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128,
Idx.info128>, EVEX_V128;
defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256,
Idx.info256>, EVEX_V256;
}
}
defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", X86VPermi2X,
avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", X86VPermi2X,
avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
defm VPERMI2W : avx512_perm_i_sizes_w<0x75, "vpermi2w", X86VPermi2X,
avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", X86VPermi2X,
avx512vl_f32_info>, EVEX_CD8<32, CD8VF>;
defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", X86VPermi2X,
avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d",
avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q",
avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
defm VPERMI2W : avx512_perm_i_sizes_w<0x75, "vpermi2w",
avx512vl_i16_info, avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps",
avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd",
avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
// VPERMT
// VPERMT2
multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
let Constraints = "$src1 = $dst" in {

View File

@ -292,8 +292,6 @@ def X86insertqi : SDNode<"X86ISD::INSERTQI",
def SDTShuff1Op : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>;
def SDTShuff3Op : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>;
def SDTShuff2OpM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameSizeAs<0,2>,
@ -389,7 +387,13 @@ def X86VPermt2 : SDNode<"X86ISD::VPERMV3",
SDTCisSameSizeAs<0,2>,
SDTCisSameAs<0,3>]>, []>;
def X86VPermi2X : SDNode<"X86ISD::VPERMIV3", SDTShuff3Op>;
def X86VPermi2X : SDNode<"X86ISD::VPERMIV3",
SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<1>,
SDTCisVec<1>, SDTCisSameNumEltsAs<0, 1>,
SDTCisSameSizeAs<0,1>,
SDTCisSameAs<0,2>,
SDTCisSameAs<0,3>]>, []>;
def X86vpternlog : SDNode<"X86ISD::VPTERNLOG", SDTTernlog>;
def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;