mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
AVX-512: Fixed a bug in i1 vector zero extending. (Skylake-avx512)
(failed on instruction selection phase) Differential Revision: http://reviews.llvm.org/D17924 llvm-svn: 263111
This commit is contained in:
parent
d55567ce3a
commit
2f71b82e0e
@ -13948,16 +13948,21 @@ static SDValue LowerZERO_EXTEND_AVX512(SDValue Op,
|
||||
return DAG.getNode(X86ISD::VZEXT, DL, VT, In);
|
||||
|
||||
assert(InVT.getVectorElementType() == MVT::i1);
|
||||
MVT ExtVT = NumElts == 8 ? MVT::v8i64 : MVT::v16i32;
|
||||
|
||||
// Extend VT if the target is 256 or 128bit vector and VLX is not supported.
|
||||
MVT ExtVT = VT;
|
||||
if (!VT.is512BitVector() && !Subtarget.hasVLX())
|
||||
ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts);
|
||||
|
||||
SDValue One =
|
||||
DAG.getConstant(APInt(ExtVT.getScalarSizeInBits(), 1), DL, ExtVT);
|
||||
SDValue Zero =
|
||||
DAG.getConstant(APInt::getNullValue(ExtVT.getScalarSizeInBits()), DL, ExtVT);
|
||||
|
||||
SDValue V = DAG.getNode(ISD::VSELECT, DL, ExtVT, In, One, Zero);
|
||||
if (VT.is512BitVector())
|
||||
return V;
|
||||
return DAG.getNode(X86ISD::VTRUNC, DL, VT, V);
|
||||
SDValue SelectedVal = DAG.getNode(ISD::VSELECT, DL, ExtVT, In, One, Zero);
|
||||
if (VT == ExtVT)
|
||||
return SelectedVal;
|
||||
return DAG.getNode(X86ISD::VTRUNC, DL, VT, SelectedVal);
|
||||
}
|
||||
|
||||
static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
|
||||
@ -15047,16 +15052,15 @@ static SDValue LowerBoolVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
|
||||
}
|
||||
}
|
||||
|
||||
static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
|
||||
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
SDValue Op1 = Op.getOperand(1);
|
||||
SDValue CC = Op.getOperand(2);
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
SDLoc dl(Op);
|
||||
|
||||
assert(Op0.getSimpleValueType().getVectorElementType().getSizeInBits() >= 8 &&
|
||||
Op.getSimpleValueType().getVectorElementType() == MVT::i1 &&
|
||||
assert(VT.getVectorElementType() == MVT::i1 &&
|
||||
"Cannot set masked compare for this operation");
|
||||
|
||||
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
|
||||
@ -15194,24 +15198,24 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
|
||||
if (VT.is256BitVector() && !Subtarget.hasInt256())
|
||||
return Lower256IntVSETCC(Op, DAG);
|
||||
|
||||
// Operands are boolean (vectors of i1)
|
||||
MVT OpVT = Op1.getSimpleValueType();
|
||||
if (OpVT.getVectorElementType() == MVT::i1)
|
||||
return LowerBoolVSETCC_AVX512(Op, DAG);
|
||||
|
||||
bool MaskResult = (VT.getVectorElementType() == MVT::i1);
|
||||
if (Subtarget.hasAVX512()) {
|
||||
if (Op1.getSimpleValueType().is512BitVector() ||
|
||||
(Subtarget.hasBWI() && Subtarget.hasVLX()) ||
|
||||
(MaskResult && OpVT.getVectorElementType().getSizeInBits() >= 32))
|
||||
return LowerIntVSETCC_AVX512(Op, DAG, Subtarget);
|
||||
|
||||
// The result is boolean, but operands are int/float
|
||||
if (VT.getVectorElementType() == MVT::i1) {
|
||||
// In AVX-512 architecture setcc returns mask with i1 elements,
|
||||
// But there is no compare instruction for i8 and i16 elements in KNL.
|
||||
// We are not talking about 512-bit operands in this case, these
|
||||
// types are illegal.
|
||||
if (MaskResult &&
|
||||
(OpVT.getVectorElementType().getSizeInBits() < 32 &&
|
||||
OpVT.getVectorElementType().getSizeInBits() >= 8))
|
||||
// In this case use SSE compare
|
||||
bool UseAVX512Inst =
|
||||
(OpVT.is512BitVector() ||
|
||||
OpVT.getVectorElementType().getSizeInBits() >= 32 ||
|
||||
(Subtarget.hasBWI() && Subtarget.hasVLX()));
|
||||
|
||||
if (UseAVX512Inst)
|
||||
return LowerIntVSETCC_AVX512(Op, DAG);
|
||||
|
||||
return DAG.getNode(ISD::TRUNCATE, dl, VT,
|
||||
DAG.getNode(ISD::SETCC, dl, OpVT, Op0, Op1, CC));
|
||||
}
|
||||
|
@ -1879,3 +1879,47 @@ define <4 x i64> @zext_8x32_to_4x64(<8 x i32> %a) {
|
||||
%2 = bitcast <8 x i32> %1 to <4 x i64>
|
||||
ret <4 x i64> %2
|
||||
}
|
||||
|
||||
define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 {
|
||||
; KNL-LABEL: zext_64xi1_to_64xi8:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
|
||||
; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
||||
; KNL-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; KNL-NEXT: vpcmpeqb %ymm3, %ymm1, %ymm1
|
||||
; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: zext_64xi1_to_64xi8:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpcmpeqb %zmm1, %zmm0, %k1
|
||||
; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z}
|
||||
; SKX-NEXT: retq
|
||||
%mask = icmp eq <64 x i8> %x, %y
|
||||
%1 = zext <64 x i1> %mask to <64 x i8>
|
||||
ret <64 x i8> %1
|
||||
}
|
||||
|
||||
define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 {
|
||||
; KNL-LABEL: zext_4xi1_to_4x32:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
|
||||
; KNL-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpand %xmm2, %xmm0, %xmm0
|
||||
; KNL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
||||
; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
|
||||
; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: zext_4xi1_to_4x32:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vmovdqa64 {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
|
||||
; SKX-NEXT: vpandq %xmm2, %xmm1, %xmm1
|
||||
; SKX-NEXT: vpandq %xmm2, %xmm0, %xmm0
|
||||
; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
||||
; SKX-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; SKX-NEXT: retq
|
||||
%mask = icmp eq <4 x i8> %x, %y
|
||||
%1 = zext <4 x i1> %mask to <4 x i32>
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user