1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-21 20:12:56 +02:00

AVX-512: Fixed a bug in i1 vector zero extending. (Skylake-avx512)

(failed on instruction selection phase)

Differential Revision: http://reviews.llvm.org/D17924

llvm-svn: 263111
This commit is contained in:
Elena Demikhovsky 2016-03-10 13:44:22 +00:00
parent d55567ce3a
commit 2f71b82e0e
2 changed files with 71 additions and 23 deletions

View File

@ -13948,16 +13948,21 @@ static SDValue LowerZERO_EXTEND_AVX512(SDValue Op,
return DAG.getNode(X86ISD::VZEXT, DL, VT, In);
assert(InVT.getVectorElementType() == MVT::i1);
MVT ExtVT = NumElts == 8 ? MVT::v8i64 : MVT::v16i32;
// Extend VT if the target is 256 or 128bit vector and VLX is not supported.
MVT ExtVT = VT;
if (!VT.is512BitVector() && !Subtarget.hasVLX())
ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts);
SDValue One =
DAG.getConstant(APInt(ExtVT.getScalarSizeInBits(), 1), DL, ExtVT);
SDValue Zero =
DAG.getConstant(APInt::getNullValue(ExtVT.getScalarSizeInBits()), DL, ExtVT);
SDValue V = DAG.getNode(ISD::VSELECT, DL, ExtVT, In, One, Zero);
if (VT.is512BitVector())
return V;
return DAG.getNode(X86ISD::VTRUNC, DL, VT, V);
SDValue SelectedVal = DAG.getNode(ISD::VSELECT, DL, ExtVT, In, One, Zero);
if (VT == ExtVT)
return SelectedVal;
return DAG.getNode(X86ISD::VTRUNC, DL, VT, SelectedVal);
}
static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
@ -15047,16 +15052,15 @@ static SDValue LowerBoolVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
}
}
static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
assert(Op0.getSimpleValueType().getVectorElementType().getSizeInBits() >= 8 &&
Op.getSimpleValueType().getVectorElementType() == MVT::i1 &&
assert(VT.getVectorElementType() == MVT::i1 &&
"Cannot set masked compare for this operation");
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
@ -15194,26 +15198,26 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
if (VT.is256BitVector() && !Subtarget.hasInt256())
return Lower256IntVSETCC(Op, DAG);
// Operands are boolean (vectors of i1)
MVT OpVT = Op1.getSimpleValueType();
if (OpVT.getVectorElementType() == MVT::i1)
return LowerBoolVSETCC_AVX512(Op, DAG);
bool MaskResult = (VT.getVectorElementType() == MVT::i1);
if (Subtarget.hasAVX512()) {
if (Op1.getSimpleValueType().is512BitVector() ||
(Subtarget.hasBWI() && Subtarget.hasVLX()) ||
(MaskResult && OpVT.getVectorElementType().getSizeInBits() >= 32))
return LowerIntVSETCC_AVX512(Op, DAG, Subtarget);
// The result is boolean, but operands are int/float
if (VT.getVectorElementType() == MVT::i1) {
// In AVX-512 architecture setcc returns mask with i1 elements,
// But there is no compare instruction for i8 and i16 elements in KNL.
// We are not talking about 512-bit operands in this case, these
// types are illegal.
if (MaskResult &&
(OpVT.getVectorElementType().getSizeInBits() < 32 &&
OpVT.getVectorElementType().getSizeInBits() >= 8))
return DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getNode(ISD::SETCC, dl, OpVT, Op0, Op1, CC));
// In this case use SSE compare
bool UseAVX512Inst =
(OpVT.is512BitVector() ||
OpVT.getVectorElementType().getSizeInBits() >= 32 ||
(Subtarget.hasBWI() && Subtarget.hasVLX()));
if (UseAVX512Inst)
return LowerIntVSETCC_AVX512(Op, DAG);
return DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getNode(ISD::SETCC, dl, OpVT, Op0, Op1, CC));
}
// Lower using XOP integer comparisons.

View File

@ -1879,3 +1879,47 @@ define <4 x i64> @zext_8x32_to_4x64(<8 x i32> %a) {
%2 = bitcast <8 x i32> %1 to <4 x i64>
ret <4 x i64> %2
}
define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 {
; KNL-LABEL: zext_64xi1_to_64xi8:
; KNL: ## BB#0:
; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
; KNL-NEXT: vpand %ymm2, %ymm0, %ymm0
; KNL-NEXT: vpcmpeqb %ymm3, %ymm1, %ymm1
; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
; KNL-NEXT: retq
;
; SKX-LABEL: zext_64xi1_to_64xi8:
; SKX: ## BB#0:
; SKX-NEXT: vpcmpeqb %zmm1, %zmm0, %k1
; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z}
; SKX-NEXT: retq
%mask = icmp eq <64 x i8> %x, %y
%1 = zext <64 x i1> %mask to <64 x i8>
ret <64 x i8> %1
}
define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 {
; KNL-LABEL: zext_4xi1_to_4x32:
; KNL: ## BB#0:
; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; KNL-NEXT: vpand %xmm2, %xmm1, %xmm1
; KNL-NEXT: vpand %xmm2, %xmm0, %xmm0
; KNL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
; KNL-NEXT: retq
;
; SKX-LABEL: zext_4xi1_to_4x32:
; SKX: ## BB#0:
; SKX-NEXT: vmovdqa64 {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; SKX-NEXT: vpandq %xmm2, %xmm1, %xmm1
; SKX-NEXT: vpandq %xmm2, %xmm0, %xmm0
; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; SKX-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0
; SKX-NEXT: retq
%mask = icmp eq <4 x i8> %x, %y
%1 = zext <4 x i1> %mask to <4 x i32>
ret <4 x i32> %1
}