mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 02:52:53 +02:00
[LV][X86] Support of AVX2 Gathers code generation and update the LV with this
This patch depends on: https://reviews.llvm.org/D35348 Support of pattern selection of masked gathers of AVX2 (X86\AVX2 code gen) Update LoopVectorize to generate gathers for AVX2 processors. Reviewers: delena, zvi, RKSimon, craig.topper, aaboud, igorb Reviewed By: delena, RKSimon Differential Revision: https://reviews.llvm.org/D35772 llvm-svn: 318641
This commit is contained in:
parent
35278fe922
commit
059fc817b2
@ -970,6 +970,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
setOperationAction(ISD::BITREVERSE, VT, Custom);
|
||||
}
|
||||
|
||||
// Special handling for masked gather of 2 elements
|
||||
if (Subtarget.hasAVX2() && !Subtarget.hasAVX512())
|
||||
setOperationAction(ISD::MGATHER, MVT::v2i64, Custom);
|
||||
|
||||
if (!Subtarget.useSoftFloat() && Subtarget.hasFp256()) {
|
||||
bool HasInt256 = Subtarget.hasInt256();
|
||||
|
||||
@ -24301,8 +24305,8 @@ static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget,
|
||||
|
||||
static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
assert(Subtarget.hasAVX512() &&
|
||||
"MGATHER/MSCATTER are supported on AVX-512 arch only");
|
||||
assert(Subtarget.hasAVX2() &&
|
||||
"MGATHER/MSCATTER are supported on AVX-512/AVX-2 arch only");
|
||||
|
||||
MaskedGatherSDNode *N = cast<MaskedGatherSDNode>(Op.getNode());
|
||||
SDLoc dl(Op);
|
||||
@ -24316,7 +24320,7 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op");
|
||||
|
||||
if (!Subtarget.hasVLX() && !VT.is512BitVector() &&
|
||||
if (Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() &&
|
||||
!Index.getSimpleValueType().is512BitVector()) {
|
||||
// AVX512F supports only 512-bit vectors. Or data or index should
|
||||
// be 512 bit wide. If now the both index and data are 256-bit, but
|
||||
@ -24359,7 +24363,7 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
|
||||
SDValue RetOps[] = {Extract, NewGather.getValue(1)};
|
||||
return DAG.getMergeValues(RetOps, dl);
|
||||
}
|
||||
if (N->getMemoryVT() == MVT::v2i32 && Subtarget.hasVLX()) {
|
||||
if (N->getMemoryVT() == MVT::v2i32) {
|
||||
// There is a special case when the return type is v2i32 is illegal and
|
||||
// the type legaizer extended it to v2i64. Without this conversion we end up
|
||||
// with VPGATHERQQ (reading q-words from the memory) instead of VPGATHERQD.
|
||||
@ -24367,16 +24371,26 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
|
||||
// with index v2i64 and value type v4i32.
|
||||
assert(VT == MVT::v2i64 && Src0.getValueType() == MVT::v2i64 &&
|
||||
"Unexpected type in masked gather");
|
||||
Src0 = DAG.getVectorShuffle(MVT::v4i32, dl,
|
||||
DAG.getBitcast(MVT::v4i32, Src0),
|
||||
DAG.getUNDEF(MVT::v4i32), { 0, 2, -1, -1 });
|
||||
Src0 =
|
||||
DAG.getVectorShuffle(MVT::v4i32, dl, DAG.getBitcast(MVT::v4i32, Src0),
|
||||
DAG.getUNDEF(MVT::v4i32), { 0, 2, -1, -1 });
|
||||
// The mask should match the destination type. Extending mask with zeroes
|
||||
// is not necessary since instruction itself reads only two values from
|
||||
// memory.
|
||||
SDVTList VTList;
|
||||
if (Subtarget.hasVLX()) {
|
||||
Mask = ExtendToType(Mask, MVT::v4i1, DAG, false);
|
||||
VTList = DAG.getVTList(MVT::v4i32, MVT::v2i1, MVT::Other);
|
||||
}
|
||||
else {
|
||||
Mask =
|
||||
DAG.getVectorShuffle(MVT::v4i32, dl, DAG.getBitcast(MVT::v4i32, Mask),
|
||||
DAG.getUNDEF(MVT::v4i32), {0, 2, -1, -1});
|
||||
VTList = DAG.getVTList(MVT::v4i32, MVT::Other);
|
||||
}
|
||||
SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
|
||||
SDValue NewGather = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
|
||||
DAG.getVTList(MVT::v4i32, MVT::v2i1, MVT::Other), Ops, dl,
|
||||
N->getMemoryVT(), N->getMemOperand());
|
||||
VTList, Ops, dl, N->getMemoryVT(), N->getMemOperand());
|
||||
|
||||
SDValue Sext = getExtendInVec(X86ISD::VSEXT, dl, MVT::v2i64,
|
||||
NewGather.getValue(0), DAG);
|
||||
|
@ -1101,3 +1101,91 @@ def masked_truncstore_us_vi32 : PatFrag<(ops node:$src1, node:$src2, node:$src3)
|
||||
(X86MTruncUSStore node:$src1, node:$src2, node:$src3), [{
|
||||
return cast<MaskedTruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
|
||||
}]>;
|
||||
|
||||
// AVX2 special nodes
|
||||
// masked gather of AVX2 where mask elements are i32
|
||||
def avx2_x86_masked_gather_32 : SDNode<"X86ISD::MGATHER",
|
||||
SDTypeProfile<2, 3, [
|
||||
SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<1, 3>,
|
||||
SDTCisPtrTy<4>, SDTCVecEltisVT<1, i32>, SDTCisSameNumEltsAs<0, 1>]>,
|
||||
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
|
||||
|
||||
def avx2_masked_gather_32 : SDNode<"ISD::MGATHER",
|
||||
SDTypeProfile<2, 3, [
|
||||
SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<1, 3>,
|
||||
SDTCisPtrTy<4>, SDTCVecEltisVT<1, i32>, SDTCisSameNumEltsAs<0, 1>]>,
|
||||
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
|
||||
|
||||
// masked gather of AVX2 where mask elements are i64
|
||||
def avx2_masked_gather_64 : SDNode<"ISD::MGATHER",
|
||||
SDTypeProfile<2, 3, [
|
||||
SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<1, 3>,
|
||||
SDTCisPtrTy<4>, SDTCVecEltisVT<1, i64>, SDTCisSameNumEltsAs<0, 1>]>,
|
||||
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
|
||||
|
||||
// dword gathers
|
||||
def avx2_mvpgatherdd_ps_xmm : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(avx2_masked_gather_32 node:$src1, node:$src2, node:$src3) , [{
|
||||
if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
|
||||
return (Mgt->getIndex().getValueType() == MVT::v4i32 ||
|
||||
Mgt->getBasePtr().getValueType() == MVT::v4i32);
|
||||
return false;
|
||||
}]>;
|
||||
|
||||
def avx2_mvpgatherqd_ps_xmm : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(avx2_x86_masked_gather_32 node:$src1, node:$src2, node:$src3) , [{
|
||||
if (X86MaskedGatherSDNode *Mgt = dyn_cast<X86MaskedGatherSDNode>(N))
|
||||
return (Mgt->getIndex().getValueType() == MVT::v2i64 ||
|
||||
Mgt->getBasePtr().getValueType() == MVT::v2i64);
|
||||
return false;
|
||||
}]>;
|
||||
|
||||
def avx2_mvpgatherdd_ps_ymm : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(avx2_masked_gather_32 node:$src1, node:$src2, node:$src3) , [{
|
||||
if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
|
||||
return (Mgt->getIndex().getValueType() == MVT::v8i32 ||
|
||||
Mgt->getBasePtr().getValueType() == MVT::v8i32);
|
||||
return false;
|
||||
}]>;
|
||||
|
||||
def avx2_mvpgatherqd_ps_ymm : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(avx2_masked_gather_32 node:$src1, node:$src2, node:$src3) , [{
|
||||
if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
|
||||
return (Mgt->getIndex().getValueType() == MVT::v4i64 ||
|
||||
Mgt->getBasePtr().getValueType() == MVT::v4i64);
|
||||
return false;
|
||||
}]>;
|
||||
|
||||
// qwords
|
||||
def avx2_mvpgatherdq_pd_xmm : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(avx2_masked_gather_64 node:$src1, node:$src2, node:$src3) , [{
|
||||
if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
|
||||
return (Mgt->getIndex().getValueType() == MVT::v2i32 ||
|
||||
Mgt->getBasePtr().getValueType() == MVT::v2i32);
|
||||
return false;
|
||||
}]>;
|
||||
|
||||
def avx2_mvpgatherqq_pd_xmm : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(avx2_masked_gather_64 node:$src1, node:$src2, node:$src3) , [{
|
||||
if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
|
||||
return (Mgt->getIndex().getValueType() == MVT::v2i64 ||
|
||||
Mgt->getBasePtr().getValueType() == MVT::v2i64) &&
|
||||
Mgt->getMemoryVT().is128BitVector();
|
||||
return false;
|
||||
}]>;
|
||||
|
||||
def avx2_mvpgatherdq_pd_ymm : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(avx2_masked_gather_64 node:$src1, node:$src2, node:$src3) , [{
|
||||
if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
|
||||
return (Mgt->getIndex().getValueType() == MVT::v4i32 ||
|
||||
Mgt->getBasePtr().getValueType() == MVT::v4i32);
|
||||
return false;
|
||||
}]>;
|
||||
|
||||
def avx2_mvpgatherqq_pd_ymm : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(avx2_masked_gather_64 node:$src1, node:$src2, node:$src3) , [{
|
||||
if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
|
||||
return (Mgt->getIndex().getValueType() == MVT::v4i64 ||
|
||||
Mgt->getBasePtr().getValueType() == MVT::v4i64);
|
||||
return false;
|
||||
}]>;
|
||||
|
@ -8326,36 +8326,52 @@ let Predicates = [HasAVX2, NoVLX] in {
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VGATHER - GATHER Operations
|
||||
multiclass avx2_gather<bits<8> opc, string OpcodeStr, RegisterClass RC256,
|
||||
multiclass avx2_gather<bits<8> opc, string OpcodeStr, ValueType VTx,
|
||||
ValueType VTy, PatFrag GatherNode128,
|
||||
PatFrag GatherNode256, RegisterClass RC256,
|
||||
X86MemOperand memop128, X86MemOperand memop256> {
|
||||
def rm : AVX28I<opc, MRMSrcMem4VOp3, (outs VR128:$dst, VR128:$mask_wb),
|
||||
(ins VR128:$src1, memop128:$src2, VR128:$mask),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$mask, $src2, $dst|$dst, $src2, $mask}"),
|
||||
[]>, VEX;
|
||||
[(set (VTx VR128:$dst), VR128:$mask_wb,
|
||||
(GatherNode128 (VTx VR128:$src1), VR128:$mask,
|
||||
vectoraddr:$src2))]>, VEX;
|
||||
def Yrm : AVX28I<opc, MRMSrcMem4VOp3, (outs RC256:$dst, RC256:$mask_wb),
|
||||
(ins RC256:$src1, memop256:$src2, RC256:$mask),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$mask, $src2, $dst|$dst, $src2, $mask}"),
|
||||
[]>, VEX, VEX_L;
|
||||
[(set (VTy RC256:$dst), RC256:$mask_wb,
|
||||
(GatherNode256 (VTy RC256:$src1), RC256:$mask,
|
||||
vectoraddr:$src2))]>, VEX, VEX_L;
|
||||
}
|
||||
|
||||
let mayLoad = 1, hasSideEffects = 0, Constraints
|
||||
= "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb"
|
||||
in {
|
||||
defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", VR256, vx128mem, vx256mem>, VEX_W;
|
||||
defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", VR256, vx128mem, vy256mem>, VEX_W;
|
||||
defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", VR256, vx128mem, vy256mem>;
|
||||
defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", VR128, vx64mem, vy128mem>;
|
||||
let Predicates = [UseAVX2] in {
|
||||
let mayLoad = 1, hasSideEffects = 0, Constraints
|
||||
= "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb"
|
||||
in {
|
||||
defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", v2i64, v4i64, avx2_mvpgatherdq_pd_xmm,
|
||||
avx2_mvpgatherdq_pd_ymm, VR256, vx128mem, vx256mem>, VEX_W;
|
||||
defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", v2i64, v4i64, avx2_mvpgatherqq_pd_xmm,
|
||||
avx2_mvpgatherqq_pd_ymm, VR256, vx128mem, vy256mem>, VEX_W;
|
||||
defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", v4i32, v8i32, avx2_mvpgatherdd_ps_xmm,
|
||||
avx2_mvpgatherdd_ps_ymm, VR256, vx128mem, vy256mem>;
|
||||
defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", v4i32, v4i32, avx2_mvpgatherqd_ps_xmm,
|
||||
avx2_mvpgatherqd_ps_ymm, VR128, vx64mem, vy128mem>;
|
||||
|
||||
let ExeDomain = SSEPackedDouble in {
|
||||
defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", VR256, vx128mem, vx256mem>, VEX_W;
|
||||
defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", VR256, vx128mem, vy256mem>, VEX_W;
|
||||
}
|
||||
let ExeDomain = SSEPackedDouble in {
|
||||
defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", v2f64, v4f64, avx2_mvpgatherdq_pd_xmm,
|
||||
avx2_mvpgatherdq_pd_ymm, VR256, vx128mem, vx256mem>, VEX_W;
|
||||
defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", v2f64, v4f64, avx2_mvpgatherqq_pd_xmm,
|
||||
avx2_mvpgatherqq_pd_ymm, VR256, vx128mem, vy256mem>, VEX_W;
|
||||
}
|
||||
|
||||
let ExeDomain = SSEPackedSingle in {
|
||||
defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", VR256, vx128mem, vy256mem>;
|
||||
defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", VR128, vx64mem, vy128mem>;
|
||||
let ExeDomain = SSEPackedSingle in {
|
||||
defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", v4f32, v8f32, avx2_mvpgatherdd_ps_xmm,
|
||||
avx2_mvpgatherdd_ps_ymm, VR256, vx128mem, vy256mem>;
|
||||
defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", v4f32, v4f32, avx2_mvpgatherqd_ps_xmm,
|
||||
avx2_mvpgatherqd_ps_ymm, VR128, vx64mem, vy128mem>;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -51,17 +51,9 @@ enum Style {
|
||||
} // end namespace PICStyles
|
||||
|
||||
class X86Subtarget final : public X86GenSubtargetInfo {
|
||||
protected:
|
||||
enum X86SSEEnum {
|
||||
NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F
|
||||
};
|
||||
|
||||
enum X863DNowEnum {
|
||||
NoThreeDNow, MMX, ThreeDNow, ThreeDNowA
|
||||
};
|
||||
|
||||
public:
|
||||
enum X86ProcFamilyEnum {
|
||||
Others,
|
||||
Others,
|
||||
IntelAtom,
|
||||
IntelSLM,
|
||||
IntelGLM,
|
||||
@ -74,6 +66,15 @@ protected:
|
||||
IntelIcelake,
|
||||
};
|
||||
|
||||
protected:
|
||||
enum X86SSEEnum {
|
||||
NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F
|
||||
};
|
||||
|
||||
enum X863DNowEnum {
|
||||
NoThreeDNow, MMX, ThreeDNow, ThreeDNowA
|
||||
};
|
||||
|
||||
/// X86 processor family: Intel Atom, and others
|
||||
X86ProcFamilyEnum X86ProcFamily;
|
||||
|
||||
|
@ -2368,8 +2368,9 @@ int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr,
|
||||
|
||||
// Trying to reduce IndexSize to 32 bits for vector 16.
|
||||
// By default the IndexSize is equal to pointer size.
|
||||
unsigned IndexSize = (VF >= 16) ? getIndexSizeInBits(Ptr, DL) :
|
||||
DL.getPointerSizeInBits();
|
||||
unsigned IndexSize = (ST->hasAVX512() && VF >= 16)
|
||||
? getIndexSizeInBits(Ptr, DL)
|
||||
: DL.getPointerSizeInBits();
|
||||
|
||||
Type *IndexVTy = VectorType::get(IntegerType::get(SrcVTy->getContext(),
|
||||
IndexSize), VF);
|
||||
@ -2385,7 +2386,9 @@ int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr,
|
||||
|
||||
// The gather / scatter cost is given by Intel architects. It is a rough
|
||||
// number since we are looking at one instruction in a time.
|
||||
const int GSOverhead = 2;
|
||||
const int GSOverhead = (Opcode == Instruction::Load)
|
||||
? ST->getGatherOverhead()
|
||||
: ST->getScatterOverhead();
|
||||
return GSOverhead + VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
|
||||
Alignment, AddressSpace);
|
||||
}
|
||||
@ -2456,7 +2459,7 @@ int X86TTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *SrcVTy,
|
||||
// the mask vector will add more instructions. Right now we give the scalar
|
||||
// cost of vector-4 for KNL. TODO: Check, maybe the gather/scatter instruction
|
||||
// is better in the VariableMask case.
|
||||
if (VF == 2 || (VF == 4 && !ST->hasVLX()))
|
||||
if (ST->hasAVX512() && (VF == 2 || (VF == 4 && !ST->hasVLX())))
|
||||
Scalarize = true;
|
||||
|
||||
if (Scalarize)
|
||||
@ -2515,11 +2518,15 @@ bool X86TTIImpl::isLegalMaskedGather(Type *DataTy) {
|
||||
int DataWidth = isa<PointerType>(ScalarTy) ?
|
||||
DL.getPointerSizeInBits() : ScalarTy->getPrimitiveSizeInBits();
|
||||
|
||||
// AVX-512 allows gather and scatter
|
||||
return (DataWidth == 32 || DataWidth == 64) && ST->hasAVX512();
|
||||
// AVX-512 and Skylake AVX2 allows gather and scatter
|
||||
return (DataWidth == 32 || DataWidth == 64) && (ST->hasAVX512() ||
|
||||
ST->getProcFamily() == X86Subtarget::IntelSkylake);
|
||||
}
|
||||
|
||||
bool X86TTIImpl::isLegalMaskedScatter(Type *DataType) {
|
||||
// AVX2 doesn't support scatter
|
||||
if (!ST->hasAVX512())
|
||||
return false;
|
||||
return isLegalMaskedGather(DataType);
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: opt -S -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -cost-model -analyze < %s | FileCheck %s --check-prefix=AVX2
|
||||
; RUN: opt -S -mtriple=x86_64-apple-darwin -mcpu=skylake -cost-model -analyze < %s | FileCheck %s --check-prefix=SKL
|
||||
; RUN: opt -S -mtriple=x86_64-apple-darwin -mcpu=knl -cost-model -analyze < %s | FileCheck %s --check-prefix=KNL
|
||||
; RUN: opt -S -mtriple=x86_64-apple-darwin -mcpu=skx -cost-model -analyze < %s | FileCheck %s --check-prefix=SKX
|
||||
|
||||
@ -72,6 +73,9 @@ define <2 x double> @test_gather_2f64(<2 x double*> %ptrs, <2 x i1> %mask, <2 x
|
||||
; AVX2-LABEL: test_gather_2f64
|
||||
; AVX2: Found an estimated cost of 7 {{.*}}.gather
|
||||
|
||||
; SKL-LABEL: test_gather_2f64
|
||||
; SKL: Found an estimated cost of 4 {{.*}}.gather
|
||||
|
||||
; KNL-LABEL: test_gather_2f64
|
||||
; KNL: Found an estimated cost of 7 {{.*}}.gather
|
||||
|
||||
@ -88,6 +92,9 @@ define <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> %
|
||||
; AVX2-LABEL: test_gather_4i32
|
||||
; AVX2: Found an estimated cost of 16 {{.*}}.gather
|
||||
|
||||
; SKL-LABEL: test_gather_4i32
|
||||
; SKL: Found an estimated cost of 6 {{.*}}.gather
|
||||
|
||||
; KNL-LABEL: test_gather_4i32
|
||||
; KNL: Found an estimated cost of 16 {{.*}}.gather
|
||||
|
||||
@ -103,6 +110,9 @@ define <4 x i32> @test_gather_4i32_const_mask(<4 x i32*> %ptrs, <4 x i32> %src0)
|
||||
; AVX2-LABEL: test_gather_4i32_const_mask
|
||||
; AVX2: Found an estimated cost of 8 {{.*}}.gather
|
||||
|
||||
; SKL-LABEL: test_gather_4i32_const_mask
|
||||
; SKL: Found an estimated cost of 6 {{.*}}.gather
|
||||
|
||||
; KNL-LABEL: test_gather_4i32_const_mask
|
||||
; KNL: Found an estimated cost of 8 {{.*}}.gather
|
||||
|
||||
@ -119,6 +129,9 @@ define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind)
|
||||
; AVX2-LABEL: test_gather_16f32_const_mask
|
||||
; AVX2: Found an estimated cost of 30 {{.*}}.gather
|
||||
|
||||
; SKL-LABEL: test_gather_16f32_const_mask
|
||||
; SKL: Found an estimated cost of 24 {{.*}}.gather
|
||||
|
||||
; KNL-LABEL: test_gather_16f32_const_mask
|
||||
; KNL: Found an estimated cost of 18 {{.*}}.gather
|
||||
|
||||
@ -137,6 +150,9 @@ define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, <
|
||||
; AVX2-LABEL: test_gather_16f32_var_mask
|
||||
; AVX2: Found an estimated cost of 62 {{.*}}.gather
|
||||
|
||||
; SKL-LABEL: test_gather_16f32_var_mask
|
||||
; SKL: Found an estimated cost of 24 {{.*}}.gather
|
||||
|
||||
; KNL-LABEL: test_gather_16f32_var_mask
|
||||
; KNL: Found an estimated cost of 18 {{.*}}.gather
|
||||
|
||||
@ -155,6 +171,9 @@ define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i3
|
||||
; AVX2-LABEL: test_gather_16f32_ra_var_mask
|
||||
; AVX2: Found an estimated cost of 62 {{.*}}.gather
|
||||
|
||||
; SKL-LABEL: test_gather_16f32_ra_var_mask
|
||||
; SKL: Found an estimated cost of 24 {{.*}}.gather
|
||||
|
||||
; KNL-LABEL: test_gather_16f32_ra_var_mask
|
||||
; KNL: Found an estimated cost of 20 {{.*}}.gather
|
||||
|
||||
@ -173,6 +192,9 @@ define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind
|
||||
; AVX2-LABEL: test_gather_16f32_const_mask2
|
||||
; AVX2: Found an estimated cost of 30 {{.*}}.gather
|
||||
|
||||
; SKL-LABEL: test_gather_16f32_const_mask2
|
||||
; SKL: Found an estimated cost of 24 {{.*}}.gather
|
||||
|
||||
; KNL-LABEL: test_gather_16f32_const_mask2
|
||||
; KNL: Found an estimated cost of 18 {{.*}}.gather
|
||||
|
||||
@ -193,6 +215,9 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3
|
||||
; AVX2-LABEL: test_scatter_16i32
|
||||
; AVX2: Found an estimated cost of 64 {{.*}}.scatter
|
||||
|
||||
; SKL-LABEL: test_scatter_16i32
|
||||
; SKL: Found an estimated cost of 64 {{.*}}.scatter
|
||||
|
||||
; KNL-LABEL: test_scatter_16i32
|
||||
; KNL: Found an estimated cost of 18 {{.*}}.scatter
|
||||
|
||||
@ -212,6 +237,9 @@ define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) {
|
||||
; AVX2-LABEL: test_scatter_8i32
|
||||
; AVX2: Found an estimated cost of 32 {{.*}}.scatter
|
||||
|
||||
; SKL-LABEL: test_scatter_8i32
|
||||
; SKL: Found an estimated cost of 32 {{.*}}.scatter
|
||||
|
||||
; KNL-LABEL: test_scatter_8i32
|
||||
; KNL: Found an estimated cost of 10 {{.*}}.scatter
|
||||
|
||||
@ -228,6 +256,9 @@ define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
|
||||
; AVX2-LABEL: test_scatter_4i32
|
||||
; AVX2: Found an estimated cost of 16 {{.*}}.scatter
|
||||
|
||||
; SKL-LABEL: test_scatter_4i32
|
||||
; SKL: Found an estimated cost of 16 {{.*}}.scatter
|
||||
|
||||
; KNL-LABEL: test_scatter_4i32
|
||||
; KNL: Found an estimated cost of 16 {{.*}}.scatter
|
||||
|
||||
@ -243,6 +274,9 @@ define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask)
|
||||
; AVX2-LABEL: test_gather_4f32
|
||||
; AVX2: Found an estimated cost of 15 {{.*}}.gather
|
||||
|
||||
; SKL-LABEL: test_gather_4f32
|
||||
; SKL: Found an estimated cost of 6 {{.*}}.gather
|
||||
|
||||
; KNL-LABEL: test_gather_4f32
|
||||
; KNL: Found an estimated cost of 15 {{.*}}.gather
|
||||
|
||||
@ -261,6 +295,9 @@ define <4 x float> @test_gather_4f32_const_mask(float* %ptr, <4 x i32> %ind) {
|
||||
; AVX2-LABEL: test_gather_4f32_const_mask
|
||||
; AVX2: Found an estimated cost of 7 {{.*}}.gather
|
||||
|
||||
; SKL-LABEL: test_gather_4f32_const_mask
|
||||
; SKL: Found an estimated cost of 6 {{.*}}.gather
|
||||
|
||||
; KNL-LABEL: test_gather_4f32_const_mask
|
||||
; KNL: Found an estimated cost of 7 {{.*}}.gather
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user