mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
AVX512BW: Support llvm intrinsic masked vector load/store for i8/i16 element types on SKX
Differential Revision: http://reviews.llvm.org/D17913 llvm-svn: 262803
This commit is contained in:
parent
9703a437cc
commit
c376e5b7a2
@ -1687,6 +1687,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||||||
if (Subtarget.hasVLX())
|
if (Subtarget.hasVLX())
|
||||||
setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
|
setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
|
||||||
|
|
||||||
|
LegalizeAction Action = Subtarget.hasVLX() ? Legal : Custom;
|
||||||
|
for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
|
||||||
|
setOperationAction(ISD::MLOAD, VT, Action);
|
||||||
|
setOperationAction(ISD::MSTORE, VT, Action);
|
||||||
|
}
|
||||||
|
|
||||||
if (Subtarget.hasCDI()) {
|
if (Subtarget.hasCDI()) {
|
||||||
setOperationAction(ISD::CTLZ, MVT::v32i16, Custom);
|
setOperationAction(ISD::CTLZ, MVT::v32i16, Custom);
|
||||||
setOperationAction(ISD::CTLZ, MVT::v64i8, Custom);
|
setOperationAction(ISD::CTLZ, MVT::v64i8, Custom);
|
||||||
@ -1700,6 +1706,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||||||
setOperationAction(ISD::SRL, VT, Custom);
|
setOperationAction(ISD::SRL, VT, Custom);
|
||||||
setOperationAction(ISD::SHL, VT, Custom);
|
setOperationAction(ISD::SHL, VT, Custom);
|
||||||
setOperationAction(ISD::SRA, VT, Custom);
|
setOperationAction(ISD::SRA, VT, Custom);
|
||||||
|
setOperationAction(ISD::MLOAD, VT, Legal);
|
||||||
|
setOperationAction(ISD::MSTORE, VT, Legal);
|
||||||
|
|
||||||
setOperationAction(ISD::AND, VT, Promote);
|
setOperationAction(ISD::AND, VT, Promote);
|
||||||
AddPromotedToType (ISD::AND, VT, MVT::v8i64);
|
AddPromotedToType (ISD::AND, VT, MVT::v8i64);
|
||||||
@ -20786,31 +20794,36 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
|
|||||||
|
|
||||||
MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode());
|
MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode());
|
||||||
MVT VT = Op.getSimpleValueType();
|
MVT VT = Op.getSimpleValueType();
|
||||||
|
MVT ScalarVT = VT.getScalarType();
|
||||||
SDValue Mask = N->getMask();
|
SDValue Mask = N->getMask();
|
||||||
SDLoc dl(Op);
|
SDLoc dl(Op);
|
||||||
|
|
||||||
if (Subtarget.hasAVX512() && !Subtarget.hasVLX() &&
|
assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() &&
|
||||||
!VT.is512BitVector() && Mask.getValueType() == MVT::v8i1) {
|
"Cannot lower masked load op.");
|
||||||
// This operation is legal for targets with VLX, but without
|
|
||||||
// VLX the vector should be widened to 512 bit
|
|
||||||
unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits();
|
|
||||||
MVT WideDataVT = MVT::getVectorVT(VT.getScalarType(), NumEltsInWideVec);
|
|
||||||
MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec);
|
|
||||||
SDValue Src0 = N->getSrc0();
|
|
||||||
Src0 = ExtendToType(Src0, WideDataVT, DAG);
|
|
||||||
Mask = ExtendToType(Mask, WideMaskVT, DAG, true);
|
|
||||||
SDValue NewLoad = DAG.getMaskedLoad(WideDataVT, dl, N->getChain(),
|
|
||||||
N->getBasePtr(), Mask, Src0,
|
|
||||||
N->getMemoryVT(), N->getMemOperand(),
|
|
||||||
N->getExtensionType());
|
|
||||||
|
|
||||||
SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
|
assert(((ScalarVT == MVT::i32 || ScalarVT == MVT::f32) ||
|
||||||
NewLoad.getValue(0),
|
(Subtarget.hasBWI() &&
|
||||||
DAG.getIntPtrConstant(0, dl));
|
(ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) &&
|
||||||
SDValue RetOps[] = {Exract, NewLoad.getValue(1)};
|
"Unsupported masked load op.");
|
||||||
return DAG.getMergeValues(RetOps, dl);
|
|
||||||
}
|
// This operation is legal for targets with VLX, but without
|
||||||
return Op;
|
// VLX the vector should be widened to 512 bit
|
||||||
|
unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits();
|
||||||
|
MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec);
|
||||||
|
MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec);
|
||||||
|
SDValue Src0 = N->getSrc0();
|
||||||
|
Src0 = ExtendToType(Src0, WideDataVT, DAG);
|
||||||
|
Mask = ExtendToType(Mask, WideMaskVT, DAG, true);
|
||||||
|
SDValue NewLoad = DAG.getMaskedLoad(WideDataVT, dl, N->getChain(),
|
||||||
|
N->getBasePtr(), Mask, Src0,
|
||||||
|
N->getMemoryVT(), N->getMemOperand(),
|
||||||
|
N->getExtensionType());
|
||||||
|
|
||||||
|
SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
|
||||||
|
NewLoad.getValue(0),
|
||||||
|
DAG.getIntPtrConstant(0, dl));
|
||||||
|
SDValue RetOps[] = {Exract, NewLoad.getValue(1)};
|
||||||
|
return DAG.getMergeValues(RetOps, dl);
|
||||||
}
|
}
|
||||||
|
|
||||||
static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget,
|
static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget,
|
||||||
@ -20818,23 +20831,28 @@ static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget,
|
|||||||
MaskedStoreSDNode *N = cast<MaskedStoreSDNode>(Op.getNode());
|
MaskedStoreSDNode *N = cast<MaskedStoreSDNode>(Op.getNode());
|
||||||
SDValue DataToStore = N->getValue();
|
SDValue DataToStore = N->getValue();
|
||||||
MVT VT = DataToStore.getSimpleValueType();
|
MVT VT = DataToStore.getSimpleValueType();
|
||||||
|
MVT ScalarVT = VT.getScalarType();
|
||||||
SDValue Mask = N->getMask();
|
SDValue Mask = N->getMask();
|
||||||
SDLoc dl(Op);
|
SDLoc dl(Op);
|
||||||
|
|
||||||
if (Subtarget.hasAVX512() && !Subtarget.hasVLX() &&
|
assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() &&
|
||||||
!VT.is512BitVector() && Mask.getValueType() == MVT::v8i1) {
|
"Cannot lower masked store op.");
|
||||||
// This operation is legal for targets with VLX, but without
|
|
||||||
// VLX the vector should be widened to 512 bit
|
assert(((ScalarVT == MVT::i32 || ScalarVT == MVT::f32) ||
|
||||||
unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits();
|
(Subtarget.hasBWI() &&
|
||||||
MVT WideDataVT = MVT::getVectorVT(VT.getScalarType(), NumEltsInWideVec);
|
(ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) &&
|
||||||
MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec);
|
"Unsupported masked store op.");
|
||||||
DataToStore = ExtendToType(DataToStore, WideDataVT, DAG);
|
|
||||||
Mask = ExtendToType(Mask, WideMaskVT, DAG, true);
|
// This operation is legal for targets with VLX, but without
|
||||||
return DAG.getMaskedStore(N->getChain(), dl, DataToStore, N->getBasePtr(),
|
// VLX the vector should be widened to 512 bit
|
||||||
Mask, N->getMemoryVT(), N->getMemOperand(),
|
unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits();
|
||||||
N->isTruncatingStore());
|
MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec);
|
||||||
}
|
MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec);
|
||||||
return Op;
|
DataToStore = ExtendToType(DataToStore, WideDataVT, DAG);
|
||||||
|
Mask = ExtendToType(Mask, WideMaskVT, DAG, true);
|
||||||
|
return DAG.getMaskedStore(N->getChain(), dl, DataToStore, N->getBasePtr(),
|
||||||
|
Mask, N->getMemoryVT(), N->getMemOperand(),
|
||||||
|
N->isTruncatingStore());
|
||||||
}
|
}
|
||||||
|
|
||||||
static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
|
static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
|
||||||
|
@ -1438,7 +1438,8 @@ bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) {
|
|||||||
int DataWidth = isa<PointerType>(ScalarTy) ?
|
int DataWidth = isa<PointerType>(ScalarTy) ?
|
||||||
DL.getPointerSizeInBits() : ScalarTy->getPrimitiveSizeInBits();
|
DL.getPointerSizeInBits() : ScalarTy->getPrimitiveSizeInBits();
|
||||||
|
|
||||||
return (DataWidth >= 32 && ST->hasAVX());
|
return (DataWidth >= 32 && ST->hasAVX()) ||
|
||||||
|
(DataWidth >= 8 && ST->hasBWI());
|
||||||
}
|
}
|
||||||
|
|
||||||
bool X86TTIImpl::isLegalMaskedStore(Type *DataType) {
|
bool X86TTIImpl::isLegalMaskedStore(Type *DataType) {
|
||||||
|
@ -79,3 +79,115 @@ define <32 x i16> @test8(i8 * %addr, <32 x i16> %mask1) {
|
|||||||
%res = select <32 x i1> %mask, <32 x i16> %r, <32 x i16> zeroinitializer
|
%res = select <32 x i1> %mask, <32 x i16> %r, <32 x i16> zeroinitializer
|
||||||
ret <32 x i16>%res
|
ret <32 x i16>%res
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define <16 x i8> @test_mask_load_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8> %val) {
|
||||||
|
; CHECK-LABEL: test_mask_load_16xi8:
|
||||||
|
; CHECK: ## BB#0:
|
||||||
|
; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||||
|
; CHECK-NEXT: vpmovb2m %zmm0, %k0
|
||||||
|
; CHECK-NEXT: kshiftlq $48, %k0, %k0
|
||||||
|
; CHECK-NEXT: kshiftrq $48, %k0, %k1
|
||||||
|
; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1} {z}
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
%res = call <16 x i8> @llvm.masked.load.v16i8(<16 x i8>* %addr, i32 4, <16 x i1>%mask, <16 x i8> undef)
|
||||||
|
ret <16 x i8> %res
|
||||||
|
}
|
||||||
|
declare <16 x i8> @llvm.masked.load.v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>)
|
||||||
|
|
||||||
|
define <32 x i8> @test_mask_load_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8> %val) {
|
||||||
|
; CHECK-LABEL: test_mask_load_32xi8:
|
||||||
|
; CHECK: ## BB#0:
|
||||||
|
; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0
|
||||||
|
; CHECK-NEXT: vpmovb2m %zmm0, %k0
|
||||||
|
; CHECK-NEXT: kshiftlq $32, %k0, %k0
|
||||||
|
; CHECK-NEXT: kshiftrq $32, %k0, %k1
|
||||||
|
; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1} {z}
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
%res = call <32 x i8> @llvm.masked.load.v32i8(<32 x i8>* %addr, i32 4, <32 x i1>%mask, <32 x i8> zeroinitializer)
|
||||||
|
ret <32 x i8> %res
|
||||||
|
}
|
||||||
|
declare <32 x i8> @llvm.masked.load.v32i8(<32 x i8>*, i32, <32 x i1>, <32 x i8>)
|
||||||
|
|
||||||
|
define <8 x i16> @test_mask_load_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %val) {
|
||||||
|
; CHECK-LABEL: test_mask_load_8xi16:
|
||||||
|
; CHECK: ## BB#0:
|
||||||
|
; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0
|
||||||
|
; CHECK-NEXT: vpmovw2m %zmm0, %k0
|
||||||
|
; CHECK-NEXT: kshiftld $24, %k0, %k0
|
||||||
|
; CHECK-NEXT: kshiftrd $24, %k0, %k1
|
||||||
|
; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} {z}
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
%res = call <8 x i16> @llvm.masked.load.v8i16(<8 x i16>* %addr, i32 4, <8 x i1>%mask, <8 x i16> undef)
|
||||||
|
ret <8 x i16> %res
|
||||||
|
}
|
||||||
|
declare <8 x i16> @llvm.masked.load.v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>)
|
||||||
|
|
||||||
|
define <16 x i16> @test_mask_load_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i16> %val) {
|
||||||
|
; CHECK-LABEL: test_mask_load_16xi16:
|
||||||
|
; CHECK: ## BB#0:
|
||||||
|
; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||||
|
; CHECK-NEXT: vpmovb2m %zmm0, %k0
|
||||||
|
; CHECK-NEXT: kshiftld $16, %k0, %k0
|
||||||
|
; CHECK-NEXT: kshiftrd $16, %k0, %k1
|
||||||
|
; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} {z}
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
%res = call <16 x i16> @llvm.masked.load.v16i16(<16 x i16>* %addr, i32 4, <16 x i1>%mask, <16 x i16> zeroinitializer)
|
||||||
|
ret <16 x i16> %res
|
||||||
|
}
|
||||||
|
declare <16 x i16> @llvm.masked.load.v16i16(<16 x i16>*, i32, <16 x i1>, <16 x i16>)
|
||||||
|
|
||||||
|
define void @test_mask_store_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8> %val) {
|
||||||
|
; CHECK-LABEL: test_mask_store_16xi8:
|
||||||
|
; CHECK: ## BB#0:
|
||||||
|
; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||||
|
; CHECK-NEXT: vpmovb2m %zmm0, %k0
|
||||||
|
; CHECK-NEXT: kshiftlq $48, %k0, %k0
|
||||||
|
; CHECK-NEXT: kshiftrq $48, %k0, %k1
|
||||||
|
; CHECK-NEXT: vmovdqu8 %zmm1, (%rdi) {%k1}
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
call void @llvm.masked.store.v16i8(<16 x i8> %val, <16 x i8>* %addr, i32 4, <16 x i1>%mask)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
declare void @llvm.masked.store.v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>)
|
||||||
|
|
||||||
|
define void @test_mask_store_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8> %val) {
|
||||||
|
; CHECK-LABEL: test_mask_store_32xi8:
|
||||||
|
; CHECK: ## BB#0:
|
||||||
|
; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0
|
||||||
|
; CHECK-NEXT: vpmovb2m %zmm0, %k0
|
||||||
|
; CHECK-NEXT: kshiftlq $32, %k0, %k0
|
||||||
|
; CHECK-NEXT: kshiftrq $32, %k0, %k1
|
||||||
|
; CHECK-NEXT: vmovdqu8 %zmm1, (%rdi) {%k1}
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
call void @llvm.masked.store.v32i8(<32 x i8> %val, <32 x i8>* %addr, i32 4, <32 x i1>%mask)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
declare void @llvm.masked.store.v32i8(<32 x i8>, <32 x i8>*, i32, <32 x i1>)
|
||||||
|
|
||||||
|
define void @test_mask_store_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %val) {
|
||||||
|
; CHECK-LABEL: test_mask_store_8xi16:
|
||||||
|
; CHECK: ## BB#0:
|
||||||
|
; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0
|
||||||
|
; CHECK-NEXT: vpmovw2m %zmm0, %k0
|
||||||
|
; CHECK-NEXT: kshiftld $24, %k0, %k0
|
||||||
|
; CHECK-NEXT: kshiftrd $24, %k0, %k1
|
||||||
|
; CHECK-NEXT: vmovdqu16 %zmm1, (%rdi) {%k1}
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
call void @llvm.masked.store.v8i16(<8 x i16> %val, <8 x i16>* %addr, i32 4, <8 x i1>%mask)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
declare void @llvm.masked.store.v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>)
|
||||||
|
|
||||||
|
define void @test_mask_store_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i16> %val) {
|
||||||
|
; CHECK-LABEL: test_mask_store_16xi16:
|
||||||
|
; CHECK: ## BB#0:
|
||||||
|
; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||||
|
; CHECK-NEXT: vpmovb2m %zmm0, %k0
|
||||||
|
; CHECK-NEXT: kshiftld $16, %k0, %k0
|
||||||
|
; CHECK-NEXT: kshiftrd $16, %k0, %k1
|
||||||
|
; CHECK-NEXT: vmovdqu16 %zmm1, (%rdi) {%k1}
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
call void @llvm.masked.store.v16i16(<16 x i16> %val, <16 x i16>* %addr, i32 4, <16 x i1>%mask)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
declare void @llvm.masked.store.v16i16(<16 x i16>, <16 x i16>*, i32, <16 x i1>)
|
||||||
|
@ -2168,3 +2168,149 @@ define <32 x double> @test_load_32f64(<32 x double>* %ptrs, <32 x i1> %mask, <32
|
|||||||
ret <32 x double> %res
|
ret <32 x double> %res
|
||||||
}
|
}
|
||||||
declare <32 x double> @llvm.masked.load.v32f64(<32 x double>* %ptrs, i32, <32 x i1> %mask, <32 x double> %src0)
|
declare <32 x double> @llvm.masked.load.v32f64(<32 x double>* %ptrs, i32, <32 x i1> %mask, <32 x double> %src0)
|
||||||
|
|
||||||
|
define <16 x i8> @test_mask_load_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8> %val) {
|
||||||
|
; SKX-LABEL: test_mask_load_16xi8:
|
||||||
|
; SKX: ## BB#0:
|
||||||
|
; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||||
|
; SKX-NEXT: vpmovb2m %xmm0, %k1
|
||||||
|
; SKX-NEXT: vmovdqu8 (%rdi), %xmm0 {%k1} {z}
|
||||||
|
; SKX-NEXT: retq
|
||||||
|
%res = call <16 x i8> @llvm.masked.load.v16i8(<16 x i8>* %addr, i32 4, <16 x i1>%mask, <16 x i8> undef)
|
||||||
|
ret <16 x i8> %res
|
||||||
|
}
|
||||||
|
declare <16 x i8> @llvm.masked.load.v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>)
|
||||||
|
|
||||||
|
define <32 x i8> @test_mask_load_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8> %val) {
|
||||||
|
; SKX-LABEL: test_mask_load_32xi8:
|
||||||
|
; SKX: ## BB#0:
|
||||||
|
; SKX-NEXT: vpsllw $7, %ymm0, %ymm0
|
||||||
|
; SKX-NEXT: vpmovb2m %ymm0, %k1
|
||||||
|
; SKX-NEXT: vmovdqu8 (%rdi), %ymm0 {%k1} {z}
|
||||||
|
; SKX-NEXT: retq
|
||||||
|
%res = call <32 x i8> @llvm.masked.load.v32i8(<32 x i8>* %addr, i32 4, <32 x i1>%mask, <32 x i8> zeroinitializer)
|
||||||
|
ret <32 x i8> %res
|
||||||
|
}
|
||||||
|
declare <32 x i8> @llvm.masked.load.v32i8(<32 x i8>*, i32, <32 x i1>, <32 x i8>)
|
||||||
|
|
||||||
|
define <64 x i8> @test_mask_load_64xi8(<64 x i1> %mask, <64 x i8>* %addr, <64 x i8> %val) {
|
||||||
|
; SKX-LABEL: test_mask_load_64xi8:
|
||||||
|
; SKX: ## BB#0:
|
||||||
|
; SKX-NEXT: vpsllw $7, %zmm0, %zmm0
|
||||||
|
; SKX-NEXT: vpmovb2m %zmm0, %k1
|
||||||
|
; SKX-NEXT: vmovdqu8 (%rdi), %zmm1 {%k1}
|
||||||
|
; SKX-NEXT: vmovaps %zmm1, %zmm0
|
||||||
|
; SKX-NEXT: retq
|
||||||
|
%res = call <64 x i8> @llvm.masked.load.v64i8(<64 x i8>* %addr, i32 4, <64 x i1>%mask, <64 x i8> %val)
|
||||||
|
ret <64 x i8> %res
|
||||||
|
}
|
||||||
|
declare <64 x i8> @llvm.masked.load.v64i8(<64 x i8>*, i32, <64 x i1>, <64 x i8>)
|
||||||
|
|
||||||
|
define <8 x i16> @test_mask_load_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %val) {
|
||||||
|
; SKX-LABEL: test_mask_load_8xi16:
|
||||||
|
; SKX: ## BB#0:
|
||||||
|
; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
|
||||||
|
; SKX-NEXT: vpmovw2m %xmm0, %k1
|
||||||
|
; SKX-NEXT: vmovdqu16 (%rdi), %xmm0 {%k1} {z}
|
||||||
|
; SKX-NEXT: retq
|
||||||
|
%res = call <8 x i16> @llvm.masked.load.v8i16(<8 x i16>* %addr, i32 4, <8 x i1>%mask, <8 x i16> undef)
|
||||||
|
ret <8 x i16> %res
|
||||||
|
}
|
||||||
|
declare <8 x i16> @llvm.masked.load.v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>)
|
||||||
|
|
||||||
|
define <16 x i16> @test_mask_load_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i16> %val) {
|
||||||
|
; SKX-LABEL: test_mask_load_16xi16:
|
||||||
|
; SKX: ## BB#0:
|
||||||
|
; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||||
|
; SKX-NEXT: vpmovb2m %xmm0, %k1
|
||||||
|
; SKX-NEXT: vmovdqu16 (%rdi), %ymm0 {%k1} {z}
|
||||||
|
; SKX-NEXT: retq
|
||||||
|
%res = call <16 x i16> @llvm.masked.load.v16i16(<16 x i16>* %addr, i32 4, <16 x i1>%mask, <16 x i16> zeroinitializer)
|
||||||
|
ret <16 x i16> %res
|
||||||
|
}
|
||||||
|
declare <16 x i16> @llvm.masked.load.v16i16(<16 x i16>*, i32, <16 x i1>, <16 x i16>)
|
||||||
|
|
||||||
|
define <32 x i16> @test_mask_load_32xi16(<32 x i1> %mask, <32 x i16>* %addr, <32 x i16> %val) {
|
||||||
|
; SKX-LABEL: test_mask_load_32xi16:
|
||||||
|
; SKX: ## BB#0:
|
||||||
|
; SKX-NEXT: vpsllw $7, %ymm0, %ymm0
|
||||||
|
; SKX-NEXT: vpmovb2m %ymm0, %k1
|
||||||
|
; SKX-NEXT: vmovdqu16 (%rdi), %zmm1 {%k1}
|
||||||
|
; SKX-NEXT: vmovaps %zmm1, %zmm0
|
||||||
|
; SKX-NEXT: retq
|
||||||
|
%res = call <32 x i16> @llvm.masked.load.v32i16(<32 x i16>* %addr, i32 4, <32 x i1>%mask, <32 x i16> %val)
|
||||||
|
ret <32 x i16> %res
|
||||||
|
}
|
||||||
|
declare <32 x i16> @llvm.masked.load.v32i16(<32 x i16>*, i32, <32 x i1>, <32 x i16>)
|
||||||
|
|
||||||
|
define void @test_mask_store_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8> %val) {
|
||||||
|
; SKX-LABEL: test_mask_store_16xi8:
|
||||||
|
; SKX: ## BB#0:
|
||||||
|
; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||||
|
; SKX-NEXT: vpmovb2m %xmm0, %k1
|
||||||
|
; SKX-NEXT: vmovdqu8 %xmm1, (%rdi) {%k1}
|
||||||
|
; SKX-NEXT: retq
|
||||||
|
call void @llvm.masked.store.v16i8(<16 x i8> %val, <16 x i8>* %addr, i32 4, <16 x i1>%mask)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
declare void @llvm.masked.store.v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>)
|
||||||
|
|
||||||
|
define void @test_mask_store_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8> %val) {
|
||||||
|
; SKX-LABEL: test_mask_store_32xi8:
|
||||||
|
; SKX: ## BB#0:
|
||||||
|
; SKX-NEXT: vpsllw $7, %ymm0, %ymm0
|
||||||
|
; SKX-NEXT: vpmovb2m %ymm0, %k1
|
||||||
|
; SKX-NEXT: vmovdqu8 %ymm1, (%rdi) {%k1}
|
||||||
|
; SKX-NEXT: retq
|
||||||
|
call void @llvm.masked.store.v32i8(<32 x i8> %val, <32 x i8>* %addr, i32 4, <32 x i1>%mask)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
declare void @llvm.masked.store.v32i8(<32 x i8>, <32 x i8>*, i32, <32 x i1>)
|
||||||
|
|
||||||
|
define void @test_mask_store_64xi8(<64 x i1> %mask, <64 x i8>* %addr, <64 x i8> %val) {
|
||||||
|
; SKX-LABEL: test_mask_store_64xi8:
|
||||||
|
; SKX: ## BB#0:
|
||||||
|
; SKX-NEXT: vpsllw $7, %zmm0, %zmm0
|
||||||
|
; SKX-NEXT: vpmovb2m %zmm0, %k1
|
||||||
|
; SKX-NEXT: vmovdqu8 %zmm1, (%rdi) {%k1}
|
||||||
|
; SKX-NEXT: retq
|
||||||
|
call void @llvm.masked.store.v64i8(<64 x i8> %val, <64 x i8>* %addr, i32 4, <64 x i1>%mask)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
declare void @llvm.masked.store.v64i8(<64 x i8>, <64 x i8>*, i32, <64 x i1>)
|
||||||
|
|
||||||
|
define void @test_mask_store_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %val) {
|
||||||
|
; SKX-LABEL: test_mask_store_8xi16:
|
||||||
|
; SKX: ## BB#0:
|
||||||
|
; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
|
||||||
|
; SKX-NEXT: vpmovw2m %xmm0, %k1
|
||||||
|
; SKX-NEXT: vmovdqu16 %xmm1, (%rdi) {%k1}
|
||||||
|
; SKX-NEXT: retq
|
||||||
|
call void @llvm.masked.store.v8i16(<8 x i16> %val, <8 x i16>* %addr, i32 4, <8 x i1>%mask)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
declare void @llvm.masked.store.v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>)
|
||||||
|
|
||||||
|
define void @test_mask_store_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i16> %val) {
|
||||||
|
; SKX-LABEL: test_mask_store_16xi16:
|
||||||
|
; SKX: ## BB#0:
|
||||||
|
; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||||
|
; SKX-NEXT: vpmovb2m %xmm0, %k1
|
||||||
|
; SKX-NEXT: vmovdqu16 %ymm1, (%rdi) {%k1}
|
||||||
|
; SKX-NEXT: retq
|
||||||
|
call void @llvm.masked.store.v16i16(<16 x i16> %val, <16 x i16>* %addr, i32 4, <16 x i1>%mask)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
declare void @llvm.masked.store.v16i16(<16 x i16>, <16 x i16>*, i32, <16 x i1>)
|
||||||
|
|
||||||
|
define void @test_mask_store_32xi16(<32 x i1> %mask, <32 x i16>* %addr, <32 x i16> %val) {
|
||||||
|
; SKX-LABEL: test_mask_store_32xi16:
|
||||||
|
; SKX: ## BB#0:
|
||||||
|
; SKX-NEXT: vpsllw $7, %ymm0, %ymm0
|
||||||
|
; SKX-NEXT: vpmovb2m %ymm0, %k1
|
||||||
|
; SKX-NEXT: vmovdqu16 %zmm1, (%rdi) {%k1}
|
||||||
|
; SKX-NEXT: retq
|
||||||
|
call void @llvm.masked.store.v32i16(<32 x i16> %val, <32 x i16>* %addr, i32 4, <32 x i1>%mask)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
declare void @llvm.masked.store.v32i16(<32 x i16>, <32 x i16>*, i32, <32 x i1>)
|
Loading…
Reference in New Issue
Block a user