mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[X86] Stop reduceMaskedLoadToScalarLoad/reduceMaskedStoreToScalarStore from creating scalar i64 load/stores in 32-bit mode
If we emit a scalar i64 load/store it will get type legalized to two i32 load/stores. Differential Revision: https://reviews.llvm.org/D87862
This commit is contained in:
parent
b826feba37
commit
aa3ccaf198
@ -44499,7 +44499,8 @@ static bool getParamsForOneTrueMaskedElt(MaskedLoadStoreSDNode *MaskedOp,
|
||||
/// mask have already been optimized in IR, so we don't bother with those here.
|
||||
static SDValue
|
||||
reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI) {
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const X86Subtarget &Subtarget) {
|
||||
assert(ML->isUnindexed() && "Unexpected indexed masked load!");
|
||||
// TODO: This is not x86-specific, so it could be lifted to DAGCombiner.
|
||||
// However, some target hooks may need to be added to know when the transform
|
||||
@ -44516,14 +44517,25 @@ reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG,
|
||||
SDLoc DL(ML);
|
||||
EVT VT = ML->getValueType(0);
|
||||
EVT EltVT = VT.getVectorElementType();
|
||||
|
||||
EVT CastVT = VT;
|
||||
if (EltVT == MVT::i64 && !Subtarget.is64Bit()) {
|
||||
EltVT = MVT::f64;
|
||||
CastVT =
|
||||
EVT::getVectorVT(*DAG.getContext(), EltVT, VT.getVectorNumElements());
|
||||
}
|
||||
|
||||
SDValue Load =
|
||||
DAG.getLoad(EltVT, DL, ML->getChain(), Addr,
|
||||
ML->getPointerInfo().getWithOffset(Offset),
|
||||
Alignment, ML->getMemOperand()->getFlags());
|
||||
|
||||
SDValue PassThru = DAG.getBitcast(CastVT, ML->getPassThru());
|
||||
|
||||
// Insert the loaded element into the appropriate place in the vector.
|
||||
SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
|
||||
ML->getPassThru(), Load, VecIndex);
|
||||
SDValue Insert =
|
||||
DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, CastVT, PassThru, Load, VecIndex);
|
||||
Insert = DAG.getBitcast(VT, Insert);
|
||||
return DCI.CombineTo(ML, Insert, Load.getValue(1), true);
|
||||
}
|
||||
|
||||
@ -44586,7 +44598,8 @@ static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG,
|
||||
return SDValue();
|
||||
|
||||
if (Mld->getExtensionType() == ISD::NON_EXTLOAD) {
|
||||
if (SDValue ScalarLoad = reduceMaskedLoadToScalarLoad(Mld, DAG, DCI))
|
||||
if (SDValue ScalarLoad =
|
||||
reduceMaskedLoadToScalarLoad(Mld, DAG, DCI, Subtarget))
|
||||
return ScalarLoad;
|
||||
|
||||
// TODO: Do some AVX512 subsets benefit from this transform?
|
||||
@ -44623,7 +44636,8 @@ static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG,
|
||||
/// Note: It is expected that the degenerate cases of an all-zeros or all-ones
|
||||
/// mask have already been optimized in IR, so we don't bother with those here.
|
||||
static SDValue reduceMaskedStoreToScalarStore(MaskedStoreSDNode *MS,
|
||||
SelectionDAG &DAG) {
|
||||
SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
// TODO: This is not x86-specific, so it could be lifted to DAGCombiner.
|
||||
// However, some target hooks may need to be added to know when the transform
|
||||
// is profitable. Endianness would also have to be considered.
|
||||
@ -44636,10 +44650,17 @@ static SDValue reduceMaskedStoreToScalarStore(MaskedStoreSDNode *MS,
|
||||
|
||||
// Extract the one scalar element that is actually being stored.
|
||||
SDLoc DL(MS);
|
||||
EVT VT = MS->getValue().getValueType();
|
||||
SDValue Value = MS->getValue();
|
||||
EVT VT = Value.getValueType();
|
||||
EVT EltVT = VT.getVectorElementType();
|
||||
SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
|
||||
MS->getValue(), VecIndex);
|
||||
if (EltVT == MVT::i64 && !Subtarget.is64Bit()) {
|
||||
EltVT = MVT::f64;
|
||||
EVT CastVT =
|
||||
EVT::getVectorVT(*DAG.getContext(), EltVT, VT.getVectorNumElements());
|
||||
Value = DAG.getBitcast(CastVT, Value);
|
||||
}
|
||||
SDValue Extract =
|
||||
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Value, VecIndex);
|
||||
|
||||
// Store that element at the appropriate offset from the base pointer.
|
||||
return DAG.getStore(MS->getChain(), DL, Extract, Addr,
|
||||
@ -44661,7 +44682,7 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
|
||||
if (Mst->isTruncatingStore())
|
||||
return SDValue();
|
||||
|
||||
if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG))
|
||||
if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG, Subtarget))
|
||||
return ScalarStore;
|
||||
|
||||
// If the mask value has been legalized to a non-boolean vector, try to
|
||||
|
@ -6504,8 +6504,7 @@ define <2 x i64> @mload_constmask_v2i64(<2 x i64>* %addr, <2 x i64> %dst) {
|
||||
; X86-AVX512-LABEL: mload_constmask_v2i64:
|
||||
; X86-AVX512: ## %bb.0:
|
||||
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-AVX512-NEXT: vpinsrd $2, 8(%eax), %xmm0, %xmm0
|
||||
; X86-AVX512-NEXT: vpinsrd $3, 12(%eax), %xmm0, %xmm0
|
||||
; X86-AVX512-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
|
||||
; X86-AVX512-NEXT: retl
|
||||
%res = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %addr, i32 4, <2 x i1> <i1 0, i1 1>, <2 x i64> %dst)
|
||||
ret <2 x i64> %res
|
||||
@ -7109,10 +7108,9 @@ define <4 x i64> @load_one_mask_bit_set3(<4 x i64>* %addr, <4 x i64> %val) {
|
||||
; X86-AVX512-LABEL: load_one_mask_bit_set3:
|
||||
; X86-AVX512: ## %bb.0:
|
||||
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X86-AVX512-NEXT: vpinsrd $0, 16(%eax), %xmm1, %xmm1
|
||||
; X86-AVX512-NEXT: vpinsrd $1, 20(%eax), %xmm1, %xmm1
|
||||
; X86-AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
||||
; X86-AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X86-AVX512-NEXT: vmovlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
|
||||
; X86-AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; X86-AVX512-NEXT: retl
|
||||
%res = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %addr, i32 4, <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x i64> %val)
|
||||
ret <4 x i64> %res
|
||||
|
Loading…
x
Reference in New Issue
Block a user