mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[SVE][CodeGen] Legalisation of unpredicated load instructions
Summary: When splitting a load of a scalable type, the new address is calculated in SplitVecRes_LOAD using a vscale and an add instruction. This patch also adds a DAG combiner fold to visitADD for vscale: - Fold (add (vscale(C0)), (vscale(C1))) to (add (vscale(C0 + C1))) Reviewers: sdesmalen, efriedma, david-arm Reviewed By: david-arm Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D82792
This commit is contained in:
parent
966a5c445d
commit
884e13dafb
@ -931,7 +931,8 @@ public:
|
||||
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm) {
|
||||
assert(MulImm.getMinSignedBits() <= VT.getSizeInBits() &&
|
||||
"Immediate does not fit VT");
|
||||
return getNode(ISD::VSCALE, DL, VT, getConstant(MulImm, DL, VT));
|
||||
return getNode(ISD::VSCALE, DL, VT,
|
||||
getConstant(MulImm.sextOrTrunc(VT.getSizeInBits()), DL, VT));
|
||||
}
|
||||
|
||||
/// Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
|
||||
|
@ -2371,6 +2371,16 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
|
||||
return DAG.getVScale(DL, VT, C0 + C1);
|
||||
}
|
||||
|
||||
// fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
|
||||
if ((N0.getOpcode() == ISD::ADD) &&
|
||||
(N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
|
||||
(N1.getOpcode() == ISD::VSCALE)) {
|
||||
auto VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
|
||||
auto VS1 = N1->getConstantOperandAPInt(0);
|
||||
auto VS = DAG.getVScale(DL, VT, VS0 + VS1);
|
||||
return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
@ -1537,11 +1537,22 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
|
||||
LD->getPointerInfo(), LoMemVT, LD->getOriginalAlign(),
|
||||
MMOFlags, AAInfo);
|
||||
|
||||
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
|
||||
Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
|
||||
Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
|
||||
LD->getPointerInfo().getWithOffset(IncrementSize), HiMemVT,
|
||||
LD->getOriginalAlign(), MMOFlags, AAInfo);
|
||||
unsigned IncrementSize = LoMemVT.getSizeInBits().getKnownMinSize() / 8;
|
||||
|
||||
MachinePointerInfo MPI;
|
||||
if (LoVT.isScalableVector()) {
|
||||
SDValue BytesIncrement = DAG.getVScale(
|
||||
dl, Ptr.getValueType(),
|
||||
APInt(Ptr.getValueSizeInBits().getFixedSize(), IncrementSize));
|
||||
MPI = MachinePointerInfo(LD->getPointerInfo().getAddrSpace());
|
||||
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, BytesIncrement);
|
||||
} else {
|
||||
MPI = LD->getPointerInfo().getWithOffset(IncrementSize);
|
||||
Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
|
||||
}
|
||||
|
||||
Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, MPI,
|
||||
HiMemVT, LD->getOriginalAlign(), MMOFlags, AAInfo);
|
||||
|
||||
// Build a factor node to remember that this load is independent of the
|
||||
// other one.
|
||||
|
@ -4802,6 +4802,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
|
||||
if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X)
|
||||
return getNode(ISD::FABS, DL, VT, Operand.getOperand(0));
|
||||
break;
|
||||
case ISD::VSCALE:
|
||||
assert(VT == Operand.getValueType() && "Unexpected VT!");
|
||||
break;
|
||||
}
|
||||
|
||||
SDNode *N;
|
||||
|
55
test/CodeGen/AArch64/sve-split-load.ll
Normal file
55
test/CodeGen/AArch64/sve-split-load.ll
Normal file
@ -0,0 +1,55 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
|
||||
|
||||
; LOAD
|
||||
|
||||
define <vscale x 4 x i16> @load_promote_4i8(<vscale x 4 x i16>* %a) {
|
||||
; CHECK-LABEL: load_promote_4i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%load = load <vscale x 4 x i16>, <vscale x 4 x i16>* %a
|
||||
ret <vscale x 4 x i16> %load
|
||||
}
|
||||
|
||||
define <vscale x 16 x i16> @load_split_i16(<vscale x 16 x i16>* %a) {
|
||||
; CHECK-LABEL: load_split_i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0, #1, mul vl]
|
||||
; CHECK-NEXT: ret
|
||||
%load = load <vscale x 16 x i16>, <vscale x 16 x i16>* %a
|
||||
ret <vscale x 16 x i16> %load
|
||||
}
|
||||
|
||||
define <vscale x 32 x i16> @load_split_32i16(<vscale x 32 x i16>* %a) {
|
||||
; CHECK-LABEL: load_split_32i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0, #1, mul vl]
|
||||
; CHECK-NEXT: ld1h { z2.h }, p0/z, [x0, #2, mul vl]
|
||||
; CHECK-NEXT: ld1h { z3.h }, p0/z, [x0, #3, mul vl]
|
||||
; CHECK-NEXT: ret
|
||||
%load = load <vscale x 32 x i16>, <vscale x 32 x i16>* %a
|
||||
ret <vscale x 32 x i16> %load
|
||||
}
|
||||
|
||||
define <vscale x 16 x i64> @load_split_16i64(<vscale x 16 x i64>* %a) {
|
||||
; CHECK-LABEL: load_split_16i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #1, mul vl]
|
||||
; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0, #2, mul vl]
|
||||
; CHECK-NEXT: ld1d { z3.d }, p0/z, [x0, #3, mul vl]
|
||||
; CHECK-NEXT: ld1d { z4.d }, p0/z, [x0, #4, mul vl]
|
||||
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x0, #5, mul vl]
|
||||
; CHECK-NEXT: ld1d { z6.d }, p0/z, [x0, #6, mul vl]
|
||||
; CHECK-NEXT: ld1d { z7.d }, p0/z, [x0, #7, mul vl]
|
||||
; CHECK-NEXT: ret
|
||||
%load = load <vscale x 16 x i64>, <vscale x 16 x i64>* %a
|
||||
ret <vscale x 16 x i64> %load
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user