mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[AArch64][SVE] Allow llvm.aarch64.sve.st2/3/4 with vectors of pointers.
This isn't necessaary for ACLE, but could be useful in other situations. And the change is simple. Differential Revision: https://reviews.llvm.org/D85251
This commit is contained in:
parent
67ae683e5b
commit
2be753c211
@ -9462,16 +9462,17 @@ SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
|
||||
|
||||
/// Set the IntrinsicInfo for the `aarch64_sve_st<N>` intrinsics.
|
||||
template <unsigned NumVecs>
|
||||
static bool setInfoSVEStN(AArch64TargetLowering::IntrinsicInfo &Info,
|
||||
const CallInst &CI) {
|
||||
static bool
|
||||
setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL,
|
||||
AArch64TargetLowering::IntrinsicInfo &Info, const CallInst &CI) {
|
||||
Info.opc = ISD::INTRINSIC_VOID;
|
||||
// Retrieve EC from first vector argument.
|
||||
const EVT VT = EVT::getEVT(CI.getArgOperand(0)->getType());
|
||||
const EVT VT = TLI.getMemValueType(DL, CI.getArgOperand(0)->getType());
|
||||
ElementCount EC = VT.getVectorElementCount();
|
||||
#ifndef NDEBUG
|
||||
// Check the assumption that all input vectors are the same type.
|
||||
for (unsigned I = 0; I < NumVecs; ++I)
|
||||
assert(VT == EVT::getEVT(CI.getArgOperand(I)->getType()) &&
|
||||
assert(VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) &&
|
||||
"Invalid type.");
|
||||
#endif
|
||||
// memVT is `NumVecs * VT`.
|
||||
@ -9494,11 +9495,11 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
||||
auto &DL = I.getModule()->getDataLayout();
|
||||
switch (Intrinsic) {
|
||||
case Intrinsic::aarch64_sve_st2:
|
||||
return setInfoSVEStN<2>(Info, I);
|
||||
return setInfoSVEStN<2>(*this, DL, Info, I);
|
||||
case Intrinsic::aarch64_sve_st3:
|
||||
return setInfoSVEStN<3>(Info, I);
|
||||
return setInfoSVEStN<3>(*this, DL, Info, I);
|
||||
case Intrinsic::aarch64_sve_st4:
|
||||
return setInfoSVEStN<4>(Info, I);
|
||||
return setInfoSVEStN<4>(*this, DL, Info, I);
|
||||
case Intrinsic::aarch64_neon_ld2:
|
||||
case Intrinsic::aarch64_neon_ld3:
|
||||
case Intrinsic::aarch64_neon_ld4:
|
||||
|
@ -108,6 +108,17 @@ define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st2d_ptr(<vscale x 2 x i8*> %v0, <vscale x 2 x i8*> %v1, <vscale x 2 x i1> %pred, i8** %addr) {
|
||||
; CHECK-LABEL: st2d_ptr:
|
||||
; CHECK: st2d { z0.d, z1.d }, p0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.st2.nxv2p0i8(<vscale x 2 x i8*> %v0,
|
||||
<vscale x 2 x i8*> %v1,
|
||||
<vscale x 2 x i1> %pred,
|
||||
i8** %addr)
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; ST3B
|
||||
;
|
||||
@ -220,6 +231,18 @@ define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st3d_ptr(<vscale x 2 x i8*> %v0, <vscale x 2 x i8*> %v1, <vscale x 2 x i8*> %v2, <vscale x 2 x i1> %pred, i8** %addr) {
|
||||
; CHECK-LABEL: st3d_ptr:
|
||||
; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.st3.nxv2p0i8(<vscale x 2 x i8*> %v0,
|
||||
<vscale x 2 x i8*> %v1,
|
||||
<vscale x 2 x i8*> %v2,
|
||||
<vscale x 2 x i1> %pred,
|
||||
i8** %addr)
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; ST4B
|
||||
;
|
||||
@ -340,6 +363,18 @@ define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st4d_ptr(<vscale x 2 x i8*> %v0, <vscale x 2 x i8*> %v1, <vscale x 2 x i8*> %v2, <vscale x 2 x i8*> %v3, <vscale x 2 x i1> %pred, i8** %addr) {
|
||||
; CHECK-LABEL: st4d_ptr:
|
||||
; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.st4.nxv2p0i8(<vscale x 2 x i8*> %v0,
|
||||
<vscale x 2 x i8*> %v1,
|
||||
<vscale x 2 x i8*> %v2,
|
||||
<vscale x 2 x i8*> %v3,
|
||||
<vscale x 2 x i1> %pred,
|
||||
i8** %addr)
|
||||
ret void
|
||||
}
|
||||
;
|
||||
; STNT1B
|
||||
;
|
||||
@ -508,6 +543,7 @@ declare void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x ha
|
||||
declare void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, bfloat*)
|
||||
declare void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
|
||||
declare void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
|
||||
declare void @llvm.aarch64.sve.st2.nxv2p0i8(<vscale x 2 x i8*>, <vscale x 2 x i8*>, <vscale x 2 x i1>, i8** nocapture)
|
||||
|
||||
declare void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
|
||||
declare void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
|
||||
@ -517,6 +553,7 @@ declare void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half>, <vscale x 8 x ha
|
||||
declare void @llvm.aarch64.sve.st3.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, bfloat*)
|
||||
declare void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
|
||||
declare void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
|
||||
declare void @llvm.aarch64.sve.st3.nxv2p0i8(<vscale x 2 x i8*>, <vscale x 2 x i8*>, <vscale x 2 x i8*>, <vscale x 2 x i1>, i8** nocapture)
|
||||
|
||||
declare void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
|
||||
declare void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
|
||||
@ -526,6 +563,7 @@ declare void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x ha
|
||||
declare void @llvm.aarch64.sve.st4.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, bfloat*)
|
||||
declare void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
|
||||
declare void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
|
||||
declare void @llvm.aarch64.sve.st4.nxv2p0i8(<vscale x 2 x i8*>, <vscale x 2 x i8*>, <vscale x 2 x i8*>, <vscale x 2 x i8*>, <vscale x 2 x i1>, i8** nocapture)
|
||||
|
||||
declare void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
|
||||
declare void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
|
||||
|
Loading…
x
Reference in New Issue
Block a user