mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[SVE][CodeGen] Legalisation of masked loads and stores
Summary: This patch modifies IncrementMemoryAddress to use a vscale when calculating the new address if the data type is scalable. Also adds tablegen patterns which match an extract_subvector of a legal predicate type with zip1/zip2 instructions Reviewers: sdesmalen, efriedma, david-arm Reviewed By: efriedma, david-arm Subscribers: tschuett, hiraditya, psnobl, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D83137
This commit is contained in:
parent
aa7315a8f1
commit
30eb603e95
@ -7153,6 +7153,9 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
|
||||
assert(DataVT.getVectorNumElements() == MaskVT.getVectorNumElements() &&
|
||||
"Incompatible types of Data and Mask");
|
||||
if (IsCompressedMemory) {
|
||||
if (DataVT.isScalableVector())
|
||||
report_fatal_error(
|
||||
"Cannot currently handle compressed memory with scalable vectors");
|
||||
// Incrementing the pointer according to number of '1's in the mask.
|
||||
EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
|
||||
SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
|
||||
@ -7168,6 +7171,10 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
|
||||
SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
|
||||
AddrVT);
|
||||
Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
|
||||
} else if (DataVT.isScalableVector()) {
|
||||
Increment = DAG.getVScale(DL, AddrVT,
|
||||
APInt(AddrVT.getSizeInBits().getFixedSize(),
|
||||
DataVT.getStoreSize().getKnownMinSize()));
|
||||
} else
|
||||
Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
|
||||
|
||||
|
@ -1109,6 +1109,20 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
|
||||
defm TRN1_PPP : sve_int_perm_bin_perm_pp<0b100, "trn1", AArch64trn1>;
|
||||
defm TRN2_PPP : sve_int_perm_bin_perm_pp<0b101, "trn2", AArch64trn2>;
|
||||
|
||||
// Extract lo/hi halves of legal predicate types.
|
||||
def : Pat<(nxv2i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 0))),
|
||||
(ZIP1_PPP_S PPR:$Ps, (PFALSE))>;
|
||||
def : Pat<(nxv2i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 2))),
|
||||
(ZIP2_PPP_S PPR:$Ps, (PFALSE))>;
|
||||
def : Pat<(nxv4i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 0))),
|
||||
(ZIP1_PPP_H PPR:$Ps, (PFALSE))>;
|
||||
def : Pat<(nxv4i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 4))),
|
||||
(ZIP2_PPP_H PPR:$Ps, (PFALSE))>;
|
||||
def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 0))),
|
||||
(ZIP1_PPP_B PPR:$Ps, (PFALSE))>;
|
||||
def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))),
|
||||
(ZIP2_PPP_B PPR:$Ps, (PFALSE))>;
|
||||
|
||||
defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs", SETUGE, SETULE>;
|
||||
defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi", SETUGT, SETULT>;
|
||||
defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge", SETGE, SETLE>;
|
||||
|
@ -1,10 +1,10 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
|
||||
|
||||
; LOAD
|
||||
; UNPREDICATED
|
||||
|
||||
define <vscale x 4 x i16> @load_promote_4i8(<vscale x 4 x i16>* %a) {
|
||||
; CHECK-LABEL: load_promote_4i8:
|
||||
define <vscale x 4 x i16> @load_promote_4i16(<vscale x 4 x i16>* %a) {
|
||||
; CHECK-LABEL: load_promote_4i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
|
||||
@ -53,3 +53,82 @@ define <vscale x 16 x i64> @load_split_16i64(<vscale x 16 x i64>* %a) {
|
||||
%load = load <vscale x 16 x i64>, <vscale x 16 x i64>* %a
|
||||
ret <vscale x 16 x i64> %load
|
||||
}
|
||||
|
||||
; MASKED
|
||||
|
||||
define <vscale x 2 x i32> @masked_load_promote_2i32(<vscale x 2 x i32> *%a, <vscale x 2 x i1> %pg) {
|
||||
; CHECK-LABEL: masked_load_promote_2i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32(<vscale x 2 x i32> *%a, i32 1, <vscale x 2 x i1> %pg, <vscale x 2 x i32> undef)
|
||||
ret <vscale x 2 x i32> %load
|
||||
}
|
||||
|
||||
define <vscale x 32 x i8> @masked_load_split_32i8(<vscale x 32 x i8> *%a, <vscale x 32 x i1> %pg) {
|
||||
; CHECK-LABEL: masked_load_split_32i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
|
||||
; CHECK-NEXT: ld1b { z1.b }, p1/z, [x0, #1, mul vl]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 32 x i8> @llvm.masked.load.nxv32i8(<vscale x 32 x i8> *%a, i32 1, <vscale x 32 x i1> %pg, <vscale x 32 x i8> undef)
|
||||
ret <vscale x 32 x i8> %load
|
||||
}
|
||||
|
||||
define <vscale x 32 x i16> @masked_load_split_32i16(<vscale x 32 x i16> *%a, <vscale x 32 x i1> %pg) {
|
||||
; CHECK-LABEL: masked_load_split_32i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: pfalse p2.b
|
||||
; CHECK-NEXT: zip1 p3.b, p0.b, p2.b
|
||||
; CHECK-NEXT: zip2 p0.b, p0.b, p2.b
|
||||
; CHECK-NEXT: ld1h { z0.h }, p3/z, [x0]
|
||||
; CHECK-NEXT: zip1 p3.b, p1.b, p2.b
|
||||
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0, #1, mul vl]
|
||||
; CHECK-NEXT: zip2 p0.b, p1.b, p2.b
|
||||
; CHECK-NEXT: ld1h { z2.h }, p3/z, [x0, #2, mul vl]
|
||||
; CHECK-NEXT: ld1h { z3.h }, p0/z, [x0, #3, mul vl]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 32 x i16> @llvm.masked.load.nxv32i16(<vscale x 32 x i16> *%a, i32 1, <vscale x 32 x i1> %pg, <vscale x 32 x i16> undef)
|
||||
ret <vscale x 32 x i16> %load
|
||||
}
|
||||
|
||||
define <vscale x 8 x i32> @masked_load_split_8i32(<vscale x 8 x i32> *%a, <vscale x 8 x i1> %pg) {
|
||||
; CHECK-LABEL: masked_load_split_8i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: zip1 p2.h, p0.h, p1.h
|
||||
; CHECK-NEXT: zip2 p0.h, p0.h, p1.h
|
||||
; CHECK-NEXT: ld1w { z0.s }, p2/z, [x0]
|
||||
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0, #1, mul vl]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 8 x i32> @llvm.masked.load.nxv8i32(<vscale x 8 x i32> *%a, i32 1, <vscale x 8 x i1> %pg, <vscale x 8 x i32> undef)
|
||||
ret <vscale x 8 x i32> %load
|
||||
}
|
||||
|
||||
define <vscale x 8 x i64> @masked_load_split_8i64(<vscale x 8 x i64> *%a, <vscale x 8 x i1> %pg) {
|
||||
; CHECK-LABEL: masked_load_split_8i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: zip1 p2.h, p0.h, p1.h
|
||||
; CHECK-NEXT: zip2 p0.h, p0.h, p1.h
|
||||
; CHECK-NEXT: zip1 p3.s, p2.s, p1.s
|
||||
; CHECK-NEXT: zip2 p2.s, p2.s, p1.s
|
||||
; CHECK-NEXT: ld1d { z0.d }, p3/z, [x0]
|
||||
; CHECK-NEXT: ld1d { z1.d }, p2/z, [x0, #1, mul vl]
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #2, mul vl]
|
||||
; CHECK-NEXT: ld1d { z3.d }, p0/z, [x0, #3, mul vl]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 8 x i64> @llvm.masked.load.nxv8i64(<vscale x 8 x i64> *%a, i32 1, <vscale x 8 x i1> %pg, <vscale x 8 x i64> undef)
|
||||
ret <vscale x 8 x i64> %load
|
||||
}
|
||||
|
||||
declare <vscale x 32 x i8> @llvm.masked.load.nxv32i8(<vscale x 32 x i8>*, i32, <vscale x 32 x i1>, <vscale x 32 x i8>)
|
||||
|
||||
declare <vscale x 32 x i16> @llvm.masked.load.nxv32i16(<vscale x 32 x i16>*, i32, <vscale x 32 x i1>, <vscale x 32 x i16>)
|
||||
|
||||
declare <vscale x 2 x i32> @llvm.masked.load.nxv2i32(<vscale x 2 x i32>*, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
|
||||
declare <vscale x 8 x i32> @llvm.masked.load.nxv8i32(<vscale x 8 x i32>*, i32, <vscale x 8 x i1>, <vscale x 8 x i32>)
|
||||
|
||||
declare <vscale x 8 x i64> @llvm.masked.load.nxv8i64(<vscale x 8 x i64>*, i32, <vscale x 8 x i1>, <vscale x 8 x i64>)
|
||||
|
@ -1,6 +1,8 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
|
||||
|
||||
; UNPREDICATED
|
||||
|
||||
define void @store_promote_4i8(<vscale x 4 x i8> %data, <vscale x 4 x i8>* %a) {
|
||||
; CHECK-LABEL: store_promote_4i8:
|
||||
; CHECK: // %bb.0:
|
||||
@ -51,3 +53,82 @@ define void @store_split_16i64(<vscale x 16 x i64> %data, <vscale x 16 x i64>* %
|
||||
store <vscale x 16 x i64> %data, <vscale x 16 x i64>* %a
|
||||
ret void
|
||||
}
|
||||
|
||||
; MASKED
|
||||
|
||||
define void @masked_store_promote_2i8(<vscale x 2 x i8> %data, <vscale x 2 x i8> *%a, <vscale x 2 x i1> %pg) {
|
||||
; CHECK-LABEL: masked_store_promote_2i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1b { z0.d }, p0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.masked.store.nxv2i8(<vscale x 2 x i8> %data, <vscale x 2 x i8> *%a, i32 1, <vscale x 2 x i1> %pg)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_store_split_32i8(<vscale x 32 x i8> %data, <vscale x 32 x i8> *%a, <vscale x 32 x i1> %pg) {
|
||||
; CHECK-LABEL: masked_store_split_32i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1b { z1.b }, p1, [x0, #1, mul vl]
|
||||
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.masked.store.nxv32i8(<vscale x 32 x i8> %data, <vscale x 32 x i8> *%a, i32 1, <vscale x 32 x i1> %pg)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_store_split_32i16(<vscale x 32 x i16> %data, <vscale x 32 x i16> *%a, <vscale x 32 x i1> %pg) {
|
||||
; CHECK-LABEL: masked_store_split_32i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: pfalse p2.b
|
||||
; CHECK-NEXT: zip2 p3.b, p1.b, p2.b
|
||||
; CHECK-NEXT: zip1 p1.b, p1.b, p2.b
|
||||
; CHECK-NEXT: st1h { z3.h }, p3, [x0, #3, mul vl]
|
||||
; CHECK-NEXT: zip2 p3.b, p0.b, p2.b
|
||||
; CHECK-NEXT: zip1 p0.b, p0.b, p2.b
|
||||
; CHECK-NEXT: st1h { z2.h }, p1, [x0, #2, mul vl]
|
||||
; CHECK-NEXT: st1h { z1.h }, p3, [x0, #1, mul vl]
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.masked.store.nxv32i16(<vscale x 32 x i16> %data, <vscale x 32 x i16> *%a, i32 1, <vscale x 32 x i1> %pg)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_store_split_8i32(<vscale x 8 x i32> %data, <vscale x 8 x i32> *%a, <vscale x 8 x i1> %pg) {
|
||||
; CHECK-LABEL: masked_store_split_8i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: zip2 p2.h, p0.h, p1.h
|
||||
; CHECK-NEXT: zip1 p0.h, p0.h, p1.h
|
||||
; CHECK-NEXT: st1w { z1.s }, p2, [x0, #1, mul vl]
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.masked.store.nxv8i32(<vscale x 8 x i32> %data, <vscale x 8 x i32> *%a, i32 1, <vscale x 8 x i1> %pg)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_store_split_8i64(<vscale x 8 x i64> %data, <vscale x 8 x i64> *%a, <vscale x 8 x i1> %pg) {
|
||||
; CHECK-LABEL: masked_store_split_8i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: zip2 p2.h, p0.h, p1.h
|
||||
; CHECK-NEXT: zip1 p0.h, p0.h, p1.h
|
||||
; CHECK-NEXT: zip2 p3.s, p2.s, p1.s
|
||||
; CHECK-NEXT: zip1 p2.s, p2.s, p1.s
|
||||
; CHECK-NEXT: st1d { z2.d }, p2, [x0, #2, mul vl]
|
||||
; CHECK-NEXT: zip2 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip1 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1d { z3.d }, p3, [x0, #3, mul vl]
|
||||
; CHECK-NEXT: st1d { z1.d }, p2, [x0, #1, mul vl]
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.masked.store.nxv8i64(<vscale x 8 x i64> %data, <vscale x 8 x i64> *%a, i32 1, <vscale x 8 x i1> %pg)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.masked.store.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>*, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.store.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>*, i32, <vscale x 32 x i1>)
|
||||
|
||||
declare void @llvm.masked.store.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>*, i32, <vscale x 32 x i1>)
|
||||
|
||||
declare void @llvm.masked.store.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>*, i32, <vscale x 8 x i1>)
|
||||
|
||||
declare void @llvm.masked.store.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>*, i32, <vscale x 8 x i1>)
|
||||
|
Loading…
x
Reference in New Issue
Block a user