1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[SVE][CodeGen] Vector + immediate addressing mode for masked gather/scatter

This patch extends LowerMGATHER/MSCATTER to make use of the vector + reg/immediate
addressing modes for scalable masked gathers & scatters.

selectGatherScatterAddrMode checks if the base pointer is null, in which case
we can swap the base pointer and the index, e.g.
     getelementptr nullptr, <vscale x N x T> (splat(%offset)) + %indices)
  -> getelementptr %offset, <vscale x N x T> %indices

Reviewed By: david-arm

Differential Revision: https://reviews.llvm.org/D93132
This commit is contained in:
Kerry McLaughlin 2020-12-18 11:04:41 +00:00
parent 28cf5e1f81
commit a9d5f92f0d
8 changed files with 847 additions and 10 deletions

View File

@ -3812,6 +3812,8 @@ unsigned getSignExtendedGatherOpcode(unsigned Opcode) {
return Opcode;
case AArch64ISD::GLD1_MERGE_ZERO:
return AArch64ISD::GLD1S_MERGE_ZERO;
case AArch64ISD::GLD1_IMM_MERGE_ZERO:
return AArch64ISD::GLD1S_IMM_MERGE_ZERO;
case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
return AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
@ -3843,6 +3845,60 @@ bool getGatherScatterIndexIsExtended(SDValue Index) {
return false;
}
// If the base pointer of a masked gather or scatter is null, we
// may be able to swap BasePtr & Index and use the vector + register
// or vector + immediate addressing mode, e.g.
// VECTOR + REGISTER:
// getelementptr nullptr, <vscale x N x T> (splat(%offset)) + %indices)
// -> getelementptr %offset, <vscale x N x T> %indices
// VECTOR + IMMEDIATE:
// getelementptr nullptr, <vscale x N x T> (splat(#x)) + %indices)
// -> getelementptr #x, <vscale x N x T> %indices
void selectGatherScatterAddrMode(SDValue &BasePtr, SDValue &Index, EVT MemVT,
unsigned &Opcode, bool IsGather,
SelectionDAG &DAG) {
if (!isNullConstant(BasePtr))
return;
ConstantSDNode *Offset = nullptr;
if (Index.getOpcode() == ISD::ADD)
if (auto SplatVal = DAG.getSplatValue(Index.getOperand(1))) {
if (isa<ConstantSDNode>(SplatVal))
Offset = cast<ConstantSDNode>(SplatVal);
else {
BasePtr = SplatVal;
Index = Index->getOperand(0);
return;
}
}
unsigned NewOp =
IsGather ? AArch64ISD::GLD1_IMM_MERGE_ZERO : AArch64ISD::SST1_IMM_PRED;
if (!Offset) {
std::swap(BasePtr, Index);
Opcode = NewOp;
return;
}
uint64_t OffsetVal = Offset->getZExtValue();
unsigned ScalarSizeInBytes = MemVT.getScalarSizeInBits() / 8;
auto ConstOffset = DAG.getConstant(OffsetVal, SDLoc(Index), MVT::i64);
if (OffsetVal % ScalarSizeInBytes || OffsetVal / ScalarSizeInBytes > 31) {
// Index is out of range for the immediate addressing mode
BasePtr = ConstOffset;
Index = Index->getOperand(0);
return;
}
// Immediate is in range
Opcode = NewOp;
BasePtr = Index->getOperand(0);
Index = ConstOffset;
return;
}
SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
@ -3892,6 +3948,9 @@ SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
Index = Index.getOperand(0);
unsigned Opcode = getGatherVecOpcode(IsScaled, IsSigned, IdxNeedsExtend);
selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
/*isGather=*/true, DAG);
if (ResNeedsSignExtend)
Opcode = getSignExtendedGatherOpcode(Opcode);
@ -3944,9 +4003,12 @@ SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
if (getGatherScatterIndexIsExtended(Index))
Index = Index.getOperand(0);
unsigned Opcode = getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend);
selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
/*isGather=*/false, DAG);
SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT};
return DAG.getNode(getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend), DL,
VTs, Ops);
return DAG.getNode(Opcode, DL, VTs, Ops);
}
// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.

View File

@ -44,12 +44,29 @@ define <vscale x 2 x i64> @masked_sgather_zext(i8* %base, <vscale x 2 x i64> %of
; Tests that exercise various type legalisation scenarios for ISD::MGATHER.
; Code generate load of an illegal datatype via promotion.
define <vscale x 2 x i8> @masked_gather_nxv2i8(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_gather_nxv2i8:
; CHECK: ld1sb { z0.d }, p0/z, [z0.d]
; CHECK: ret
%data = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
ret <vscale x 2 x i8> %data
}
; Code generate load of an illegal datatype via promotion.
define <vscale x 2 x i16> @masked_gather_nxv2i16(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_gather_nxv2i16:
; CHECK: ld1sh { z0.d }, p0/z, [z0.d]
; CHECK: ret
%data = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
ret <vscale x 2 x i16> %data
}
; Code generate load of an illegal datatype via promotion.
define <vscale x 2 x i32> @masked_gather_nxv2i32(<vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_gather_nxv2i32:
; CHECK-DAG: mov x8, xzr
; CHECK-DAG: ld1sw { z0.d }, p0/z, [x8, z0.d]
; CHECK: ret
; CHECK: ld1sw { z0.d }, p0/z, [z0.d]
; CHECK: ret
%data = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
ret <vscale x 2 x i32> %data
}
@ -92,11 +109,10 @@ define <vscale x 32 x i32> @masked_gather_nxv32i32(i32* %base, <vscale x 32 x i3
define <vscale x 4 x i32> @masked_sgather_nxv4i8(<vscale x 4 x i8*> %ptrs, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: masked_sgather_nxv4i8:
; CHECK: pfalse p1.b
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: zip2 p2.s, p0.s, p1.s
; CHECK-NEXT: zip1 p0.s, p0.s, p1.s
; CHECK-NEXT: ld1sb { z1.d }, p2/z, [x8, z1.d]
; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x8, z0.d]
; CHECK-NEXT: ld1sb { z1.d }, p2/z, [z1.d]
; CHECK-NEXT: ld1sb { z0.d }, p0/z, [z0.d]
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
; CHECK-NEXT: sxtb z0.s, p0/m, z0.s
@ -109,8 +125,6 @@ define <vscale x 4 x i32> @masked_sgather_nxv4i8(<vscale x 4 x i8*> %ptrs, <vsca
declare <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*>, i32, <vscale x 2 x i1>, <vscale x 2 x i8>)
declare <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*>, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
declare <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*>, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
declare <vscale x 4 x i8> @llvm.masked.gather.nxv4i8(<vscale x 4 x i8*>, i32, <vscale x 4 x i1>, <vscale x 4 x i8>)
declare <vscale x 16 x i8> @llvm.masked.gather.nxv16i8(<vscale x 16 x i8*>, i32, <vscale x 16 x i1>, <vscale x 16 x i8>)
declare <vscale x 32 x i32> @llvm.masked.gather.nxv32i32(<vscale x 32 x i32*>, i32, <vscale x 32 x i1>, <vscale x 32 x i32>)

View File

@ -0,0 +1,186 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
define <vscale x 2 x i64> @masked_gather_nxv2i8(<vscale x 2 x i8*> %bases, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_gather_nxv2i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d, #1]
; CHECK-NEXT: ret
%ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i32 1
%vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
%vals.zext = zext <vscale x 2 x i8> %vals to <vscale x 2 x i64>
ret <vscale x 2 x i64> %vals.zext
}
define <vscale x 2 x i64> @masked_gather_nxv2i16(<vscale x 2 x i16*> %bases, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_gather_nxv2i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d, #2]
; CHECK-NEXT: ret
%ptrs = getelementptr i16, <vscale x 2 x i16*> %bases, i32 1
%vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
%vals.zext = zext <vscale x 2 x i16> %vals to <vscale x 2 x i64>
ret <vscale x 2 x i64> %vals.zext
}
define <vscale x 2 x i64> @masked_gather_nxv2i32(<vscale x 2 x i32*> %bases, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_gather_nxv2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d, #4]
; CHECK-NEXT: ret
%ptrs = getelementptr i32, <vscale x 2 x i32*> %bases, i32 1
%vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
%vals.zext = zext <vscale x 2 x i32> %vals to <vscale x 2 x i64>
ret <vscale x 2 x i64> %vals.zext
}
define <vscale x 2 x i64> @masked_gather_nxv2i64(<vscale x 2 x i64*> %bases, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_gather_nxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d, #8]
; CHECK-NEXT: ret
%ptrs = getelementptr i64, <vscale x 2 x i64*> %bases, i32 1
%vals.zext = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64> undef)
ret <vscale x 2 x i64> %vals.zext
}
define <vscale x 2 x half> @masked_gather_nxv2f16(<vscale x 2 x half*> %bases, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_gather_nxv2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d, #4]
; CHECK-NEXT: ret
%ptrs = getelementptr half, <vscale x 2 x half*> %bases, i32 2
%vals = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x half> undef)
ret <vscale x 2 x half> %vals
}
define <vscale x 2 x bfloat> @masked_gather_nxv2bf16(<vscale x 2 x bfloat*> %bases, <vscale x 2 x i1> %mask) #0 {
; CHECK-LABEL: masked_gather_nxv2bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d, #4]
; CHECK-NEXT: ret
%ptrs = getelementptr bfloat, <vscale x 2 x bfloat*> %bases, i32 2
%vals = call <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16(<vscale x 2 x bfloat*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x bfloat> undef)
ret <vscale x 2 x bfloat> %vals
}
define <vscale x 2 x float> @masked_gather_nxv2f32(<vscale x 2 x float*> %bases, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_gather_nxv2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d, #12]
; CHECK-NEXT: ret
%ptrs = getelementptr float, <vscale x 2 x float*> %bases, i32 3
%vals = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x float> undef)
ret <vscale x 2 x float> %vals
}
define <vscale x 2 x double> @masked_gather_nxv2f64(<vscale x 2 x double*> %bases, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_gather_nxv2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d, #32]
; CHECK-NEXT: ret
%ptrs = getelementptr double, <vscale x 2 x double*> %bases, i32 4
%vals = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x double> undef)
ret <vscale x 2 x double> %vals
}
define <vscale x 2 x i64> @masked_sgather_nxv2i8(<vscale x 2 x i8*> %bases, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_sgather_nxv2i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1sb { z0.d }, p0/z, [z0.d, #5]
; CHECK-NEXT: ret
%ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i32 5
%vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
%vals.sext = sext <vscale x 2 x i8> %vals to <vscale x 2 x i64>
ret <vscale x 2 x i64> %vals.sext
}
define <vscale x 2 x i64> @masked_sgather_nxv2i16(<vscale x 2 x i16*> %bases, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_sgather_nxv2i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1sh { z0.d }, p0/z, [z0.d, #12]
; CHECK-NEXT: ret
%ptrs = getelementptr i16, <vscale x 2 x i16*> %bases, i32 6
%vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
%vals.sext = sext <vscale x 2 x i16> %vals to <vscale x 2 x i64>
ret <vscale x 2 x i64> %vals.sext
}
define <vscale x 2 x i64> @masked_sgather_nxv2i32(<vscale x 2 x i32*> %bases, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_sgather_nxv2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1sw { z0.d }, p0/z, [z0.d, #28]
; CHECK-NEXT: ret
%ptrs = getelementptr i32, <vscale x 2 x i32*> %bases, i32 7
%vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
%vals.sext = sext <vscale x 2 x i32> %vals to <vscale x 2 x i64>
ret <vscale x 2 x i64> %vals.sext
}
; Tests where the immediate is out of range
define <vscale x 2 x i64> @masked_gather_nxv2i8_range(<vscale x 2 x i8*> %bases, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_gather_nxv2i8_range:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #32
; CHECK-NEXT: ld1b { z0.d }, p0/z, [x8, z0.d]
; CHECK-NEXT: ret
%ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i32 32
%vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
%vals.zext = zext <vscale x 2 x i8> %vals to <vscale x 2 x i64>
ret <vscale x 2 x i64> %vals.zext
}
define <vscale x 2 x half> @masked_gather_nxv2f16_range(<vscale x 2 x half*> %bases, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_gather_nxv2f16_range:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #64
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x8, z0.d]
; CHECK-NEXT: ret
%ptrs = getelementptr half, <vscale x 2 x half*> %bases, i32 32
%vals = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x half> undef)
ret <vscale x 2 x half> %vals
}
define <vscale x 2 x bfloat> @masked_gather_nxv2bf16_range(<vscale x 2 x bfloat*> %bases, <vscale x 2 x i1> %mask) #0 {
; CHECK-LABEL: masked_gather_nxv2bf16_range:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #64
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x8, z0.d]
; CHECK-NEXT: ret
%ptrs = getelementptr bfloat, <vscale x 2 x bfloat*> %bases, i32 32
%vals = call <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16(<vscale x 2 x bfloat*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x bfloat> undef)
ret <vscale x 2 x bfloat> %vals
}
define <vscale x 2 x float> @masked_gather_nxv2f32_range(<vscale x 2 x float*> %bases, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_gather_nxv2f32_range:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #128
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x8, z0.d]
; CHECK-NEXT: ret
%ptrs = getelementptr float, <vscale x 2 x float*> %bases, i32 32
%vals = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x float> undef)
ret <vscale x 2 x float> %vals
}
define <vscale x 2 x double> @masked_gather_nxv2f64_range(<vscale x 2 x double*> %bases, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_gather_nxv2f64_range:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #256
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, z0.d]
; CHECK-NEXT: ret
%ptrs = getelementptr double, <vscale x 2 x double*> %bases, i32 32
%vals = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x double> undef)
ret <vscale x 2 x double> %vals
}
declare <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*>, i32, <vscale x 2 x i1>, <vscale x 2 x i8>)
declare <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*>, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
declare <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*>, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
declare <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*>, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
declare <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*>, i32, <vscale x 2 x i1>, <vscale x 2 x half>)
declare <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16(<vscale x 2 x bfloat*>, i32, <vscale x 2 x i1>, <vscale x 2 x bfloat>)
declare <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*>, i32, <vscale x 2 x i1>, <vscale x 2 x float>)
declare <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*>, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
attributes #0 = { "target-features"="+sve,+bf16" }

View File

@ -0,0 +1,137 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
define <vscale x 2 x i64> @masked_gather_nxv2i8(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_gather_nxv2i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, z0.d]
; CHECK-NEXT: ret
%ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
%vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
%vals.zext = zext <vscale x 2 x i8> %vals to <vscale x 2 x i64>
ret <vscale x 2 x i64> %vals.zext
}
define <vscale x 2 x i64> @masked_gather_nxv2i16(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_gather_nxv2i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d]
; CHECK-NEXT: ret
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i16*>
%vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
%vals.zext = zext <vscale x 2 x i16> %vals to <vscale x 2 x i64>
ret <vscale x 2 x i64> %vals.zext
}
define <vscale x 2 x i64> @masked_gather_nxv2i32(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_gather_nxv2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d]
; CHECK-NEXT: ret
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i32*>
%vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
%vals.zext = zext <vscale x 2 x i32> %vals to <vscale x 2 x i64>
ret <vscale x 2 x i64> %vals.zext
}
define <vscale x 2 x i64> @masked_gather_nxv2i64(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_gather_nxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d]
; CHECK-NEXT: ret
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i64*>
%vals = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64> undef)
ret <vscale x 2 x i64> %vals
}
define <vscale x 2 x half> @masked_gather_nxv2f16(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_gather_nxv2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d]
; CHECK-NEXT: ret
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x half*>
%vals = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x half> undef)
ret <vscale x 2 x half> %vals
}
define <vscale x 2 x bfloat> @masked_gather_nxv2bf16(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) #0 {
; CHECK-LABEL: masked_gather_nxv2bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d]
; CHECK-NEXT: ret
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x bfloat*>
%vals = call <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16(<vscale x 2 x bfloat*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x bfloat> undef)
ret <vscale x 2 x bfloat> %vals
}
define <vscale x 2 x float> @masked_gather_nxv2f32(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_gather_nxv2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d]
; CHECK-NEXT: ret
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x float*>
%vals = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x float> undef)
ret <vscale x 2 x float> %vals
}
define <vscale x 2 x double> @masked_gather_nxv2f64(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_gather_nxv2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d]
; CHECK-NEXT: ret
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x double*>
%vals = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x double> undef)
ret <vscale x 2 x double> %vals
}
define <vscale x 2 x i64> @masked_sgather_nxv2i8(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_sgather_nxv2i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, z0.d]
; CHECK-NEXT: ret
%ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
%vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
%vals.sext = sext <vscale x 2 x i8> %vals to <vscale x 2 x i64>
ret <vscale x 2 x i64> %vals.sext
}
define <vscale x 2 x i64> @masked_sgather_nxv2i16(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_sgather_nxv2i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d]
; CHECK-NEXT: ret
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i16*>
%vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
%vals.sext = sext <vscale x 2 x i16> %vals to <vscale x 2 x i64>
ret <vscale x 2 x i64> %vals.sext
}
define <vscale x 2 x i64> @masked_sgather_nxv2i32(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_sgather_nxv2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d]
; CHECK-NEXT: ret
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i32*>
%vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
%vals.sext = sext <vscale x 2 x i32> %vals to <vscale x 2 x i64>
ret <vscale x 2 x i64> %vals.sext
}
declare <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*>, i32, <vscale x 2 x i1>, <vscale x 2 x i8>)
declare <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*>, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
declare <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*>, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
declare <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*>, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
declare <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*>, i32, <vscale x 2 x i1>, <vscale x 2 x half>)
declare <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16(<vscale x 2 x bfloat*>, i32, <vscale x 2 x i1>, <vscale x 2 x bfloat>)
declare <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*>, i32, <vscale x 2 x i1>, <vscale x 2 x float>)
declare <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*>, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
attributes #0 = { "target-features"="+sve,+bf16" }

View File

@ -0,0 +1,117 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
define <vscale x 2 x i64> @masked_gather_nxv2i8(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_gather_nxv2i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d]
; CHECK-NEXT: ret
%vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
%vals.zext = zext <vscale x 2 x i8> %vals to <vscale x 2 x i64>
ret <vscale x 2 x i64> %vals.zext
}
define <vscale x 2 x i64> @masked_gather_nxv2i16(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_gather_nxv2i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d]
; CHECK-NEXT: ret
%vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
%vals.zext = zext <vscale x 2 x i16> %vals to <vscale x 2 x i64>
ret <vscale x 2 x i64> %vals.zext
}
define <vscale x 2 x i64> @masked_gather_nxv2i32(<vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_gather_nxv2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d]
; CHECK-NEXT: ret
%vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
%vals.zext = zext <vscale x 2 x i32> %vals to <vscale x 2 x i64>
ret <vscale x 2 x i64> %vals.zext
}
define <vscale x 2 x i64> @masked_gather_nxv2i64(<vscale x 2 x i64*> %ptrs, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_gather_nxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d]
; CHECK-NEXT: ret
%vals = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64> undef)
ret <vscale x 2 x i64> %vals
}
define <vscale x 2 x half> @masked_gather_nxv2f16(<vscale x 2 x half*> %ptrs, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_gather_nxv2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d]
; CHECK-NEXT: ret
%vals = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x half> undef)
ret <vscale x 2 x half> %vals
}
define <vscale x 2 x bfloat> @masked_gather_nxv2bf16(<vscale x 2 x bfloat*> %ptrs, <vscale x 2 x i1> %mask) #0 {
; CHECK-LABEL: masked_gather_nxv2bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d]
; CHECK-NEXT: ret
%vals = call <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16(<vscale x 2 x bfloat*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x bfloat> undef)
ret <vscale x 2 x bfloat> %vals
}
define <vscale x 2 x float> @masked_gather_nxv2f32(<vscale x 2 x float*> %ptrs, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_gather_nxv2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d]
; CHECK-NEXT: ret
%vals = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x float> undef)
ret <vscale x 2 x float> %vals
}
define <vscale x 2 x double> @masked_gather_nxv2f64(<vscale x 2 x double*> %ptrs, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_gather_nxv2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d]
; CHECK-NEXT: ret
%vals = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x double> undef)
ret <vscale x 2 x double> %vals
}
define <vscale x 2 x i64> @masked_sgather_nxv2i8(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_sgather_nxv2i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1sb { z0.d }, p0/z, [z0.d]
; CHECK-NEXT: ret
%vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
%vals.sext = sext <vscale x 2 x i8> %vals to <vscale x 2 x i64>
ret <vscale x 2 x i64> %vals.sext
}
define <vscale x 2 x i64> @masked_sgather_nxv2i16(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_sgather_nxv2i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1sh { z0.d }, p0/z, [z0.d]
; CHECK-NEXT: ret
%vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
%vals.sext = sext <vscale x 2 x i16> %vals to <vscale x 2 x i64>
ret <vscale x 2 x i64> %vals.sext
}
define <vscale x 2 x i64> @masked_sgather_nxv2i32(<vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_sgather_nxv2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1sw { z0.d }, p0/z, [z0.d]
; CHECK-NEXT: ret
%vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
%vals.sext = sext <vscale x 2 x i32> %vals to <vscale x 2 x i64>
ret <vscale x 2 x i64> %vals.sext
}
declare <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*>, i32, <vscale x 2 x i1>, <vscale x 2 x i8>)
declare <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*>, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
declare <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*>, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
declare <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*>, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
declare <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*>, i32, <vscale x 2 x i1>, <vscale x 2 x half>)
declare <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16(<vscale x 2 x bfloat*>, i32, <vscale x 2 x i1>, <vscale x 2 x bfloat>)
declare <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*>, i32, <vscale x 2 x i1>, <vscale x 2 x float>)
declare <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*>, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
attributes #0 = { "target-features"="+sve,+bf16" }

View File

@ -0,0 +1,138 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
define void @masked_scatter_nxv2i8(<vscale x 2 x i8> %data, <vscale x 2 x i8*> %bases, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_scatter_nxv2i8:
; CHECK: // %bb.0:
; CHECK-NEXT: st1b { z0.d }, p0, [z1.d, #1]
; CHECK-NEXT: ret
%ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i32 1
call void @llvm.masked.scatter.nxv2i8(<vscale x 2 x i8> %data, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask)
ret void
}
define void @masked_scatter_nxv2i16(<vscale x 2 x i16> %data, <vscale x 2 x i16*> %bases, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_scatter_nxv2i16:
; CHECK: // %bb.0:
; CHECK-NEXT: st1h { z0.d }, p0, [z1.d, #2]
; CHECK-NEXT: ret
%ptrs = getelementptr i16, <vscale x 2 x i16*> %bases, i32 1
call void @llvm.masked.scatter.nxv2i16(<vscale x 2 x i16> %data, <vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask)
ret void
}
define void @masked_scatter_nxv2i32(<vscale x 2 x i32> %data, <vscale x 2 x i32*> %bases, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_scatter_nxv2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: st1w { z0.d }, p0, [z1.d, #4]
; CHECK-NEXT: ret
%ptrs = getelementptr i32, <vscale x 2 x i32*> %bases, i32 1
call void @llvm.masked.scatter.nxv2i32(<vscale x 2 x i32> %data, <vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask)
ret void
}
define void @masked_scatter_nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64*> %bases, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_scatter_nxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: st1d { z0.d }, p0, [z1.d, #8]
; CHECK-NEXT: ret
%ptrs = getelementptr i64, <vscale x 2 x i64*> %bases, i32 1
call void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64*> %ptrs, i32 8, <vscale x 2 x i1> %mask)
ret void
}
define void @masked_scatter_nxv2f16(<vscale x 2 x half> %data, <vscale x 2 x half*> %bases, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_scatter_nxv2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: st1h { z0.d }, p0, [z1.d, #4]
; CHECK-NEXT: ret
%ptrs = getelementptr half, <vscale x 2 x half*> %bases, i32 2
call void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half> %data, <vscale x 2 x half*> %ptrs, i32 2, <vscale x 2 x i1> %mask)
ret void
}
define void @masked_scatter_nxv2bf16(<vscale x 2 x bfloat> %data, <vscale x 2 x bfloat*> %bases, <vscale x 2 x i1> %mask) #0 {
; CHECK-LABEL: masked_scatter_nxv2bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: st1h { z0.d }, p0, [z1.d, #4]
; CHECK-NEXT: ret
%ptrs = getelementptr bfloat, <vscale x 2 x bfloat*> %bases, i32 2
call void @llvm.masked.scatter.nxv2bf16(<vscale x 2 x bfloat> %data, <vscale x 2 x bfloat*> %ptrs, i32 2, <vscale x 2 x i1> %mask)
ret void
}
define void @masked_scatter_nxv2f32(<vscale x 2 x float> %data, <vscale x 2 x float*> %bases, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_scatter_nxv2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: st1w { z0.d }, p0, [z1.d, #12]
; CHECK-NEXT: ret
%ptrs = getelementptr float, <vscale x 2 x float*> %bases, i32 3
call void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float> %data, <vscale x 2 x float*> %ptrs, i32 4, <vscale x 2 x i1> %mask)
ret void
}
define void @masked_scatter_nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double*> %bases, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_scatter_nxv2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: st1d { z0.d }, p0, [z1.d, #32]
; CHECK-NEXT: ret
%ptrs = getelementptr double, <vscale x 2 x double*> %bases, i32 4
call void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %mask)
ret void
}
; Test where the immediate is out of range
define void @masked_scatter_nxv2i8_range(<vscale x 2 x i8> %data, <vscale x 2 x i8*> %bases, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_scatter_nxv2i8_range:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #32
; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d]
; CHECK-NEXT: ret
%ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i32 32
call void @llvm.masked.scatter.nxv2i8(<vscale x 2 x i8> %data, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask)
ret void
}
define void @masked_scatter_nxv2i16_range(<vscale x 2 x i16> %data, <vscale x 2 x i16*> %bases, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_scatter_nxv2i16_range:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #64
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
; CHECK-NEXT: ret
%ptrs = getelementptr i16, <vscale x 2 x i16*> %bases, i32 32
call void @llvm.masked.scatter.nxv2i16(<vscale x 2 x i16> %data, <vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask)
ret void
}
define void @masked_scatter_nxv2i32_range(<vscale x 2 x i32> %data, <vscale x 2 x i32*> %bases, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_scatter_nxv2i32_range:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #128
; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
; CHECK-NEXT: ret
%ptrs = getelementptr i32, <vscale x 2 x i32*> %bases, i32 32
call void @llvm.masked.scatter.nxv2i32(<vscale x 2 x i32> %data, <vscale x 2 x i32*> %ptrs, i32 1, <vscale x 2 x i1> %mask)
ret void
}
define void @masked_scatter_nxv2f64_range(<vscale x 2 x double> %data, <vscale x 2 x double*> %bases, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_scatter_nxv2f64_range:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #256
; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d]
; CHECK-NEXT: ret
%ptrs = getelementptr double, <vscale x 2 x double*> %bases, i32 32
call void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %mask)
ret void
}
declare void @llvm.masked.scatter.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8*>, i32, <vscale x 2 x i1>)
declare void @llvm.masked.scatter.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16*>, i32, <vscale x 2 x i1>)
declare void @llvm.masked.scatter.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32*>, i32, <vscale x 2 x i1>)
declare void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64*>, i32, <vscale x 2 x i1>)
declare void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half*>, i32, <vscale x 2 x i1>)
declare void @llvm.masked.scatter.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat*>, i32, <vscale x 2 x i1>)
declare void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float*>, i32, <vscale x 2 x i1>)
declare void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double*>, i32, <vscale x 2 x i1>)
attributes #0 = { "target-features"="+sve,+bf16" }

View File

@ -0,0 +1,99 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
define void @masked_scatter_nxv2i8(<vscale x 2 x i8> %data, <vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_scatter_nxv2i8:
; CHECK: // %bb.0:
; CHECK-NEXT: st1b { z0.d }, p0, [x0, z1.d]
; CHECK-NEXT: ret
%ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
call void @llvm.masked.scatter.nxv2i8(<vscale x 2 x i8> %data, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask)
ret void
}
define void @masked_scatter_nxv2i16(<vscale x 2 x i16> %data, <vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_scatter_nxv2i16:
; CHECK: // %bb.0:
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d]
; CHECK-NEXT: ret
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i16*>
call void @llvm.masked.scatter.nxv2i16(<vscale x 2 x i16> %data, <vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask)
ret void
}
define void @masked_scatter_nxv2i32(<vscale x 2 x i32> %data, <vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_scatter_nxv2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d]
; CHECK-NEXT: ret
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i32*>
call void @llvm.masked.scatter.nxv2i32(<vscale x 2 x i32> %data, <vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask)
ret void
}
define void @masked_scatter_nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_scatter_nxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d]
; CHECK-NEXT: ret
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i64*>
call void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64*> %ptrs, i32 8, <vscale x 2 x i1> %mask)
ret void
}
define void @masked_scatter_nxv2f16(<vscale x 2 x half> %data, <vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_scatter_nxv2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d]
; CHECK-NEXT: ret
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x half*>
call void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half> %data, <vscale x 2 x half*> %ptrs, i32 2, <vscale x 2 x i1> %mask)
ret void
}
define void @masked_scatter_nxv2bf16(<vscale x 2 x bfloat> %data, <vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) #0 {
; CHECK-LABEL: masked_scatter_nxv2bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d]
; CHECK-NEXT: ret
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x bfloat*>
call void @llvm.masked.scatter.nxv2bf16(<vscale x 2 x bfloat> %data, <vscale x 2 x bfloat*> %ptrs, i32 2, <vscale x 2 x i1> %mask)
ret void
}
define void @masked_scatter_nxv2f32(<vscale x 2 x float> %data, <vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_scatter_nxv2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d]
; CHECK-NEXT: ret
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x float*>
call void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float> %data, <vscale x 2 x float*> %ptrs, i32 4, <vscale x 2 x i1> %mask)
ret void
}
define void @masked_scatter_nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_scatter_nxv2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d]
; CHECK-NEXT: ret
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x double*>
call void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %mask)
ret void
}
declare void @llvm.masked.scatter.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8*>, i32, <vscale x 2 x i1>)
declare void @llvm.masked.scatter.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16*>, i32, <vscale x 2 x i1>)
declare void @llvm.masked.scatter.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32*>, i32, <vscale x 2 x i1>)
declare void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64*>, i32, <vscale x 2 x i1>)
declare void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half*>, i32, <vscale x 2 x i1>)
declare void @llvm.masked.scatter.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat*>, i32, <vscale x 2 x i1>)
declare void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float*>, i32, <vscale x 2 x i1>)
declare void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double*>, i32, <vscale x 2 x i1>)
attributes #0 = { "target-features"="+sve,+bf16" }

View File

@ -0,0 +1,84 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
define void @masked_scatter_nxv2i8(<vscale x 2 x i8> %data, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv2i8:
; CHECK: // %bb.0:
; CHECK-NEXT: st1b { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
call void @llvm.masked.scatter.nxv2i8(<vscale x 2 x i8> %data, <vscale x 2 x i8*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
ret void
}
define void @masked_scatter_nxv2i16(<vscale x 2 x i16> %data, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv2i16:
; CHECK: // %bb.0:
; CHECK-NEXT: st1h { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
call void @llvm.masked.scatter.nxv2i16(<vscale x 2 x i16> %data, <vscale x 2 x i16*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
ret void
}
define void @masked_scatter_nxv2i32(<vscale x 2 x i32> %data, <vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: st1w { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
call void @llvm.masked.scatter.nxv2i32(<vscale x 2 x i32> %data, <vscale x 2 x i32*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
ret void
}
define void @masked_scatter_nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64*> %ptrs, <vscale x 2 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: st1d { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
call void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
ret void
}
define void @masked_scatter_nxv2f16(<vscale x 2 x half> %data, <vscale x 2 x half*> %ptrs, <vscale x 2 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: st1h { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
call void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half> %data, <vscale x 2 x half*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
ret void
}
define void @masked_scatter_nxv2bf16(<vscale x 2 x bfloat> %data, <vscale x 2 x bfloat*> %ptrs, <vscale x 2 x i1> %masks) nounwind #0 {
; CHECK-LABEL: masked_scatter_nxv2bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: st1h { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
call void @llvm.masked.scatter.nxv2bf16(<vscale x 2 x bfloat> %data, <vscale x 2 x bfloat*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
ret void
}
define void @masked_scatter_nxv2f32(<vscale x 2 x float> %data, <vscale x 2 x float*> %ptrs, <vscale x 2 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: st1w { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
call void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float> %data, <vscale x 2 x float*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
ret void
}
define void @masked_scatter_nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double*> %ptrs, <vscale x 2 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: st1d { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
call void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
ret void
}
declare void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half*>, i32, <vscale x 2 x i1>)
declare void @llvm.masked.scatter.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat*>, i32, <vscale x 2 x i1>)
declare void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float*>, i32, <vscale x 2 x i1>)
declare void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double*>, i32, <vscale x 2 x i1>)
declare void @llvm.masked.scatter.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16*>, i32, <vscale x 2 x i1>)
declare void @llvm.masked.scatter.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32*>, i32, <vscale x 2 x i1>)
declare void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64*>, i32, <vscale x 2 x i1>)
declare void @llvm.masked.scatter.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8*>, i32, <vscale x 2 x i1>)
attributes #0 = { "target-features"="+sve,+bf16" }