mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[SVE][CodeGen] Vector + immediate addressing mode for masked gather/scatter
This patch extends LowerMGATHER/MSCATTER to make use of the vector + reg/immediate addressing modes for scalable masked gathers & scatters. selectGatherScatterAddrMode checks if the base pointer is null, in which case we can swap the base pointer and the index, e.g. getelementptr nullptr, <vscale x N x T> (splat(%offset)) + %indices) -> getelementptr %offset, <vscale x N x T> %indices Reviewed By: david-arm Differential Revision: https://reviews.llvm.org/D93132
This commit is contained in:
parent
28cf5e1f81
commit
a9d5f92f0d
@ -3812,6 +3812,8 @@ unsigned getSignExtendedGatherOpcode(unsigned Opcode) {
|
||||
return Opcode;
|
||||
case AArch64ISD::GLD1_MERGE_ZERO:
|
||||
return AArch64ISD::GLD1S_MERGE_ZERO;
|
||||
case AArch64ISD::GLD1_IMM_MERGE_ZERO:
|
||||
return AArch64ISD::GLD1S_IMM_MERGE_ZERO;
|
||||
case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
|
||||
return AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
|
||||
case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
|
||||
@ -3843,6 +3845,60 @@ bool getGatherScatterIndexIsExtended(SDValue Index) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// If the base pointer of a masked gather or scatter is null, we
|
||||
// may be able to swap BasePtr & Index and use the vector + register
|
||||
// or vector + immediate addressing mode, e.g.
|
||||
// VECTOR + REGISTER:
|
||||
// getelementptr nullptr, <vscale x N x T> (splat(%offset)) + %indices)
|
||||
// -> getelementptr %offset, <vscale x N x T> %indices
|
||||
// VECTOR + IMMEDIATE:
|
||||
// getelementptr nullptr, <vscale x N x T> (splat(#x)) + %indices)
|
||||
// -> getelementptr #x, <vscale x N x T> %indices
|
||||
void selectGatherScatterAddrMode(SDValue &BasePtr, SDValue &Index, EVT MemVT,
|
||||
unsigned &Opcode, bool IsGather,
|
||||
SelectionDAG &DAG) {
|
||||
if (!isNullConstant(BasePtr))
|
||||
return;
|
||||
|
||||
ConstantSDNode *Offset = nullptr;
|
||||
if (Index.getOpcode() == ISD::ADD)
|
||||
if (auto SplatVal = DAG.getSplatValue(Index.getOperand(1))) {
|
||||
if (isa<ConstantSDNode>(SplatVal))
|
||||
Offset = cast<ConstantSDNode>(SplatVal);
|
||||
else {
|
||||
BasePtr = SplatVal;
|
||||
Index = Index->getOperand(0);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned NewOp =
|
||||
IsGather ? AArch64ISD::GLD1_IMM_MERGE_ZERO : AArch64ISD::SST1_IMM_PRED;
|
||||
|
||||
if (!Offset) {
|
||||
std::swap(BasePtr, Index);
|
||||
Opcode = NewOp;
|
||||
return;
|
||||
}
|
||||
|
||||
uint64_t OffsetVal = Offset->getZExtValue();
|
||||
unsigned ScalarSizeInBytes = MemVT.getScalarSizeInBits() / 8;
|
||||
auto ConstOffset = DAG.getConstant(OffsetVal, SDLoc(Index), MVT::i64);
|
||||
|
||||
if (OffsetVal % ScalarSizeInBytes || OffsetVal / ScalarSizeInBytes > 31) {
|
||||
// Index is out of range for the immediate addressing mode
|
||||
BasePtr = ConstOffset;
|
||||
Index = Index->getOperand(0);
|
||||
return;
|
||||
}
|
||||
|
||||
// Immediate is in range
|
||||
Opcode = NewOp;
|
||||
BasePtr = Index->getOperand(0);
|
||||
Index = ConstOffset;
|
||||
return;
|
||||
}
|
||||
|
||||
SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
SDLoc DL(Op);
|
||||
@ -3892,6 +3948,9 @@ SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
|
||||
Index = Index.getOperand(0);
|
||||
|
||||
unsigned Opcode = getGatherVecOpcode(IsScaled, IsSigned, IdxNeedsExtend);
|
||||
selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
|
||||
/*isGather=*/true, DAG);
|
||||
|
||||
if (ResNeedsSignExtend)
|
||||
Opcode = getSignExtendedGatherOpcode(Opcode);
|
||||
|
||||
@ -3944,9 +4003,12 @@ SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
|
||||
if (getGatherScatterIndexIsExtended(Index))
|
||||
Index = Index.getOperand(0);
|
||||
|
||||
unsigned Opcode = getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend);
|
||||
selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
|
||||
/*isGather=*/false, DAG);
|
||||
|
||||
SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT};
|
||||
return DAG.getNode(getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend), DL,
|
||||
VTs, Ops);
|
||||
return DAG.getNode(Opcode, DL, VTs, Ops);
|
||||
}
|
||||
|
||||
// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
|
||||
|
@ -44,12 +44,29 @@ define <vscale x 2 x i64> @masked_sgather_zext(i8* %base, <vscale x 2 x i64> %of
|
||||
|
||||
; Tests that exercise various type legalisation scenarios for ISD::MGATHER.
|
||||
|
||||
; Code generate load of an illegal datatype via promotion.
|
||||
define <vscale x 2 x i8> @masked_gather_nxv2i8(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_gather_nxv2i8:
|
||||
; CHECK: ld1sb { z0.d }, p0/z, [z0.d]
|
||||
; CHECK: ret
|
||||
%data = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
|
||||
ret <vscale x 2 x i8> %data
|
||||
}
|
||||
|
||||
; Code generate load of an illegal datatype via promotion.
|
||||
define <vscale x 2 x i16> @masked_gather_nxv2i16(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_gather_nxv2i16:
|
||||
; CHECK: ld1sh { z0.d }, p0/z, [z0.d]
|
||||
; CHECK: ret
|
||||
%data = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
|
||||
ret <vscale x 2 x i16> %data
|
||||
}
|
||||
|
||||
; Code generate load of an illegal datatype via promotion.
|
||||
define <vscale x 2 x i32> @masked_gather_nxv2i32(<vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_gather_nxv2i32:
|
||||
; CHECK-DAG: mov x8, xzr
|
||||
; CHECK-DAG: ld1sw { z0.d }, p0/z, [x8, z0.d]
|
||||
; CHECK: ret
|
||||
; CHECK: ld1sw { z0.d }, p0/z, [z0.d]
|
||||
; CHECK: ret
|
||||
%data = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
|
||||
ret <vscale x 2 x i32> %data
|
||||
}
|
||||
@ -92,11 +109,10 @@ define <vscale x 32 x i32> @masked_gather_nxv32i32(i32* %base, <vscale x 32 x i3
|
||||
define <vscale x 4 x i32> @masked_sgather_nxv4i8(<vscale x 4 x i8*> %ptrs, <vscale x 4 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_sgather_nxv4i8:
|
||||
; CHECK: pfalse p1.b
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: zip2 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip1 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: ld1sb { z1.d }, p2/z, [x8, z1.d]
|
||||
; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x8, z0.d]
|
||||
; CHECK-NEXT: ld1sb { z1.d }, p2/z, [z1.d]
|
||||
; CHECK-NEXT: ld1sb { z0.d }, p0/z, [z0.d]
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
|
||||
; CHECK-NEXT: sxtb z0.s, p0/m, z0.s
|
||||
@ -109,8 +125,6 @@ define <vscale x 4 x i32> @masked_sgather_nxv4i8(<vscale x 4 x i8*> %ptrs, <vsca
|
||||
declare <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*>, i32, <vscale x 2 x i1>, <vscale x 2 x i8>)
|
||||
declare <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*>, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
|
||||
declare <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*>, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
|
||||
|
||||
declare <vscale x 4 x i8> @llvm.masked.gather.nxv4i8(<vscale x 4 x i8*>, i32, <vscale x 4 x i1>, <vscale x 4 x i8>)
|
||||
|
||||
declare <vscale x 16 x i8> @llvm.masked.gather.nxv16i8(<vscale x 16 x i8*>, i32, <vscale x 16 x i1>, <vscale x 16 x i8>)
|
||||
declare <vscale x 32 x i32> @llvm.masked.gather.nxv32i32(<vscale x 32 x i32*>, i32, <vscale x 32 x i1>, <vscale x 32 x i32>)
|
||||
|
186
test/CodeGen/AArch64/sve-masked-gather-vec-plus-imm.ll
Normal file
186
test/CodeGen/AArch64/sve-masked-gather-vec-plus-imm.ll
Normal file
@ -0,0 +1,186 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
|
||||
|
||||
define <vscale x 2 x i64> @masked_gather_nxv2i8(<vscale x 2 x i8*> %bases, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_gather_nxv2i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d, #1]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i32 1
|
||||
%vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
|
||||
%vals.zext = zext <vscale x 2 x i8> %vals to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %vals.zext
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @masked_gather_nxv2i16(<vscale x 2 x i16*> %bases, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_gather_nxv2i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d, #2]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr i16, <vscale x 2 x i16*> %bases, i32 1
|
||||
%vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
|
||||
%vals.zext = zext <vscale x 2 x i16> %vals to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %vals.zext
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @masked_gather_nxv2i32(<vscale x 2 x i32*> %bases, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_gather_nxv2i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d, #4]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr i32, <vscale x 2 x i32*> %bases, i32 1
|
||||
%vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
|
||||
%vals.zext = zext <vscale x 2 x i32> %vals to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %vals.zext
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @masked_gather_nxv2i64(<vscale x 2 x i64*> %bases, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_gather_nxv2i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d, #8]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr i64, <vscale x 2 x i64*> %bases, i32 1
|
||||
%vals.zext = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64> undef)
|
||||
ret <vscale x 2 x i64> %vals.zext
|
||||
}
|
||||
|
||||
define <vscale x 2 x half> @masked_gather_nxv2f16(<vscale x 2 x half*> %bases, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_gather_nxv2f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d, #4]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr half, <vscale x 2 x half*> %bases, i32 2
|
||||
%vals = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x half> undef)
|
||||
ret <vscale x 2 x half> %vals
|
||||
}
|
||||
|
||||
define <vscale x 2 x bfloat> @masked_gather_nxv2bf16(<vscale x 2 x bfloat*> %bases, <vscale x 2 x i1> %mask) #0 {
|
||||
; CHECK-LABEL: masked_gather_nxv2bf16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d, #4]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr bfloat, <vscale x 2 x bfloat*> %bases, i32 2
|
||||
%vals = call <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16(<vscale x 2 x bfloat*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x bfloat> undef)
|
||||
ret <vscale x 2 x bfloat> %vals
|
||||
}
|
||||
|
||||
define <vscale x 2 x float> @masked_gather_nxv2f32(<vscale x 2 x float*> %bases, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_gather_nxv2f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d, #12]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr float, <vscale x 2 x float*> %bases, i32 3
|
||||
%vals = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x float> undef)
|
||||
ret <vscale x 2 x float> %vals
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @masked_gather_nxv2f64(<vscale x 2 x double*> %bases, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_gather_nxv2f64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d, #32]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr double, <vscale x 2 x double*> %bases, i32 4
|
||||
%vals = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x double> undef)
|
||||
ret <vscale x 2 x double> %vals
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @masked_sgather_nxv2i8(<vscale x 2 x i8*> %bases, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_sgather_nxv2i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1sb { z0.d }, p0/z, [z0.d, #5]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i32 5
|
||||
%vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
|
||||
%vals.sext = sext <vscale x 2 x i8> %vals to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %vals.sext
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @masked_sgather_nxv2i16(<vscale x 2 x i16*> %bases, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_sgather_nxv2i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1sh { z0.d }, p0/z, [z0.d, #12]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr i16, <vscale x 2 x i16*> %bases, i32 6
|
||||
%vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
|
||||
%vals.sext = sext <vscale x 2 x i16> %vals to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %vals.sext
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @masked_sgather_nxv2i32(<vscale x 2 x i32*> %bases, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_sgather_nxv2i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1sw { z0.d }, p0/z, [z0.d, #28]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr i32, <vscale x 2 x i32*> %bases, i32 7
|
||||
%vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
|
||||
%vals.sext = sext <vscale x 2 x i32> %vals to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %vals.sext
|
||||
}
|
||||
|
||||
; Tests where the immediate is out of range
|
||||
|
||||
define <vscale x 2 x i64> @masked_gather_nxv2i8_range(<vscale x 2 x i8*> %bases, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_gather_nxv2i8_range:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #32
|
||||
; CHECK-NEXT: ld1b { z0.d }, p0/z, [x8, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i32 32
|
||||
%vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
|
||||
%vals.zext = zext <vscale x 2 x i8> %vals to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %vals.zext
|
||||
}
|
||||
|
||||
define <vscale x 2 x half> @masked_gather_nxv2f16_range(<vscale x 2 x half*> %bases, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_gather_nxv2f16_range:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #64
|
||||
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x8, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr half, <vscale x 2 x half*> %bases, i32 32
|
||||
%vals = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x half> undef)
|
||||
ret <vscale x 2 x half> %vals
|
||||
}
|
||||
|
||||
define <vscale x 2 x bfloat> @masked_gather_nxv2bf16_range(<vscale x 2 x bfloat*> %bases, <vscale x 2 x i1> %mask) #0 {
|
||||
; CHECK-LABEL: masked_gather_nxv2bf16_range:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #64
|
||||
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x8, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr bfloat, <vscale x 2 x bfloat*> %bases, i32 32
|
||||
%vals = call <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16(<vscale x 2 x bfloat*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x bfloat> undef)
|
||||
ret <vscale x 2 x bfloat> %vals
|
||||
}
|
||||
|
||||
define <vscale x 2 x float> @masked_gather_nxv2f32_range(<vscale x 2 x float*> %bases, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_gather_nxv2f32_range:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #128
|
||||
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x8, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr float, <vscale x 2 x float*> %bases, i32 32
|
||||
%vals = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x float> undef)
|
||||
ret <vscale x 2 x float> %vals
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @masked_gather_nxv2f64_range(<vscale x 2 x double*> %bases, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_gather_nxv2f64_range:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #256
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr double, <vscale x 2 x double*> %bases, i32 32
|
||||
%vals = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x double> undef)
|
||||
ret <vscale x 2 x double> %vals
|
||||
}
|
||||
|
||||
declare <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*>, i32, <vscale x 2 x i1>, <vscale x 2 x i8>)
|
||||
declare <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*>, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
|
||||
declare <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*>, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
|
||||
declare <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*>, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
|
||||
declare <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*>, i32, <vscale x 2 x i1>, <vscale x 2 x half>)
|
||||
declare <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16(<vscale x 2 x bfloat*>, i32, <vscale x 2 x i1>, <vscale x 2 x bfloat>)
|
||||
declare <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*>, i32, <vscale x 2 x i1>, <vscale x 2 x float>)
|
||||
declare <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*>, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
|
||||
attributes #0 = { "target-features"="+sve,+bf16" }
|
137
test/CodeGen/AArch64/sve-masked-gather-vec-plus-reg.ll
Normal file
137
test/CodeGen/AArch64/sve-masked-gather-vec-plus-reg.ll
Normal file
@ -0,0 +1,137 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
|
||||
|
||||
define <vscale x 2 x i64> @masked_gather_nxv2i8(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_gather_nxv2i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
|
||||
%vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
|
||||
%vals.zext = zext <vscale x 2 x i8> %vals to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %vals.zext
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @masked_gather_nxv2i16(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_gather_nxv2i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i16*>
|
||||
%vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
|
||||
%vals.zext = zext <vscale x 2 x i16> %vals to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %vals.zext
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @masked_gather_nxv2i32(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_gather_nxv2i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i32*>
|
||||
%vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
|
||||
%vals.zext = zext <vscale x 2 x i32> %vals to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %vals.zext
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @masked_gather_nxv2i64(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_gather_nxv2i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i64*>
|
||||
%vals = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64> undef)
|
||||
ret <vscale x 2 x i64> %vals
|
||||
}
|
||||
|
||||
define <vscale x 2 x half> @masked_gather_nxv2f16(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_gather_nxv2f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x half*>
|
||||
%vals = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x half> undef)
|
||||
ret <vscale x 2 x half> %vals
|
||||
}
|
||||
|
||||
define <vscale x 2 x bfloat> @masked_gather_nxv2bf16(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) #0 {
|
||||
; CHECK-LABEL: masked_gather_nxv2bf16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x bfloat*>
|
||||
%vals = call <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16(<vscale x 2 x bfloat*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x bfloat> undef)
|
||||
ret <vscale x 2 x bfloat> %vals
|
||||
}
|
||||
|
||||
define <vscale x 2 x float> @masked_gather_nxv2f32(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_gather_nxv2f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x float*>
|
||||
%vals = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x float> undef)
|
||||
ret <vscale x 2 x float> %vals
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @masked_gather_nxv2f64(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_gather_nxv2f64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x double*>
|
||||
%vals = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x double> undef)
|
||||
ret <vscale x 2 x double> %vals
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @masked_sgather_nxv2i8(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_sgather_nxv2i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
|
||||
%vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
|
||||
%vals.sext = sext <vscale x 2 x i8> %vals to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %vals.sext
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @masked_sgather_nxv2i16(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_sgather_nxv2i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i16*>
|
||||
%vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
|
||||
%vals.sext = sext <vscale x 2 x i16> %vals to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %vals.sext
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @masked_sgather_nxv2i32(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_sgather_nxv2i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i32*>
|
||||
%vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
|
||||
%vals.sext = sext <vscale x 2 x i32> %vals to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %vals.sext
|
||||
}
|
||||
|
||||
declare <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*>, i32, <vscale x 2 x i1>, <vscale x 2 x i8>)
|
||||
declare <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*>, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
|
||||
declare <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*>, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
|
||||
declare <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*>, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
|
||||
declare <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*>, i32, <vscale x 2 x i1>, <vscale x 2 x half>)
|
||||
declare <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16(<vscale x 2 x bfloat*>, i32, <vscale x 2 x i1>, <vscale x 2 x bfloat>)
|
||||
declare <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*>, i32, <vscale x 2 x i1>, <vscale x 2 x float>)
|
||||
declare <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*>, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
|
||||
attributes #0 = { "target-features"="+sve,+bf16" }
|
117
test/CodeGen/AArch64/sve-masked-gather.ll
Normal file
117
test/CodeGen/AArch64/sve-masked-gather.ll
Normal file
@ -0,0 +1,117 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
|
||||
|
||||
define <vscale x 2 x i64> @masked_gather_nxv2i8(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_gather_nxv2i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
|
||||
%vals.zext = zext <vscale x 2 x i8> %vals to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %vals.zext
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @masked_gather_nxv2i16(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_gather_nxv2i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
|
||||
%vals.zext = zext <vscale x 2 x i16> %vals to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %vals.zext
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @masked_gather_nxv2i32(<vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_gather_nxv2i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
|
||||
%vals.zext = zext <vscale x 2 x i32> %vals to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %vals.zext
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @masked_gather_nxv2i64(<vscale x 2 x i64*> %ptrs, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_gather_nxv2i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%vals = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64> undef)
|
||||
ret <vscale x 2 x i64> %vals
|
||||
}
|
||||
|
||||
define <vscale x 2 x half> @masked_gather_nxv2f16(<vscale x 2 x half*> %ptrs, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_gather_nxv2f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%vals = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x half> undef)
|
||||
ret <vscale x 2 x half> %vals
|
||||
}
|
||||
|
||||
define <vscale x 2 x bfloat> @masked_gather_nxv2bf16(<vscale x 2 x bfloat*> %ptrs, <vscale x 2 x i1> %mask) #0 {
|
||||
; CHECK-LABEL: masked_gather_nxv2bf16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%vals = call <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16(<vscale x 2 x bfloat*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x bfloat> undef)
|
||||
ret <vscale x 2 x bfloat> %vals
|
||||
}
|
||||
|
||||
define <vscale x 2 x float> @masked_gather_nxv2f32(<vscale x 2 x float*> %ptrs, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_gather_nxv2f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%vals = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x float> undef)
|
||||
ret <vscale x 2 x float> %vals
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @masked_gather_nxv2f64(<vscale x 2 x double*> %ptrs, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_gather_nxv2f64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%vals = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x double> undef)
|
||||
ret <vscale x 2 x double> %vals
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @masked_sgather_nxv2i8(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_sgather_nxv2i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1sb { z0.d }, p0/z, [z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
|
||||
%vals.sext = sext <vscale x 2 x i8> %vals to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %vals.sext
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @masked_sgather_nxv2i16(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_sgather_nxv2i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1sh { z0.d }, p0/z, [z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
|
||||
%vals.sext = sext <vscale x 2 x i16> %vals to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %vals.sext
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @masked_sgather_nxv2i32(<vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_sgather_nxv2i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ld1sw { z0.d }, p0/z, [z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
|
||||
%vals.sext = sext <vscale x 2 x i32> %vals to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %vals.sext
|
||||
}
|
||||
|
||||
declare <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*>, i32, <vscale x 2 x i1>, <vscale x 2 x i8>)
|
||||
declare <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*>, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
|
||||
declare <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*>, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
|
||||
declare <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*>, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
|
||||
declare <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*>, i32, <vscale x 2 x i1>, <vscale x 2 x half>)
|
||||
declare <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16(<vscale x 2 x bfloat*>, i32, <vscale x 2 x i1>, <vscale x 2 x bfloat>)
|
||||
declare <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*>, i32, <vscale x 2 x i1>, <vscale x 2 x float>)
|
||||
declare <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*>, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
|
||||
attributes #0 = { "target-features"="+sve,+bf16" }
|
138
test/CodeGen/AArch64/sve-masked-scatter-vec-plus-imm.ll
Normal file
138
test/CodeGen/AArch64/sve-masked-scatter-vec-plus-imm.ll
Normal file
@ -0,0 +1,138 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
|
||||
|
||||
define void @masked_scatter_nxv2i8(<vscale x 2 x i8> %data, <vscale x 2 x i8*> %bases, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1b { z0.d }, p0, [z1.d, #1]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i32 1
|
||||
call void @llvm.masked.scatter.nxv2i8(<vscale x 2 x i8> %data, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2i16(<vscale x 2 x i16> %data, <vscale x 2 x i16*> %bases, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [z1.d, #2]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr i16, <vscale x 2 x i16*> %bases, i32 1
|
||||
call void @llvm.masked.scatter.nxv2i16(<vscale x 2 x i16> %data, <vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2i32(<vscale x 2 x i32> %data, <vscale x 2 x i32*> %bases, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [z1.d, #4]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr i32, <vscale x 2 x i32*> %bases, i32 1
|
||||
call void @llvm.masked.scatter.nxv2i32(<vscale x 2 x i32> %data, <vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64*> %bases, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [z1.d, #8]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr i64, <vscale x 2 x i64*> %bases, i32 1
|
||||
call void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64*> %ptrs, i32 8, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2f16(<vscale x 2 x half> %data, <vscale x 2 x half*> %bases, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [z1.d, #4]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr half, <vscale x 2 x half*> %bases, i32 2
|
||||
call void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half> %data, <vscale x 2 x half*> %ptrs, i32 2, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2bf16(<vscale x 2 x bfloat> %data, <vscale x 2 x bfloat*> %bases, <vscale x 2 x i1> %mask) #0 {
|
||||
; CHECK-LABEL: masked_scatter_nxv2bf16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [z1.d, #4]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr bfloat, <vscale x 2 x bfloat*> %bases, i32 2
|
||||
call void @llvm.masked.scatter.nxv2bf16(<vscale x 2 x bfloat> %data, <vscale x 2 x bfloat*> %ptrs, i32 2, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2f32(<vscale x 2 x float> %data, <vscale x 2 x float*> %bases, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [z1.d, #12]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr float, <vscale x 2 x float*> %bases, i32 3
|
||||
call void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float> %data, <vscale x 2 x float*> %ptrs, i32 4, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double*> %bases, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [z1.d, #32]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr double, <vscale x 2 x double*> %bases, i32 4
|
||||
call void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test where the immediate is out of range
|
||||
|
||||
define void @masked_scatter_nxv2i8_range(<vscale x 2 x i8> %data, <vscale x 2 x i8*> %bases, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i8_range:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #32
|
||||
; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i32 32
|
||||
call void @llvm.masked.scatter.nxv2i8(<vscale x 2 x i8> %data, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2i16_range(<vscale x 2 x i16> %data, <vscale x 2 x i16*> %bases, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i16_range:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #64
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr i16, <vscale x 2 x i16*> %bases, i32 32
|
||||
call void @llvm.masked.scatter.nxv2i16(<vscale x 2 x i16> %data, <vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2i32_range(<vscale x 2 x i32> %data, <vscale x 2 x i32*> %bases, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i32_range:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #128
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr i32, <vscale x 2 x i32*> %bases, i32 32
|
||||
call void @llvm.masked.scatter.nxv2i32(<vscale x 2 x i32> %data, <vscale x 2 x i32*> %ptrs, i32 1, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2f64_range(<vscale x 2 x double> %data, <vscale x 2 x double*> %bases, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f64_range:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #256
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr double, <vscale x 2 x double*> %bases, i32 32
|
||||
call void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.masked.scatter.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double*>, i32, <vscale x 2 x i1>)
|
||||
attributes #0 = { "target-features"="+sve,+bf16" }
|
99
test/CodeGen/AArch64/sve-masked-scatter-vec-plus-reg.ll
Normal file
99
test/CodeGen/AArch64/sve-masked-scatter-vec-plus-reg.ll
Normal file
@ -0,0 +1,99 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
|
||||
|
||||
define void @masked_scatter_nxv2i8(<vscale x 2 x i8> %data, <vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1b { z0.d }, p0, [x0, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
|
||||
call void @llvm.masked.scatter.nxv2i8(<vscale x 2 x i8> %data, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2i16(<vscale x 2 x i16> %data, <vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i16*>
|
||||
call void @llvm.masked.scatter.nxv2i16(<vscale x 2 x i16> %data, <vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2i32(<vscale x 2 x i32> %data, <vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i32*>
|
||||
call void @llvm.masked.scatter.nxv2i32(<vscale x 2 x i32> %data, <vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i64*>
|
||||
call void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64*> %ptrs, i32 8, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2f16(<vscale x 2 x half> %data, <vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x half*>
|
||||
call void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half> %data, <vscale x 2 x half*> %ptrs, i32 2, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2bf16(<vscale x 2 x bfloat> %data, <vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) #0 {
|
||||
; CHECK-LABEL: masked_scatter_nxv2bf16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x bfloat*>
|
||||
call void @llvm.masked.scatter.nxv2bf16(<vscale x 2 x bfloat> %data, <vscale x 2 x bfloat*> %ptrs, i32 2, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2f32(<vscale x 2 x float> %data, <vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x float*>
|
||||
call void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float> %data, <vscale x 2 x float*> %ptrs, i32 4, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x double*>
|
||||
call void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.masked.scatter.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double*>, i32, <vscale x 2 x i1>)
|
||||
attributes #0 = { "target-features"="+sve,+bf16" }
|
84
test/CodeGen/AArch64/sve-masked-scatter.ll
Normal file
84
test/CodeGen/AArch64/sve-masked-scatter.ll
Normal file
@ -0,0 +1,84 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
|
||||
|
||||
define void @masked_scatter_nxv2i8(<vscale x 2 x i8> %data, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1b { z0.d }, p0, [z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.masked.scatter.nxv2i8(<vscale x 2 x i8> %data, <vscale x 2 x i8*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2i16(<vscale x 2 x i16> %data, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.masked.scatter.nxv2i16(<vscale x 2 x i16> %data, <vscale x 2 x i16*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2i32(<vscale x 2 x i32> %data, <vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.masked.scatter.nxv2i32(<vscale x 2 x i32> %data, <vscale x 2 x i32*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64*> %ptrs, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2f16(<vscale x 2 x half> %data, <vscale x 2 x half*> %ptrs, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half> %data, <vscale x 2 x half*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2bf16(<vscale x 2 x bfloat> %data, <vscale x 2 x bfloat*> %ptrs, <vscale x 2 x i1> %masks) nounwind #0 {
|
||||
; CHECK-LABEL: masked_scatter_nxv2bf16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.masked.scatter.nxv2bf16(<vscale x 2 x bfloat> %data, <vscale x 2 x bfloat*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2f32(<vscale x 2 x float> %data, <vscale x 2 x float*> %ptrs, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float> %data, <vscale x 2 x float*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double*> %ptrs, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8*>, i32, <vscale x 2 x i1>)
|
||||
attributes #0 = { "target-features"="+sve,+bf16" }
|
Loading…
x
Reference in New Issue
Block a user