mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[SVE][CodeGen] Improve codegen of scalable masked scatters
If the scatter store is able to perform the sign/zero extend of its index, this is folded into the instruction with refineIndexType(). Additionally, refineUniformBase() will return the base pointer and index from an add + splat_vector. Reviewed By: sdesmalen Differential Revision: https://reviews.llvm.org/D90942
This commit is contained in:
parent
69f353da9d
commit
c26a89a1b4
@ -1318,6 +1318,10 @@ public:
|
||||
getIndexedMaskedStoreAction(IdxMode, VT.getSimpleVT()) == Custom);
|
||||
}
|
||||
|
||||
// Returns true if VT is a legal index type for masked gathers/scatters
|
||||
// on this target
|
||||
virtual bool shouldRemoveExtendFromGSIndex(EVT VT) const { return false; }
|
||||
|
||||
/// Return how the condition code should be treated: either it is legal, needs
|
||||
/// to be expanded to some other code sequence, or the target has a custom
|
||||
/// expander for it.
|
||||
|
@ -9399,16 +9399,74 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
|
||||
TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
|
||||
}
|
||||
|
||||
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
|
||||
if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD)
|
||||
return false;
|
||||
|
||||
// For now we check only the LHS of the add.
|
||||
SDValue LHS = Index.getOperand(0);
|
||||
SDValue SplatVal = DAG.getSplatValue(LHS);
|
||||
if (!SplatVal)
|
||||
return false;
|
||||
|
||||
BasePtr = SplatVal;
|
||||
Index = Index.getOperand(1);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Fold sext/zext of index into index type.
|
||||
bool refineIndexType(MaskedScatterSDNode *MSC, SDValue &Index, bool Scaled,
|
||||
SelectionDAG &DAG) {
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
SDValue Op = Index.getOperand(0);
|
||||
|
||||
if (Index.getOpcode() == ISD::ZERO_EXTEND) {
|
||||
MSC->setIndexType(Scaled ? ISD::UNSIGNED_SCALED : ISD::UNSIGNED_UNSCALED);
|
||||
if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
|
||||
Index = Op;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (Index.getOpcode() == ISD::SIGN_EXTEND) {
|
||||
MSC->setIndexType(Scaled ? ISD::SIGNED_SCALED : ISD::SIGNED_UNSCALED);
|
||||
if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
|
||||
Index = Op;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
|
||||
MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
|
||||
SDValue Mask = MSC->getMask();
|
||||
SDValue Chain = MSC->getChain();
|
||||
SDValue Index = MSC->getIndex();
|
||||
SDValue Scale = MSC->getScale();
|
||||
SDValue StoreVal = MSC->getValue();
|
||||
SDValue BasePtr = MSC->getBasePtr();
|
||||
SDLoc DL(N);
|
||||
|
||||
// Zap scatters with a zero mask.
|
||||
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
|
||||
return Chain;
|
||||
|
||||
if (refineUniformBase(BasePtr, Index, DAG)) {
|
||||
SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
|
||||
return DAG.getMaskedScatter(
|
||||
DAG.getVTList(MVT::Other), StoreVal.getValueType(), DL, Ops,
|
||||
MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
|
||||
}
|
||||
|
||||
if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) {
|
||||
SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
|
||||
return DAG.getMaskedScatter(
|
||||
DAG.getVTList(MVT::Other), StoreVal.getValueType(), DL, Ops,
|
||||
MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
@ -3705,6 +3705,14 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
}
|
||||
}
|
||||
|
||||
bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
|
||||
if (VT.getVectorElementType() == MVT::i32 &&
|
||||
VT.getVectorElementCount().getKnownMinValue() >= 4)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
|
||||
return ExtVal.getValueType().isScalableVector();
|
||||
}
|
||||
@ -3792,11 +3800,8 @@ SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
|
||||
InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
|
||||
}
|
||||
|
||||
if (getScatterIndexIsExtended(Index)) {
|
||||
if (Index.getOpcode() == ISD::AND)
|
||||
IsSigned = false;
|
||||
if (getScatterIndexIsExtended(Index))
|
||||
Index = Index.getOperand(0);
|
||||
}
|
||||
|
||||
SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT};
|
||||
return DAG.getNode(getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend), DL,
|
||||
|
@ -980,6 +980,7 @@ private:
|
||||
return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
|
||||
}
|
||||
|
||||
bool shouldRemoveExtendFromGSIndex(EVT VT) const override;
|
||||
bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
|
||||
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
|
||||
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
|
||||
|
@ -166,15 +166,7 @@ define void @masked_scatter_nxv2f64_zext(<vscale x 2 x double> %data, double* %b
|
||||
define void @masked_scatter_nxv4i16_sext(<vscale x 4 x i16> %data, i16* %base, <vscale x 4 x i32> %indexes, <vscale x 4 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv4i16_sext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: uunpkhi z2.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z1.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1h { z3.d }, p2, [x0, z1.d, sxtw #1]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z2.d, sxtw #1]
|
||||
; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, sxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = sext <vscale x 4 x i32> %indexes to <vscale x 4 x i64>
|
||||
%ptrs = getelementptr i16, i16* %base, <vscale x 4 x i64> %ext
|
||||
@ -185,15 +177,7 @@ define void @masked_scatter_nxv4i16_sext(<vscale x 4 x i16> %data, i16* %base, <
|
||||
define void @masked_scatter_nxv4i32_sext(<vscale x 4 x i32> %data, i32* %base, <vscale x 4 x i32> %indexes, <vscale x 4 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv4i32_sext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: uunpkhi z2.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z1.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1w { z3.d }, p2, [x0, z1.d, sxtw #2]
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x0, z2.d, sxtw #2]
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [x0, z1.s, sxtw #2]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = sext <vscale x 4 x i32> %indexes to <vscale x 4 x i64>
|
||||
%ptrs = getelementptr i32, i32* %base, <vscale x 4 x i64> %ext
|
||||
@ -204,15 +188,7 @@ define void @masked_scatter_nxv4i32_sext(<vscale x 4 x i32> %data, i32* %base, <
|
||||
define void @masked_scatter_nxv4f16_sext(<vscale x 4 x half> %data, half* %base, <vscale x 4 x i32> %indexes, <vscale x 4 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv4f16_sext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: uunpkhi z2.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z1.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1h { z3.d }, p2, [x0, z1.d, sxtw #1]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z2.d, sxtw #1]
|
||||
; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, sxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = sext <vscale x 4 x i32> %indexes to <vscale x 4 x i64>
|
||||
%ptrs = getelementptr half, half* %base, <vscale x 4 x i64> %ext
|
||||
@ -223,15 +199,7 @@ define void @masked_scatter_nxv4f16_sext(<vscale x 4 x half> %data, half* %base,
|
||||
define void @masked_scatter_nxv4bf16_sext(<vscale x 4 x bfloat> %data, bfloat* %base, <vscale x 4 x i32> %indexes, <vscale x 4 x i1> %masks) nounwind #0 {
|
||||
; CHECK-LABEL: masked_scatter_nxv4bf16_sext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: uunpkhi z2.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z1.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1h { z3.d }, p2, [x0, z1.d, sxtw #1]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z2.d, sxtw #1]
|
||||
; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, sxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = sext <vscale x 4 x i32> %indexes to <vscale x 4 x i64>
|
||||
%ptrs = getelementptr bfloat, bfloat* %base, <vscale x 4 x i64> %ext
|
||||
@ -242,15 +210,7 @@ define void @masked_scatter_nxv4bf16_sext(<vscale x 4 x bfloat> %data, bfloat* %
|
||||
define void @masked_scatter_nxv4f32_sext(<vscale x 4 x float> %data, float* %base, <vscale x 4 x i32> %indexes, <vscale x 4 x i1> %masks) nounwind #0 {
|
||||
; CHECK-LABEL: masked_scatter_nxv4f32_sext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: uunpkhi z2.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z1.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1w { z3.d }, p2, [x0, z1.d, sxtw #2]
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x0, z2.d, sxtw #2]
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [x0, z1.s, sxtw #2]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = sext <vscale x 4 x i32> %indexes to <vscale x 4 x i64>
|
||||
%ptrs = getelementptr float, float* %base, <vscale x 4 x i64> %ext
|
||||
@ -261,15 +221,7 @@ define void @masked_scatter_nxv4f32_sext(<vscale x 4 x float> %data, float* %bas
|
||||
define void @masked_scatter_nxv4i16_zext(<vscale x 4 x i16> %data, i16* %base, <vscale x 4 x i32> %indexes, <vscale x 4 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv4i16_zext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: uunpkhi z2.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z1.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1h { z3.d }, p2, [x0, z1.d, uxtw #1]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z2.d, uxtw #1]
|
||||
; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, uxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = zext <vscale x 4 x i32> %indexes to <vscale x 4 x i64>
|
||||
%ptrs = getelementptr i16, i16* %base, <vscale x 4 x i64> %ext
|
||||
@ -280,15 +232,7 @@ define void @masked_scatter_nxv4i16_zext(<vscale x 4 x i16> %data, i16* %base, <
|
||||
define void @masked_scatter_nxv4i32_zext(<vscale x 4 x i32> %data, i32* %base, <vscale x 4 x i32> %indexes, <vscale x 4 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv4i32_zext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: uunpkhi z2.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z1.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1w { z3.d }, p2, [x0, z1.d, uxtw #2]
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x0, z2.d, uxtw #2]
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [x0, z1.s, uxtw #2]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = zext <vscale x 4 x i32> %indexes to <vscale x 4 x i64>
|
||||
%ptrs = getelementptr i32, i32* %base, <vscale x 4 x i64> %ext
|
||||
@ -299,15 +243,7 @@ define void @masked_scatter_nxv4i32_zext(<vscale x 4 x i32> %data, i32* %base, <
|
||||
define void @masked_scatter_nxv4f16_zext(<vscale x 4 x half> %data, half* %base, <vscale x 4 x i32> %indexes, <vscale x 4 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv4f16_zext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: uunpkhi z2.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z1.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1h { z3.d }, p2, [x0, z1.d, uxtw #1]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z2.d, uxtw #1]
|
||||
; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, uxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = zext <vscale x 4 x i32> %indexes to <vscale x 4 x i64>
|
||||
%ptrs = getelementptr half, half* %base, <vscale x 4 x i64> %ext
|
||||
@ -318,15 +254,7 @@ define void @masked_scatter_nxv4f16_zext(<vscale x 4 x half> %data, half* %base,
|
||||
define void @masked_scatter_nxv4bf16_zext(<vscale x 4 x bfloat> %data, bfloat* %base, <vscale x 4 x i32> %indexes, <vscale x 4 x i1> %masks) nounwind #0 {
|
||||
; CHECK-LABEL: masked_scatter_nxv4bf16_zext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: uunpkhi z2.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z1.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1h { z3.d }, p2, [x0, z1.d, uxtw #1]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z2.d, uxtw #1]
|
||||
; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, uxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = zext <vscale x 4 x i32> %indexes to <vscale x 4 x i64>
|
||||
%ptrs = getelementptr bfloat, bfloat* %base, <vscale x 4 x i64> %ext
|
||||
@ -337,15 +265,7 @@ define void @masked_scatter_nxv4bf16_zext(<vscale x 4 x bfloat> %data, bfloat* %
|
||||
define void @masked_scatter_nxv4f32_zext(<vscale x 4 x float> %data, float* %base, <vscale x 4 x i32> %indexes, <vscale x 4 x i1> %masks) nounwind #0 {
|
||||
; CHECK-LABEL: masked_scatter_nxv4f32_zext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: uunpkhi z2.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z1.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1w { z3.d }, p2, [x0, z1.d, uxtw #2]
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x0, z2.d, uxtw #2]
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [x0, z1.s, uxtw #2]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = zext <vscale x 4 x i32> %indexes to <vscale x 4 x i64>
|
||||
%ptrs = getelementptr float, float* %base, <vscale x 4 x i64> %ext
|
||||
|
@ -8,12 +8,7 @@
|
||||
define void @masked_scatter_nxv2i8_sext_offsets(<vscale x 2 x i8> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i8_sext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: sxtw z1.d, p1/m, z1.d
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1b { z0.d }, p0, [x0, z1.d, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = sext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
@ -25,12 +20,7 @@ define void @masked_scatter_nxv2i8_sext_offsets(<vscale x 2 x i8> %data, i8* %ba
|
||||
define void @masked_scatter_nxv2i16_sext_offsets(<vscale x 2 x i16> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i16_sext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: sxtw z1.d, p1/m, z1.d
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = sext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
@ -42,12 +32,7 @@ define void @masked_scatter_nxv2i16_sext_offsets(<vscale x 2 x i16> %data, i8* %
|
||||
define void @masked_scatter_nxv2i32_sext_offsets(<vscale x 2 x i32> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i32_sext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: sxtw z1.d, p1/m, z1.d
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = sext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
@ -59,12 +44,7 @@ define void @masked_scatter_nxv2i32_sext_offsets(<vscale x 2 x i32> %data, i8* %
|
||||
define void @masked_scatter_nxv2i64_sext_offsets(<vscale x 2 x i64> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i64_sext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: sxtw z1.d, p1/m, z1.d
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = sext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
@ -76,12 +56,7 @@ define void @masked_scatter_nxv2i64_sext_offsets(<vscale x 2 x i64> %data, i8* %
|
||||
define void @masked_scatter_nxv2f16_sext_offsets(<vscale x 2 x half> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f16_sext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: sxtw z1.d, p1/m, z1.d
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = sext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
@ -93,12 +68,7 @@ define void @masked_scatter_nxv2f16_sext_offsets(<vscale x 2 x half> %data, i8*
|
||||
define void @masked_scatter_nxv2bf16_sext_offsets(<vscale x 2 x bfloat> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind #0 {
|
||||
; CHECK-LABEL: masked_scatter_nxv2bf16_sext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: sxtw z1.d, p1/m, z1.d
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = sext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
@ -110,12 +80,7 @@ define void @masked_scatter_nxv2bf16_sext_offsets(<vscale x 2 x bfloat> %data, i
|
||||
define void @masked_scatter_nxv2f32_sext_offsets(<vscale x 2 x float> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f32_sext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: sxtw z1.d, p1/m, z1.d
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = sext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
@ -127,12 +92,7 @@ define void @masked_scatter_nxv2f32_sext_offsets(<vscale x 2 x float> %data, i8*
|
||||
define void @masked_scatter_nxv2f64_sext_offsets(<vscale x 2 x double> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f64_sext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: sxtw z1.d, p1/m, z1.d
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = sext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
@ -144,11 +104,7 @@ define void @masked_scatter_nxv2f64_sext_offsets(<vscale x 2 x double> %data, i8
|
||||
define void @masked_scatter_nxv2i8_zext_offsets(<vscale x 2 x i8> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i8_zext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1b { z0.d }, p0, [x0, z1.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = zext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
@ -160,11 +116,7 @@ define void @masked_scatter_nxv2i8_zext_offsets(<vscale x 2 x i8> %data, i8* %ba
|
||||
define void @masked_scatter_nxv2i16_zext_offsets(<vscale x 2 x i16> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i16_zext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = zext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
@ -176,11 +128,7 @@ define void @masked_scatter_nxv2i16_zext_offsets(<vscale x 2 x i16> %data, i8* %
|
||||
define void @masked_scatter_nxv2i32_zext_offsets(<vscale x 2 x i32> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i32_zext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = zext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
@ -192,11 +140,7 @@ define void @masked_scatter_nxv2i32_zext_offsets(<vscale x 2 x i32> %data, i8* %
|
||||
define void @masked_scatter_nxv2i64_zext_offsets(<vscale x 2 x i64> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i64_zext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = zext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
@ -208,11 +152,7 @@ define void @masked_scatter_nxv2i64_zext_offsets(<vscale x 2 x i64> %data, i8* %
|
||||
define void @masked_scatter_nxv2f16_zext_offsets(<vscale x 2 x half> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f16_zext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = zext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
@ -224,11 +164,7 @@ define void @masked_scatter_nxv2f16_zext_offsets(<vscale x 2 x half> %data, i8*
|
||||
define void @masked_scatter_nxv2bf16_zext_offsets(<vscale x 2 x bfloat> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind #0 {
|
||||
; CHECK-LABEL: masked_scatter_nxv2bf16_zext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = zext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
@ -240,11 +176,7 @@ define void @masked_scatter_nxv2bf16_zext_offsets(<vscale x 2 x bfloat> %data, i
|
||||
define void @masked_scatter_nxv2f32_zext_offsets(<vscale x 2 x float> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f32_zext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = zext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
@ -256,11 +188,7 @@ define void @masked_scatter_nxv2f32_zext_offsets(<vscale x 2 x float> %data, i8*
|
||||
define void @masked_scatter_nxv2f64_zext_offsets(<vscale x 2 x double> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f64_zext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = zext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
@ -275,19 +203,7 @@ define void @masked_scatter_nxv2f64_zext_offsets(<vscale x 2 x double> %data, i8
|
||||
define void @masked_scatter_nxv4i8_sext_offsets(<vscale x 4 x i8> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv4i8_sext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: sunpklo z3.d, z1.s
|
||||
; CHECK-NEXT: sunpkhi z1.d, z1.s
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: add z2.d, z2.d, z3.d
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1b { z3.d }, p2, [x8, z2.d]
|
||||
; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1b { z0.s }, p0, [x0, z1.s, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = sext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
|
||||
@ -299,19 +215,7 @@ define void @masked_scatter_nxv4i8_sext_offsets(<vscale x 4 x i8> %data, i8* %ba
|
||||
define void @masked_scatter_nxv4i16_sext_offsets(<vscale x 4 x i16> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv4i16_sext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: sunpklo z3.d, z1.s
|
||||
; CHECK-NEXT: sunpkhi z1.d, z1.s
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: add z2.d, z2.d, z3.d
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1h { z3.d }, p2, [x8, z2.d]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = sext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
|
||||
@ -323,19 +227,7 @@ define void @masked_scatter_nxv4i16_sext_offsets(<vscale x 4 x i16> %data, i8* %
|
||||
define void @masked_scatter_nxv4i32_sext_offsets(<vscale x 4 x i32> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv4i32_sext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: sunpklo z3.d, z1.s
|
||||
; CHECK-NEXT: sunpkhi z1.d, z1.s
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: add z2.d, z2.d, z3.d
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1w { z3.d }, p2, [x8, z2.d]
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [x0, z1.s, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = sext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
|
||||
@ -347,19 +239,7 @@ define void @masked_scatter_nxv4i32_sext_offsets(<vscale x 4 x i32> %data, i8* %
|
||||
define void @masked_scatter_nxv4f16_sext_offsets(<vscale x 4 x half> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv4f16_sext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: sunpklo z3.d, z1.s
|
||||
; CHECK-NEXT: sunpkhi z1.d, z1.s
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: add z2.d, z2.d, z3.d
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1h { z3.d }, p2, [x8, z2.d]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = sext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
|
||||
@ -371,19 +251,7 @@ define void @masked_scatter_nxv4f16_sext_offsets(<vscale x 4 x half> %data, i8*
|
||||
define void @masked_scatter_nxv4bf16_sext_offsets(<vscale x 4 x bfloat> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind #0 {
|
||||
; CHECK-LABEL: masked_scatter_nxv4bf16_sext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: sunpklo z3.d, z1.s
|
||||
; CHECK-NEXT: sunpkhi z1.d, z1.s
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: add z2.d, z2.d, z3.d
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1h { z3.d }, p2, [x8, z2.d]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = sext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
|
||||
@ -395,19 +263,7 @@ define void @masked_scatter_nxv4bf16_sext_offsets(<vscale x 4 x bfloat> %data, i
|
||||
define void @masked_scatter_nxv4f32_sext_offsets(<vscale x 4 x float> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind #0 {
|
||||
; CHECK-LABEL: masked_scatter_nxv4f32_sext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: sunpklo z3.d, z1.s
|
||||
; CHECK-NEXT: sunpkhi z1.d, z1.s
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: add z2.d, z2.d, z3.d
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1w { z3.d }, p2, [x8, z2.d]
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [x0, z1.s, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = sext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
|
||||
@ -419,19 +275,7 @@ define void @masked_scatter_nxv4f32_sext_offsets(<vscale x 4 x float> %data, i8*
|
||||
define void @masked_scatter_nxv4i8_zext_offsets(<vscale x 4 x i8> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv4i8_zext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: uunpklo z3.d, z1.s
|
||||
; CHECK-NEXT: uunpkhi z1.d, z1.s
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: add z2.d, z2.d, z3.d
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1b { z3.d }, p2, [x8, z2.d]
|
||||
; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1b { z0.s }, p0, [x0, z1.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = zext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
|
||||
@ -443,19 +287,7 @@ define void @masked_scatter_nxv4i8_zext_offsets(<vscale x 4 x i8> %data, i8* %ba
|
||||
define void @masked_scatter_nxv4i16_zext_offsets(<vscale x 4 x i16> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv4i16_zext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: uunpklo z3.d, z1.s
|
||||
; CHECK-NEXT: uunpkhi z1.d, z1.s
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: add z2.d, z2.d, z3.d
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1h { z3.d }, p2, [x8, z2.d]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = zext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
|
||||
@ -467,19 +299,7 @@ define void @masked_scatter_nxv4i16_zext_offsets(<vscale x 4 x i16> %data, i8* %
|
||||
define void @masked_scatter_nxv4i32_zext_offsets(<vscale x 4 x i32> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv4i32_zext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: uunpklo z3.d, z1.s
|
||||
; CHECK-NEXT: uunpkhi z1.d, z1.s
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: add z2.d, z2.d, z3.d
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1w { z3.d }, p2, [x8, z2.d]
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [x0, z1.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = zext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
|
||||
@ -491,19 +311,7 @@ define void @masked_scatter_nxv4i32_zext_offsets(<vscale x 4 x i32> %data, i8* %
|
||||
define void @masked_scatter_nxv4f16_zext_offsets(<vscale x 4 x half> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv4f16_zext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: uunpklo z3.d, z1.s
|
||||
; CHECK-NEXT: uunpkhi z1.d, z1.s
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: add z2.d, z2.d, z3.d
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1h { z3.d }, p2, [x8, z2.d]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = zext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
|
||||
@ -515,19 +323,7 @@ define void @masked_scatter_nxv4f16_zext_offsets(<vscale x 4 x half> %data, i8*
|
||||
define void @masked_scatter_nxv4bf16_zext_offsets(<vscale x 4 x bfloat> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind #0 {
|
||||
; CHECK-LABEL: masked_scatter_nxv4bf16_zext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: uunpklo z3.d, z1.s
|
||||
; CHECK-NEXT: uunpkhi z1.d, z1.s
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: add z2.d, z2.d, z3.d
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1h { z3.d }, p2, [x8, z2.d]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = zext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
|
||||
@ -539,19 +335,7 @@ define void @masked_scatter_nxv4bf16_zext_offsets(<vscale x 4 x bfloat> %data, i
|
||||
define void @masked_scatter_nxv4f32_zext_offsets(<vscale x 4 x float> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind #0 {
|
||||
; CHECK-LABEL: masked_scatter_nxv4f32_zext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: uunpklo z3.d, z1.s
|
||||
; CHECK-NEXT: uunpkhi z1.d, z1.s
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: add z2.d, z2.d, z3.d
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1w { z3.d }, p2, [x8, z2.d]
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [x0, z1.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = zext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
|
||||
|
@ -8,10 +8,7 @@
|
||||
define void @masked_scatter_nxv2i8_unscaled_64bit_offsets(<vscale x 2 x i8> %data, i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i8_unscaled_64bit_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1b { z0.d }, p0, [x0, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i8*>
|
||||
@ -22,10 +19,7 @@ define void @masked_scatter_nxv2i8_unscaled_64bit_offsets(<vscale x 2 x i8> %dat
|
||||
define void @masked_scatter_nxv2i16_unscaled_64bit_offsets(<vscale x 2 x i16> %data, i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i16_unscaled_64bit_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i16*>
|
||||
@ -36,10 +30,7 @@ define void @masked_scatter_nxv2i16_unscaled_64bit_offsets(<vscale x 2 x i16> %d
|
||||
define void @masked_scatter_nxv2i32_unscaled_64bit_offsets(<vscale x 2 x i32> %data, i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i32_unscaled_64bit_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i32*>
|
||||
@ -50,10 +41,7 @@ define void @masked_scatter_nxv2i32_unscaled_64bit_offsets(<vscale x 2 x i32> %d
|
||||
define void @masked_scatter_nxv2i64_unscaled_64bit_offsets(<vscale x 2 x i64> %data, i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i64_unscaled_64bit_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i64*>
|
||||
@ -64,10 +52,7 @@ define void @masked_scatter_nxv2i64_unscaled_64bit_offsets(<vscale x 2 x i64> %d
|
||||
define void @masked_scatter_nxv2f16_unscaled_64bit_offsets(<vscale x 2 x half> %data, i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f16_unscaled_64bit_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x half*>
|
||||
@ -78,10 +63,7 @@ define void @masked_scatter_nxv2f16_unscaled_64bit_offsets(<vscale x 2 x half> %
|
||||
define void @masked_scatter_nxv2bf16_unscaled_64bit_offsets(<vscale x 2 x bfloat> %data, i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %masks) nounwind #0 {
|
||||
; CHECK-LABEL: masked_scatter_nxv2bf16_unscaled_64bit_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x bfloat*>
|
||||
@ -92,10 +74,7 @@ define void @masked_scatter_nxv2bf16_unscaled_64bit_offsets(<vscale x 2 x bfloat
|
||||
define void @masked_scatter_nxv2f32_unscaled_64bit_offsets(<vscale x 2 x float> %data, i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %masks) nounwind #0 {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f32_unscaled_64bit_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x float*>
|
||||
@ -106,10 +85,7 @@ define void @masked_scatter_nxv2f32_unscaled_64bit_offsets(<vscale x 2 x float>
|
||||
define void @masked_scatter_nxv2f64_unscaled_64bit_offsets(<vscale x 2 x double> %data, i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %masks) nounwind #0 {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f64_unscaled_64bit_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x double*>
|
||||
|
Loading…
x
Reference in New Issue
Block a user