mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 02:33:06 +01:00
[RISCV] Support fixed-length INSERT_VECTOR_ELT
This patch enables support for lowering INSERT_VECTOR_ELT on fixed-length vector types. The strategy follows that for scalable vector types. This patch also includes a quick fix to prevent the compiler infinitely looping between lowering BUILD_VECTOR as VECTOR_SHUFFLE and back again. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D97698
This commit is contained in:
parent
5fcbc5f0f5
commit
97dc903cfa
@ -554,6 +554,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
|
||||
}
|
||||
|
||||
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
|
||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
|
||||
|
||||
setOperationAction(ISD::ADD, VT, Custom);
|
||||
@ -610,6 +611,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
|
||||
|
||||
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
|
||||
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
|
||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
|
||||
|
||||
setOperationAction(ISD::LOAD, VT, Custom);
|
||||
@ -1021,6 +1023,19 @@ getDefaultScalableVLOps(MVT VecVT, SDLoc DL, SelectionDAG &DAG,
|
||||
return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
|
||||
}
|
||||
|
||||
// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
|
||||
// of either is (currently) supported. This can get us into an infinite loop
|
||||
// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
|
||||
// as a ..., etc.
|
||||
// Until either (or both) of these can reliably lower any node, reporting that
|
||||
// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
|
||||
// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
|
||||
// which is not desirable.
|
||||
bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
|
||||
EVT VT, unsigned DefinedValues) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
|
||||
const RISCVSubtarget &Subtarget) {
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
@ -2179,6 +2194,16 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
|
||||
SDValue Val = Op.getOperand(1);
|
||||
SDValue Idx = Op.getOperand(2);
|
||||
|
||||
MVT ContainerVT = VecVT;
|
||||
// If the operand is a fixed-length vector, convert to a scalable one.
|
||||
if (VecVT.isFixedLengthVector()) {
|
||||
ContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
|
||||
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
|
||||
}
|
||||
|
||||
SDValue Mask, VL;
|
||||
std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
|
||||
|
||||
// Custom-legalize INSERT_VECTOR_ELT where XLEN>=SEW, so that the vector is
|
||||
// first slid down into position, the value is inserted into the first
|
||||
// position, and the vector is slid back up. We do this to simplify patterns.
|
||||
@ -2186,21 +2211,17 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
|
||||
if (Subtarget.is64Bit() || Val.getValueType() != MVT::i64) {
|
||||
if (isNullConstant(Idx))
|
||||
return Op;
|
||||
SDValue Mask, VL;
|
||||
std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
|
||||
SDValue Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VecVT,
|
||||
DAG.getUNDEF(VecVT), Vec, Idx, Mask, VL);
|
||||
SDValue Slidedown =
|
||||
DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
|
||||
DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
|
||||
SDValue InsertElt0 =
|
||||
DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecVT, Slidedown, Val,
|
||||
DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT, Slidedown, Val,
|
||||
DAG.getConstant(0, DL, Subtarget.getXLenVT()));
|
||||
|
||||
return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VecVT, Vec, InsertElt0, Idx,
|
||||
Mask, VL);
|
||||
return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec, InsertElt0,
|
||||
Idx, Mask, VL);
|
||||
}
|
||||
|
||||
if (!VecVT.isScalableVector())
|
||||
return SDValue();
|
||||
|
||||
// Custom-legalize INSERT_VECTOR_ELT where XLEN<SEW, as the SEW element type
|
||||
// is illegal (currently only vXi64 RV32).
|
||||
// Since there is no easy way of getting a single element into a vector when
|
||||
@ -2212,17 +2233,21 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
|
||||
// This essentially merges the original vector with the inserted element by
|
||||
// using a mask whose only set bit is that corresponding to the insert
|
||||
// index.
|
||||
SDValue SplattedVal = DAG.getSplatVector(VecVT, DL, Val);
|
||||
SDValue SplattedIdx = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Idx);
|
||||
SDValue SplattedVal = DAG.getSplatVector(ContainerVT, DL, Val);
|
||||
SDValue SplattedIdx =
|
||||
DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, Idx, VL);
|
||||
|
||||
SDValue Mask, VL;
|
||||
std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
|
||||
SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VecVT, Mask, VL);
|
||||
SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
|
||||
auto SetCCVT =
|
||||
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VecVT);
|
||||
SDValue SelectCond = DAG.getSetCC(DL, SetCCVT, VID, SplattedIdx, ISD::SETEQ);
|
||||
|
||||
return DAG.getNode(ISD::VSELECT, DL, VecVT, SelectCond, SplattedVal, Vec);
|
||||
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ContainerVT);
|
||||
SDValue SelectCond =
|
||||
DAG.getNode(RISCVISD::SETCC_VL, DL, SetCCVT, VID, SplattedIdx,
|
||||
DAG.getCondCode(ISD::SETEQ), Mask, VL);
|
||||
SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT,
|
||||
SelectCond, SplattedVal, Vec, VL);
|
||||
if (!VecVT.isFixedLengthVector())
|
||||
return Select;
|
||||
return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
|
||||
}
|
||||
|
||||
// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
|
||||
|
@ -243,6 +243,9 @@ public:
|
||||
bool ForCodeSize) const override;
|
||||
|
||||
bool hasBitPreservingFPLogic(EVT VT) const override;
|
||||
bool
|
||||
shouldExpandBuildVectorWithShuffles(EVT VT,
|
||||
unsigned DefinedValues) const override;
|
||||
|
||||
// Provide custom lowering hooks for some operations.
|
||||
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
|
||||
|
@ -8,88 +8,34 @@
|
||||
define void @insertelt_v4i64(<4 x i64>* %x, i64 %y) {
|
||||
; RV32-LABEL: insertelt_v4i64:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: addi sp, sp, -128
|
||||
; RV32-NEXT: .cfi_def_cfa_offset 128
|
||||
; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
|
||||
; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
|
||||
; RV32-NEXT: .cfi_offset ra, -4
|
||||
; RV32-NEXT: .cfi_offset s0, -8
|
||||
; RV32-NEXT: addi s0, sp, 128
|
||||
; RV32-NEXT: .cfi_def_cfa s0, 0
|
||||
; RV32-NEXT: andi sp, sp, -32
|
||||
; RV32-NEXT: sw a2, 32(sp)
|
||||
; RV32-NEXT: sw a1, 64(sp)
|
||||
; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu
|
||||
; RV32-NEXT: vle32.v v26, (a0)
|
||||
; RV32-NEXT: vmv.x.s a1, v26
|
||||
; RV32-NEXT: sw a1, 0(sp)
|
||||
; RV32-NEXT: addi a1, sp, 32
|
||||
; RV32-NEXT: vle32.v v28, (a1)
|
||||
; RV32-NEXT: vmv.x.s a1, v28
|
||||
; RV32-NEXT: sw a1, 28(sp)
|
||||
; RV32-NEXT: addi a1, sp, 64
|
||||
; RV32-NEXT: vle32.v v28, (a1)
|
||||
; RV32-NEXT: vmv.x.s a1, v28
|
||||
; RV32-NEXT: sw a1, 24(sp)
|
||||
; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu
|
||||
; RV32-NEXT: vslidedown.vi v28, v26, 5
|
||||
; RV32-NEXT: vmv.x.s a1, v28
|
||||
; RV32-NEXT: sw a1, 20(sp)
|
||||
; RV32-NEXT: vslidedown.vi v28, v26, 4
|
||||
; RV32-NEXT: vmv.x.s a1, v28
|
||||
; RV32-NEXT: sw a1, 16(sp)
|
||||
; RV32-NEXT: vslidedown.vi v28, v26, 3
|
||||
; RV32-NEXT: vmv.x.s a1, v28
|
||||
; RV32-NEXT: sw a1, 12(sp)
|
||||
; RV32-NEXT: vslidedown.vi v28, v26, 2
|
||||
; RV32-NEXT: vmv.x.s a1, v28
|
||||
; RV32-NEXT: sw a1, 8(sp)
|
||||
; RV32-NEXT: vslidedown.vi v26, v26, 1
|
||||
; RV32-NEXT: vmv.x.s a1, v26
|
||||
; RV32-NEXT: sw a1, 4(sp)
|
||||
; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu
|
||||
; RV32-NEXT: vle32.v v26, (sp)
|
||||
; RV32-NEXT: vse32.v v26, (a0)
|
||||
; RV32-NEXT: addi sp, s0, -128
|
||||
; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
|
||||
; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
|
||||
; RV32-NEXT: addi sp, sp, 128
|
||||
; RV32-NEXT: vsetivli a3, 4, e64,m2,ta,mu
|
||||
; RV32-NEXT: vle64.v v26, (a0)
|
||||
; RV32-NEXT: vsetvli a3, zero, e64,m2,ta,mu
|
||||
; RV32-NEXT: vmv.v.x v28, a2
|
||||
; RV32-NEXT: addi a2, zero, 32
|
||||
; RV32-NEXT: vsll.vx v28, v28, a2
|
||||
; RV32-NEXT: vmv.v.x v30, a1
|
||||
; RV32-NEXT: vsll.vx v30, v30, a2
|
||||
; RV32-NEXT: vsrl.vx v30, v30, a2
|
||||
; RV32-NEXT: vor.vv v28, v30, v28
|
||||
; RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu
|
||||
; RV32-NEXT: vid.v v30
|
||||
; RV32-NEXT: vmseq.vi v0, v30, 3
|
||||
; RV32-NEXT: vmerge.vvm v26, v26, v28, v0
|
||||
; RV32-NEXT: vse64.v v26, (a0)
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: insertelt_v4i64:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: addi sp, sp, -96
|
||||
; RV64-NEXT: .cfi_def_cfa_offset 96
|
||||
; RV64-NEXT: sd ra, 88(sp) # 8-byte Folded Spill
|
||||
; RV64-NEXT: sd s0, 80(sp) # 8-byte Folded Spill
|
||||
; RV64-NEXT: .cfi_offset ra, -8
|
||||
; RV64-NEXT: .cfi_offset s0, -16
|
||||
; RV64-NEXT: addi s0, sp, 96
|
||||
; RV64-NEXT: .cfi_def_cfa s0, 0
|
||||
; RV64-NEXT: andi sp, sp, -32
|
||||
; RV64-NEXT: vsetivli a2, 4, e64,m2,ta,mu
|
||||
; RV64-NEXT: vle64.v v26, (a0)
|
||||
; RV64-NEXT: sd a1, 32(sp)
|
||||
; RV64-NEXT: vmv.x.s a1, v26
|
||||
; RV64-NEXT: sd a1, 0(sp)
|
||||
; RV64-NEXT: addi a1, sp, 32
|
||||
; RV64-NEXT: vle64.v v28, (a1)
|
||||
; RV64-NEXT: vmv.x.s a1, v28
|
||||
; RV64-NEXT: sd a1, 24(sp)
|
||||
; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu
|
||||
; RV64-NEXT: vslidedown.vi v28, v26, 2
|
||||
; RV64-NEXT: vmv.x.s a1, v28
|
||||
; RV64-NEXT: sd a1, 16(sp)
|
||||
; RV64-NEXT: vslidedown.vi v26, v26, 1
|
||||
; RV64-NEXT: vmv.x.s a1, v26
|
||||
; RV64-NEXT: sd a1, 8(sp)
|
||||
; RV64-NEXT: vslidedown.vi v28, v26, 3
|
||||
; RV64-NEXT: vsetvli a2, zero, e64,m2,ta,mu
|
||||
; RV64-NEXT: vmv.s.x v28, a1
|
||||
; RV64-NEXT: vsetivli a1, 4, e64,m2,tu,mu
|
||||
; RV64-NEXT: vslideup.vi v26, v28, 3
|
||||
; RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu
|
||||
; RV64-NEXT: vle64.v v26, (sp)
|
||||
; RV64-NEXT: vse64.v v26, (a0)
|
||||
; RV64-NEXT: addi sp, s0, -96
|
||||
; RV64-NEXT: ld s0, 80(sp) # 8-byte Folded Reload
|
||||
; RV64-NEXT: ld ra, 88(sp) # 8-byte Folded Reload
|
||||
; RV64-NEXT: addi sp, sp, 96
|
||||
; RV64-NEXT: ret
|
||||
%a = load <4 x i64>, <4 x i64>* %x
|
||||
%b = insertelement <4 x i64> %a, i64 %y, i32 3
|
||||
@ -104,8 +50,39 @@ define void @insertelt_v4i64(<4 x i64>* %x, i64 %y) {
|
||||
define void @insertelt_v3i64(<3 x i64>* %x, i64 %y) {
|
||||
; RV32-LABEL: insertelt_v3i64:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: addi sp, sp, -16
|
||||
; RV32-NEXT: .cfi_def_cfa_offset 16
|
||||
; RV32-NEXT: addi a3, a0, 16
|
||||
; RV32-NEXT: vsetivli a4, 2, e32,m1,ta,mu
|
||||
; RV32-NEXT: vle32.v v25, (a3)
|
||||
; RV32-NEXT: vse32.v v25, (sp)
|
||||
; RV32-NEXT: vsetivli a3, 2, e64,m1,ta,mu
|
||||
; RV32-NEXT: vle64.v v26, (a0)
|
||||
; RV32-NEXT: vsetivli a3, 8, e32,m2,ta,mu
|
||||
; RV32-NEXT: vmv.v.i v28, 0
|
||||
; RV32-NEXT: vsetivli a3, 2, e64,m2,tu,mu
|
||||
; RV32-NEXT: vslideup.vi v28, v26, 0
|
||||
; RV32-NEXT: vsetivli a3, 4, e32,m1,ta,mu
|
||||
; RV32-NEXT: vle32.v v26, (sp)
|
||||
; RV32-NEXT: vsetivli a3, 4, e64,m2,tu,mu
|
||||
; RV32-NEXT: vslideup.vi v28, v26, 2
|
||||
; RV32-NEXT: vsetvli a3, zero, e64,m2,ta,mu
|
||||
; RV32-NEXT: vmv.v.x v26, a2
|
||||
; RV32-NEXT: addi a3, zero, 32
|
||||
; RV32-NEXT: vsll.vx v26, v26, a3
|
||||
; RV32-NEXT: vmv.v.x v30, a1
|
||||
; RV32-NEXT: vsll.vx v30, v30, a3
|
||||
; RV32-NEXT: vsrl.vx v30, v30, a3
|
||||
; RV32-NEXT: vor.vv v26, v30, v26
|
||||
; RV32-NEXT: vsetivli a3, 4, e64,m2,ta,mu
|
||||
; RV32-NEXT: vid.v v30
|
||||
; RV32-NEXT: vmseq.vi v0, v30, 2
|
||||
; RV32-NEXT: vmerge.vvm v26, v28, v26, v0
|
||||
; RV32-NEXT: vsetivli a3, 2, e64,m1,ta,mu
|
||||
; RV32-NEXT: vse64.v v26, (a0)
|
||||
; RV32-NEXT: sw a1, 16(a0)
|
||||
; RV32-NEXT: sw a2, 20(a0)
|
||||
; RV32-NEXT: addi sp, sp, 16
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: insertelt_v3i64:
|
||||
@ -117,3 +94,103 @@ define void @insertelt_v3i64(<3 x i64>* %x, i64 %y) {
|
||||
store <3 x i64> %b, <3 x i64>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @insertelt_v16i8(<16 x i8>* %x, i8 %y) {
|
||||
; RV32-LABEL: insertelt_v16i8:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: vsetivli a2, 16, e8,m1,ta,mu
|
||||
; RV32-NEXT: vle8.v v25, (a0)
|
||||
; RV32-NEXT: vslidedown.vi v26, v25, 14
|
||||
; RV32-NEXT: vsetvli a2, zero, e8,m1,ta,mu
|
||||
; RV32-NEXT: vmv.s.x v26, a1
|
||||
; RV32-NEXT: vsetivli a1, 16, e8,m1,tu,mu
|
||||
; RV32-NEXT: vslideup.vi v25, v26, 14
|
||||
; RV32-NEXT: vsetivli a1, 16, e8,m1,ta,mu
|
||||
; RV32-NEXT: vse8.v v25, (a0)
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: insertelt_v16i8:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: vsetivli a2, 16, e8,m1,ta,mu
|
||||
; RV64-NEXT: vle8.v v25, (a0)
|
||||
; RV64-NEXT: vslidedown.vi v26, v25, 14
|
||||
; RV64-NEXT: vsetvli a2, zero, e8,m1,ta,mu
|
||||
; RV64-NEXT: vmv.s.x v26, a1
|
||||
; RV64-NEXT: vsetivli a1, 16, e8,m1,tu,mu
|
||||
; RV64-NEXT: vslideup.vi v25, v26, 14
|
||||
; RV64-NEXT: vsetivli a1, 16, e8,m1,ta,mu
|
||||
; RV64-NEXT: vse8.v v25, (a0)
|
||||
; RV64-NEXT: ret
|
||||
%a = load <16 x i8>, <16 x i8>* %x
|
||||
%b = insertelement <16 x i8> %a, i8 %y, i32 14
|
||||
store <16 x i8> %b, <16 x i8>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @insertelt_v32i16(<32 x i16>* %x, i16 %y, i32 %idx) {
|
||||
; RV32-LABEL: insertelt_v32i16:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: addi a3, zero, 32
|
||||
; RV32-NEXT: vsetvli a4, a3, e16,m4,ta,mu
|
||||
; RV32-NEXT: vle16.v v28, (a0)
|
||||
; RV32-NEXT: vslidedown.vx v8, v28, a2
|
||||
; RV32-NEXT: vsetvli a4, zero, e16,m4,ta,mu
|
||||
; RV32-NEXT: vmv.s.x v8, a1
|
||||
; RV32-NEXT: vsetvli a1, a3, e16,m4,tu,mu
|
||||
; RV32-NEXT: vslideup.vx v28, v8, a2
|
||||
; RV32-NEXT: vsetvli a1, a3, e16,m4,ta,mu
|
||||
; RV32-NEXT: vse16.v v28, (a0)
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: insertelt_v32i16:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: addi a3, zero, 32
|
||||
; RV64-NEXT: vsetvli a4, a3, e16,m4,ta,mu
|
||||
; RV64-NEXT: vle16.v v28, (a0)
|
||||
; RV64-NEXT: sext.w a2, a2
|
||||
; RV64-NEXT: vslidedown.vx v8, v28, a2
|
||||
; RV64-NEXT: vsetvli a4, zero, e16,m4,ta,mu
|
||||
; RV64-NEXT: vmv.s.x v8, a1
|
||||
; RV64-NEXT: vsetvli a1, a3, e16,m4,tu,mu
|
||||
; RV64-NEXT: vslideup.vx v28, v8, a2
|
||||
; RV64-NEXT: vsetvli a1, a3, e16,m4,ta,mu
|
||||
; RV64-NEXT: vse16.v v28, (a0)
|
||||
; RV64-NEXT: ret
|
||||
%a = load <32 x i16>, <32 x i16>* %x
|
||||
%b = insertelement <32 x i16> %a, i16 %y, i32 %idx
|
||||
store <32 x i16> %b, <32 x i16>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @insertelt_v8f32(<8 x float>* %x, float %y, i32 %idx) {
|
||||
; RV32-LABEL: insertelt_v8f32:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: vsetivli a2, 8, e32,m2,ta,mu
|
||||
; RV32-NEXT: vle32.v v26, (a0)
|
||||
; RV32-NEXT: vslidedown.vx v28, v26, a1
|
||||
; RV32-NEXT: vsetvli a2, zero, e32,m2,ta,mu
|
||||
; RV32-NEXT: vfmv.s.f v28, fa0
|
||||
; RV32-NEXT: vsetivli a2, 8, e32,m2,tu,mu
|
||||
; RV32-NEXT: vslideup.vx v26, v28, a1
|
||||
; RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu
|
||||
; RV32-NEXT: vse32.v v26, (a0)
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: insertelt_v8f32:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: vsetivli a2, 8, e32,m2,ta,mu
|
||||
; RV64-NEXT: vle32.v v26, (a0)
|
||||
; RV64-NEXT: sext.w a1, a1
|
||||
; RV64-NEXT: vslidedown.vx v28, v26, a1
|
||||
; RV64-NEXT: vsetvli a2, zero, e32,m2,ta,mu
|
||||
; RV64-NEXT: vfmv.s.f v28, fa0
|
||||
; RV64-NEXT: vsetivli a2, 8, e32,m2,tu,mu
|
||||
; RV64-NEXT: vslideup.vx v26, v28, a1
|
||||
; RV64-NEXT: vsetivli a1, 8, e32,m2,ta,mu
|
||||
; RV64-NEXT: vse32.v v26, (a0)
|
||||
; RV64-NEXT: ret
|
||||
%a = load <8 x float>, <8 x float>* %x
|
||||
%b = insertelement <8 x float> %a, float %y, i32 %idx
|
||||
store <8 x float> %b, <8 x float>* %x
|
||||
ret void
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user