1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[SelectionDAG] Simplify PromoteIntRes_INSERT_SUBVECTOR to only handle result

Let other parts of legalization handle the rest of the node, this allows
re-use of existing optimizations elsewhere.

Differential Revision: https://reviews.llvm.org/D105624
This commit is contained in:
Bradley Smith 2021-07-08 11:28:38 +00:00
parent d34192c128
commit d34a8e8671
2 changed files with 66 additions and 104 deletions

View File

@ -4741,38 +4741,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_SUBVECTOR(SDNode *N) {
SDValue SubVec = N->getOperand(1);
SDValue Idx = N->getOperand(2);
auto *ConstantIdx = cast<ConstantSDNode>(Idx);
unsigned IdxN = ConstantIdx->getZExtValue();
EVT VecVT = Vec.getValueType();
EVT SubVecVT = SubVec.getValueType();
EVT NSubVT =
EVT::getVectorVT(*DAG.getContext(), NOutVT.getVectorElementType(),
SubVecVT.getVectorElementCount());
// To insert SubVec into Vec, store the wider vector to memory, overwrite the
// appropriate bits with the narrower vector, and reload.
Align SmallestAlign = DAG.getReducedAlign(SubVecVT, /*UseABI=*/false);
Vec = GetPromotedInteger(Vec);
SubVec = DAG.getNode(ISD::ANY_EXTEND, dl, NSubVT, SubVec);
SDValue StackPtr =
DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign);
auto StackPtrVT = StackPtr->getValueType(0);
auto &MF = DAG.getMachineFunction();
auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
SmallestAlign);
SDValue ScaledIdx = Idx;
if (SubVecVT.isScalableVector() && IdxN != 0) {
APInt IdxAPInt = cast<ConstantSDNode>(Idx)->getAPIntValue();
ScaledIdx = DAG.getVScale(dl, StackPtrVT,
IdxAPInt.sextOrSelf(StackPtrVT.getSizeInBits()));
}
SDValue SubVecPtr =
TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, SubVecVT, ScaledIdx);
Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, PtrInfo, SmallestAlign);
return DAG.getExtLoad(ISD::LoadExtType::EXTLOAD, dl, NOutVT, Store, StackPtr,
PtrInfo, OutVT, SmallestAlign);
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NOutVT, Vec, SubVec, Idx);
}
SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_REVERSE(SDNode *N) {

View File

@ -8,17 +8,12 @@ target triple = "aarch64-unknown-linux-gnu"
define <vscale x 8 x i8> @vec_scalable_subvec_scalable_idx_zero_i8(<vscale x 8 x i8>* %a, <vscale x 4 x i8>* %b) #0 {
; CHECK-LABEL: vec_scalable_subvec_scalable_idx_zero_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
; CHECK-NEXT: ld1b { z1.s }, p1/z, [x1]
; CHECK-NEXT: st1b { z0.h }, p0, [sp, #1, mul vl]
; CHECK-NEXT: st1b { z1.s }, p1, [sp, #2, mul vl]
; CHECK-NEXT: ld1b { z0.h }, p0/z, [sp, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ld1b { z1.s }, p0/z, [x1]
; CHECK-NEXT: uunpkhi z0.s, z0.h
; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
; CHECK-NEXT: ret
%vec = load <vscale x 8 x i8>, <vscale x 8 x i8>* %a
%subvec = load <vscale x 4 x i8>, <vscale x 4 x i8>* %b
@ -29,18 +24,12 @@ define <vscale x 8 x i8> @vec_scalable_subvec_scalable_idx_zero_i8(<vscale x 8 x
define <vscale x 8 x i8> @vec_scalable_subvec_scalable_idx_nonzero_i8(<vscale x 8 x i8>* %a, <vscale x 4 x i8>* %b) #0 {
; CHECK-LABEL: vec_scalable_subvec_scalable_idx_nonzero_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
; CHECK-NEXT: ld1b { z1.s }, p1/z, [x1]
; CHECK-NEXT: addpl x8, sp, #4
; CHECK-NEXT: st1b { z0.h }, p0, [sp, #1, mul vl]
; CHECK-NEXT: st1b { z1.s }, p1, [x8, #1, mul vl]
; CHECK-NEXT: ld1b { z0.h }, p0/z, [sp, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ld1b { z1.s }, p0/z, [x1]
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h
; CHECK-NEXT: ret
%vec = load <vscale x 8 x i8>, <vscale x 8 x i8>* %a
%subvec = load <vscale x 4 x i8>, <vscale x 4 x i8>* %b
@ -51,17 +40,12 @@ define <vscale x 8 x i8> @vec_scalable_subvec_scalable_idx_nonzero_i8(<vscale x
define <vscale x 4 x i16> @vec_scalable_subvec_scalable_idx_zero_i16(<vscale x 4 x i16>* %a, <vscale x 2 x i16>* %b) #0 {
; CHECK-LABEL: vec_scalable_subvec_scalable_idx_zero_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
; CHECK-NEXT: ld1h { z1.d }, p1/z, [x1]
; CHECK-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl]
; CHECK-NEXT: st1h { z1.d }, p1, [sp, #2, mul vl]
; CHECK-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1h { z1.d }, p0/z, [x1]
; CHECK-NEXT: uunpkhi z0.d, z0.s
; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s
; CHECK-NEXT: ret
%vec = load <vscale x 4 x i16>, <vscale x 4 x i16>* %a
%subvec = load <vscale x 2 x i16>, <vscale x 2 x i16>* %b
@ -72,18 +56,12 @@ define <vscale x 4 x i16> @vec_scalable_subvec_scalable_idx_zero_i16(<vscale x 4
define <vscale x 4 x i16> @vec_scalable_subvec_scalable_idx_nonzero_i16(<vscale x 4 x i16>* %a, <vscale x 2 x i16>* %b) #0 {
; CHECK-LABEL: vec_scalable_subvec_scalable_idx_nonzero_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
; CHECK-NEXT: ld1h { z1.d }, p1/z, [x1]
; CHECK-NEXT: addpl x8, sp, #4
; CHECK-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl]
; CHECK-NEXT: st1h { z1.d }, p1, [x8, #1, mul vl]
; CHECK-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1h { z1.d }, p0/z, [x1]
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
; CHECK-NEXT: ret
%vec = load <vscale x 4 x i16>, <vscale x 4 x i16>* %a
%subvec = load <vscale x 2 x i16>, <vscale x 2 x i16>* %b
@ -101,10 +79,10 @@ define <vscale x 8 x i8> @vec_scalable_subvec_fixed_idx_zero_i8(<vscale x 8 x i8
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: st1b { z0.h }, p0, [sp, #1, mul vl]
; CHECK-NEXT: addpl x8, sp, #4
; CHECK-NEXT: str d1, [x8]
; CHECK-NEXT: ld1b { z0.h }, p0/z, [sp, #1, mul vl]
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: ushll v0.8h, v1.8b, #0
; CHECK-NEXT: str q0, [sp]
; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@ -120,17 +98,19 @@ define <vscale x 8 x i8> @vec_scalable_subvec_fixed_idx_nonzero_i8(<vscale x 8 x
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: cnth x9
; CHECK-NEXT: addpl x10, sp, #4
; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: cnth x9
; CHECK-NEXT: sub x9, x9, #8 // =8
; CHECK-NEXT: mov w8, #8
; CHECK-NEXT: cmp x9, #8 // =8
; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: st1b { z0.h }, p0, [sp, #1, mul vl]
; CHECK-NEXT: str d1, [x10, x8]
; CHECK-NEXT: ld1b { z0.h }, p0/z, [sp, #1, mul vl]
; CHECK-NEXT: lsl x8, x8, #1
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: ushll v0.8h, v1.8b, #0
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: str q0, [x9, x8]
; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@ -148,10 +128,10 @@ define <vscale x 4 x i16> @vec_scalable_subvec_fixed_idx_zero_i16(<vscale x 4 x
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl]
; CHECK-NEXT: addpl x8, sp, #4
; CHECK-NEXT: str d1, [x8]
; CHECK-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl]
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: ushll v0.4s, v1.4h, #0
; CHECK-NEXT: str q0, [sp]
; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@ -167,18 +147,19 @@ define <vscale x 4 x i16> @vec_scalable_subvec_fixed_idx_nonzero_i16(<vscale x 4
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: cntw x9
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: cntw x9
; CHECK-NEXT: sub x9, x9, #4 // =4
; CHECK-NEXT: mov w8, #4
; CHECK-NEXT: cmp x9, #4 // =4
; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: addpl x9, sp, #4
; CHECK-NEXT: lsl x8, x8, #1
; CHECK-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl]
; CHECK-NEXT: str d1, [x9, x8]
; CHECK-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl]
; CHECK-NEXT: lsl x8, x8, #2
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: ushll v0.4s, v1.4h, #0
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: str q0, [x9, x8]
; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@ -196,10 +177,10 @@ define <vscale x 2 x i32> @vec_scalable_subvec_fixed_idx_zero_i32(<vscale x 2 x
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: st1w { z0.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: addpl x8, sp, #4
; CHECK-NEXT: str d1, [x8]
; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl]
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: ushll v0.2d, v1.2s, #0
; CHECK-NEXT: str q0, [sp]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@ -215,18 +196,19 @@ define <vscale x 2 x i32> @vec_scalable_subvec_fixed_idx_nonzero_i32(<vscale x 2
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: cntd x9
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: cntd x9
; CHECK-NEXT: sub x9, x9, #2 // =2
; CHECK-NEXT: mov w8, #2
; CHECK-NEXT: cmp x9, #2 // =2
; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: addpl x9, sp, #4
; CHECK-NEXT: lsl x8, x8, #2
; CHECK-NEXT: st1w { z0.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: str d1, [x9, x8]
; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl]
; CHECK-NEXT: lsl x8, x8, #3
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: ushll v0.2d, v1.2s, #0
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: str q0, [x9, x8]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@ -236,25 +218,27 @@ define <vscale x 2 x i32> @vec_scalable_subvec_fixed_idx_nonzero_i32(<vscale x 2
ret <vscale x 2 x i32> %ins
}
define <vscale x 2 x i32> @vec_scalable_subvec_fixed_idx_nonzero_large_i32(<vscale x 2 x i32>* %a, <8 x i32>* %b) #0 {
define <vscale x 2 x i32> @vec_scalable_subvec_fixed_idx_nonzero_large_i32(<vscale x 2 x i32>* %a, <8 x i32>* %b) #1 {
; CHECK-LABEL: vec_scalable_subvec_fixed_idx_nonzero_large_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: cntd x8
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: subs x8, x8, #8 // =8
; CHECK-NEXT: ptrue p1.s, vl8
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
; CHECK-NEXT: ldp q1, q2, [x1]
; CHECK-NEXT: ld1w { z1.s }, p1/z, [x1]
; CHECK-NEXT: cntd x8
; CHECK-NEXT: subs x8, x8, #8 // =8
; CHECK-NEXT: csel x8, xzr, x8, lo
; CHECK-NEXT: mov w9, #8
; CHECK-NEXT: cmp x8, #8 // =8
; CHECK-NEXT: ptrue p1.d, vl8
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: uunpklo z0.d, z1.s
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: add x8, x9, x8, lsl #2
; CHECK-NEXT: st1w { z0.d }, p0, [sp]
; CHECK-NEXT: stp q1, q2, [x8]
; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp]
; CHECK-NEXT: st1d { z0.d }, p1, [x9, x8, lsl #3]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
@ -274,3 +258,4 @@ declare <vscale x 2 x i32> @llvm.experimental.vector.insert.nxv2i32.v2i32(<vscal
declare <vscale x 2 x i32> @llvm.experimental.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32>, <8 x i32>, i64)
attributes #0 = { nounwind "target-features"="+sve" }
attributes #1 = { nounwind "target-features"="+sve" vscale_range(4,4) }