mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 12:12:47 +01:00
[SVE][AArch64] Improve code generation for vector_splice for Imm > 0
This patch implements vector_splice in tablegen for all cases when the Immediate is positive and lower than the known minimum value of a scalable vector. Vector_splice can be implemented using SVE instruction EXT. For instance : @llvm.experimental.vector.splice(Vector_1, Vector_2, Imm) @llvm.experimental.vector.splice(<A,B,C,D>, <E,F,G,H>, 1) ==> <B, C, D, E> EXT Vector_1, Vector_2, Imm // Vector_1 = B, C, D + Vector_2 = E Depends on D105633 Differential Revision: https://reviews.llvm.org/D106273
This commit is contained in:
parent
fd6a38b569
commit
9af238dfc2
@ -241,6 +241,22 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
template <signed Max, signed Scale>
|
||||
bool SelectEXTImm(SDValue N, SDValue &Imm) {
|
||||
if (!isa<ConstantSDNode>(N))
|
||||
return false;
|
||||
|
||||
int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
|
||||
|
||||
if (MulImm >= 0 && MulImm <= Max) {
|
||||
MulImm *= Scale;
|
||||
Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Form sequences of consecutive 64/128-bit registers for use in NEON
|
||||
/// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
|
||||
/// between 1 and 4 elements. If it contains a single element that is returned
|
||||
|
@ -7426,7 +7426,9 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
|
||||
SDValue AArch64TargetLowering::LowerVECTOR_SPLICE(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
|
||||
if (Op.getConstantOperandAPInt(2) == -1)
|
||||
EVT Ty = Op.getValueType();
|
||||
auto Idx = Op.getConstantOperandAPInt(2);
|
||||
if (Idx.sge(-1) && Idx.slt(Ty.getVectorMinNumElements()))
|
||||
return Op;
|
||||
return SDValue();
|
||||
}
|
||||
|
@ -2398,6 +2398,16 @@ let Predicates = [HasSVE] in {
|
||||
(i32 (UMOVvi32 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index))>;
|
||||
def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), VectorIndexD:$index)),
|
||||
(i64 (UMOVvi64 (v2i64 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexD:$index))>;
|
||||
|
||||
// Splice with lane bigger or equal to 0
|
||||
def : Pat<(nxv16i8 (vector_splice (nxv16i8 ZPR:$Z1), (nxv16i8 ZPR:$Z2), (i64 (sve_ext_imm_0_15 i32:$index)))),
|
||||
(EXT_ZZI ZPR:$Z1, ZPR:$Z2, sve_ext_imm_0_15:$index)>;
|
||||
def : Pat<(nxv8i16 (vector_splice (nxv8i16 ZPR:$Z1), (nxv8i16 ZPR:$Z2), (i64 (sve_ext_imm_0_7 i32:$index)))),
|
||||
(EXT_ZZI ZPR:$Z1, ZPR:$Z2, sve_ext_imm_0_7:$index)>;
|
||||
def : Pat<(nxv4i32 (vector_splice (nxv4i32 ZPR:$Z1), (nxv4i32 ZPR:$Z2), (i64 (sve_ext_imm_0_3 i32:$index)))),
|
||||
(EXT_ZZI ZPR:$Z1, ZPR:$Z2, sve_ext_imm_0_3:$index)>;
|
||||
def : Pat<(nxv2i64 (vector_splice (nxv2i64 ZPR:$Z1), (nxv2i64 ZPR:$Z2), (i64 (sve_ext_imm_0_1 i32:$index)))),
|
||||
(EXT_ZZI ZPR:$Z1, ZPR:$Z2, sve_ext_imm_0_1:$index)>;
|
||||
}
|
||||
|
||||
// Extract first element from vector.
|
||||
|
@ -263,6 +263,12 @@ def sve_incdec_imm : Operand<i32>, TImmLeaf<i32, [{
|
||||
def sve_cnt_mul_imm : ComplexPattern<i32, 1, "SelectCntImm<1, 16, 1, false>">;
|
||||
def sve_cnt_shl_imm : ComplexPattern<i32, 1, "SelectCntImm<1, 16, 1, true>">;
|
||||
|
||||
|
||||
def sve_ext_imm_0_1 : ComplexPattern<i32, 1, "SelectEXTImm<1, 8>">;
|
||||
def sve_ext_imm_0_3 : ComplexPattern<i32, 1, "SelectEXTImm<3, 4>">;
|
||||
def sve_ext_imm_0_7 : ComplexPattern<i32, 1, "SelectEXTImm<7, 2>">;
|
||||
def sve_ext_imm_0_15 : ComplexPattern<i32, 1, "SelectEXTImm<15, 1>">;
|
||||
|
||||
def int_aarch64_sve_cntp_oneuse : PatFrag<(ops node:$pred, node:$src2),
|
||||
(int_aarch64_sve_cntp node:$pred, node:$src2), [{
|
||||
return N->hasOneUse();
|
||||
|
@ -10,15 +10,7 @@ target triple = "aarch64-unknown-linux-gnu"
|
||||
define <vscale x 16 x i8> @splice_nxv16i8_first_idx(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv16i8_first_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: st1b { z0.b }, p0, [sp]
|
||||
; CHECK-NEXT: st1b { z1.b }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 0)
|
||||
ret <vscale x 16 x i8> %res
|
||||
@ -27,16 +19,7 @@ define <vscale x 16 x i8> @splice_nxv16i8_first_idx(<vscale x 16 x i8> %a, <vsca
|
||||
define <vscale x 16 x i8> @splice_nxv16i8_last_idx(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv16i8_last_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: st1b { z0.b }, p0, [sp]
|
||||
; CHECK-NEXT: st1b { z1.b }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: orr x8, x8, #0xf
|
||||
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #15
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 15)
|
||||
ret <vscale x 16 x i8> %res
|
||||
@ -68,15 +51,7 @@ define <vscale x 16 x i8> @splice_nxv16i8_clamped_idx(<vscale x 16 x i8> %a, <vs
|
||||
define <vscale x 8 x i16> @splice_nxv8i16_first_idx(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv8i16_first_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 8 x i16> @llvm.experimental.vector.splice.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i32 0)
|
||||
ret <vscale x 8 x i16> %res
|
||||
@ -85,16 +60,7 @@ define <vscale x 8 x i16> @splice_nxv8i16_first_idx(<vscale x 8 x i16> %a, <vsca
|
||||
define <vscale x 8 x i16> @splice_nxv8i16_last_idx(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv8i16_last_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: orr x8, x8, #0xe
|
||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #14
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 8 x i16> @llvm.experimental.vector.splice.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i32 7)
|
||||
ret <vscale x 8 x i16> %res
|
||||
@ -126,15 +92,7 @@ define <vscale x 8 x i16> @splice_nxv8i16_clamped_idx(<vscale x 8 x i16> %a, <vs
|
||||
define <vscale x 4 x i32> @splice_nxv4i32_first_idx(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv4i32_first_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 0)
|
||||
ret <vscale x 4 x i32> %res
|
||||
@ -143,16 +101,7 @@ define <vscale x 4 x i32> @splice_nxv4i32_first_idx(<vscale x 4 x i32> %a, <vsca
|
||||
define <vscale x 4 x i32> @splice_nxv4i32_last_idx(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv4i32_last_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: orr x8, x8, #0xc
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #12
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 3)
|
||||
ret <vscale x 4 x i32> %res
|
||||
@ -184,15 +133,7 @@ define <vscale x 4 x i32> @splice_nxv4i32_clamped_idx(<vscale x 4 x i32> %a, <vs
|
||||
define <vscale x 2 x i64> @splice_nxv2i64_first_idx(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv2i64_first_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 2 x i64> @llvm.experimental.vector.splice.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i32 0)
|
||||
ret <vscale x 2 x i64> %res
|
||||
@ -201,16 +142,7 @@ define <vscale x 2 x i64> @splice_nxv2i64_first_idx(<vscale x 2 x i64> %a, <vsca
|
||||
define <vscale x 2 x i64> @splice_nxv2i64_last_idx(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv2i64_last_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: orr x8, x8, #0x8
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #8
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 2 x i64> @llvm.experimental.vector.splice.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i32 1)
|
||||
ret <vscale x 2 x i64> %res
|
||||
@ -271,6 +203,49 @@ define <vscale x 2 x half> @splice_nxv2f16_neg2_idx(<vscale x 2 x half> %a, <vsc
|
||||
ret <vscale x 2 x half> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x half> @splice_nxv2f16_first_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv2f16_first_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 2 x half> @llvm.experimental.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 0)
|
||||
ret <vscale x 2 x half> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x half> @splice_nxv2f16_1_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv2f16_1_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #8
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 2 x half> @llvm.experimental.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 1)
|
||||
ret <vscale x 2 x half> %res
|
||||
}
|
||||
|
||||
; Ensure index is clamped when we cannot prove it's less than VL-1.
|
||||
define <vscale x 2 x half> @splice_nxv2f16_last_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv2f16_last_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: cntd x10
|
||||
; CHECK-NEXT: sub x10, x10, #1 // =1
|
||||
; CHECK-NEXT: mov w9, #2
|
||||
; CHECK-NEXT: cmp x10, #2 // =2
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: csel x9, x10, x9, lo
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: lsl x9, x9, #3
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 2 x half> @llvm.experimental.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 2)
|
||||
ret <vscale x 2 x half> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x half> @splice_nxv4f16_neg_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv4f16_neg_idx:
|
||||
; CHECK: // %bb.0:
|
||||
@ -303,18 +278,54 @@ define <vscale x 4 x half> @splice_nxv4f16_neg3_idx(<vscale x 4 x half> %a, <vsc
|
||||
ret <vscale x 4 x half> %res
|
||||
}
|
||||
|
||||
define <vscale x 8 x half> @splice_nxv8f16_first_idx(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv8f16_first_idx:
|
||||
define <vscale x 4 x half> @splice_nxv4f16_first_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv4f16_first_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 4 x half> @llvm.experimental.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 0)
|
||||
ret <vscale x 4 x half> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x half> @splice_nxv4f16_3_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv4f16_3_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #12
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 4 x half> @llvm.experimental.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 3)
|
||||
ret <vscale x 4 x half> %res
|
||||
}
|
||||
|
||||
; Ensure index is clamped when we cannot prove it's less than VL-1.
|
||||
define <vscale x 4 x half> @splice_nxv4f16_last_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv4f16_last_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: cntw x10
|
||||
; CHECK-NEXT: sub x10, x10, #1 // =1
|
||||
; CHECK-NEXT: mov w9, #4
|
||||
; CHECK-NEXT: cmp x10, #4 // =4
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: csel x9, x10, x9, lo
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp]
|
||||
; CHECK-NEXT: lsl x9, x9, #2
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 4 x half> @llvm.experimental.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 4)
|
||||
ret <vscale x 4 x half> %res
|
||||
}
|
||||
|
||||
|
||||
define <vscale x 8 x half> @splice_nxv8f16_first_idx(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv8f16_first_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 8 x half> @llvm.experimental.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 0)
|
||||
ret <vscale x 8 x half> %res
|
||||
@ -323,16 +334,7 @@ define <vscale x 8 x half> @splice_nxv8f16_first_idx(<vscale x 8 x half> %a, <vs
|
||||
define <vscale x 8 x half> @splice_nxv8f16_last_idx(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv8f16_last_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: orr x8, x8, #0xe
|
||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #14
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 8 x half> @llvm.experimental.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 7)
|
||||
ret <vscale x 8 x half> %res
|
||||
@ -393,18 +395,53 @@ define <vscale x 2 x float> @splice_nxv2f32_neg2_idx(<vscale x 2 x float> %a, <v
|
||||
ret <vscale x 2 x float> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @splice_nxv4f32_first_idx(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv4f32_first_idx:
|
||||
define <vscale x 2 x float> @splice_nxv2f32_first_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv2f32_first_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 2 x float> @llvm.experimental.vector.splice.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, i32 0)
|
||||
ret <vscale x 2 x float> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x float> @splice_nxv2f32_1_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv2f32_1_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #8
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 2 x float> @llvm.experimental.vector.splice.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, i32 1)
|
||||
ret <vscale x 2 x float> %res
|
||||
}
|
||||
|
||||
; Ensure index is clamped when we cannot prove it's less than VL-1.
|
||||
define <vscale x 2 x float> @splice_nxv2f32_last_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv2f32_last_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: cntd x10
|
||||
; CHECK-NEXT: sub x10, x10, #1 // =1
|
||||
; CHECK-NEXT: mov w9, #2
|
||||
; CHECK-NEXT: cmp x10, #2 // =2
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: csel x9, x10, x9, lo
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp]
|
||||
; CHECK-NEXT: lsl x9, x9, #3
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 2 x float> @llvm.experimental.vector.splice.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, i32 2)
|
||||
ret <vscale x 2 x float> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @splice_nxv4f32_first_idx(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv4f32_first_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 4 x float> @llvm.experimental.vector.splice.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 0)
|
||||
ret <vscale x 4 x float> %res
|
||||
@ -413,16 +450,7 @@ define <vscale x 4 x float> @splice_nxv4f32_first_idx(<vscale x 4 x float> %a, <
|
||||
define <vscale x 4 x float> @splice_nxv4f32_last_idx(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv4f32_last_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: orr x8, x8, #0xc
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #12
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 4 x float> @llvm.experimental.vector.splice.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 3)
|
||||
ret <vscale x 4 x float> %res
|
||||
@ -454,15 +482,7 @@ define <vscale x 4 x float> @splice_nxv4f32_clamped_idx(<vscale x 4 x float> %a,
|
||||
define <vscale x 2 x double> @splice_nxv2f64_first_idx(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv2f64_first_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 0)
|
||||
ret <vscale x 2 x double> %res
|
||||
@ -471,16 +491,7 @@ define <vscale x 2 x double> @splice_nxv2f64_first_idx(<vscale x 2 x double> %a,
|
||||
define <vscale x 2 x double> @splice_nxv2f64_last_idx(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv2f64_last_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: orr x8, x8, #0x8
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #8
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 1)
|
||||
ret <vscale x 2 x double> %res
|
||||
@ -513,20 +524,12 @@ define <vscale x 2 x double> @splice_nxv2f64_clamped_idx(<vscale x 2 x double> %
|
||||
define <vscale x 2 x i1> @splice_nxv2i1_idx(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv2i1_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1
|
||||
; CHECK-NEXT: mov z0.d, p1/z, #1 // =0x1
|
||||
; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1
|
||||
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
|
||||
; CHECK-NEXT: and z1.d, z1.d, #0x1
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov z1.d, p1/z, #1 // =0x1
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: orr x8, x8, #0x8
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8]
|
||||
; CHECK-NEXT: and z0.d, z0.d, #0x1
|
||||
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 2 x i1> @llvm.experimental.vector.splice.nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b, i32 1)
|
||||
ret <vscale x 2 x i1> %res
|
||||
@ -536,20 +539,12 @@ define <vscale x 2 x i1> @splice_nxv2i1_idx(<vscale x 2 x i1> %a, <vscale x 2 x
|
||||
define <vscale x 4 x i1> @splice_nxv4i1_idx(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv4i1_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1
|
||||
; CHECK-NEXT: mov z0.s, p1/z, #1 // =0x1
|
||||
; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1
|
||||
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
|
||||
; CHECK-NEXT: and z1.s, z1.s, #0x1
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: mov z1.s, p1/z, #1 // =0x1
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: orr x8, x8, #0x8
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
|
||||
; CHECK-NEXT: and z0.s, z0.s, #0x1
|
||||
; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 4 x i1> @llvm.experimental.vector.splice.nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b, i32 2)
|
||||
ret <vscale x 4 x i1> %res
|
||||
@ -559,20 +554,12 @@ define <vscale x 4 x i1> @splice_nxv4i1_idx(<vscale x 4 x i1> %a, <vscale x 4 x
|
||||
define <vscale x 8 x i1> @splice_nxv8i1_idx(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv8i1_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1
|
||||
; CHECK-NEXT: mov z0.h, p1/z, #1 // =0x1
|
||||
; CHECK-NEXT: mov z1.h, p0/z, #1 // =0x1
|
||||
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
|
||||
; CHECK-NEXT: and z1.h, z1.h, #0x1
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: mov z1.h, p1/z, #1 // =0x1
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: orr x8, x8, #0x8
|
||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8]
|
||||
; CHECK-NEXT: and z0.h, z0.h, #0x1
|
||||
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, #0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 8 x i1> @llvm.experimental.vector.splice.nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b, i32 4)
|
||||
ret <vscale x 8 x i1> %res
|
||||
@ -582,20 +569,12 @@ define <vscale x 8 x i1> @splice_nxv8i1_idx(<vscale x 8 x i1> %a, <vscale x 8 x
|
||||
define <vscale x 16 x i1> @splice_nxv16i1_idx(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv16i1_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
|
||||
; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1
|
||||
; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1
|
||||
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
|
||||
; CHECK-NEXT: and z1.b, z1.b, #0x1
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: mov z1.b, p1/z, #1 // =0x1
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: st1b { z0.b }, p0, [sp]
|
||||
; CHECK-NEXT: st1b { z1.b }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: orr x8, x8, #0x8
|
||||
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8]
|
||||
; CHECK-NEXT: and z0.b, z0.b, #0x1
|
||||
; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 16 x i1> @llvm.experimental.vector.splice.nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b, i32 8)
|
||||
ret <vscale x 16 x i1> %res
|
||||
@ -605,16 +584,7 @@ define <vscale x 16 x i1> @splice_nxv16i1_idx(<vscale x 16 x i1> %a, <vscale x 1
|
||||
define <vscale x 2 x i8> @splice_nxv2i8_idx(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv2i8_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: orr x8, x8, #0x8
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ext z0.b, z0.b, z1.b, #8
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 2 x i8> @llvm.experimental.vector.splice.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, i32 1)
|
||||
ret <vscale x 2 x i8> %res
|
||||
|
Loading…
Reference in New Issue
Block a user