mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
[AArch64][SVE] Improve code generation for vector_splice for Imm == -1
This patch implements vector_splice in tablegen for: a) when the immediate is equal to -1 (Imm==1) and uses: INSR + LASTB For instance : @llvm.experimental.vector.splice(Vector_1, Vector_2, -1) @llvm.experimental.vector.splice(<A,B,C,D>, <E,F,G,H>, 1) ==> <D, E, F, G> LAST RegLast, Vector_1 // RegLast = D INSR Res, (Vector_1 >> 1), RegLast // Res = D + E, F, G Differential Revision: https://reviews.llvm.org/D105633
This commit is contained in:
parent
96fce2fe63
commit
d9b910d9d2
@ -895,6 +895,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
||||
setTargetDAGCombine(ISD::ANY_EXTEND);
|
||||
setTargetDAGCombine(ISD::ZERO_EXTEND);
|
||||
setTargetDAGCombine(ISD::SIGN_EXTEND);
|
||||
setTargetDAGCombine(ISD::VECTOR_SPLICE);
|
||||
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
|
||||
setTargetDAGCombine(ISD::TRUNCATE);
|
||||
setTargetDAGCombine(ISD::CONCAT_VECTORS);
|
||||
@ -1159,6 +1160,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
||||
setOperationAction(ISD::MULHS, VT, Custom);
|
||||
setOperationAction(ISD::MULHU, VT, Custom);
|
||||
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
|
||||
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
|
||||
setOperationAction(ISD::SELECT, VT, Custom);
|
||||
setOperationAction(ISD::SETCC, VT, Custom);
|
||||
setOperationAction(ISD::SDIV, VT, Custom);
|
||||
@ -1282,6 +1284,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
||||
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
|
||||
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
|
||||
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
|
||||
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
|
||||
|
||||
setOperationAction(ISD::SELECT_CC, VT, Expand);
|
||||
}
|
||||
@ -1559,6 +1562,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
|
||||
setOperationAction(ISD::SMAX, VT, Custom);
|
||||
setOperationAction(ISD::SMIN, VT, Custom);
|
||||
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
|
||||
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
|
||||
setOperationAction(ISD::SRA, VT, Custom);
|
||||
setOperationAction(ISD::SRL, VT, Custom);
|
||||
setOperationAction(ISD::STORE, VT, Custom);
|
||||
@ -4911,6 +4915,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
|
||||
/*OverrideNEON=*/true);
|
||||
case ISD::CTTZ:
|
||||
return LowerCTTZ(Op, DAG);
|
||||
case ISD::VECTOR_SPLICE:
|
||||
return LowerVECTOR_SPLICE(Op, DAG);
|
||||
}
|
||||
}
|
||||
|
||||
@ -7417,6 +7423,14 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
|
||||
return CS1;
|
||||
}
|
||||
|
||||
SDValue AArch64TargetLowering::LowerVECTOR_SPLICE(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
|
||||
if (Op.getConstantOperandAPInt(2) == -1)
|
||||
return Op;
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
|
||||
@ -16487,6 +16501,28 @@ performInsertVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
|
||||
return performPostLD1Combine(N, DCI, true);
|
||||
}
|
||||
|
||||
SDValue performSVESpliceCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
EVT Ty = N->getValueType(0);
|
||||
if (Ty.isInteger())
|
||||
return SDValue();
|
||||
|
||||
EVT IntTy = Ty.changeVectorElementTypeToInteger();
|
||||
EVT ExtIntTy = getPackedSVEVectorVT(IntTy.getVectorElementCount());
|
||||
if (ExtIntTy.getVectorElementType().getScalarSizeInBits() <
|
||||
IntTy.getVectorElementType().getScalarSizeInBits())
|
||||
return SDValue();
|
||||
|
||||
SDLoc DL(N);
|
||||
SDValue LHS = DAG.getAnyExtOrTrunc(DAG.getBitcast(IntTy, N->getOperand(0)),
|
||||
DL, ExtIntTy);
|
||||
SDValue RHS = DAG.getAnyExtOrTrunc(DAG.getBitcast(IntTy, N->getOperand(1)),
|
||||
DL, ExtIntTy);
|
||||
SDValue Idx = N->getOperand(2);
|
||||
SDValue Splice = DAG.getNode(ISD::VECTOR_SPLICE, DL, ExtIntTy, LHS, RHS, Idx);
|
||||
SDValue Trunc = DAG.getAnyExtOrTrunc(Splice, DL, IntTy);
|
||||
return DAG.getBitcast(Ty, Trunc);
|
||||
}
|
||||
|
||||
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
@ -16539,6 +16575,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
break;
|
||||
case ISD::STORE:
|
||||
return performSTORECombine(N, DCI, DAG, Subtarget);
|
||||
case ISD::VECTOR_SPLICE:
|
||||
return performSVESpliceCombine(N, DAG);
|
||||
case AArch64ISD::BRCOND:
|
||||
return performBRCONDCombine(N, DCI, DAG);
|
||||
case AArch64ISD::TBNZ:
|
||||
|
@ -948,6 +948,7 @@ private:
|
||||
SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp,
|
||||
bool OverrideNEON = false) const;
|
||||
SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
@ -1226,6 +1226,20 @@ let Predicates = [HasSVE] in {
|
||||
def : Pat<(nxv8bf16 (concat_vectors nxv4bf16:$v1, nxv4bf16:$v2)),
|
||||
(UZP1_ZZZ_H $v1, $v2)>;
|
||||
|
||||
// Splice with lane equal to -1
|
||||
def : Pat<(nxv16i8 (vector_splice (nxv16i8 ZPR:$Z1), (nxv16i8 ZPR:$Z2), (i64 -1))),
|
||||
(INSR_ZV_B ZPR:$Z2, (INSERT_SUBREG (IMPLICIT_DEF),
|
||||
(LASTB_VPZ_B (PTRUE_B 31), ZPR:$Z1), bsub))>;
|
||||
def : Pat<(nxv8i16 (vector_splice (nxv8i16 ZPR:$Z1), (nxv8i16 ZPR:$Z2), (i64 -1))),
|
||||
(INSR_ZV_H ZPR:$Z2, (INSERT_SUBREG (IMPLICIT_DEF),
|
||||
(LASTB_VPZ_H (PTRUE_H 31), ZPR:$Z1), hsub))>;
|
||||
def : Pat<(nxv4i32 (vector_splice (nxv4i32 ZPR:$Z1), (nxv4i32 ZPR:$Z2), (i64 -1))),
|
||||
(INSR_ZV_S ZPR:$Z2, (INSERT_SUBREG (IMPLICIT_DEF),
|
||||
(LASTB_VPZ_S (PTRUE_S 31), ZPR:$Z1), ssub))>;
|
||||
def : Pat<(nxv2i64 (vector_splice (nxv2i64 ZPR:$Z1), (nxv2i64 ZPR:$Z2), (i64 -1))),
|
||||
(INSR_ZV_D ZPR:$Z2, (INSERT_SUBREG (IMPLICIT_DEF),
|
||||
(LASTB_VPZ_D (PTRUE_D 31), ZPR:$Z1), dsub))>;
|
||||
|
||||
defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs", SETUGE, SETULE>;
|
||||
defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi", SETUGT, SETULT>;
|
||||
defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge", SETGE, SETLE>;
|
||||
|
@ -239,6 +239,70 @@ define <vscale x 2 x i64> @splice_nxv2i64_clamped_idx(<vscale x 2 x i64> %a, <vs
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x half> @splice_nxv2f16_neg_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv2f16_neg_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: lastb d0, p0, z0.d
|
||||
; CHECK-NEXT: insr z1.d, d0
|
||||
; CHECK-NEXT: mov z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 2 x half> @llvm.experimental.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 -1)
|
||||
ret <vscale x 2 x half> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x half> @splice_nxv2f16_neg2_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv2f16_neg2_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: mov x9, #-16
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 2 x half> @llvm.experimental.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 -2)
|
||||
ret <vscale x 2 x half> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x half> @splice_nxv4f16_neg_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv4f16_neg_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: lastb s0, p0, z0.s
|
||||
; CHECK-NEXT: insr z1.s, s0
|
||||
; CHECK-NEXT: mov z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 4 x half> @llvm.experimental.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 -1)
|
||||
ret <vscale x 4 x half> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x half> @splice_nxv4f16_neg3_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv4f16_neg3_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: mov x9, #-12
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 4 x half> @llvm.experimental.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 -3)
|
||||
ret <vscale x 4 x half> %res
|
||||
}
|
||||
|
||||
define <vscale x 8 x half> @splice_nxv8f16_first_idx(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv8f16_first_idx:
|
||||
; CHECK: // %bb.0:
|
||||
@ -297,6 +361,38 @@ define <vscale x 8 x half> @splice_nxv8f16_clamped_idx(<vscale x 8 x half> %a, <
|
||||
ret <vscale x 8 x half> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x float> @splice_nxv2f32_neg_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv2f32_neg_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: lastb d0, p0, z0.d
|
||||
; CHECK-NEXT: insr z1.d, d0
|
||||
; CHECK-NEXT: mov z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 2 x float> @llvm.experimental.vector.splice.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, i32 -1)
|
||||
ret <vscale x 2 x float> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x float> @splice_nxv2f32_neg2_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv2f32_neg2_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: mov x9, #-16
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 2 x float> @llvm.experimental.vector.splice.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, i32 -2)
|
||||
ret <vscale x 2 x float> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @splice_nxv4f32_first_idx(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv4f32_first_idx:
|
||||
; CHECK: // %bb.0:
|
||||
@ -605,17 +701,10 @@ define <vscale x 16 x i8> @splice_nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x
|
||||
define <vscale x 16 x i8> @splice_nxv16i8_1(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv16i8_1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: st1b { z0.b }, p0, [sp]
|
||||
; CHECK-NEXT: st1b { z1.b }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: mov x9, #-1
|
||||
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, x9]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: lastb b0, p0, z0.b
|
||||
; CHECK-NEXT: insr z1.b, b0
|
||||
; CHECK-NEXT: mov z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -1)
|
||||
ret <vscale x 16 x i8> %res
|
||||
@ -668,18 +757,10 @@ define <vscale x 8 x i16> @splice_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i
|
||||
define <vscale x 8 x i16> @splice_nxv8i16_1(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv8i16_1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: mov x9, #-2
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: lastb h0, p0, z0.h
|
||||
; CHECK-NEXT: insr z1.h, h0
|
||||
; CHECK-NEXT: mov z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 8 x i16> @llvm.experimental.vector.splice.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i32 -1)
|
||||
ret <vscale x 8 x i16> %res
|
||||
@ -732,18 +813,10 @@ define <vscale x 4 x i32> @splice_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i
|
||||
define <vscale x 4 x i32> @splice_nxv4i32_1(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv4i32_1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: mov x9, #-4
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: lastb s0, p0, z0.s
|
||||
; CHECK-NEXT: insr z1.s, s0
|
||||
; CHECK-NEXT: mov z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 -1)
|
||||
ret <vscale x 4 x i32> %res
|
||||
@ -796,18 +869,10 @@ define <vscale x 2 x i64> @splice_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i
|
||||
define <vscale x 2 x i64> @splice_nxv2i64_1(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv2i64_1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: mov x9, #-8
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: lastb d0, p0, z0.d
|
||||
; CHECK-NEXT: insr z1.d, d0
|
||||
; CHECK-NEXT: mov z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 2 x i64> @llvm.experimental.vector.splice.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i32 -1)
|
||||
ret <vscale x 2 x i64> %res
|
||||
@ -860,18 +925,10 @@ define <vscale x 8 x half> @splice_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x
|
||||
define <vscale x 8 x half> @splice_nxv8f16_1(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv8f16_1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: mov x9, #-2
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: lastb h0, p0, z0.h
|
||||
; CHECK-NEXT: insr z1.h, h0
|
||||
; CHECK-NEXT: mov z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 8 x half> @llvm.experimental.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 -1)
|
||||
ret <vscale x 8 x half> %res
|
||||
@ -924,18 +981,10 @@ define <vscale x 4 x float> @splice_nxv4f32(<vscale x 4 x float> %a, <vscale x 4
|
||||
define <vscale x 4 x float> @splice_nxv4f32_1(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv4f32_1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: mov x9, #-4
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: lastb s0, p0, z0.s
|
||||
; CHECK-NEXT: insr z1.s, s0
|
||||
; CHECK-NEXT: mov z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 4 x float> @llvm.experimental.vector.splice.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 -1)
|
||||
ret <vscale x 4 x float> %res
|
||||
@ -988,18 +1037,10 @@ define <vscale x 2 x double> @splice_nxv2f64(<vscale x 2 x double> %a, <vscale x
|
||||
define <vscale x 2 x double> @splice_nxv2f64_1(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv2f64_1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: mov x9, #-8
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: lastb d0, p0, z0.d
|
||||
; CHECK-NEXT: insr z1.d, d0
|
||||
; CHECK-NEXT: mov z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 -1)
|
||||
ret <vscale x 2 x double> %res
|
||||
@ -1033,22 +1074,13 @@ define <vscale x 2 x double> @splice_nxv2f64_clamped(<vscale x 2 x double> %a, <
|
||||
define <vscale x 2 x i1> @splice_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv2i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: lastb d0, p0, z0.d
|
||||
; CHECK-NEXT: mov z1.d, p1/z, #1 // =0x1
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: mov x9, #-8
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: and z0.d, z0.d, #0x1
|
||||
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: insr z1.d, d0
|
||||
; CHECK-NEXT: and z1.d, z1.d, #0x1
|
||||
; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 2 x i1> @llvm.experimental.vector.splice.nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b, i32 -1)
|
||||
ret <vscale x 2 x i1> %res
|
||||
@ -1058,22 +1090,13 @@ define <vscale x 2 x i1> @splice_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1>
|
||||
define <vscale x 4 x i1> @splice_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv4i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: lastb s0, p0, z0.s
|
||||
; CHECK-NEXT: mov z1.s, p1/z, #1 // =0x1
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: mov x9, #-4
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: and z0.s, z0.s, #0x1
|
||||
; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: insr z1.s, s0
|
||||
; CHECK-NEXT: and z1.s, z1.s, #0x1
|
||||
; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 4 x i1> @llvm.experimental.vector.splice.nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b, i32 -1)
|
||||
ret <vscale x 4 x i1> %res
|
||||
@ -1083,22 +1106,13 @@ define <vscale x 4 x i1> @splice_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1>
|
||||
define <vscale x 8 x i1> @splice_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv8i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: lastb h0, p0, z0.h
|
||||
; CHECK-NEXT: mov z1.h, p1/z, #1 // =0x1
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: mov x9, #-2
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9]
|
||||
; CHECK-NEXT: and z0.h, z0.h, #0x1
|
||||
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: insr z1.h, h0
|
||||
; CHECK-NEXT: and z1.h, z1.h, #0x1
|
||||
; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, #0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 8 x i1> @llvm.experimental.vector.splice.nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b, i32 -1)
|
||||
ret <vscale x 8 x i1> %res
|
||||
@ -1108,21 +1122,13 @@ define <vscale x 8 x i1> @splice_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1>
|
||||
define <vscale x 16 x i1> @splice_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) #0 {
|
||||
; CHECK-LABEL: splice_nxv16i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: lastb b0, p0, z0.b
|
||||
; CHECK-NEXT: mov z1.b, p1/z, #1 // =0x1
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: st1b { z0.b }, p0, [sp]
|
||||
; CHECK-NEXT: st1b { z1.b }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: mov x9, #-1
|
||||
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, x9]
|
||||
; CHECK-NEXT: and z0.b, z0.b, #0x1
|
||||
; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: insr z1.b, b0
|
||||
; CHECK-NEXT: and z1.b, z1.b, #0x1
|
||||
; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 16 x i1> @llvm.experimental.vector.splice.nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b, i32 -1)
|
||||
ret <vscale x 16 x i1> %res
|
||||
@ -1217,7 +1223,10 @@ declare <vscale x 8 x i16> @llvm.experimental.vector.splice.nxv8i16(<vscale x 8
|
||||
declare <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
|
||||
declare <vscale x 8 x i32> @llvm.experimental.vector.splice.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, i32)
|
||||
declare <vscale x 2 x i64> @llvm.experimental.vector.splice.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
|
||||
declare <vscale x 2 x half> @llvm.experimental.vector.splice.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, i32)
|
||||
declare <vscale x 4 x half> @llvm.experimental.vector.splice.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, i32)
|
||||
declare <vscale x 8 x half> @llvm.experimental.vector.splice.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, i32)
|
||||
declare <vscale x 2 x float> @llvm.experimental.vector.splice.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, i32)
|
||||
declare <vscale x 4 x float> @llvm.experimental.vector.splice.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, i32)
|
||||
declare <vscale x 16 x float> @llvm.experimental.vector.splice.nxv16f32(<vscale x 16 x float>, <vscale x 16 x float>, i32)
|
||||
declare <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)
|
||||
|
Loading…
Reference in New Issue
Block a user