mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 12:43:36 +01:00
[ARM][MVE] Add intrinsics for immediate shifts. (reland)
This adds the family of `vshlq_n` and `vshrq_n` ACLE intrinsics, which shift every lane of a vector left or right by a compile-time immediate. They mostly work by expanding to the IR `shl`, `lshr` and `ashr` operations, with their second operand being a vector splat of the immediate. There's a fiddly special case, though. ACLE specifies that the immediate in `vshrq_n` can take values up to //and including// the bit size of the vector lane. But LLVM IR thinks that shifting right by the full size of the lane is UB, and feels free to replace the `lshr` with an `undef` half way through the optimization pipeline. Hence, to keep this legal in source code, I have to detect it at codegen time. Logical (unsigned) right shifts by the element size are handled by simply emitting the zero vector; arithmetic ones are converted into a shift of one bit less, which will always give the same output. In order to do that check, I also had to enhance the tablegen MveEmitter so that it can cope with converting a builtin function's operand into a bare integer to pass to a code-generating subfunction. Previously the only bare integers it knew how to handle were flags generated from within `arm_mve.td`. Reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard Reviewed By: dmgreen, MarkMurrayARM Subscribers: echristo, hokein, rdhindsa, kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D71065
This commit is contained in:
parent
874a0827bb
commit
4dbfeb87c3
@ -913,6 +913,14 @@ defm int_arm_mve_vstr_scatter_offset: MVEPredicated<
|
||||
[], [llvm_anyptr_ty, llvm_anyvector_ty, llvm_anyvector_ty,
|
||||
llvm_i32_ty, llvm_i32_ty], llvm_anyvector_ty, [IntrWriteMem]>;
|
||||
|
||||
def int_arm_mve_shl_imm_predicated: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>],
|
||||
[IntrNoMem]>;
|
||||
def int_arm_mve_shr_imm_predicated: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, // extra i32 is unsigned flag
|
||||
llvm_anyvector_ty, LLVMMatchType<0>],
|
||||
[IntrNoMem]>;
|
||||
|
||||
// MVE scalar shifts.
|
||||
class ARM_MVE_qrshift_single<list<LLVMType> value,
|
||||
list<LLVMType> saturate = []> :
|
||||
|
@ -2816,27 +2816,39 @@ def MVE_VSHL_immi32 : MVE_VSHL_imm<"i32", (ins imm0_31:$imm)> {
|
||||
let Inst{21} = 0b1;
|
||||
}
|
||||
|
||||
multiclass MVE_immediate_shift_patterns_inner<
|
||||
MVEVectorVTInfo VTI, Operand imm_operand_type, SDNode unpred_op,
|
||||
Intrinsic pred_int, Instruction inst, list<int> unsignedFlag = []> {
|
||||
|
||||
def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$src), imm_operand_type:$imm)),
|
||||
(VTI.Vec (inst (VTI.Vec MQPR:$src), imm_operand_type:$imm))>;
|
||||
|
||||
def : Pat<(VTI.Vec !con((pred_int (VTI.Vec MQPR:$src), imm_operand_type:$imm),
|
||||
!dag(pred_int, unsignedFlag, ?),
|
||||
(pred_int (VTI.Pred VCCR:$mask),
|
||||
(VTI.Vec MQPR:$inactive)))),
|
||||
(VTI.Vec (inst (VTI.Vec MQPR:$src), imm_operand_type:$imm,
|
||||
ARMVCCThen, (VTI.Pred VCCR:$mask),
|
||||
(VTI.Vec MQPR:$inactive)))>;
|
||||
}
|
||||
|
||||
multiclass MVE_immediate_shift_patterns<MVEVectorVTInfo VTI,
|
||||
Operand imm_operand_type> {
|
||||
defm : MVE_immediate_shift_patterns_inner<VTI, imm_operand_type,
|
||||
ARMvshlImm, int_arm_mve_shl_imm_predicated,
|
||||
!cast<Instruction>("MVE_VSHL_immi" # VTI.BitsSuffix)>;
|
||||
defm : MVE_immediate_shift_patterns_inner<VTI, imm_operand_type,
|
||||
ARMvshruImm, int_arm_mve_shr_imm_predicated,
|
||||
!cast<Instruction>("MVE_VSHR_immu" # VTI.BitsSuffix), [1]>;
|
||||
defm : MVE_immediate_shift_patterns_inner<VTI, imm_operand_type,
|
||||
ARMvshrsImm, int_arm_mve_shr_imm_predicated,
|
||||
!cast<Instruction>("MVE_VSHR_imms" # VTI.BitsSuffix), [0]>;
|
||||
}
|
||||
|
||||
let Predicates = [HasMVEInt] in {
|
||||
def : Pat<(v4i32 (ARMvshlImm (v4i32 MQPR:$src), imm0_31:$imm)),
|
||||
(v4i32 (MVE_VSHL_immi32 (v4i32 MQPR:$src), imm0_31:$imm))>;
|
||||
def : Pat<(v8i16 (ARMvshlImm (v8i16 MQPR:$src), imm0_15:$imm)),
|
||||
(v8i16 (MVE_VSHL_immi16 (v8i16 MQPR:$src), imm0_15:$imm))>;
|
||||
def : Pat<(v16i8 (ARMvshlImm (v16i8 MQPR:$src), imm0_7:$imm)),
|
||||
(v16i8 (MVE_VSHL_immi8 (v16i8 MQPR:$src), imm0_7:$imm))>;
|
||||
|
||||
def : Pat<(v4i32 (ARMvshruImm (v4i32 MQPR:$src), imm0_31:$imm)),
|
||||
(v4i32 (MVE_VSHR_immu32 (v4i32 MQPR:$src), imm0_31:$imm))>;
|
||||
def : Pat<(v8i16 (ARMvshruImm (v8i16 MQPR:$src), imm0_15:$imm)),
|
||||
(v8i16 (MVE_VSHR_immu16 (v8i16 MQPR:$src), imm0_15:$imm))>;
|
||||
def : Pat<(v16i8 (ARMvshruImm (v16i8 MQPR:$src), imm0_7:$imm)),
|
||||
(v16i8 (MVE_VSHR_immu8 (v16i8 MQPR:$src), imm0_7:$imm))>;
|
||||
|
||||
def : Pat<(v4i32 (ARMvshrsImm (v4i32 MQPR:$src), imm0_31:$imm)),
|
||||
(v4i32 (MVE_VSHR_imms32 (v4i32 MQPR:$src), imm0_31:$imm))>;
|
||||
def : Pat<(v8i16 (ARMvshrsImm (v8i16 MQPR:$src), imm0_15:$imm)),
|
||||
(v8i16 (MVE_VSHR_imms16 (v8i16 MQPR:$src), imm0_15:$imm))>;
|
||||
def : Pat<(v16i8 (ARMvshrsImm (v16i8 MQPR:$src), imm0_7:$imm)),
|
||||
(v16i8 (MVE_VSHR_imms8 (v16i8 MQPR:$src), imm0_7:$imm))>;
|
||||
defm : MVE_immediate_shift_patterns<MVE_v16i8, imm0_7>;
|
||||
defm : MVE_immediate_shift_patterns<MVE_v8i16, imm0_15>;
|
||||
defm : MVE_immediate_shift_patterns<MVE_v4i32, imm0_31>;
|
||||
}
|
||||
|
||||
// end of mve_shift instructions
|
||||
|
398
test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll
Normal file
398
test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll
Normal file
@ -0,0 +1,398 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -verify-machineinstrs -o - %s | FileCheck %s
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vshlq_n_s8(<16 x i8> %a) {
|
||||
; CHECK-LABEL: test_vshlq_n_s8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vshl.i8 q0, q0, #5
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = shl <16 x i8> %a, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
|
||||
ret <16 x i8> %0
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vshlq_n_s16(<8 x i16> %a) {
|
||||
; CHECK-LABEL: test_vshlq_n_s16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vshl.i16 q0, q0, #5
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = shl <8 x i16> %a, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
|
||||
ret <8 x i16> %0
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vshlq_n_s32(<4 x i32> %a) {
|
||||
; CHECK-LABEL: test_vshlq_n_s32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vshl.i32 q0, q0, #18
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = shl <4 x i32> %a, <i32 18, i32 18, i32 18, i32 18>
|
||||
ret <4 x i32> %0
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) {
|
||||
; CHECK-LABEL: test_vshrq_n_s8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vshr.s8 q0, q0, #4
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = ashr <16 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
|
||||
ret <16 x i8> %0
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) {
|
||||
; CHECK-LABEL: test_vshrq_n_s16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vshr.s16 q0, q0, #10
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = ashr <8 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
|
||||
ret <8 x i16> %0
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) {
|
||||
; CHECK-LABEL: test_vshrq_n_s32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vshr.s32 q0, q0, #19
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = ashr <4 x i32> %a, <i32 19, i32 19, i32 19, i32 19>
|
||||
ret <4 x i32> %0
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) {
|
||||
; CHECK-LABEL: test_vshrq_n_u8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vshr.u8 q0, q0, #1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = lshr <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||
ret <16 x i8> %0
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) {
|
||||
; CHECK-LABEL: test_vshrq_n_u16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vshr.u16 q0, q0, #10
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = lshr <8 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
|
||||
ret <8 x i16> %0
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) {
|
||||
; CHECK-LABEL: test_vshrq_n_u32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vshr.u32 q0, q0, #10
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = lshr <4 x i32> %a, <i32 10, i32 10, i32 10, i32 10>
|
||||
ret <4 x i32> %0
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vshlq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshlq_m_n_s8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshlt.i8 q0, q1, #6
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
|
||||
%2 = tail call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 6, <16 x i1> %1, <16 x i8> %inactive)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vshlq_m_n_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshlq_m_n_s16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshlt.i16 q0, q1, #13
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
||||
%2 = tail call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 13, <8 x i1> %1, <8 x i16> %inactive)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vshlq_m_n_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshlq_m_n_s32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshlt.i32 q0, q1, #0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
||||
%2 = tail call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 0, <4 x i1> %1, <4 x i32> %inactive)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vshrq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshrq_m_n_s8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshrt.s8 q0, q1, #2
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
|
||||
%2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 2, i32 0, <16 x i1> %1, <16 x i8> %inactive)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vshrq_m_n_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshrq_m_n_s16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshrt.s16 q0, q1, #3
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
||||
%2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 3, i32 0, <8 x i1> %1, <8 x i16> %inactive)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vshrq_m_n_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshrq_m_n_s32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshrt.s32 q0, q1, #13
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
||||
%2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 13, i32 0, <4 x i1> %1, <4 x i32> %inactive)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vshrq_m_n_u8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshrq_m_n_u8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshrt.u8 q0, q1, #4
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
|
||||
%2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 4, i32 1, <16 x i1> %1, <16 x i8> %inactive)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vshrq_m_n_u16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshrq_m_n_u16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshrt.u16 q0, q1, #14
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
||||
%2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 14, i32 1, <8 x i1> %1, <8 x i16> %inactive)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vshrq_m_n_u32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshrq_m_n_u32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshrt.u32 q0, q1, #21
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
||||
%2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 21, i32 1, <4 x i1> %1, <4 x i32> %inactive)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vshlq_x_n_s8(<16 x i8> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshlq_x_n_s8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshlt.i8 q0, q0, #1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
|
||||
%2 = tail call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 1, <16 x i1> %1, <16 x i8> undef)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vshlq_x_n_s16(<8 x i16> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshlq_x_n_s16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshlt.i16 q0, q0, #15
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
||||
%2 = tail call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 15, <8 x i1> %1, <8 x i16> undef)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vshlq_x_n_s32(<4 x i32> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshlq_x_n_s32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshlt.i32 q0, q0, #13
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
||||
%2 = tail call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 13, <4 x i1> %1, <4 x i32> undef)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vshlq_x_n_u8(<16 x i8> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshlq_x_n_u8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshlt.i8 q0, q0, #4
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
|
||||
%2 = tail call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 4, <16 x i1> %1, <16 x i8> undef)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vshlq_x_n_u16(<8 x i16> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshlq_x_n_u16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshlt.i16 q0, q0, #10
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
||||
%2 = tail call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 10, <8 x i1> %1, <8 x i16> undef)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vshlq_x_n_u32(<4 x i32> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshlq_x_n_u32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshlt.i32 q0, q0, #30
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
||||
%2 = tail call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 30, <4 x i1> %1, <4 x i32> undef)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vshrq_x_n_s8(<16 x i8> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshrq_x_n_s8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshrt.s8 q0, q0, #4
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
|
||||
%2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 4, i32 0, <16 x i1> %1, <16 x i8> undef)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vshrq_x_n_s16(<8 x i16> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshrq_x_n_s16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshrt.s16 q0, q0, #10
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
||||
%2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 10, i32 0, <8 x i1> %1, <8 x i16> undef)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vshrq_x_n_s32(<4 x i32> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshrq_x_n_s32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshrt.s32 q0, q0, #7
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
||||
%2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 7, i32 0, <4 x i1> %1, <4 x i32> undef)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vshrq_x_n_u8(<16 x i8> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshrq_x_n_u8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshrt.u8 q0, q0, #7
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
|
||||
%2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 7, i32 1, <16 x i1> %1, <16 x i8> undef)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vshrq_x_n_u16(<8 x i16> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshrq_x_n_u16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshrt.u16 q0, q0, #7
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
||||
%2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 7, i32 1, <8 x i1> %1, <8 x i16> undef)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vshrq_x_n_u32(<4 x i32> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshrq_x_n_u32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshrt.u32 q0, q0, #6
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
||||
%2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 6, i32 1, <4 x i1> %1, <4 x i32> undef)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
|
||||
declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
|
||||
declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
|
||||
|
||||
declare <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8>, i32, <16 x i1>, <16 x i8>)
|
||||
declare <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16>, i32, <8 x i1>, <8 x i16>)
|
||||
declare <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32>, i32, <4 x i1>, <4 x i32>)
|
||||
|
||||
declare <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8>, i32, i32, <16 x i1>, <16 x i8>)
|
||||
declare <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16>, i32, i32, <8 x i1>, <8 x i16>)
|
||||
declare <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32>, i32, i32, <4 x i1>, <4 x i32>)
|
Loading…
Reference in New Issue
Block a user