1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 19:12:56 +02:00

[ARM,MVE] Intrinsics for partial-overwrite imm shifts.

This batch of intrinsics covers two sets of immediate shift
instructions, which have in common that they only overwrite part of
their output register and so they need an extra input giving its
previous value.

The VSLI and VSRI instructions shift each lane of the input vector
left or right just as if they were normal immediate VSHL/VSHR, but
then they only overwrite the output bits that correspond to actual
shifted bits of the input. So VSLI will leave the low n bits of each
output lane unchanged, and VSRI the same with the top n bits.

The V[Q][R]SHR[U]N family are all narrowing shifts: they take an input
vector of 2n-bit integers, shift each lane right by a constant, and
then narrowing the shifted result to only n bits. So they only
overwrite half of the n-bit lanes in the output register, and the B/T
suffix indicates whether it's the bottom or top half of each 2n-bit
lane.

I've implemented the whole of the latter family using a single IR
intrinsic `vshrn`, which takes a lot of i32 parameters indicating
which instruction it expands to (by specifying signedness of the input
and output types, whether it saturates and/or rounds, etc).

Reviewers: dmgreen, MarkMurrayARM, miyuki, ostannard

Reviewed By: dmgreen

Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D72328
This commit is contained in:
Simon Tatham 2020-01-08 13:36:25 +00:00
parent 21be0de34d
commit 5371ba4ab1
3 changed files with 1404 additions and 49 deletions

View File

@ -944,6 +944,17 @@ defm int_arm_mve_vshll_imm: MVEPredicatedM<[llvm_anyvector_ty],
[llvm_anyvector_ty, llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*unsigned*/,
llvm_i32_ty /*top-half*/]>;
defm int_arm_mve_vsli: MVEPredicated<
[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty]>;
defm int_arm_mve_vsri: MVEPredicated<
[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty]>;
defm int_arm_mve_vshrn: MVEPredicated<
[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty,
llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*saturate*/, llvm_i32_ty /*round*/,
llvm_i32_ty /*unsigned-out*/, llvm_i32_ty /*unsigned-in*/,
llvm_i32_ty /*top-half*/]>;
// MVE scalar shifts.
class ARM_MVE_qrshift_single<list<LLVMType> value,
list<LLVMType> saturate = []> :

View File

@ -2503,11 +2503,15 @@ foreach VTI = [MVE_v16s8, MVE_v8s16, MVE_v16u8, MVE_v8u16] in
foreach top = [0, 1] in
defm : MVE_VSHLL_patterns<VTI, top>;
class MVE_shift_imm_partial<Operand imm, string iname, string suffix>
: MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$QdSrc, MQPR:$Qm, imm:$imm),
iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc"> {
Operand immediateType = imm;
}
class MVE_VxSHRN<string iname, string suffix, bit bit_12, bit bit_28,
dag immops, list<dag> pattern=[]>
: MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops),
iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc",
pattern> {
Operand imm, list<dag> pattern=[]>
: MVE_shift_imm_partial<imm, iname, suffix> {
bits<5> imm;
let Inst{28} = bit_28;
@ -2520,45 +2524,35 @@ class MVE_VxSHRN<string iname, string suffix, bit bit_12, bit bit_28,
let Inst{0} = 0b1;
}
def MVE_VRSHRNi16bh : MVE_VxSHRN<
"vrshrnb", "i16", 0b0, 0b1, (ins shr_imm8:$imm)> {
def MVE_VRSHRNi16bh : MVE_VxSHRN<"vrshrnb", "i16", 0b0, 0b1, shr_imm8> {
let Inst{20-19} = 0b01;
}
def MVE_VRSHRNi16th : MVE_VxSHRN<
"vrshrnt", "i16", 0b1, 0b1,(ins shr_imm8:$imm)> {
def MVE_VRSHRNi16th : MVE_VxSHRN<"vrshrnt", "i16", 0b1, 0b1, shr_imm8> {
let Inst{20-19} = 0b01;
}
def MVE_VRSHRNi32bh : MVE_VxSHRN<
"vrshrnb", "i32", 0b0, 0b1, (ins shr_imm16:$imm)> {
def MVE_VRSHRNi32bh : MVE_VxSHRN<"vrshrnb", "i32", 0b0, 0b1, shr_imm16> {
let Inst{20} = 0b1;
}
def MVE_VRSHRNi32th : MVE_VxSHRN<
"vrshrnt", "i32", 0b1, 0b1, (ins shr_imm16:$imm)> {
def MVE_VRSHRNi32th : MVE_VxSHRN<"vrshrnt", "i32", 0b1, 0b1, shr_imm16> {
let Inst{20} = 0b1;
}
def MVE_VSHRNi16bh : MVE_VxSHRN<
"vshrnb", "i16", 0b0, 0b0, (ins shr_imm8:$imm)> {
def MVE_VSHRNi16bh : MVE_VxSHRN<"vshrnb", "i16", 0b0, 0b0, shr_imm8> {
let Inst{20-19} = 0b01;
}
def MVE_VSHRNi16th : MVE_VxSHRN<
"vshrnt", "i16", 0b1, 0b0, (ins shr_imm8:$imm)> {
def MVE_VSHRNi16th : MVE_VxSHRN<"vshrnt", "i16", 0b1, 0b0, shr_imm8> {
let Inst{20-19} = 0b01;
}
def MVE_VSHRNi32bh : MVE_VxSHRN<
"vshrnb", "i32", 0b0, 0b0, (ins shr_imm16:$imm)> {
def MVE_VSHRNi32bh : MVE_VxSHRN<"vshrnb", "i32", 0b0, 0b0, shr_imm16> {
let Inst{20} = 0b1;
}
def MVE_VSHRNi32th : MVE_VxSHRN<
"vshrnt", "i32", 0b1, 0b0, (ins shr_imm16:$imm)> {
def MVE_VSHRNi32th : MVE_VxSHRN<"vshrnt", "i32", 0b1, 0b0, shr_imm16> {
let Inst{20} = 0b1;
}
class MVE_VxQRSHRUN<string iname, string suffix, bit bit_28, bit bit_12, dag immops,
list<dag> pattern=[]>
: MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops),
iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc",
pattern> {
class MVE_VxQRSHRUN<string iname, string suffix, bit bit_28, bit bit_12,
Operand imm, list<dag> pattern=[]>
: MVE_shift_imm_partial<imm, iname, suffix> {
bits<5> imm;
let Inst{28} = bit_28;
@ -2572,44 +2566,42 @@ class MVE_VxQRSHRUN<string iname, string suffix, bit bit_28, bit bit_12, dag imm
}
def MVE_VQRSHRUNs16bh : MVE_VxQRSHRUN<
"vqrshrunb", "s16", 0b1, 0b0, (ins shr_imm8:$imm)> {
"vqrshrunb", "s16", 0b1, 0b0, shr_imm8> {
let Inst{20-19} = 0b01;
}
def MVE_VQRSHRUNs16th : MVE_VxQRSHRUN<
"vqrshrunt", "s16", 0b1, 0b1, (ins shr_imm8:$imm)> {
"vqrshrunt", "s16", 0b1, 0b1, shr_imm8> {
let Inst{20-19} = 0b01;
}
def MVE_VQRSHRUNs32bh : MVE_VxQRSHRUN<
"vqrshrunb", "s32", 0b1, 0b0, (ins shr_imm16:$imm)> {
"vqrshrunb", "s32", 0b1, 0b0, shr_imm16> {
let Inst{20} = 0b1;
}
def MVE_VQRSHRUNs32th : MVE_VxQRSHRUN<
"vqrshrunt", "s32", 0b1, 0b1, (ins shr_imm16:$imm)> {
"vqrshrunt", "s32", 0b1, 0b1, shr_imm16> {
let Inst{20} = 0b1;
}
def MVE_VQSHRUNs16bh : MVE_VxQRSHRUN<
"vqshrunb", "s16", 0b0, 0b0, (ins shr_imm8:$imm)> {
"vqshrunb", "s16", 0b0, 0b0, shr_imm8> {
let Inst{20-19} = 0b01;
}
def MVE_VQSHRUNs16th : MVE_VxQRSHRUN<
"vqshrunt", "s16", 0b0, 0b1, (ins shr_imm8:$imm)> {
"vqshrunt", "s16", 0b0, 0b1, shr_imm8> {
let Inst{20-19} = 0b01;
}
def MVE_VQSHRUNs32bh : MVE_VxQRSHRUN<
"vqshrunb", "s32", 0b0, 0b0, (ins shr_imm16:$imm)> {
"vqshrunb", "s32", 0b0, 0b0, shr_imm16> {
let Inst{20} = 0b1;
}
def MVE_VQSHRUNs32th : MVE_VxQRSHRUN<
"vqshrunt", "s32", 0b0, 0b1, (ins shr_imm16:$imm)> {
"vqshrunt", "s32", 0b0, 0b1, shr_imm16> {
let Inst{20} = 0b1;
}
class MVE_VxQRSHRN<string iname, string suffix, bit bit_0, bit bit_12,
dag immops, list<dag> pattern=[]>
: MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops),
iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc",
pattern> {
Operand imm, list<dag> pattern=[]>
: MVE_shift_imm_partial<imm, iname, suffix> {
bits<5> imm;
let Inst{25-23} = 0b101;
@ -2622,19 +2614,19 @@ class MVE_VxQRSHRN<string iname, string suffix, bit bit_0, bit bit_12,
}
multiclass MVE_VxQRSHRN_types<string iname, bit bit_0, bit bit_12> {
def s16 : MVE_VxQRSHRN<iname, "s16", bit_0, bit_12, (ins shr_imm8:$imm)> {
def s16 : MVE_VxQRSHRN<iname, "s16", bit_0, bit_12, shr_imm8> {
let Inst{28} = 0b0;
let Inst{20-19} = 0b01;
}
def u16 : MVE_VxQRSHRN<iname, "u16", bit_0, bit_12, (ins shr_imm8:$imm)> {
def u16 : MVE_VxQRSHRN<iname, "u16", bit_0, bit_12, shr_imm8> {
let Inst{28} = 0b1;
let Inst{20-19} = 0b01;
}
def s32 : MVE_VxQRSHRN<iname, "s32", bit_0, bit_12, (ins shr_imm16:$imm)> {
def s32 : MVE_VxQRSHRN<iname, "s32", bit_0, bit_12, shr_imm16> {
let Inst{28} = 0b0;
let Inst{20} = 0b1;
}
def u32 : MVE_VxQRSHRN<iname, "u32", bit_0, bit_12, (ins shr_imm16:$imm)> {
def u32 : MVE_VxQRSHRN<iname, "u32", bit_0, bit_12, shr_imm16> {
let Inst{28} = 0b1;
let Inst{20} = 0b1;
}
@ -2645,6 +2637,64 @@ defm MVE_VQRSHRNth : MVE_VxQRSHRN_types<"vqrshrnt", 0b1, 0b1>;
defm MVE_VQSHRNbh : MVE_VxQRSHRN_types<"vqshrnb", 0b0, 0b0>;
defm MVE_VQSHRNth : MVE_VxQRSHRN_types<"vqshrnt", 0b0, 0b1>;
multiclass MVE_VSHRN_patterns<MVE_shift_imm_partial inst,
MVEVectorVTInfo OutVTI, MVEVectorVTInfo InVTI,
bit q, bit r, bit top> {
foreach inparams = [(? (OutVTI.Vec MQPR:$QdSrc), (InVTI.Vec MQPR:$Qm),
(inst.immediateType:$imm), (i32 q), (i32 r),
(i32 OutVTI.Unsigned), (i32 InVTI.Unsigned),
(i32 top))] in
foreach outparams = [(inst (OutVTI.Vec MQPR:$QdSrc), (InVTI.Vec MQPR:$Qm),
(imm:$imm))] in {
def : Pat<(OutVTI.Vec !setop(inparams, int_arm_mve_vshrn)),
(OutVTI.Vec outparams)>;
def : Pat<(OutVTI.Vec !con(inparams, (int_arm_mve_vshrn_predicated
(InVTI.Pred VCCR:$pred)))),
(OutVTI.Vec !con(outparams, (? ARMVCCThen, VCCR:$pred)))>;
}
}
defm : MVE_VSHRN_patterns<MVE_VSHRNi16bh, MVE_v16s8, MVE_v8s16, 0,0,0>;
defm : MVE_VSHRN_patterns<MVE_VSHRNi16th, MVE_v16s8, MVE_v8s16, 0,0,1>;
defm : MVE_VSHRN_patterns<MVE_VSHRNi32bh, MVE_v8s16, MVE_v4s32, 0,0,0>;
defm : MVE_VSHRN_patterns<MVE_VSHRNi32th, MVE_v8s16, MVE_v4s32, 0,0,1>;
defm : MVE_VSHRN_patterns<MVE_VSHRNi16bh, MVE_v16u8, MVE_v8u16, 0,0,0>;
defm : MVE_VSHRN_patterns<MVE_VSHRNi16th, MVE_v16u8, MVE_v8u16, 0,0,1>;
defm : MVE_VSHRN_patterns<MVE_VSHRNi32bh, MVE_v8u16, MVE_v4u32, 0,0,0>;
defm : MVE_VSHRN_patterns<MVE_VSHRNi32th, MVE_v8u16, MVE_v4u32, 0,0,1>;
defm : MVE_VSHRN_patterns<MVE_VRSHRNi16bh, MVE_v16s8, MVE_v8s16, 0,1,0>;
defm : MVE_VSHRN_patterns<MVE_VRSHRNi16th, MVE_v16s8, MVE_v8s16, 0,1,1>;
defm : MVE_VSHRN_patterns<MVE_VRSHRNi32bh, MVE_v8s16, MVE_v4s32, 0,1,0>;
defm : MVE_VSHRN_patterns<MVE_VRSHRNi32th, MVE_v8s16, MVE_v4s32, 0,1,1>;
defm : MVE_VSHRN_patterns<MVE_VRSHRNi16bh, MVE_v16u8, MVE_v8u16, 0,1,0>;
defm : MVE_VSHRN_patterns<MVE_VRSHRNi16th, MVE_v16u8, MVE_v8u16, 0,1,1>;
defm : MVE_VSHRN_patterns<MVE_VRSHRNi32bh, MVE_v8u16, MVE_v4u32, 0,1,0>;
defm : MVE_VSHRN_patterns<MVE_VRSHRNi32th, MVE_v8u16, MVE_v4u32, 0,1,1>;
defm : MVE_VSHRN_patterns<MVE_VQSHRNbhs16, MVE_v16s8, MVE_v8s16, 1,0,0>;
defm : MVE_VSHRN_patterns<MVE_VQSHRNths16, MVE_v16s8, MVE_v8s16, 1,0,1>;
defm : MVE_VSHRN_patterns<MVE_VQSHRNbhs32, MVE_v8s16, MVE_v4s32, 1,0,0>;
defm : MVE_VSHRN_patterns<MVE_VQSHRNths32, MVE_v8s16, MVE_v4s32, 1,0,1>;
defm : MVE_VSHRN_patterns<MVE_VQSHRNbhu16, MVE_v16u8, MVE_v8u16, 1,0,0>;
defm : MVE_VSHRN_patterns<MVE_VQSHRNthu16, MVE_v16u8, MVE_v8u16, 1,0,1>;
defm : MVE_VSHRN_patterns<MVE_VQSHRNbhu32, MVE_v8u16, MVE_v4u32, 1,0,0>;
defm : MVE_VSHRN_patterns<MVE_VQSHRNthu32, MVE_v8u16, MVE_v4u32, 1,0,1>;
defm : MVE_VSHRN_patterns<MVE_VQRSHRNbhs16, MVE_v16s8, MVE_v8s16, 1,1,0>;
defm : MVE_VSHRN_patterns<MVE_VQRSHRNths16, MVE_v16s8, MVE_v8s16, 1,1,1>;
defm : MVE_VSHRN_patterns<MVE_VQRSHRNbhs32, MVE_v8s16, MVE_v4s32, 1,1,0>;
defm : MVE_VSHRN_patterns<MVE_VQRSHRNths32, MVE_v8s16, MVE_v4s32, 1,1,1>;
defm : MVE_VSHRN_patterns<MVE_VQRSHRNbhu16, MVE_v16u8, MVE_v8u16, 1,1,0>;
defm : MVE_VSHRN_patterns<MVE_VQRSHRNthu16, MVE_v16u8, MVE_v8u16, 1,1,1>;
defm : MVE_VSHRN_patterns<MVE_VQRSHRNbhu32, MVE_v8u16, MVE_v4u32, 1,1,0>;
defm : MVE_VSHRN_patterns<MVE_VQRSHRNthu32, MVE_v8u16, MVE_v4u32, 1,1,1>;
defm : MVE_VSHRN_patterns<MVE_VQSHRUNs16bh, MVE_v16u8, MVE_v8s16, 1,0,0>;
defm : MVE_VSHRN_patterns<MVE_VQSHRUNs16th, MVE_v16u8, MVE_v8s16, 1,0,1>;
defm : MVE_VSHRN_patterns<MVE_VQSHRUNs32bh, MVE_v8u16, MVE_v4s32, 1,0,0>;
defm : MVE_VSHRN_patterns<MVE_VQSHRUNs32th, MVE_v8u16, MVE_v4s32, 1,0,1>;
defm : MVE_VSHRN_patterns<MVE_VQRSHRUNs16bh, MVE_v16u8, MVE_v8s16, 1,1,0>;
defm : MVE_VSHRN_patterns<MVE_VQRSHRUNs16th, MVE_v16u8, MVE_v8s16, 1,1,1>;
defm : MVE_VSHRN_patterns<MVE_VQRSHRUNs32bh, MVE_v8u16, MVE_v4s32, 1,1,0>;
defm : MVE_VSHRN_patterns<MVE_VQRSHRUNs32th, MVE_v8u16, MVE_v4s32, 1,1,1>;
// end of mve_imm_shift instructions
// start of mve_shift instructions
@ -2733,9 +2783,9 @@ class MVE_shift_with_imm<string iname, string suffix, dag oops, dag iops,
dag unsignedFlag = (?);
}
class MVE_VSxI_imm<string iname, string suffix, bit bit_8, dag imm>
class MVE_VSxI_imm<string iname, string suffix, bit bit_8, Operand immType>
: MVE_shift_with_imm<iname, suffix, (outs MQPR:$Qd),
!con((ins MQPR:$Qd_src, MQPR:$Qm), imm),
(ins MQPR:$Qd_src, MQPR:$Qm, immType:$imm),
"$Qd, $Qm, $imm", vpred_n, "$Qd = $Qd_src"> {
bits<6> imm;
let Inst{28} = 0b1;
@ -2744,32 +2794,56 @@ class MVE_VSxI_imm<string iname, string suffix, bit bit_8, dag imm>
let Inst{10-9} = 0b10;
let Inst{8} = bit_8;
let validForTailPredication = 1;
Operand immediateType = immType;
}
def MVE_VSRIimm8 : MVE_VSxI_imm<"vsri", "8", 0b0, (ins shr_imm8:$imm)> {
def MVE_VSRIimm8 : MVE_VSxI_imm<"vsri", "8", 0b0, shr_imm8> {
let Inst{21-19} = 0b001;
}
def MVE_VSRIimm16 : MVE_VSxI_imm<"vsri", "16", 0b0, (ins shr_imm16:$imm)> {
def MVE_VSRIimm16 : MVE_VSxI_imm<"vsri", "16", 0b0, shr_imm16> {
let Inst{21-20} = 0b01;
}
def MVE_VSRIimm32 : MVE_VSxI_imm<"vsri", "32", 0b0, (ins shr_imm32:$imm)> {
def MVE_VSRIimm32 : MVE_VSxI_imm<"vsri", "32", 0b0, shr_imm32> {
let Inst{21} = 0b1;
}
def MVE_VSLIimm8 : MVE_VSxI_imm<"vsli", "8", 0b1, (ins imm0_7:$imm)> {
def MVE_VSLIimm8 : MVE_VSxI_imm<"vsli", "8", 0b1, imm0_7> {
let Inst{21-19} = 0b001;
}
def MVE_VSLIimm16 : MVE_VSxI_imm<"vsli", "16", 0b1, (ins imm0_15:$imm)> {
def MVE_VSLIimm16 : MVE_VSxI_imm<"vsli", "16", 0b1, imm0_15> {
let Inst{21-20} = 0b01;
}
def MVE_VSLIimm32 : MVE_VSxI_imm<"vsli", "32", 0b1,(ins imm0_31:$imm)> {
def MVE_VSLIimm32 : MVE_VSxI_imm<"vsli", "32", 0b1,imm0_31> {
let Inst{21} = 0b1;
}
multiclass MVE_VSxI_patterns<MVE_VSxI_imm inst, string name,
MVEVectorVTInfo VTI> {
foreach inparams = [(? (VTI.Vec MQPR:$QdSrc), (VTI.Vec MQPR:$Qm),
(inst.immediateType:$imm))] in
foreach outparams = [(inst (VTI.Vec MQPR:$QdSrc), (VTI.Vec MQPR:$Qm),
(inst.immediateType:$imm))] in
foreach unpred_int = [!cast<Intrinsic>("int_arm_mve_" # name)] in
foreach pred_int = [!cast<Intrinsic>("int_arm_mve_" # name # "_predicated")] in {
def : Pat<(VTI.Vec !setop(inparams, unpred_int)),
(VTI.Vec outparams)>;
def : Pat<(VTI.Vec !con(inparams, (pred_int (VTI.Pred VCCR:$pred)))),
(VTI.Vec !con(outparams, (? ARMVCCThen, VCCR:$pred)))>;
}
}
defm : MVE_VSxI_patterns<MVE_VSLIimm8, "vsli", MVE_v16i8>;
defm : MVE_VSxI_patterns<MVE_VSLIimm16, "vsli", MVE_v8i16>;
defm : MVE_VSxI_patterns<MVE_VSLIimm32, "vsli", MVE_v4i32>;
defm : MVE_VSxI_patterns<MVE_VSRIimm8, "vsri", MVE_v16i8>;
defm : MVE_VSxI_patterns<MVE_VSRIimm16, "vsri", MVE_v8i16>;
defm : MVE_VSxI_patterns<MVE_VSRIimm32, "vsri", MVE_v4i32>;
class MVE_VQSHL_imm<MVEVectorVTInfo VTI_, Operand immType>
: MVE_shift_with_imm<"vqshl", VTI_.Suffix, (outs MQPR:$Qd),
(ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm",

File diff suppressed because it is too large Load Diff