1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-22 20:43:44 +02:00

[ARM,MVE] Add intrinsics for scalar shifts.

This fills in the small family of MVE intrinsics that have nothing to
do with vectors: they implement bit-shift operations on 32- or 64-bit
values held in one or two general-purpose registers. Most of these
shift operations saturate if shifting left, and round to nearest if
shifting right, although LSLL and ASRL behave like ordinary shifts.

When these instructions take a variable shift count in a register,
they pay attention to its sign, so that (for example) LSLL or UQRSHLL
will shift left if given a positive number but right if given a
negative one. That makes even LSLL and ASRL different enough from
standard LLVM IR shift semantics that I couldn't see any better
alternative than to simply model the whole family as a set of
MVE-specific IR intrinsics.

(The //immediate// forms of LSLL and ASRL, on the other hand, do
behave exactly like a standard IR shift of a 64-bit value. In fact,
those forms don't have ACLE intrinsics defined at all, because you can
just write an ordinary C shift operation if you want one of those.)

The 64-bit shifts have to be instruction-selected in C++, because they
deliver two output values. But the 32-bit ones are simple enough that
I could write a DAG isel pattern directly into each Instruction
record.

Reviewers: ostannard, MarkMurrayARM, dmgreen

Reviewed By: dmgreen

Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D70319
This commit is contained in:
Simon Tatham 2019-11-19 14:47:07 +00:00
parent 3b2b582539
commit 7935cf1c34
4 changed files with 324 additions and 15 deletions

View File

@ -850,9 +850,25 @@ defm int_arm_mve_vstr_scatter_offset: MVEPredicated<
[], [llvm_anyptr_ty, llvm_anyvector_ty, llvm_anyvector_ty, [], [llvm_anyptr_ty, llvm_anyvector_ty, llvm_anyvector_ty,
llvm_i32_ty, llvm_i32_ty], llvm_anyvector_ty, [IntrWriteMem]>; llvm_i32_ty, llvm_i32_ty], llvm_anyvector_ty, [IntrWriteMem]>;
def int_arm_mve_urshrl: Intrinsic< // MVE scalar shifts.
[llvm_i32_ty, llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], class ARM_MVE_qrshift_single<list<LLVMType> value,
[IntrNoMem]>; list<LLVMType> saturate = []> :
Intrinsic<value, value # [llvm_i32_ty] # saturate, [IntrNoMem]>;
multiclass ARM_MVE_qrshift<list<LLVMType> saturate = []> {
// Most of these shifts come in 32- and 64-bit versions. But only
// the 64-bit ones have the extra saturation argument (if any).
def "": ARM_MVE_qrshift_single<[llvm_i32_ty]>;
def l: ARM_MVE_qrshift_single<[llvm_i32_ty, llvm_i32_ty], saturate>;
}
defm int_arm_mve_urshr: ARM_MVE_qrshift;
defm int_arm_mve_uqshl: ARM_MVE_qrshift;
defm int_arm_mve_srshr: ARM_MVE_qrshift;
defm int_arm_mve_sqshl: ARM_MVE_qrshift;
defm int_arm_mve_uqrshl: ARM_MVE_qrshift<[llvm_i32_ty]>;
defm int_arm_mve_sqrshr: ARM_MVE_qrshift<[llvm_i32_ty]>;
// LSLL and ASRL only have 64-bit versions, not 32.
def int_arm_mve_lsll: ARM_MVE_qrshift_single<[llvm_i32_ty, llvm_i32_ty]>;
def int_arm_mve_asrl: ARM_MVE_qrshift_single<[llvm_i32_ty, llvm_i32_ty]>;
def int_arm_mve_vadc: Intrinsic< def int_arm_mve_vadc: Intrinsic<
[llvm_anyvector_ty, llvm_i32_ty], [llvm_anyvector_ty, llvm_i32_ty],

View File

@ -226,7 +226,8 @@ private:
void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated); void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated);
/// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics. /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics.
void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate); void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate,
bool HasSaturationOperand);
/// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics. /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics.
void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
@ -2399,7 +2400,8 @@ void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes,
} }
void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode, void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
bool Immediate) { bool Immediate,
bool HasSaturationOperand) {
SDLoc Loc(N); SDLoc Loc(N);
SmallVector<SDValue, 8> Ops; SmallVector<SDValue, 8> Ops;
@ -2410,11 +2412,18 @@ void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
// The shift count // The shift count
if (Immediate) { if (Immediate) {
int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
} else { } else {
Ops.push_back(N->getOperand(3)); Ops.push_back(N->getOperand(3));
} }
// The immediate saturation operand, if any
if (HasSaturationOperand) {
int32_t SatOp = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue();
int SatBit = (SatOp == 64 ? 0 : 1);
Ops.push_back(getI32Imm(SatBit, Loc));
}
// MVE scalar shifts are IT-predicable, so include the standard // MVE scalar shifts are IT-predicable, so include the standard
// predicate arguments. // predicate arguments.
Ops.push_back(getAL(CurDAG, Loc)); Ops.push_back(getAL(CurDAG, Loc));
@ -4267,7 +4276,28 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
break; break;
case Intrinsic::arm_mve_urshrl: case Intrinsic::arm_mve_urshrl:
SelectMVE_LongShift(N, ARM::MVE_URSHRL, true); SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false);
return;
case Intrinsic::arm_mve_uqshll:
SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false);
return;
case Intrinsic::arm_mve_srshrl:
SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false);
return;
case Intrinsic::arm_mve_sqshll:
SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false);
return;
case Intrinsic::arm_mve_uqrshll:
SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true);
return;
case Intrinsic::arm_mve_sqrshrl:
SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true);
return;
case Intrinsic::arm_mve_lsll:
SelectMVE_LongShift(N, ARM::MVE_LSLLr, false, false);
return;
case Intrinsic::arm_mve_asrl:
SelectMVE_LongShift(N, ARM::MVE_ASRLr, false, false);
return; return;
case Intrinsic::arm_mve_vadc: case Intrinsic::arm_mve_vadc:

View File

@ -409,9 +409,12 @@ class MVE_ScalarShiftSingleReg<string iname, dag iops, string asm, string cstr,
let Inst{19-16} = RdaDest{3-0}; let Inst{19-16} = RdaDest{3-0};
} }
class MVE_ScalarShiftSRegImm<string iname, bits<2> op5_4, list<dag> pattern=[]> class MVE_ScalarShiftSRegImm<string iname, bits<2> op5_4>
: MVE_ScalarShiftSingleReg<iname, (ins rGPR:$RdaSrc, long_shift:$imm), : MVE_ScalarShiftSingleReg<iname, (ins rGPR:$RdaSrc, long_shift:$imm),
"$RdaSrc, $imm", "$RdaDest = $RdaSrc", pattern> { "$RdaSrc, $imm", "$RdaDest = $RdaSrc",
[(set rGPR:$RdaDest,
(i32 (!cast<Intrinsic>("int_arm_mve_" # iname)
(i32 rGPR:$RdaSrc), (i32 imm:$imm))))]> {
bits<5> imm; bits<5> imm;
let Inst{15} = 0b0; let Inst{15} = 0b0;
@ -427,9 +430,12 @@ def MVE_SRSHR : MVE_ScalarShiftSRegImm<"srshr", 0b10>;
def MVE_UQSHL : MVE_ScalarShiftSRegImm<"uqshl", 0b00>; def MVE_UQSHL : MVE_ScalarShiftSRegImm<"uqshl", 0b00>;
def MVE_URSHR : MVE_ScalarShiftSRegImm<"urshr", 0b01>; def MVE_URSHR : MVE_ScalarShiftSRegImm<"urshr", 0b01>;
class MVE_ScalarShiftSRegReg<string iname, bits<2> op5_4, list<dag> pattern=[]> class MVE_ScalarShiftSRegReg<string iname, bits<2> op5_4>
: MVE_ScalarShiftSingleReg<iname, (ins rGPR:$RdaSrc, rGPR:$Rm), : MVE_ScalarShiftSingleReg<iname, (ins rGPR:$RdaSrc, rGPR:$Rm),
"$RdaSrc, $Rm", "$RdaDest = $RdaSrc", pattern> { "$RdaSrc, $Rm", "$RdaDest = $RdaSrc",
[(set rGPR:$RdaDest,
(i32 (!cast<Intrinsic>("int_arm_mve_" # iname)
(i32 rGPR:$RdaSrc), (i32 rGPR:$Rm))))]> {
bits<4> Rm; bits<4> Rm;
let Inst{15-12} = Rm{3-0}; let Inst{15-12} = Rm{3-0};

View File

@ -1,7 +1,264 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -verify-machineinstrs -o - %s | FileCheck %s
define arm_aapcs_vfpcc i64 @test_urshrl(i64 %value) { define i64 @test_asrl(i64 %value, i32 %shift) {
; CHECK-LABEL: test_asrl:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: asrl r0, r1, r2
; CHECK-NEXT: bx lr
entry:
%0 = lshr i64 %value, 32
%1 = trunc i64 %0 to i32
%2 = trunc i64 %value to i32
%3 = call { i32, i32 } @llvm.arm.mve.asrl(i32 %2, i32 %1, i32 %shift)
%4 = extractvalue { i32, i32 } %3, 1
%5 = zext i32 %4 to i64
%6 = shl i64 %5, 32
%7 = extractvalue { i32, i32 } %3, 0
%8 = zext i32 %7 to i64
%9 = or i64 %6, %8
ret i64 %9
}
declare { i32, i32 } @llvm.arm.mve.asrl(i32, i32, i32)
define i64 @test_lsll(i64 %value, i32 %shift) {
; CHECK-LABEL: test_lsll:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: lsll r0, r1, r2
; CHECK-NEXT: bx lr
entry:
%0 = lshr i64 %value, 32
%1 = trunc i64 %0 to i32
%2 = trunc i64 %value to i32
%3 = call { i32, i32 } @llvm.arm.mve.lsll(i32 %2, i32 %1, i32 %shift)
%4 = extractvalue { i32, i32 } %3, 1
%5 = zext i32 %4 to i64
%6 = shl i64 %5, 32
%7 = extractvalue { i32, i32 } %3, 0
%8 = zext i32 %7 to i64
%9 = or i64 %6, %8
ret i64 %9
}
declare { i32, i32 } @llvm.arm.mve.lsll(i32, i32, i32)
define i32 @test_sqrshr(i32 %value, i32 %shift) {
; CHECK-LABEL: test_sqrshr:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: sqrshr r0, r1
; CHECK-NEXT: bx lr
entry:
%0 = call i32 @llvm.arm.mve.sqrshr(i32 %value, i32 %shift)
ret i32 %0
}
declare i32 @llvm.arm.mve.sqrshr(i32, i32)
define i64 @test_sqrshrl(i64 %value, i32 %shift) {
; CHECK-LABEL: test_sqrshrl:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: sqrshrl r0, r1, #64, r2
; CHECK-NEXT: bx lr
entry:
%0 = lshr i64 %value, 32
%1 = trunc i64 %0 to i32
%2 = trunc i64 %value to i32
%3 = call { i32, i32 } @llvm.arm.mve.sqrshrl(i32 %2, i32 %1, i32 %shift, i32 64)
%4 = extractvalue { i32, i32 } %3, 1
%5 = zext i32 %4 to i64
%6 = shl i64 %5, 32
%7 = extractvalue { i32, i32 } %3, 0
%8 = zext i32 %7 to i64
%9 = or i64 %6, %8
ret i64 %9
}
declare { i32, i32 } @llvm.arm.mve.sqrshrl(i32, i32, i32, i32)
define i64 @test_sqrshrl_sat48(i64 %value, i32 %shift) {
; CHECK-LABEL: test_sqrshrl_sat48:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: sqrshrl r0, r1, #48, r2
; CHECK-NEXT: bx lr
entry:
%0 = lshr i64 %value, 32
%1 = trunc i64 %0 to i32
%2 = trunc i64 %value to i32
%3 = call { i32, i32 } @llvm.arm.mve.sqrshrl(i32 %2, i32 %1, i32 %shift, i32 48)
%4 = extractvalue { i32, i32 } %3, 1
%5 = zext i32 %4 to i64
%6 = shl i64 %5, 32
%7 = extractvalue { i32, i32 } %3, 0
%8 = zext i32 %7 to i64
%9 = or i64 %6, %8
ret i64 %9
}
define i32 @test_sqshl(i32 %value) {
; CHECK-LABEL: test_sqshl:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: sqshl r0, #2
; CHECK-NEXT: bx lr
entry:
%0 = call i32 @llvm.arm.mve.sqshl(i32 %value, i32 2)
ret i32 %0
}
declare i32 @llvm.arm.mve.sqshl(i32, i32)
define i64 @test_sqshll(i64 %value) {
; CHECK-LABEL: test_sqshll:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: sqshll r0, r1, #17
; CHECK-NEXT: bx lr
entry:
%0 = lshr i64 %value, 32
%1 = trunc i64 %0 to i32
%2 = trunc i64 %value to i32
%3 = call { i32, i32 } @llvm.arm.mve.sqshll(i32 %2, i32 %1, i32 17)
%4 = extractvalue { i32, i32 } %3, 1
%5 = zext i32 %4 to i64
%6 = shl i64 %5, 32
%7 = extractvalue { i32, i32 } %3, 0
%8 = zext i32 %7 to i64
%9 = or i64 %6, %8
ret i64 %9
}
declare { i32, i32 } @llvm.arm.mve.sqshll(i32, i32, i32)
define i32 @test_srshr(i32 %value) {
; CHECK-LABEL: test_srshr:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: srshr r0, #6
; CHECK-NEXT: bx lr
entry:
%0 = call i32 @llvm.arm.mve.srshr(i32 %value, i32 6)
ret i32 %0
}
declare i32 @llvm.arm.mve.srshr(i32, i32)
define i64 @test_srshrl(i64 %value) {
; CHECK-LABEL: test_srshrl:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: srshrl r0, r1, #26
; CHECK-NEXT: bx lr
entry:
%0 = lshr i64 %value, 32
%1 = trunc i64 %0 to i32
%2 = trunc i64 %value to i32
%3 = call { i32, i32 } @llvm.arm.mve.srshrl(i32 %2, i32 %1, i32 26)
%4 = extractvalue { i32, i32 } %3, 1
%5 = zext i32 %4 to i64
%6 = shl i64 %5, 32
%7 = extractvalue { i32, i32 } %3, 0
%8 = zext i32 %7 to i64
%9 = or i64 %6, %8
ret i64 %9
}
declare { i32, i32 } @llvm.arm.mve.srshrl(i32, i32, i32)
define i32 @test_uqrshl(i32 %value, i32 %shift) {
; CHECK-LABEL: test_uqrshl:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: uqrshl r0, r1
; CHECK-NEXT: bx lr
entry:
%0 = call i32 @llvm.arm.mve.uqrshl(i32 %value, i32 %shift)
ret i32 %0
}
declare i32 @llvm.arm.mve.uqrshl(i32, i32)
define i64 @test_uqrshll(i64 %value, i32 %shift) {
; CHECK-LABEL: test_uqrshll:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: uqrshll r0, r1, #64, r2
; CHECK-NEXT: bx lr
entry:
%0 = lshr i64 %value, 32
%1 = trunc i64 %0 to i32
%2 = trunc i64 %value to i32
%3 = call { i32, i32 } @llvm.arm.mve.uqrshll(i32 %2, i32 %1, i32 %shift, i32 64)
%4 = extractvalue { i32, i32 } %3, 1
%5 = zext i32 %4 to i64
%6 = shl i64 %5, 32
%7 = extractvalue { i32, i32 } %3, 0
%8 = zext i32 %7 to i64
%9 = or i64 %6, %8
ret i64 %9
}
declare { i32, i32 } @llvm.arm.mve.uqrshll(i32, i32, i32, i32)
define i64 @test_uqrshll_sat48(i64 %value, i32 %shift) {
; CHECK-LABEL: test_uqrshll_sat48:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: uqrshll r0, r1, #48, r2
; CHECK-NEXT: bx lr
entry:
%0 = lshr i64 %value, 32
%1 = trunc i64 %0 to i32
%2 = trunc i64 %value to i32
%3 = call { i32, i32 } @llvm.arm.mve.uqrshll(i32 %2, i32 %1, i32 %shift, i32 48)
%4 = extractvalue { i32, i32 } %3, 1
%5 = zext i32 %4 to i64
%6 = shl i64 %5, 32
%7 = extractvalue { i32, i32 } %3, 0
%8 = zext i32 %7 to i64
%9 = or i64 %6, %8
ret i64 %9
}
define i32 @test_uqshl(i32 %value) {
; CHECK-LABEL: test_uqshl:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: uqshl r0, #21
; CHECK-NEXT: bx lr
entry:
%0 = call i32 @llvm.arm.mve.uqshl(i32 %value, i32 21)
ret i32 %0
}
declare i32 @llvm.arm.mve.uqshl(i32, i32)
define i64 @test_uqshll(i64 %value) {
; CHECK-LABEL: test_uqshll:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: uqshll r0, r1, #16
; CHECK-NEXT: bx lr
entry:
%0 = lshr i64 %value, 32
%1 = trunc i64 %0 to i32
%2 = trunc i64 %value to i32
%3 = call { i32, i32 } @llvm.arm.mve.uqshll(i32 %2, i32 %1, i32 16)
%4 = extractvalue { i32, i32 } %3, 1
%5 = zext i32 %4 to i64
%6 = shl i64 %5, 32
%7 = extractvalue { i32, i32 } %3, 0
%8 = zext i32 %7 to i64
%9 = or i64 %6, %8
ret i64 %9
}
declare { i32, i32 } @llvm.arm.mve.uqshll(i32, i32, i32)
define i32 @test_urshr(i32 %value) {
; CHECK-LABEL: test_urshr:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: urshr r0, #22
; CHECK-NEXT: bx lr
entry:
%0 = call i32 @llvm.arm.mve.urshr(i32 %value, i32 22)
ret i32 %0
}
declare i32 @llvm.arm.mve.urshr(i32, i32)
define i64 @test_urshrl(i64 %value) {
; CHECK-LABEL: test_urshrl: ; CHECK-LABEL: test_urshrl:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: urshrl r0, r1, #6 ; CHECK-NEXT: urshrl r0, r1, #6
@ -10,10 +267,10 @@ entry:
%0 = lshr i64 %value, 32 %0 = lshr i64 %value, 32
%1 = trunc i64 %0 to i32 %1 = trunc i64 %0 to i32
%2 = trunc i64 %value to i32 %2 = trunc i64 %value to i32
%3 = tail call { i32, i32 } @llvm.arm.mve.urshrl(i32 %2, i32 %1, i32 6) %3 = call { i32, i32 } @llvm.arm.mve.urshrl(i32 %2, i32 %1, i32 6)
%4 = extractvalue { i32, i32 } %3, 1 %4 = extractvalue { i32, i32 } %3, 1
%5 = zext i32 %4 to i64 %5 = zext i32 %4 to i64
%6 = shl nuw i64 %5, 32 %6 = shl i64 %5, 32
%7 = extractvalue { i32, i32 } %3, 0 %7 = extractvalue { i32, i32 } %3, 0
%8 = zext i32 %7 to i64 %8 = zext i32 %7 to i64
%9 = or i64 %6, %8 %9 = or i64 %6, %8