mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
[DAG][ARM][MIPS][RISCV] Improve funnel shift promotion to use 'double shift' patterns
Based on a discussion on D88783, if we're promoting a funnel shift to a width at least twice the size as the original type, then we can use the 'double shift' patterns (shifting the concatenated sources). Differential Revision: https://reviews.llvm.org/D89139
This commit is contained in:
parent
6940914260
commit
132f72d148
@ -1129,27 +1129,44 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) {
|
||||
SDValue Lo = GetPromotedInteger(N->getOperand(1));
|
||||
SDValue Amount = GetPromotedInteger(N->getOperand(2));
|
||||
|
||||
unsigned OldBits = N->getOperand(0).getScalarValueSizeInBits();
|
||||
unsigned NewBits = Hi.getScalarValueSizeInBits();
|
||||
|
||||
// Shift Lo up to occupy the upper bits of the promoted type.
|
||||
SDLoc DL(N);
|
||||
EVT OldVT = N->getOperand(0).getValueType();
|
||||
EVT VT = Lo.getValueType();
|
||||
Lo = DAG.getNode(ISD::SHL, DL, VT, Lo,
|
||||
DAG.getConstant(NewBits - OldBits, DL, VT));
|
||||
unsigned Opcode = N->getOpcode();
|
||||
bool IsFSHR = Opcode == ISD::FSHR;
|
||||
unsigned OldBits = OldVT.getScalarSizeInBits();
|
||||
unsigned NewBits = VT.getScalarSizeInBits();
|
||||
|
||||
// Amount has to be interpreted modulo the old bit width.
|
||||
Amount =
|
||||
DAG.getNode(ISD::UREM, DL, VT, Amount, DAG.getConstant(OldBits, DL, VT));
|
||||
|
||||
unsigned Opcode = N->getOpcode();
|
||||
if (Opcode == ISD::FSHR) {
|
||||
// Increase Amount to shift the result into the lower bits of the promoted
|
||||
// type.
|
||||
Amount = DAG.getNode(ISD::ADD, DL, VT, Amount,
|
||||
DAG.getConstant(NewBits - OldBits, DL, VT));
|
||||
// If the promoted type is twice the size (or more), then we use the
|
||||
// traditional funnel 'double' shift codegen. This isn't necessary if the
|
||||
// shift amount is constant.
|
||||
// fshl(x,y,z) -> (((aext(x) << bw) | zext(y)) << (z % bw)) >> bw.
|
||||
// fshr(x,y,z) -> (((aext(x) << bw) | zext(y)) >> (z % bw)).
|
||||
if (NewBits >= (2 * OldBits) && !isa<ConstantSDNode>(Amount) &&
|
||||
!TLI.isOperationLegalOrCustom(Opcode, VT)) {
|
||||
SDValue HiShift = DAG.getConstant(OldBits, DL, VT);
|
||||
Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, HiShift);
|
||||
Lo = DAG.getZeroExtendInReg(Lo, DL, OldVT);
|
||||
SDValue Res = DAG.getNode(ISD::OR, DL, VT, Hi, Lo);
|
||||
Res = DAG.getNode(IsFSHR ? ISD::SRL : ISD::SHL, DL, VT, Res, Amount);
|
||||
if (!IsFSHR)
|
||||
Res = DAG.getNode(ISD::SRL, DL, VT, Res, HiShift);
|
||||
return Res;
|
||||
}
|
||||
|
||||
// Shift Lo up to occupy the upper bits of the promoted type.
|
||||
SDValue ShiftOffset = DAG.getConstant(NewBits - OldBits, DL, VT);
|
||||
Lo = DAG.getNode(ISD::SHL, DL, VT, Lo, ShiftOffset);
|
||||
|
||||
// Increase Amount to shift the result into the lower bits of the promoted
|
||||
// type.
|
||||
if (IsFSHR)
|
||||
Amount = DAG.getNode(ISD::ADD, DL, VT, Amount, ShiftOffset);
|
||||
|
||||
return DAG.getNode(Opcode, DL, VT, Hi, Lo, Amount);
|
||||
}
|
||||
|
||||
|
@ -19,13 +19,10 @@ declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
|
||||
define i16 @fshl_i16(i16 %x, i16 %y, i16 %z) {
|
||||
; CHECK-LABEL: fshl_i16:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: and r2, r2, #15
|
||||
; CHECK-NEXT: mov r3, #31
|
||||
; CHECK-NEXT: lsl r1, r1, #16
|
||||
; CHECK-NEXT: bic r3, r3, r2
|
||||
; CHECK-NEXT: lsl r0, r0, r2
|
||||
; CHECK-NEXT: lsr r1, r1, #1
|
||||
; CHECK-NEXT: orr r0, r0, r1, lsr r3
|
||||
; CHECK-NEXT: pkhbt r0, r1, r0, lsl #16
|
||||
; CHECK-NEXT: and r1, r2, #15
|
||||
; CHECK-NEXT: lsl r0, r0, r1
|
||||
; CHECK-NEXT: lsr r0, r0, #16
|
||||
; CHECK-NEXT: bx lr
|
||||
%f = call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 %z)
|
||||
ret i16 %f
|
||||
@ -188,15 +185,9 @@ define i8 @fshl_i8_const_fold() {
|
||||
define i16 @fshr_i16(i16 %x, i16 %y, i16 %z) {
|
||||
; CHECK-LABEL: fshr_i16:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: mov r3, #1
|
||||
; CHECK-NEXT: lsl r0, r0, #1
|
||||
; CHECK-NEXT: bfi r2, r3, #4, #28
|
||||
; CHECK-NEXT: mov r3, #31
|
||||
; CHECK-NEXT: bic r3, r3, r2
|
||||
; CHECK-NEXT: and r2, r2, #31
|
||||
; CHECK-NEXT: lsl r1, r1, #16
|
||||
; CHECK-NEXT: lsl r0, r0, r3
|
||||
; CHECK-NEXT: orr r0, r0, r1, lsr r2
|
||||
; CHECK-NEXT: pkhbt r0, r1, r0, lsl #16
|
||||
; CHECK-NEXT: and r1, r2, #15
|
||||
; CHECK-NEXT: lsr r0, r0, r1
|
||||
; CHECK-NEXT: bx lr
|
||||
%f = call i16 @llvm.fshr.i16(i16 %x, i16 %y, i16 %z)
|
||||
ret i16 %f
|
||||
|
@ -19,15 +19,13 @@ declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
|
||||
define i16 @fshl_i16(i16 %x, i16 %y, i16 %z) {
|
||||
; CHECK-LABEL: fshl_i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: andi $1, $6, 15
|
||||
; CHECK-NEXT: sllv $2, $4, $1
|
||||
; CHECK-NEXT: sll $3, $5, 16
|
||||
; CHECK-NEXT: srl $3, $3, 1
|
||||
; CHECK-NEXT: not $1, $1
|
||||
; CHECK-NEXT: andi $1, $1, 31
|
||||
; CHECK-NEXT: srlv $1, $3, $1
|
||||
; CHECK-NEXT: andi $1, $5, 65535
|
||||
; CHECK-NEXT: sll $2, $4, 16
|
||||
; CHECK-NEXT: or $1, $2, $1
|
||||
; CHECK-NEXT: andi $2, $6, 15
|
||||
; CHECK-NEXT: sllv $1, $1, $2
|
||||
; CHECK-NEXT: jr $ra
|
||||
; CHECK-NEXT: or $2, $2, $1
|
||||
; CHECK-NEXT: srl $2, $1, 16
|
||||
%f = call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 %z)
|
||||
ret i16 %f
|
||||
}
|
||||
@ -288,15 +286,12 @@ define i8 @fshl_i8_const_fold() {
|
||||
define i16 @fshr_i16(i16 %x, i16 %y, i16 %z) {
|
||||
; CHECK-LABEL: fshr_i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: sll $1, $5, 16
|
||||
; CHECK-NEXT: andi $1, $5, 65535
|
||||
; CHECK-NEXT: sll $2, $4, 16
|
||||
; CHECK-NEXT: or $1, $2, $1
|
||||
; CHECK-NEXT: andi $2, $6, 15
|
||||
; CHECK-NEXT: ori $3, $2, 16
|
||||
; CHECK-NEXT: srlv $1, $1, $3
|
||||
; CHECK-NEXT: sll $3, $4, 1
|
||||
; CHECK-NEXT: xori $2, $2, 15
|
||||
; CHECK-NEXT: sllv $2, $3, $2
|
||||
; CHECK-NEXT: jr $ra
|
||||
; CHECK-NEXT: or $2, $2, $1
|
||||
; CHECK-NEXT: srlv $2, $1, $2
|
||||
%f = call i16 @llvm.fshr.i16(i16 %x, i16 %y, i16 %z)
|
||||
ret i16 %f
|
||||
}
|
||||
|
@ -109,14 +109,13 @@ declare i32 @llvm.fshl.i32(i32, i32, i32)
|
||||
define signext i32 @fshl_i32(i32 signext %a, i32 signext %b, i32 signext %c) nounwind {
|
||||
; RV64I-LABEL: fshl_i32:
|
||||
; RV64I: # %bb.0:
|
||||
; RV64I-NEXT: andi a2, a2, 31
|
||||
; RV64I-NEXT: sll a0, a0, a2
|
||||
; RV64I-NEXT: not a2, a2
|
||||
; RV64I-NEXT: slli a0, a0, 32
|
||||
; RV64I-NEXT: slli a1, a1, 32
|
||||
; RV64I-NEXT: srli a1, a1, 1
|
||||
; RV64I-NEXT: srl a1, a1, a2
|
||||
; RV64I-NEXT: srli a1, a1, 32
|
||||
; RV64I-NEXT: or a0, a0, a1
|
||||
; RV64I-NEXT: sext.w a0, a0
|
||||
; RV64I-NEXT: andi a1, a2, 31
|
||||
; RV64I-NEXT: sll a0, a0, a1
|
||||
; RV64I-NEXT: srai a0, a0, 32
|
||||
; RV64I-NEXT: ret
|
||||
;
|
||||
; RV64IB-LABEL: fshl_i32:
|
||||
@ -162,14 +161,12 @@ declare i32 @llvm.fshr.i32(i32, i32, i32)
|
||||
define signext i32 @fshr_i32(i32 signext %a, i32 signext %b, i32 signext %c) nounwind {
|
||||
; RV64I-LABEL: fshr_i32:
|
||||
; RV64I: # %bb.0:
|
||||
; RV64I-NEXT: slli a0, a0, 32
|
||||
; RV64I-NEXT: slli a1, a1, 32
|
||||
; RV64I-NEXT: andi a2, a2, 31
|
||||
; RV64I-NEXT: ori a3, a2, 32
|
||||
; RV64I-NEXT: srl a1, a1, a3
|
||||
; RV64I-NEXT: slli a0, a0, 1
|
||||
; RV64I-NEXT: xori a2, a2, 31
|
||||
; RV64I-NEXT: sll a0, a0, a2
|
||||
; RV64I-NEXT: srli a1, a1, 32
|
||||
; RV64I-NEXT: or a0, a0, a1
|
||||
; RV64I-NEXT: andi a1, a2, 31
|
||||
; RV64I-NEXT: srl a0, a0, a1
|
||||
; RV64I-NEXT: sext.w a0, a0
|
||||
; RV64I-NEXT: ret
|
||||
;
|
||||
|
Loading…
Reference in New Issue
Block a user