1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

[ARM] Optimize immediate selection

Optimize some specific immediates selection by materializing them with sub/mvn
instructions as opposed to loading them from the constant pool.

Patch by Ben Shi, powerman1st@163.com.

Differential Revision: https://reviews.llvm.org/D83745
This commit is contained in:
Sjoerd Meijer 2020-07-29 13:13:04 +01:00
parent e11e91bb11
commit 8b22bd7d24
6 changed files with 110 additions and 10 deletions

View File

@ -5513,6 +5513,8 @@ unsigned llvm::ConstantMaterializationCost(unsigned Val,
return ForCodesize ? 4 : 1;
if (ARM_AM::isSOImmTwoPartVal(Val)) // two instrs
return ForCodesize ? 8 : 2;
if (ARM_AM::isSOImmTwoPartValNeg(Val)) // two instrs
return ForCodesize ? 8 : 2;
}
if (Subtarget->useMovt()) // MOVW + MOVT
return ForCodesize ? 8 : 2;

View File

@ -873,16 +873,27 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
// FIXME Windows CE supports older ARM CPUs
assert(!STI->isTargetWindows() && "Windows on ARM requires ARMv7+");
// Expand into a movi + orr.
LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg);
HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri))
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg);
assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!");
unsigned ImmVal = (unsigned)MO.getImm();
unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
unsigned SOImmValV1 = 0, SOImmValV2 = 0;
if (ARM_AM::isSOImmTwoPartVal(ImmVal)) { // Expand into a movi + orr.
LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg);
HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri))
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg);
SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
} else { // Expand into a mvn + sub.
LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MVNi), DstReg);
HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri))
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg);
SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(-ImmVal);
SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(-ImmVal);
SOImmValV1 = ~(-SOImmValV1);
}
unsigned MIFlags = MI.getFlags();
LO16 = LO16.addImm(SOImmValV1);
HI16 = HI16.addImm(SOImmValV2);

View File

@ -824,7 +824,9 @@ def mod_imm_neg : Operand<i32>, PatLeaf<(imm), [{
def arm_i32imm : IntImmLeaf<i32, [{
if (Subtarget->useMovt())
return true;
return ARM_AM::isSOImmTwoPartVal(Imm.getZExtValue());
if (ARM_AM::isSOImmTwoPartVal(Imm.getZExtValue()))
return true;
return ARM_AM::isSOImmTwoPartValNeg(Imm.getZExtValue());
}]>;
/// imm0_1 predicate - Immediate in the range [0,1].

View File

@ -205,6 +205,20 @@ namespace ARM_AM {
return V;
}
/// isSOImmTwoPartValNeg - Return true if the specified value can be obtained
/// by two SOImmVal, that -V = First + Second.
/// "R+V" can be optimized to (sub (sub R, First), Second).
/// "R=V" can be optimized to (sub (mvn R, ~(-First)), Second).
inline bool isSOImmTwoPartValNeg(unsigned V) {
unsigned First;
if (!isSOImmTwoPartVal(-V))
return false;
// Return false if ~(-First) is not a SoImmval.
First = getSOImmTwoPartFirst(-V);
First = ~(-First);
return !(rotr32(~255U, getSOImmValRotate(First)) & First);
}
/// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed
/// by a left shift. Returns the shift amount to use.
inline unsigned getThumbImmValShift(unsigned Imm) {

View File

@ -0,0 +1,70 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s --check-prefix=CHECK
;; Check how immediates are handled in add/sub.
define i32 @sub0(i32 %0) {
; CHECK-LABEL: sub0:
; CHECK: @ %bb.0:
; CHECK-NEXT: sub r0, r0, #23
; CHECK-NEXT: mov pc, lr
%2 = sub i32 %0, 23
ret i32 %2
}
define i32 @sub1(i32 %0) {
; CHECK-LABEL: sub1:
; CHECK: @ %bb.0:
; CHECK-NEXT: ldr r1, .LCPI1_0
; CHECK-NEXT: add r0, r0, r1
; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI1_0:
; CHECK-NEXT: .long 4294836225 @ 0xfffe0001
%2 = sub i32 %0, 131071
ret i32 %2
}
define i32 @sub2(i32 %0) {
; CHECK-LABEL: sub2:
; CHECK: @ %bb.0:
; CHECK-NEXT: sub r0, r0, #35
; CHECK-NEXT: sub r0, r0, #8960
; CHECK-NEXT: mov pc, lr
%2 = sub i32 %0, 8995
ret i32 %2
}
define i32 @add0(i32 %0) {
; CHECK-LABEL: add0:
; CHECK: @ %bb.0:
; CHECK-NEXT: add r0, r0, #23
; CHECK-NEXT: mov pc, lr
%2 = add i32 %0, 23
ret i32 %2
}
define i32 @add1(i32 %0) {
; CHECK-LABEL: add1:
; CHECK: @ %bb.0:
; CHECK-NEXT: ldr r1, .LCPI4_0
; CHECK-NEXT: add r0, r0, r1
; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI4_0:
; CHECK-NEXT: .long 131071 @ 0x1ffff
%2 = add i32 %0, 131071
ret i32 %2
}
define i32 @add2(i32 %0) {
; CHECK-LABEL: add2:
; CHECK: @ %bb.0:
; CHECK-NEXT: add r0, r0, #8960
; CHECK-NEXT: add r0, r0, #2293760
; CHECK-NEXT: mov pc, lr
%2 = add i32 %0, 2302720
ret i32 %2
}

View File

@ -85,7 +85,8 @@ entry:
define i32 @t4(i32 %a, i32 %b, i32 %x) nounwind {
entry:
; ARM-LABEL: t4:
; ARM: ldr
; ARM: mvn [[R0:r[0-9]+]], #170
; ARM: sub [[R0:r[0-9]+]], [[R0:r[0-9]+]], #11141120
; ARM: mov{{lt|ge}}
; ARMT2-LABEL: t4: