mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[ARM] Optimize immediate selection
Optimize some specific immediates selection by materializing them with sub/mvn instructions as opposed to loading them from the constant pool. Patch by Ben Shi, powerman1st@163.com. Differential Revision: https://reviews.llvm.org/D83745
This commit is contained in:
parent
e11e91bb11
commit
8b22bd7d24
@ -5513,6 +5513,8 @@ unsigned llvm::ConstantMaterializationCost(unsigned Val,
|
||||
return ForCodesize ? 4 : 1;
|
||||
if (ARM_AM::isSOImmTwoPartVal(Val)) // two instrs
|
||||
return ForCodesize ? 8 : 2;
|
||||
if (ARM_AM::isSOImmTwoPartValNeg(Val)) // two instrs
|
||||
return ForCodesize ? 8 : 2;
|
||||
}
|
||||
if (Subtarget->useMovt()) // MOVW + MOVT
|
||||
return ForCodesize ? 8 : 2;
|
||||
|
@ -873,16 +873,27 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
|
||||
// FIXME Windows CE supports older ARM CPUs
|
||||
assert(!STI->isTargetWindows() && "Windows on ARM requires ARMv7+");
|
||||
|
||||
// Expand into a movi + orr.
|
||||
LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg);
|
||||
HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri))
|
||||
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
|
||||
.addReg(DstReg);
|
||||
|
||||
assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!");
|
||||
unsigned ImmVal = (unsigned)MO.getImm();
|
||||
unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
|
||||
unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
|
||||
unsigned SOImmValV1 = 0, SOImmValV2 = 0;
|
||||
|
||||
if (ARM_AM::isSOImmTwoPartVal(ImmVal)) { // Expand into a movi + orr.
|
||||
LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg);
|
||||
HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri))
|
||||
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
|
||||
.addReg(DstReg);
|
||||
SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
|
||||
SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
|
||||
} else { // Expand into a mvn + sub.
|
||||
LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MVNi), DstReg);
|
||||
HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri))
|
||||
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
|
||||
.addReg(DstReg);
|
||||
SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(-ImmVal);
|
||||
SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(-ImmVal);
|
||||
SOImmValV1 = ~(-SOImmValV1);
|
||||
}
|
||||
|
||||
unsigned MIFlags = MI.getFlags();
|
||||
LO16 = LO16.addImm(SOImmValV1);
|
||||
HI16 = HI16.addImm(SOImmValV2);
|
||||
|
@ -824,7 +824,9 @@ def mod_imm_neg : Operand<i32>, PatLeaf<(imm), [{
|
||||
def arm_i32imm : IntImmLeaf<i32, [{
|
||||
if (Subtarget->useMovt())
|
||||
return true;
|
||||
return ARM_AM::isSOImmTwoPartVal(Imm.getZExtValue());
|
||||
if (ARM_AM::isSOImmTwoPartVal(Imm.getZExtValue()))
|
||||
return true;
|
||||
return ARM_AM::isSOImmTwoPartValNeg(Imm.getZExtValue());
|
||||
}]>;
|
||||
|
||||
/// imm0_1 predicate - Immediate in the range [0,1].
|
||||
|
@ -205,6 +205,20 @@ namespace ARM_AM {
|
||||
return V;
|
||||
}
|
||||
|
||||
/// isSOImmTwoPartValNeg - Return true if the specified value can be obtained
|
||||
/// by two SOImmVal, that -V = First + Second.
|
||||
/// "R+V" can be optimized to (sub (sub R, First), Second).
|
||||
/// "R=V" can be optimized to (sub (mvn R, ~(-First)), Second).
|
||||
inline bool isSOImmTwoPartValNeg(unsigned V) {
|
||||
unsigned First;
|
||||
if (!isSOImmTwoPartVal(-V))
|
||||
return false;
|
||||
// Return false if ~(-First) is not a SoImmval.
|
||||
First = getSOImmTwoPartFirst(-V);
|
||||
First = ~(-First);
|
||||
return !(rotr32(~255U, getSOImmValRotate(First)) & First);
|
||||
}
|
||||
|
||||
/// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed
|
||||
/// by a left shift. Returns the shift amount to use.
|
||||
inline unsigned getThumbImmValShift(unsigned Imm) {
|
||||
|
70
test/CodeGen/ARM/add-sub-imm.ll
Normal file
70
test/CodeGen/ARM/add-sub-imm.ll
Normal file
@ -0,0 +1,70 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s --check-prefix=CHECK
|
||||
|
||||
;; Check how immediates are handled in add/sub.
|
||||
|
||||
define i32 @sub0(i32 %0) {
|
||||
; CHECK-LABEL: sub0:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: sub r0, r0, #23
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
%2 = sub i32 %0, 23
|
||||
ret i32 %2
|
||||
}
|
||||
|
||||
define i32 @sub1(i32 %0) {
|
||||
; CHECK-LABEL: sub1:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: ldr r1, .LCPI1_0
|
||||
; CHECK-NEXT: add r0, r0, r1
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
; CHECK-NEXT: .p2align 2
|
||||
; CHECK-NEXT: @ %bb.1:
|
||||
; CHECK-NEXT: .LCPI1_0:
|
||||
; CHECK-NEXT: .long 4294836225 @ 0xfffe0001
|
||||
%2 = sub i32 %0, 131071
|
||||
ret i32 %2
|
||||
}
|
||||
|
||||
define i32 @sub2(i32 %0) {
|
||||
; CHECK-LABEL: sub2:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: sub r0, r0, #35
|
||||
; CHECK-NEXT: sub r0, r0, #8960
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
%2 = sub i32 %0, 8995
|
||||
ret i32 %2
|
||||
}
|
||||
|
||||
define i32 @add0(i32 %0) {
|
||||
; CHECK-LABEL: add0:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: add r0, r0, #23
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
%2 = add i32 %0, 23
|
||||
ret i32 %2
|
||||
}
|
||||
|
||||
define i32 @add1(i32 %0) {
|
||||
; CHECK-LABEL: add1:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: ldr r1, .LCPI4_0
|
||||
; CHECK-NEXT: add r0, r0, r1
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
; CHECK-NEXT: .p2align 2
|
||||
; CHECK-NEXT: @ %bb.1:
|
||||
; CHECK-NEXT: .LCPI4_0:
|
||||
; CHECK-NEXT: .long 131071 @ 0x1ffff
|
||||
%2 = add i32 %0, 131071
|
||||
ret i32 %2
|
||||
}
|
||||
|
||||
define i32 @add2(i32 %0) {
|
||||
; CHECK-LABEL: add2:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: add r0, r0, #8960
|
||||
; CHECK-NEXT: add r0, r0, #2293760
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
%2 = add i32 %0, 2302720
|
||||
ret i32 %2
|
||||
}
|
@ -85,7 +85,8 @@ entry:
|
||||
define i32 @t4(i32 %a, i32 %b, i32 %x) nounwind {
|
||||
entry:
|
||||
; ARM-LABEL: t4:
|
||||
; ARM: ldr
|
||||
; ARM: mvn [[R0:r[0-9]+]], #170
|
||||
; ARM: sub [[R0:r[0-9]+]], [[R0:r[0-9]+]], #11141120
|
||||
; ARM: mov{{lt|ge}}
|
||||
|
||||
; ARMT2-LABEL: t4:
|
||||
|
Loading…
x
Reference in New Issue
Block a user