mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:23:11 +01:00
[VE] Optimize address calculation
Optimize address calculations using LEA/LEASL instructions. Update comments in VEISelLowering.cpp also. Update an existing regression test optimized by this modification. Reviewed By: simoll Differential Revision: https://reviews.llvm.org/D90878
This commit is contained in:
parent
9bff67230d
commit
93a65c706c
@ -940,23 +940,19 @@ SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
|
||||
if (isa<ConstantPoolSDNode>(Op) ||
|
||||
(GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
|
||||
// Create following instructions for local linkage PIC code.
|
||||
// lea %s35, %gotoff_lo(.LCPI0_0)
|
||||
// and %s35, %s35, (32)0
|
||||
// lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35)
|
||||
// adds.l %s35, %s15, %s35 ; %s15 is GOT
|
||||
// FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15)
|
||||
// lea %reg, label@gotoff_lo
|
||||
// and %reg, %reg, (32)0
|
||||
// lea.sl %reg, label@gotoff_hi(%reg, %got)
|
||||
SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
|
||||
VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
|
||||
SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
|
||||
return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
|
||||
}
|
||||
// Create following instructions for not local linkage PIC code.
|
||||
// lea %s35, %got_lo(.LCPI0_0)
|
||||
// and %s35, %s35, (32)0
|
||||
// lea.sl %s35, %got_hi(.LCPI0_0)(%s35)
|
||||
// adds.l %s35, %s15, %s35 ; %s15 is GOT
|
||||
// ld %s35, (,%s35)
|
||||
// FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15)
|
||||
// lea %reg, label@got_lo
|
||||
// and %reg, %reg, (32)0
|
||||
// lea.sl %reg, label@got_hi(%reg)
|
||||
// ld %reg, (%reg, %got)
|
||||
SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOT_HI32,
|
||||
VEMCExpr::VK_VE_GOT_LO32, DAG);
|
||||
SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
|
||||
|
@ -944,23 +944,13 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1,
|
||||
let cx = 1 in defm LEASL : RMm<"lea.sl", 0x06, I64>;
|
||||
}
|
||||
|
||||
// LEA basic patterns.
|
||||
// Need to be defined here to prioritize LEA over ADX.
|
||||
def : Pat<(iPTR ADDRrri:$addr), (LEArri MEMrri:$addr)>;
|
||||
def : Pat<(iPTR ADDRrii:$addr), (LEArii MEMrii:$addr)>;
|
||||
def : Pat<(add I64:$base, simm32:$disp), (LEArii $base, 0, (LO32 $disp))>;
|
||||
def : Pat<(add I64:$base, lozero:$disp), (LEASLrii $base, 0, (HI32 $disp))>;
|
||||
|
||||
def lea_add : PatFrags<(ops node:$base, node:$idx, node:$disp),
|
||||
[(add (add node:$base, node:$idx), node:$disp),
|
||||
(add (add node:$base, node:$disp), node:$idx)]>;
|
||||
def : Pat<(lea_add I64:$base, simm7:$idx, simm32:$disp),
|
||||
(LEArii $base, (LO7 $idx), (LO32 $disp))>;
|
||||
def : Pat<(lea_add I64:$base, I64:$idx, simm32:$disp),
|
||||
(LEArri $base, $idx, (LO32 $disp))>;
|
||||
def : Pat<(lea_add I64:$base, simm7:$idx, lozero:$disp),
|
||||
(LEASLrii $base, (LO7 $idx), (HI32 $disp))>;
|
||||
def : Pat<(lea_add I64:$base, I64:$idx, lozero:$disp),
|
||||
(LEASLrri $base, $idx, (HI32 $disp))>;
|
||||
|
||||
// Multiclass for load instructions.
|
||||
let mayLoad = 1, hasSideEffects = 0 in
|
||||
multiclass LOADm<string opcStr, bits<8> opc, RegisterClass RC, ValueType Ty,
|
||||
@ -1566,6 +1556,50 @@ def : Pat<(i64 imm:$val),
|
||||
(LEASLrii (ANDrm (LEAzii 0, 0, (LO32 imm:$val)), !add(32, 64)), 0,
|
||||
(HI32 imm:$val))>;
|
||||
|
||||
// LEA patterns
|
||||
def lea_add : PatFrags<(ops node:$base, node:$idx, node:$disp),
|
||||
[(add (add node:$base, node:$idx), node:$disp),
|
||||
(add (add node:$base, node:$disp), node:$idx),
|
||||
(add node:$base, (add $idx, $disp))]>;
|
||||
def : Pat<(lea_add I64:$base, simm7:$idx, simm32:$disp),
|
||||
(LEArii $base, (LO7 $idx), (LO32 $disp))>;
|
||||
def : Pat<(lea_add I64:$base, I64:$idx, simm32:$disp),
|
||||
(LEArri $base, $idx, (LO32 $disp))>;
|
||||
def : Pat<(lea_add I64:$base, simm7:$idx, lozero:$disp),
|
||||
(LEASLrii $base, (LO7 $idx), (HI32 $disp))>;
|
||||
def : Pat<(lea_add I64:$base, I64:$idx, lozero:$disp),
|
||||
(LEASLrri $base, $idx, (HI32 $disp))>;
|
||||
|
||||
// Address calculation patterns and optimizations
|
||||
//
|
||||
// Generate following instructions:
|
||||
// 1. LEA %reg, label@LO32
|
||||
// AND %reg, %reg, (32)0
|
||||
// 2. LEASL %reg, label@HI32
|
||||
// 3. (LEA %reg, label@LO32)
|
||||
// (AND %reg, %reg, (32)0)
|
||||
// LEASL %reg, label@HI32(, %reg)
|
||||
// 4. (LEA %reg, label@LO32)
|
||||
// (AND %reg, %reg, (32)0)
|
||||
// LEASL %reg, label@HI32(%reg, %got)
|
||||
//
|
||||
def velo_only : OutPatFrag<(ops node:$lo),
|
||||
(ANDrm (LEAzii 0, 0, $lo), !add(32, 64))>;
|
||||
def vehi_only : OutPatFrag<(ops node:$hi),
|
||||
(LEASLzii 0, 0, $hi)>;
|
||||
def vehi_lo : OutPatFrag<(ops node:$hi, node:$lo),
|
||||
(LEASLrii $lo, 0, $hi)>;
|
||||
def vehi_baselo : OutPatFrag<(ops node:$base, node:$hi, node:$lo),
|
||||
(LEASLrri $base, $lo, $hi)>;
|
||||
foreach type = [ "tblockaddress", "tconstpool", "texternalsym", "tglobaladdr",
|
||||
"tglobaltlsaddr" ] in {
|
||||
def : Pat<(VElo !cast<SDNode>(type):$lo), (velo_only $lo)>;
|
||||
def : Pat<(VEhi !cast<SDNode>(type):$hi), (vehi_only $hi)>;
|
||||
def : Pat<(add (VEhi !cast<SDNode>(type):$hi), I64:$lo), (vehi_lo $hi, $lo)>;
|
||||
def : Pat<(add I64:$base, (add (VEhi !cast<SDNode>(type):$hi), I64:$lo)),
|
||||
(vehi_baselo $base, $hi, $lo)>;
|
||||
}
|
||||
|
||||
// floating point
|
||||
def : Pat<(f32 fpimm:$val),
|
||||
(EXTRACT_SUBREG (LEASLzii 0, 0, (HIFP32 $val)), sub_f32)>;
|
||||
@ -1813,46 +1847,6 @@ defm : TRATMSTm<atomic_store_8, i32, ST1Brri, ST1Brii, ST1Bzri, ST1Bzii>;
|
||||
defm : TRATMSTm<atomic_store_16, i32, ST2Brri, ST2Brii, ST2Bzri, ST2Bzii>;
|
||||
defm : TRATMSTm<atomic_store_32, i32, STLrri, STLrii, STLzri, STLzii>;
|
||||
|
||||
// Address calculation and its optimization
|
||||
def : Pat<(VEhi tconstpool:$in), (LEASLzii 0, 0, tconstpool:$in)>;
|
||||
def : Pat<(VElo tconstpool:$in),
|
||||
(ANDrm (LEAzii 0, 0, tconstpool:$in), !add(32, 64))>;
|
||||
def : Pat<(add (VEhi tconstpool:$in1), (VElo tconstpool:$in2)),
|
||||
(LEASLrii (ANDrm (LEAzii 0, 0, tconstpool:$in2), !add(32, 64)), 0,
|
||||
(tconstpool:$in1))>;
|
||||
|
||||
// Address calculation and its optimization
|
||||
def : Pat<(VEhi tglobaladdr:$in), (LEASLzii 0, 0, tglobaladdr:$in)>;
|
||||
def : Pat<(VElo tglobaladdr:$in),
|
||||
(ANDrm (LEAzii 0, 0, tglobaladdr:$in), !add(32, 64))>;
|
||||
def : Pat<(add (VEhi tglobaladdr:$in1), (VElo tglobaladdr:$in2)),
|
||||
(LEASLrii (ANDrm (LEAzii 0, 0, tglobaladdr:$in2), !add(32, 64)), 0,
|
||||
(tglobaladdr:$in1))>;
|
||||
|
||||
// Address calculation and its optimization
|
||||
def : Pat<(VEhi tblockaddress:$in), (LEASLzii 0, 0, tblockaddress:$in)>;
|
||||
def : Pat<(VElo tblockaddress:$in),
|
||||
(ANDrm (LEAzii 0, 0, tblockaddress:$in), !add(32, 64))>;
|
||||
def : Pat<(add (VEhi tblockaddress:$in1), (VElo tblockaddress:$in2)),
|
||||
(LEASLrii (ANDrm (LEAzii 0, 0, tblockaddress:$in2), !add(32, 64)), 0,
|
||||
(tblockaddress:$in1))>;
|
||||
|
||||
// GlobalTLS address calculation and its optimization
|
||||
def : Pat<(VEhi tglobaltlsaddr:$in), (LEASLzii 0, 0, tglobaltlsaddr:$in)>;
|
||||
def : Pat<(VElo tglobaltlsaddr:$in),
|
||||
(ANDrm (LEAzii 0, 0, tglobaltlsaddr:$in), !add(32, 64))>;
|
||||
def : Pat<(add (VEhi tglobaltlsaddr:$in1), (VElo tglobaltlsaddr:$in2)),
|
||||
(LEASLrii (ANDrm (LEAzii 0, 0, tglobaltlsaddr:$in2), !add(32, 64)), 0,
|
||||
(tglobaltlsaddr:$in1))>;
|
||||
|
||||
// Address calculation and its optimization
|
||||
def : Pat<(VEhi texternalsym:$in), (LEASLzii 0, 0, texternalsym:$in)>;
|
||||
def : Pat<(VElo texternalsym:$in),
|
||||
(ANDrm (LEAzii 0, 0, texternalsym:$in), !add(32, 64))>;
|
||||
def : Pat<(add (VEhi texternalsym:$in1), (VElo texternalsym:$in2)),
|
||||
(LEASLrii (ANDrm (LEAzii 0, 0, texternalsym:$in2), !add(32, 64)), 0,
|
||||
(texternalsym:$in1))>;
|
||||
|
||||
// Branches
|
||||
def : Pat<(br bb:$addr), (BRCFLa bb:$addr)>;
|
||||
|
||||
|
@ -54,8 +54,7 @@ define i32 @main() {
|
||||
; CHECK-NEXT: st %s1, 184(, %s11)
|
||||
; CHECK-NEXT: lea %s0, .L.str@gotoff_lo
|
||||
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||
; CHECK-NEXT: lea.sl %s0, .L.str@gotoff_hi(, %s0)
|
||||
; CHECK-NEXT: adds.l %s0, %s15, %s0
|
||||
; CHECK-NEXT: lea.sl %s0, .L.str@gotoff_hi(%s0, %s15)
|
||||
; CHECK-NEXT: lea %s12, printf@plt_lo(-24)
|
||||
; CHECK-NEXT: and %s12, %s12, (32)0
|
||||
; CHECK-NEXT: sic %s16
|
||||
|
Loading…
Reference in New Issue
Block a user