1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 20:23:11 +01:00

[VE] Optimize address calculation

Optimize address calculations using LEA/LEASL instructions.
Update comments in VEISelLowering.cpp also.  Update an
existing regression test optimized by this modification.

Reviewed By: simoll

Differential Revision: https://reviews.llvm.org/D90878
This commit is contained in:
Kazushi (Jam) Marukawa 2020-11-01 10:59:28 +09:00
parent 9bff67230d
commit 93a65c706c
3 changed files with 54 additions and 65 deletions

View File

@ -940,23 +940,19 @@ SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
if (isa<ConstantPoolSDNode>(Op) ||
(GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
// Create following instructions for local linkage PIC code.
// lea %s35, %gotoff_lo(.LCPI0_0)
// and %s35, %s35, (32)0
// lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35)
// adds.l %s35, %s15, %s35 ; %s15 is GOT
// FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15)
// lea %reg, label@gotoff_lo
// and %reg, %reg, (32)0
// lea.sl %reg, label@gotoff_hi(%reg, %got)
SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
}
// Create following instructions for not local linkage PIC code.
// lea %s35, %got_lo(.LCPI0_0)
// and %s35, %s35, (32)0
// lea.sl %s35, %got_hi(.LCPI0_0)(%s35)
// adds.l %s35, %s15, %s35 ; %s15 is GOT
// ld %s35, (,%s35)
// FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15)
// lea %reg, label@got_lo
// and %reg, %reg, (32)0
// lea.sl %reg, label@got_hi(%reg)
// ld %reg, (%reg, %got)
SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOT_HI32,
VEMCExpr::VK_VE_GOT_LO32, DAG);
SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);

View File

@ -944,23 +944,13 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1,
let cx = 1 in defm LEASL : RMm<"lea.sl", 0x06, I64>;
}
// LEA basic patterns.
// Need to be defined here to prioritize LEA over ADX.
def : Pat<(iPTR ADDRrri:$addr), (LEArri MEMrri:$addr)>;
def : Pat<(iPTR ADDRrii:$addr), (LEArii MEMrii:$addr)>;
def : Pat<(add I64:$base, simm32:$disp), (LEArii $base, 0, (LO32 $disp))>;
def : Pat<(add I64:$base, lozero:$disp), (LEASLrii $base, 0, (HI32 $disp))>;
def lea_add : PatFrags<(ops node:$base, node:$idx, node:$disp),
[(add (add node:$base, node:$idx), node:$disp),
(add (add node:$base, node:$disp), node:$idx)]>;
def : Pat<(lea_add I64:$base, simm7:$idx, simm32:$disp),
(LEArii $base, (LO7 $idx), (LO32 $disp))>;
def : Pat<(lea_add I64:$base, I64:$idx, simm32:$disp),
(LEArri $base, $idx, (LO32 $disp))>;
def : Pat<(lea_add I64:$base, simm7:$idx, lozero:$disp),
(LEASLrii $base, (LO7 $idx), (HI32 $disp))>;
def : Pat<(lea_add I64:$base, I64:$idx, lozero:$disp),
(LEASLrri $base, $idx, (HI32 $disp))>;
// Multiclass for load instructions.
let mayLoad = 1, hasSideEffects = 0 in
multiclass LOADm<string opcStr, bits<8> opc, RegisterClass RC, ValueType Ty,
@ -1566,6 +1556,50 @@ def : Pat<(i64 imm:$val),
(LEASLrii (ANDrm (LEAzii 0, 0, (LO32 imm:$val)), !add(32, 64)), 0,
(HI32 imm:$val))>;
// LEA patterns
def lea_add : PatFrags<(ops node:$base, node:$idx, node:$disp),
[(add (add node:$base, node:$idx), node:$disp),
(add (add node:$base, node:$disp), node:$idx),
(add node:$base, (add $idx, $disp))]>;
def : Pat<(lea_add I64:$base, simm7:$idx, simm32:$disp),
(LEArii $base, (LO7 $idx), (LO32 $disp))>;
def : Pat<(lea_add I64:$base, I64:$idx, simm32:$disp),
(LEArri $base, $idx, (LO32 $disp))>;
def : Pat<(lea_add I64:$base, simm7:$idx, lozero:$disp),
(LEASLrii $base, (LO7 $idx), (HI32 $disp))>;
def : Pat<(lea_add I64:$base, I64:$idx, lozero:$disp),
(LEASLrri $base, $idx, (HI32 $disp))>;
// Address calculation patterns and optimizations
//
// Generate following instructions:
// 1. LEA %reg, label@LO32
// AND %reg, %reg, (32)0
// 2. LEASL %reg, label@HI32
// 3. (LEA %reg, label@LO32)
// (AND %reg, %reg, (32)0)
// LEASL %reg, label@HI32(, %reg)
// 4. (LEA %reg, label@LO32)
// (AND %reg, %reg, (32)0)
// LEASL %reg, label@HI32(%reg, %got)
//
def velo_only : OutPatFrag<(ops node:$lo),
(ANDrm (LEAzii 0, 0, $lo), !add(32, 64))>;
def vehi_only : OutPatFrag<(ops node:$hi),
(LEASLzii 0, 0, $hi)>;
def vehi_lo : OutPatFrag<(ops node:$hi, node:$lo),
(LEASLrii $lo, 0, $hi)>;
def vehi_baselo : OutPatFrag<(ops node:$base, node:$hi, node:$lo),
(LEASLrri $base, $lo, $hi)>;
foreach type = [ "tblockaddress", "tconstpool", "texternalsym", "tglobaladdr",
"tglobaltlsaddr" ] in {
def : Pat<(VElo !cast<SDNode>(type):$lo), (velo_only $lo)>;
def : Pat<(VEhi !cast<SDNode>(type):$hi), (vehi_only $hi)>;
def : Pat<(add (VEhi !cast<SDNode>(type):$hi), I64:$lo), (vehi_lo $hi, $lo)>;
def : Pat<(add I64:$base, (add (VEhi !cast<SDNode>(type):$hi), I64:$lo)),
(vehi_baselo $base, $hi, $lo)>;
}
// floating point
def : Pat<(f32 fpimm:$val),
(EXTRACT_SUBREG (LEASLzii 0, 0, (HIFP32 $val)), sub_f32)>;
@ -1813,46 +1847,6 @@ defm : TRATMSTm<atomic_store_8, i32, ST1Brri, ST1Brii, ST1Bzri, ST1Bzii>;
defm : TRATMSTm<atomic_store_16, i32, ST2Brri, ST2Brii, ST2Bzri, ST2Bzii>;
defm : TRATMSTm<atomic_store_32, i32, STLrri, STLrii, STLzri, STLzii>;
// Address calculation and its optimization
def : Pat<(VEhi tconstpool:$in), (LEASLzii 0, 0, tconstpool:$in)>;
def : Pat<(VElo tconstpool:$in),
(ANDrm (LEAzii 0, 0, tconstpool:$in), !add(32, 64))>;
def : Pat<(add (VEhi tconstpool:$in1), (VElo tconstpool:$in2)),
(LEASLrii (ANDrm (LEAzii 0, 0, tconstpool:$in2), !add(32, 64)), 0,
(tconstpool:$in1))>;
// Address calculation and its optimization
def : Pat<(VEhi tglobaladdr:$in), (LEASLzii 0, 0, tglobaladdr:$in)>;
def : Pat<(VElo tglobaladdr:$in),
(ANDrm (LEAzii 0, 0, tglobaladdr:$in), !add(32, 64))>;
def : Pat<(add (VEhi tglobaladdr:$in1), (VElo tglobaladdr:$in2)),
(LEASLrii (ANDrm (LEAzii 0, 0, tglobaladdr:$in2), !add(32, 64)), 0,
(tglobaladdr:$in1))>;
// Address calculation and its optimization
def : Pat<(VEhi tblockaddress:$in), (LEASLzii 0, 0, tblockaddress:$in)>;
def : Pat<(VElo tblockaddress:$in),
(ANDrm (LEAzii 0, 0, tblockaddress:$in), !add(32, 64))>;
def : Pat<(add (VEhi tblockaddress:$in1), (VElo tblockaddress:$in2)),
(LEASLrii (ANDrm (LEAzii 0, 0, tblockaddress:$in2), !add(32, 64)), 0,
(tblockaddress:$in1))>;
// GlobalTLS address calculation and its optimization
def : Pat<(VEhi tglobaltlsaddr:$in), (LEASLzii 0, 0, tglobaltlsaddr:$in)>;
def : Pat<(VElo tglobaltlsaddr:$in),
(ANDrm (LEAzii 0, 0, tglobaltlsaddr:$in), !add(32, 64))>;
def : Pat<(add (VEhi tglobaltlsaddr:$in1), (VElo tglobaltlsaddr:$in2)),
(LEASLrii (ANDrm (LEAzii 0, 0, tglobaltlsaddr:$in2), !add(32, 64)), 0,
(tglobaltlsaddr:$in1))>;
// Address calculation and its optimization
def : Pat<(VEhi texternalsym:$in), (LEASLzii 0, 0, texternalsym:$in)>;
def : Pat<(VElo texternalsym:$in),
(ANDrm (LEAzii 0, 0, texternalsym:$in), !add(32, 64))>;
def : Pat<(add (VEhi texternalsym:$in1), (VElo texternalsym:$in2)),
(LEASLrii (ANDrm (LEAzii 0, 0, texternalsym:$in2), !add(32, 64)), 0,
(texternalsym:$in1))>;
// Branches
def : Pat<(br bb:$addr), (BRCFLa bb:$addr)>;

View File

@ -54,8 +54,7 @@ define i32 @main() {
; CHECK-NEXT: st %s1, 184(, %s11)
; CHECK-NEXT: lea %s0, .L.str@gotoff_lo
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lea.sl %s0, .L.str@gotoff_hi(, %s0)
; CHECK-NEXT: adds.l %s0, %s15, %s0
; CHECK-NEXT: lea.sl %s0, .L.str@gotoff_hi(%s0, %s15)
; CHECK-NEXT: lea %s12, printf@plt_lo(-24)
; CHECK-NEXT: and %s12, %s12, (32)0
; CHECK-NEXT: sic %s16