mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[GlobalISel][Localizer] Allow localization of G_INTTOPTR and chains of instructions.
G_INTTOPTR can prevent the localizer from moving G_CONSTANTs, but since it's essentially a side effect free cast instruction we can remat both instructions. This patch changes the localizer to enable localization of the chains by iterating over the entry block instructions in reverse order. That way, uses will localized first, and then the defs are free to be localized as well. This also changes the previous SmallPtrSet of localized instructions to use a SetVector instead. We're dealing with pointers and need deterministic iteration order. Overall, this change improves ARM64 -O0 CTMark code size by around 0.7% geomean. Differential Revision: https://reviews.llvm.org/D63630 llvm-svn: 364001
This commit is contained in:
parent
9d712950b5
commit
5647d9b15a
@ -21,6 +21,7 @@
|
||||
#ifndef LLVM_CODEGEN_GLOBALISEL_LOCALIZER_H
|
||||
#define LLVM_CODEGEN_GLOBALISEL_LOCALIZER_H
|
||||
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
|
||||
@ -60,12 +61,14 @@ private:
|
||||
/// Initialize the field members using \p MF.
|
||||
void init(MachineFunction &MF);
|
||||
|
||||
typedef SmallSetVector<MachineInstr *, 32> LocalizedSetVecT;
|
||||
|
||||
/// Do inter-block localization from the entry block.
|
||||
bool localizeInterBlock(MachineFunction &MF,
|
||||
SmallPtrSetImpl<MachineInstr *> &LocalizedInstrs);
|
||||
LocalizedSetVecT &LocalizedInstrs);
|
||||
|
||||
/// Do intra-block localization of already localized instructions.
|
||||
bool localizeIntraBlock(SmallPtrSetImpl<MachineInstr *> &LocalizedInstrs);
|
||||
bool localizeIntraBlock(LocalizedSetVecT &LocalizedInstrs);
|
||||
|
||||
public:
|
||||
Localizer();
|
||||
|
@ -12,7 +12,6 @@
|
||||
#include "llvm/CodeGen/GlobalISel/Localizer.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
|
||||
@ -76,6 +75,7 @@ bool Localizer::shouldLocalize(const MachineInstr &MI) {
|
||||
case TargetOpcode::G_CONSTANT:
|
||||
case TargetOpcode::G_FCONSTANT:
|
||||
case TargetOpcode::G_FRAME_INDEX:
|
||||
case TargetOpcode::G_INTTOPTR:
|
||||
return true;
|
||||
case TargetOpcode::G_GLOBAL_VALUE: {
|
||||
unsigned RematCost = TTI->getGISelRematGlobalCost();
|
||||
@ -104,8 +104,8 @@ bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def,
|
||||
return InsertMBB == Def.getParent();
|
||||
}
|
||||
|
||||
bool Localizer::localizeInterBlock(
|
||||
MachineFunction &MF, SmallPtrSetImpl<MachineInstr *> &LocalizedInstrs) {
|
||||
bool Localizer::localizeInterBlock(MachineFunction &MF,
|
||||
LocalizedSetVecT &LocalizedInstrs) {
|
||||
bool Changed = false;
|
||||
DenseMap<std::pair<MachineBasicBlock *, unsigned>, unsigned> MBBWithLocalDef;
|
||||
|
||||
@ -114,7 +114,8 @@ bool Localizer::localizeInterBlock(
|
||||
// we only localize instructions in the entry block here. This might change if
|
||||
// we start doing CSE across blocks.
|
||||
auto &MBB = MF.front();
|
||||
for (MachineInstr &MI : MBB) {
|
||||
for (auto RI = MBB.rbegin(), RE = MBB.rend(); RI != RE; ++RI) {
|
||||
MachineInstr &MI = *RI;
|
||||
if (!shouldLocalize(MI))
|
||||
continue;
|
||||
LLVM_DEBUG(dbgs() << "Should localize: " << MI);
|
||||
@ -166,8 +167,7 @@ bool Localizer::localizeInterBlock(
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool Localizer::localizeIntraBlock(
|
||||
SmallPtrSetImpl<MachineInstr *> &LocalizedInstrs) {
|
||||
bool Localizer::localizeIntraBlock(LocalizedSetVecT &LocalizedInstrs) {
|
||||
bool Changed = false;
|
||||
|
||||
// For each already-localized instruction which has multiple users, then we
|
||||
@ -179,15 +179,16 @@ bool Localizer::localizeIntraBlock(
|
||||
for (MachineInstr *MI : LocalizedInstrs) {
|
||||
unsigned Reg = MI->getOperand(0).getReg();
|
||||
MachineBasicBlock &MBB = *MI->getParent();
|
||||
// If the instruction has a single use, we would have already moved it right
|
||||
// before its user in localizeInterBlock().
|
||||
if (MRI->hasOneUse(Reg))
|
||||
continue;
|
||||
|
||||
// All of the user MIs of this reg.
|
||||
SmallPtrSet<MachineInstr *, 32> Users;
|
||||
for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg))
|
||||
Users.insert(&UseMI);
|
||||
for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) {
|
||||
if (!UseMI.isPHI())
|
||||
Users.insert(&UseMI);
|
||||
}
|
||||
// If all the users were PHIs then they're not going to be in our block,
|
||||
// don't try to move this instruction.
|
||||
if (Users.empty())
|
||||
continue;
|
||||
|
||||
MachineBasicBlock::iterator II(MI);
|
||||
++II;
|
||||
@ -216,7 +217,7 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) {
|
||||
|
||||
// Keep track of the instructions we localized. We'll do a second pass of
|
||||
// intra-block localization to further reduce live ranges.
|
||||
SmallPtrSet<MachineInstr *, 32> LocalizedInstrs;
|
||||
LocalizedSetVecT LocalizedInstrs;
|
||||
|
||||
bool Changed = localizeInterBlock(MF, LocalizedInstrs);
|
||||
return Changed |= localizeIntraBlock(LocalizedInstrs);
|
||||
|
@ -38,6 +38,8 @@
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
define void @test_inttoptr() { ret void }
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
@ -350,8 +352,8 @@ body: |
|
||||
; CHECK: [[GV3:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var2
|
||||
; CHECK: [[C4:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2
|
||||
; CHECK: G_STORE [[C4]](s32), [[GV3]](p0) :: (store 4 into @var2)
|
||||
; CHECK: [[GV4:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var1
|
||||
; CHECK: [[C5:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3
|
||||
; CHECK: [[GV4:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var1
|
||||
; CHECK: G_STORE [[C5]](s32), [[GV4]](p0) :: (store 4 into @var1)
|
||||
; CHECK: [[GV5:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3
|
||||
; CHECK: G_STORE [[C4]](s32), [[GV5]](p0) :: (store 4 into @var3)
|
||||
@ -388,3 +390,63 @@ body: |
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
---
|
||||
name: test_inttoptr
|
||||
alignment: 2
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: test_inttoptr
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: liveins: $w0, $x1
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr(p0) = COPY $x1
|
||||
; CHECK: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 128
|
||||
; CHECK: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0
|
||||
; CHECK: [[C2:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 0
|
||||
; CHECK: [[INTTOPTR:%[0-9]+]]:gpr(p0) = G_INTTOPTR [[C2]](s64)
|
||||
; CHECK: [[INTTOPTR1:%[0-9]+]]:gpr(p0) = G_INTTOPTR [[C]](s64)
|
||||
; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C1]]
|
||||
; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[ICMP]](s32)
|
||||
; CHECK: G_BRCOND [[TRUNC]](s1), %bb.1
|
||||
; CHECK: G_BR %bb.2
|
||||
; CHECK: bb.1:
|
||||
; CHECK: [[ADD:%[0-9]+]]:gpr(s32) = G_ADD [[COPY]], [[COPY]]
|
||||
; CHECK: G_STORE [[ADD]](s32), [[COPY1]](p0) :: (store 4)
|
||||
; CHECK: [[C3:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 128
|
||||
; CHECK: [[INTTOPTR2:%[0-9]+]]:gpr(p0) = G_INTTOPTR [[C3]](s64)
|
||||
; CHECK: $x0 = COPY [[INTTOPTR2]](p0)
|
||||
; CHECK: RET_ReallyLR implicit $x0
|
||||
; CHECK: bb.2:
|
||||
; CHECK: [[C4:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 0
|
||||
; CHECK: [[INTTOPTR3:%[0-9]+]]:gpr(p0) = G_INTTOPTR [[C4]](s64)
|
||||
; CHECK: $x0 = COPY [[INTTOPTR3]](p0)
|
||||
; CHECK: RET_ReallyLR implicit $x0
|
||||
bb.1:
|
||||
liveins: $w0, $x1
|
||||
|
||||
%0:gpr(s32) = COPY $w0
|
||||
%1:gpr(p0) = COPY $x1
|
||||
%2:gpr(s64) = G_CONSTANT i64 128
|
||||
%4:gpr(s32) = G_CONSTANT i32 0
|
||||
%7:gpr(s64) = G_CONSTANT i64 0
|
||||
%6:gpr(p0) = G_INTTOPTR %7(s64)
|
||||
%3:gpr(p0) = G_INTTOPTR %2(s64)
|
||||
%9:gpr(s32) = G_ICMP intpred(eq), %0(s32), %4
|
||||
%5:gpr(s1) = G_TRUNC %9(s32)
|
||||
G_BRCOND %5(s1), %bb.2
|
||||
G_BR %bb.3
|
||||
|
||||
bb.2:
|
||||
%8:gpr(s32) = G_ADD %0, %0
|
||||
G_STORE %8(s32), %1(p0) :: (store 4)
|
||||
$x0 = COPY %3(p0)
|
||||
RET_ReallyLR implicit $x0
|
||||
|
||||
bb.3:
|
||||
$x0 = COPY %6(p0)
|
||||
RET_ReallyLR implicit $x0
|
||||
|
||||
...
|
||||
|
Loading…
Reference in New Issue
Block a user