1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 02:33:06 +01:00

[Power9] Spill gprs to vector registers rather than stack

This patch updates register allocation to enable spilling gprs to
volatile vector registers rather than the stack. It can be enabled
 for Power9 with option -ppc-enable-gpr-to-vsr-spills.

Differential Revision: https://reviews.llvm.org/D34815

llvm-svn: 313886
This commit is contained in:
Zaara Syeda 2017-09-21 16:12:33 +00:00
parent c90e12840a
commit 465ce59a0d
5 changed files with 145 additions and 1 deletions

View File

@ -46,6 +46,12 @@ using namespace llvm;
#define GET_INSTRINFO_CTOR_DTOR
#include "PPCGenInstrInfo.inc"
STATISTIC(NumStoreSPILLVSRRCAsVec,
"Number of spillvsrrc spilled to stack as vec");
STATISTIC(NumStoreSPILLVSRRCAsGpr,
"Number of spillvsrrc spilled to stack as gpr");
STATISTIC(NumGPRtoVSRSpill, "Number of gpr spills to spillvsrrc");
static cl::
opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
cl::desc("Disable analysis for CTR loops"));
@ -280,6 +286,7 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
case PPC::QVLFSXs:
case PPC::QVLFDXb:
case PPC::RESTORE_VRSAVE:
case PPC::SPILLTOVSR_LD:
// Check for the operands added by addFrameReference (the immediate is the
// offset which defaults to 0).
if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() &&
@ -333,6 +340,7 @@ unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
case PPC::QVSTFSXs:
case PPC::QVSTFDXb:
case PPC::SPILL_VRSAVE:
case PPC::SPILLTOVSR_ST:
// Check for the operands added by addFrameReference (the immediate is the
// offset which defaults to 0).
if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() &&
@ -917,7 +925,18 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg).addReg(SrcReg);
getKillRegState(KillSrc);
return;
}
} else if (PPC::G8RCRegClass.contains(SrcReg) &&
PPC::VSFRCRegClass.contains(DestReg)) {
BuildMI(MBB, I, DL, get(PPC::MTVSRD), DestReg).addReg(SrcReg);
NumGPRtoVSRSpill++;
getKillRegState(KillSrc);
return;
} else if (PPC::VSFRCRegClass.contains(SrcReg) &&
PPC::G8RCRegClass.contains(DestReg)) {
BuildMI(MBB, I, DL, get(PPC::MFVSRD), DestReg).addReg(SrcReg);
getKillRegState(KillSrc);
return;
}
unsigned Opc;
if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
@ -1061,6 +1080,11 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
getKillRegState(isKill)),
FrameIdx));
NonRI = true;
} else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILLTOVSR_ST))
.addReg(SrcReg,
getKillRegState(isKill)),
FrameIdx));
} else {
llvm_unreachable("Unknown regclass!");
}
@ -1182,6 +1206,9 @@ bool PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL,
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFDXb), DestReg),
FrameIdx));
NonRI = true;
} else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILLTOVSR_LD),
DestReg), FrameIdx));
} else {
llvm_unreachable("Unknown regclass!");
}
@ -1995,6 +2022,48 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.setDesc(get(Opcode));
return true;
}
case PPC::SPILLTOVSR_LD: {
unsigned TargetReg = MI.getOperand(0).getReg();
if (PPC::VSFRCRegClass.contains(TargetReg)) {
MI.setDesc(get(PPC::DFLOADf64));
return expandPostRAPseudo(MI);
}
else
MI.setDesc(get(PPC::LD));
return true;
}
case PPC::SPILLTOVSR_ST: {
unsigned SrcReg = MI.getOperand(0).getReg();
if (PPC::VSFRCRegClass.contains(SrcReg)) {
NumStoreSPILLVSRRCAsVec++;
MI.setDesc(get(PPC::DFSTOREf64));
return expandPostRAPseudo(MI);
} else {
NumStoreSPILLVSRRCAsGpr++;
MI.setDesc(get(PPC::STD));
}
return true;
}
case PPC::SPILLTOVSR_LDX: {
unsigned TargetReg = MI.getOperand(0).getReg();
if (PPC::VSFRCRegClass.contains(TargetReg))
MI.setDesc(get(PPC::LXSDX));
else
MI.setDesc(get(PPC::LDX));
return true;
}
case PPC::SPILLTOVSR_STX: {
unsigned SrcReg = MI.getOperand(0).getReg();
if (PPC::VSFRCRegClass.contains(SrcReg)) {
NumStoreSPILLVSRRCAsVec++;
MI.setDesc(get(PPC::STXSDX));
} else {
NumStoreSPILLVSRRCAsGpr++;
MI.setDesc(get(PPC::STDX));
}
return true;
}
case PPC::CFENCE8: {
auto Val = MI.getOperand(0).getReg();
BuildMI(MBB, MI, DL, get(PPC::CMPD), PPC::CR7).addReg(Val).addReg(Val);

View File

@ -47,6 +47,13 @@ def vssrc : RegisterOperand<VSSRC> {
let ParserMatchClass = PPCRegVSSRCAsmOperand;
}
def PPCRegSPILLTOVSRRCAsmOperand : AsmOperandClass {
let Name = "RegSPILLTOVSRRC"; let PredicateMethod = "isVSRegNumber";
}
def spilltovsrrc : RegisterOperand<SPILLTOVSRRC> {
let ParserMatchClass = PPCRegSPILLTOVSRRCAsmOperand;
}
// Little-endian-specific nodes.
def SDT_PPClxvd2x : SDTypeProfile<1, 1, [
SDTCisVT<0, v2f64>, SDTCisPtrTy<1>
@ -2863,6 +2870,23 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
(f32 (DFLOADf32 ixaddr:$src))>;
} // end HasP9Vector, AddedComplexity
let Predicates = [HasP9Vector] in {
let isPseudo = 1 in {
let mayStore = 1 in {
def SPILLTOVSR_STX : Pseudo<(outs), (ins spilltovsrrc:$XT, memrr:$dst),
"#SPILLTOVSR_STX", []>;
def SPILLTOVSR_ST : Pseudo<(outs), (ins spilltovsrrc:$XT, memrix:$dst),
"#SPILLTOVSR_ST", []>;
}
let mayLoad = 1 in {
def SPILLTOVSR_LDX : Pseudo<(outs spilltovsrrc:$XT), (ins memrr:$src),
"#SPILLTOVSR_LDX", []>;
def SPILLTOVSR_LD : Pseudo<(outs spilltovsrrc:$XT), (ins memrix:$src),
"#SPILLTOVSR_LD", []>;
}
}
}
// Integer extend helper dags 32 -> 64
def AnyExts {
dag A = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32);

View File

@ -21,6 +21,7 @@
#include "PPCTargetMachine.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@ -49,6 +50,9 @@ using namespace llvm;
#define GET_REGINFO_TARGET_DESC
#include "PPCGenRegisterInfo.inc"
STATISTIC(InflateGPRC, "Number of gprc inputs for getLargestLegalClass");
STATISTIC(InflateGP8RC, "Number of g8rc inputs for getLargestLegalClass");
static cl::opt<bool>
EnableBasePointer("ppc-use-base-pointer", cl::Hidden, cl::init(true),
cl::desc("Enable use of a base pointer for complex stack frames"));
@ -57,6 +61,10 @@ static cl::opt<bool>
AlwaysBasePointer("ppc-always-use-base-pointer", cl::Hidden, cl::init(false),
cl::desc("Force the use of a base pointer in every function"));
static cl::opt<bool>
EnableGPRToVecSpills("ppc-enable-gpr-to-vsr-spills", cl::Hidden, cl::init(false),
cl::desc("Enable spills from gpr to vsr rather than stack"));
PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM)
: PPCGenRegisterInfo(TM.isPPC64() ? PPC::LR8 : PPC::LR,
TM.isPPC64() ? 0 : 1,
@ -82,6 +90,8 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM)
// VSX
ImmToIdxMap[PPC::DFLOADf32] = PPC::LXSSPX;
ImmToIdxMap[PPC::DFLOADf64] = PPC::LXSDX;
ImmToIdxMap[PPC::SPILLTOVSR_LD] = PPC::SPILLTOVSR_LDX;
ImmToIdxMap[PPC::SPILLTOVSR_ST] = PPC::SPILLTOVSR_STX;
ImmToIdxMap[PPC::DFSTOREf32] = PPC::STXSSPX;
ImmToIdxMap[PPC::DFSTOREf64] = PPC::STXSDX;
ImmToIdxMap[PPC::LXV] = PPC::LXVX;
@ -328,6 +338,18 @@ PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
// With VSX, we can inflate various sub-register classes to the full VSX
// register set.
// For Power9 we allow the user to enable GPR to vector spills.
// FIXME: Currently limited to spilling GP8RC. A follow on patch will add
// support to spill GPRC.
if (TM.isELFv2ABI()) {
if (Subtarget.hasP9Vector() && EnableGPRToVecSpills &&
RC == &PPC::G8RCRegClass) {
InflateGP8RC++;
return &PPC::SPILLTOVSRRCRegClass;
}
if (RC == &PPC::GPRCRegClass && EnableGPRToVecSpills)
InflateGPRC++;
}
if (RC == &PPC::F8RCRegClass)
return &PPC::VSFRCRegClass;
else if (RC == &PPC::VRRCRegClass)

View File

@ -305,6 +305,11 @@ def VFRC : RegisterClass<"PPC", [f64], 64,
VF22, VF21, VF20)>;
def VSFRC : RegisterClass<"PPC", [f64], 64, (add F8RC, VFRC)>;
// Allow spilling GPR's into caller-saved VSR's.
def SPILLTOVSRRC : RegisterClass<"PPC", [i64, f64], 64, (add G8RC, (sub VSFRC,
(sequence "VF%u", 31, 20),
(sequence "F%u", 31, 14)))>;
// Register class for single precision scalars in VSX registers
def VSSRC : RegisterClass<"PPC", [f32], 32, (add VSFRC)>;

View File

@ -0,0 +1,24 @@
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-enable-gpr-to-vsr-spills < %s | FileCheck %s
define signext i32 @foo(i32 signext %a, i32 signext %b) {
entry:
%cmp = icmp slt i32 %a, %b
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
%0 = tail call i32 asm "add $0, $1, $2", "=r,r,r,~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29}"(i32 %a, i32 %b)
%mul = mul nsw i32 %0, %a
%add = add i32 %b, %a
%tmp = add i32 %add, %mul
br label %if.end
if.end: ; preds = %if.then, %entry
%e.0 = phi i32 [ %tmp, %if.then ], [ undef, %entry ]
ret i32 %e.0
; CHECK: @foo
; CHECK: mr [[NEWREG:[0-9]+]], 3
; CHECK: mtvsrd [[NEWREG2:[0-9]+]], 4
; CHECK: mffprd [[REG1:[0-9]+]], [[NEWREG2]]
; CHECK: add {{[0-9]+}}, [[NEWREG]], [[REG1]]
; CHECK: mffprd [[REG2:[0-9]+]], [[NEWREG2]]
; CHECK: add {{[0-9]+}}, [[REG2]], [[NEWREG]]
}