mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[PPC64LE] More vector swap optimization TLC
This makes one substantive change and a few stylistic changes to the VSX swap optimization pass. The substantive change is to permit LXSDX and LXSSPX instructions to participate in swap optimization computations. The previous change to insert a swap following a SUBREG_TO_REG widening operation makes this almost trivial. I experimented with also permitting STXSDX and STXSSPX instructions. This can be done using similar techniques: we could insert a swap prior to a narrowing COPY operation, and then permit these stores to participate. I prototyped this, but discovered that the pattern of a narrowing COPY followed by an STXSDX does not occur in any of our test-suite code. So instead, I added commentary indicating that this could be done. Other TLC: - I changed SH_COPYSCALAR to SH_COPYWIDEN to more clearly indicate the direction of the copy. - I factored the insertion of swap instructions into a separate function. Finally, I added a new test case to check that the scalar-to-vector loads are working properly with swap optimization. llvm-svn: 242838
This commit is contained in:
parent
be5ba86160
commit
76e220a5ef
@ -94,7 +94,7 @@ enum SHValues {
|
||||
SH_NOSWAP_ST,
|
||||
SH_SPLAT,
|
||||
SH_XXPERMDI,
|
||||
SH_COPYSCALAR
|
||||
SH_COPYWIDEN
|
||||
};
|
||||
|
||||
struct PPCVSXSwapRemoval : public MachineFunctionPass {
|
||||
@ -149,6 +149,11 @@ private:
|
||||
// handling. Return true iff any changes are made.
|
||||
bool removeSwaps();
|
||||
|
||||
// Insert a swap instruction from SrcReg to DstReg at the given
|
||||
// InsertPoint.
|
||||
void insertSwap(MachineInstr *MI, MachineBasicBlock::iterator InsertPoint,
|
||||
unsigned DstReg, unsigned SrcReg);
|
||||
|
||||
// Update instructions requiring special handling.
|
||||
void handleSpecialSwappables(int EntryIdx);
|
||||
|
||||
@ -340,6 +345,15 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
|
||||
SwapVector[VecIdx].IsLoad = 1;
|
||||
SwapVector[VecIdx].IsSwap = 1;
|
||||
break;
|
||||
case PPC::LXSDX:
|
||||
case PPC::LXSSPX:
|
||||
// A load of a floating-point value into the high-order half of
|
||||
// a vector register is safe, provided that we introduce a swap
|
||||
// following the load, which will be done by the SUBREG_TO_REG
|
||||
// support. So just mark these as safe.
|
||||
SwapVector[VecIdx].IsLoad = 1;
|
||||
SwapVector[VecIdx].IsSwappable = 1;
|
||||
break;
|
||||
case PPC::STVX:
|
||||
// Non-permuting stores are currently unsafe. We can use special
|
||||
// handling for this in the future. By not marking these as
|
||||
@ -382,7 +396,7 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
|
||||
else if (isVecReg(MI.getOperand(0).getReg()) &&
|
||||
isScalarVecReg(MI.getOperand(2).getReg())) {
|
||||
SwapVector[VecIdx].IsSwappable = 1;
|
||||
SwapVector[VecIdx].SpecialHandling = SHValues::SH_COPYSCALAR;
|
||||
SwapVector[VecIdx].SpecialHandling = SHValues::SH_COPYWIDEN;
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -417,7 +431,14 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
|
||||
case PPC::STVEHX:
|
||||
case PPC::STVEWX:
|
||||
case PPC::STVXL:
|
||||
// We can handle STXSDX and STXSSPX similarly to LXSDX and LXSSPX,
|
||||
// by adding special handling for narrowing copies as well as
|
||||
// widening ones. However, I've experimented with this, and in
|
||||
// practice we currently do not appear to use STXSDX fed by
|
||||
// a narrowing copy from a full vector register. Since I can't
|
||||
// generate any useful test cases, I've left this alone for now.
|
||||
case PPC::STXSDX:
|
||||
case PPC::STXSSPX:
|
||||
case PPC::VCIPHER:
|
||||
case PPC::VCIPHERLAST:
|
||||
case PPC::VMRGHB:
|
||||
@ -540,7 +561,8 @@ unsigned PPCVSXSwapRemoval::lookThruCopyLike(unsigned SrcReg,
|
||||
}
|
||||
|
||||
if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg)) {
|
||||
SwapVector[VecIdx].MentionsPhysVR = 1;
|
||||
if (!isScalarVecReg(CopySrcReg))
|
||||
SwapVector[VecIdx].MentionsPhysVR = 1;
|
||||
return CopySrcReg;
|
||||
}
|
||||
|
||||
@ -626,8 +648,8 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
|
||||
SwapVector[Repr].WebRejected = 1;
|
||||
|
||||
DEBUG(dbgs() <<
|
||||
format("Web %d rejected for physreg, partial reg, or not swap[pable]\n",
|
||||
Repr));
|
||||
format("Web %d rejected for physreg, partial reg, or not "
|
||||
"swap[pable]\n", Repr));
|
||||
DEBUG(dbgs() << " in " << EntryIdx << ": ");
|
||||
DEBUG(SwapVector[EntryIdx].VSEMI->dump());
|
||||
DEBUG(dbgs() << "\n");
|
||||
@ -740,6 +762,21 @@ void PPCVSXSwapRemoval::markSwapsForRemoval() {
|
||||
}
|
||||
}
|
||||
|
||||
// Create an xxswapd instruction and insert it prior to the given point.
|
||||
// MI is used to determine basic block and debug loc information.
|
||||
// FIXME: When inserting a swap, we should check whether SrcReg is
|
||||
// defined by another swap: SrcReg = XXPERMDI Reg, Reg, 2; If so,
|
||||
// then instead we should generate a copy from Reg to DstReg.
|
||||
void PPCVSXSwapRemoval::insertSwap(MachineInstr *MI,
|
||||
MachineBasicBlock::iterator InsertPoint,
|
||||
unsigned DstReg, unsigned SrcReg) {
|
||||
BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
|
||||
TII->get(PPC::XXPERMDI), DstReg)
|
||||
.addReg(SrcReg)
|
||||
.addReg(SrcReg)
|
||||
.addImm(2);
|
||||
}
|
||||
|
||||
// The identified swap entry requires special handling to allow its
|
||||
// containing computation to be optimized. Perform that handling
|
||||
// here.
|
||||
@ -808,7 +845,7 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
|
||||
// For a copy from a scalar floating-point register to a vector
|
||||
// register, removing swaps will leave the copied value in the
|
||||
// wrong lane. Insert a swap following the copy to fix this.
|
||||
case SHValues::SH_COPYSCALAR: {
|
||||
case SHValues::SH_COPYWIDEN: {
|
||||
MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
|
||||
|
||||
DEBUG(dbgs() << "Changing SUBREG_TO_REG: ");
|
||||
@ -829,7 +866,6 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
|
||||
// assignment problem. In this case we must copy from VRRC to VSRC
|
||||
// prior to the swap, and from VSRC to VRRC following the swap.
|
||||
// Coalescing will usually remove all this mess.
|
||||
|
||||
if (DstRC == &PPC::VRRCRegClass) {
|
||||
unsigned VSRCTmp1 = MRI->createVirtualRegister(&PPC::VSRCRegClass);
|
||||
unsigned VSRCTmp2 = MRI->createVirtualRegister(&PPC::VSRCRegClass);
|
||||
@ -839,11 +875,7 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
|
||||
.addReg(NewVReg);
|
||||
DEBUG(MI->getNextNode()->dump());
|
||||
|
||||
BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
|
||||
TII->get(PPC::XXPERMDI), VSRCTmp2)
|
||||
.addReg(VSRCTmp1)
|
||||
.addReg(VSRCTmp1)
|
||||
.addImm(2);
|
||||
insertSwap(MI, InsertPoint, VSRCTmp2, VSRCTmp1);
|
||||
DEBUG(MI->getNextNode()->getNextNode()->dump());
|
||||
|
||||
BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
|
||||
@ -852,13 +884,7 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
|
||||
DEBUG(MI->getNextNode()->getNextNode()->getNextNode()->dump());
|
||||
|
||||
} else {
|
||||
|
||||
BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
|
||||
TII->get(PPC::XXPERMDI), DstReg)
|
||||
.addReg(NewVReg)
|
||||
.addReg(NewVReg)
|
||||
.addImm(2);
|
||||
|
||||
insertSwap(MI, InsertPoint, DstReg, NewVReg);
|
||||
DEBUG(MI->getNextNode()->dump());
|
||||
}
|
||||
break;
|
||||
@ -944,8 +970,8 @@ void PPCVSXSwapRemoval::dumpSwapVector() {
|
||||
case SH_XXPERMDI:
|
||||
DEBUG(dbgs() << "special:xxpermdi ");
|
||||
break;
|
||||
case SH_COPYSCALAR:
|
||||
DEBUG(dbgs() << "special:copyscalar ");
|
||||
case SH_COPYWIDEN:
|
||||
DEBUG(dbgs() << "special:copywiden ");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
44
test/CodeGen/PowerPC/swaps-le-6.ll
Normal file
44
test/CodeGen/PowerPC/swaps-le-6.ll
Normal file
@ -0,0 +1,44 @@
|
||||
; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s | FileCheck %s
|
||||
|
||||
; These tests verify that VSX swap optimization works when loading a scalar
|
||||
; into a vector register.
|
||||
|
||||
|
||||
@x = global <2 x double> <double 9.970000e+01, double -1.032220e+02>, align 16
|
||||
@z = global <2 x double> <double 2.332000e+01, double 3.111111e+01>, align 16
|
||||
@y = global double 1.780000e+00, align 8
|
||||
|
||||
define void @bar0() {
|
||||
entry:
|
||||
%0 = load <2 x double>, <2 x double>* @x, align 16
|
||||
%1 = load double, double* @y, align 8
|
||||
%vecins = insertelement <2 x double> %0, double %1, i32 0
|
||||
store <2 x double> %vecins, <2 x double>* @z, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @bar0
|
||||
; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
|
||||
; CHECK-DAG: lxsdx [[REG2:[0-9]+]]
|
||||
; CHECK: xxswapd [[REG3:[0-9]+]], [[REG2]]
|
||||
; CHECK: xxspltd [[REG4:[0-9]+]], [[REG3]], 1
|
||||
; CHECK: xxpermdi [[REG5:[0-9]+]], [[REG4]], [[REG1]], 1
|
||||
; CHECK: stxvd2x [[REG5]]
|
||||
|
||||
define void @bar1() {
|
||||
entry:
|
||||
%0 = load <2 x double>, <2 x double>* @x, align 16
|
||||
%1 = load double, double* @y, align 8
|
||||
%vecins = insertelement <2 x double> %0, double %1, i32 1
|
||||
store <2 x double> %vecins, <2 x double>* @z, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @bar1
|
||||
; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
|
||||
; CHECK-DAG: lxsdx [[REG2:[0-9]+]]
|
||||
; CHECK: xxswapd [[REG3:[0-9]+]], [[REG2]]
|
||||
; CHECK: xxspltd [[REG4:[0-9]+]], [[REG3]], 1
|
||||
; CHECK: xxmrghd [[REG5:[0-9]+]], [[REG1]], [[REG4]]
|
||||
; CHECK: stxvd2x [[REG5]]
|
||||
|
Loading…
x
Reference in New Issue
Block a user