1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[X86] Remove CustomInserter for FMA3 instructions. Looks like since we got full commuting support for FMAs after this was added, the coalescer can now get this right on its own.

Differential Revision: https://reviews.llvm.org/D22799

llvm-svn: 276987
This commit is contained in:
Craig Topper 2016-07-28 15:28:56 +00:00
parent e4df16d417
commit 9332f50e72
3 changed files with 2 additions and 196 deletions

View File

@ -24236,164 +24236,6 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
return BB;
}
// Replace 213-type (isel default) FMA3 instructions with 231-type for
// accumulator loops. Writing back to the accumulator allows the coalescer
// to remove extra copies in the loop.
// FIXME: Do this on AVX512. We don't support 231 variants yet (PR23937).
MachineBasicBlock *
X86TargetLowering::emitFMA3Instr(MachineInstr &MI,
MachineBasicBlock *MBB) const {
MachineOperand &AddendOp = MI.getOperand(3);
// Bail out early if the addend isn't a register - we can't switch these.
if (!AddendOp.isReg())
return MBB;
MachineFunction &MF = *MBB->getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
// Check whether the addend is defined by a PHI:
assert(MRI.hasOneDef(AddendOp.getReg()) && "Multiple defs in SSA?");
MachineInstr &AddendDef = *MRI.def_instr_begin(AddendOp.getReg());
if (!AddendDef.isPHI())
return MBB;
// Look for the following pattern:
// loop:
// %addend = phi [%entry, 0], [%loop, %result]
// ...
// %result<tied1> = FMA213 %m2<tied0>, %m1, %addend
// Replace with:
// loop:
// %addend = phi [%entry, 0], [%loop, %result]
// ...
// %result<tied1> = FMA231 %addend<tied0>, %m1, %m2
for (unsigned i = 1, e = AddendDef.getNumOperands(); i < e; i += 2) {
assert(AddendDef.getOperand(i).isReg());
MachineOperand PHISrcOp = AddendDef.getOperand(i);
MachineInstr &PHISrcInst = *MRI.def_instr_begin(PHISrcOp.getReg());
if (&PHISrcInst == &MI) {
// Found a matching instruction.
unsigned NewFMAOpc = 0;
switch (MI.getOpcode()) {
case X86::VFMADD213PDr:
NewFMAOpc = X86::VFMADD231PDr;
break;
case X86::VFMADD213PSr:
NewFMAOpc = X86::VFMADD231PSr;
break;
case X86::VFMADD213SDr:
NewFMAOpc = X86::VFMADD231SDr;
break;
case X86::VFMADD213SSr:
NewFMAOpc = X86::VFMADD231SSr;
break;
case X86::VFMSUB213PDr:
NewFMAOpc = X86::VFMSUB231PDr;
break;
case X86::VFMSUB213PSr:
NewFMAOpc = X86::VFMSUB231PSr;
break;
case X86::VFMSUB213SDr:
NewFMAOpc = X86::VFMSUB231SDr;
break;
case X86::VFMSUB213SSr:
NewFMAOpc = X86::VFMSUB231SSr;
break;
case X86::VFNMADD213PDr:
NewFMAOpc = X86::VFNMADD231PDr;
break;
case X86::VFNMADD213PSr:
NewFMAOpc = X86::VFNMADD231PSr;
break;
case X86::VFNMADD213SDr:
NewFMAOpc = X86::VFNMADD231SDr;
break;
case X86::VFNMADD213SSr:
NewFMAOpc = X86::VFNMADD231SSr;
break;
case X86::VFNMSUB213PDr:
NewFMAOpc = X86::VFNMSUB231PDr;
break;
case X86::VFNMSUB213PSr:
NewFMAOpc = X86::VFNMSUB231PSr;
break;
case X86::VFNMSUB213SDr:
NewFMAOpc = X86::VFNMSUB231SDr;
break;
case X86::VFNMSUB213SSr:
NewFMAOpc = X86::VFNMSUB231SSr;
break;
case X86::VFMADDSUB213PDr:
NewFMAOpc = X86::VFMADDSUB231PDr;
break;
case X86::VFMADDSUB213PSr:
NewFMAOpc = X86::VFMADDSUB231PSr;
break;
case X86::VFMSUBADD213PDr:
NewFMAOpc = X86::VFMSUBADD231PDr;
break;
case X86::VFMSUBADD213PSr:
NewFMAOpc = X86::VFMSUBADD231PSr;
break;
case X86::VFMADD213PDYr:
NewFMAOpc = X86::VFMADD231PDYr;
break;
case X86::VFMADD213PSYr:
NewFMAOpc = X86::VFMADD231PSYr;
break;
case X86::VFMSUB213PDYr:
NewFMAOpc = X86::VFMSUB231PDYr;
break;
case X86::VFMSUB213PSYr:
NewFMAOpc = X86::VFMSUB231PSYr;
break;
case X86::VFNMADD213PDYr:
NewFMAOpc = X86::VFNMADD231PDYr;
break;
case X86::VFNMADD213PSYr:
NewFMAOpc = X86::VFNMADD231PSYr;
break;
case X86::VFNMSUB213PDYr:
NewFMAOpc = X86::VFNMSUB231PDYr;
break;
case X86::VFNMSUB213PSYr:
NewFMAOpc = X86::VFNMSUB231PSYr;
break;
case X86::VFMADDSUB213PDYr:
NewFMAOpc = X86::VFMADDSUB231PDYr;
break;
case X86::VFMADDSUB213PSYr:
NewFMAOpc = X86::VFMADDSUB231PSYr;
break;
case X86::VFMSUBADD213PDYr:
NewFMAOpc = X86::VFMSUBADD231PDYr;
break;
case X86::VFMSUBADD213PSYr:
NewFMAOpc = X86::VFMSUBADD231PSYr;
break;
default:
llvm_unreachable("Unrecognized FMA variant.");
}
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
MachineInstrBuilder MIB =
BuildMI(MF, MI.getDebugLoc(), TII.get(NewFMAOpc))
.addOperand(MI.getOperand(0))
.addOperand(MI.getOperand(3))
.addOperand(MI.getOperand(2))
.addOperand(MI.getOperand(1));
MBB->insert(MachineBasicBlock::iterator(MI), MIB);
MI.eraseFromParent();
}
}
return MBB;
}
MachineBasicBlock *
X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {
@ -24616,39 +24458,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case TargetOpcode::PATCHPOINT:
return emitPatchPoint(MI, BB);
case X86::VFMADD213PDr:
case X86::VFMADD213PSr:
case X86::VFMADD213SDr:
case X86::VFMADD213SSr:
case X86::VFMSUB213PDr:
case X86::VFMSUB213PSr:
case X86::VFMSUB213SDr:
case X86::VFMSUB213SSr:
case X86::VFNMADD213PDr:
case X86::VFNMADD213PSr:
case X86::VFNMADD213SDr:
case X86::VFNMADD213SSr:
case X86::VFNMSUB213PDr:
case X86::VFNMSUB213PSr:
case X86::VFNMSUB213SDr:
case X86::VFNMSUB213SSr:
case X86::VFMADDSUB213PDr:
case X86::VFMADDSUB213PSr:
case X86::VFMSUBADD213PDr:
case X86::VFMSUBADD213PSr:
case X86::VFMADD213PDYr:
case X86::VFMADD213PSYr:
case X86::VFMSUB213PDYr:
case X86::VFMSUB213PSYr:
case X86::VFNMADD213PDYr:
case X86::VFNMADD213PSYr:
case X86::VFNMSUB213PDYr:
case X86::VFNMSUB213PSYr:
case X86::VFMADDSUB213PDYr:
case X86::VFMADDSUB213PSYr:
case X86::VFMSUBADD213PDYr:
case X86::VFMSUBADD213PSYr:
return emitFMA3Instr(MI, BB);
case X86::LCMPXCHG8B_SAVE_EBX:
case X86::LCMPXCHG16B_SAVE_RBX: {
unsigned BasePtr =

View File

@ -39,7 +39,6 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
PatFrag MemFrag128, PatFrag MemFrag256,
ValueType OpVT128, ValueType OpVT256,
SDPatternOperator Op = null_frag> {
let usesCustomInserter = 1 in
def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
@ -55,7 +54,6 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
[(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1,
(MemFrag128 addr:$src3))))]>;
let usesCustomInserter = 1 in
def Yr : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, VR256:$src3),
!strconcat(OpcodeStr,
@ -144,7 +142,6 @@ let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in
multiclass fma3s_rm<bits<8> opc, string OpcodeStr,
X86MemOperand x86memop, RegisterClass RC,
SDPatternOperator OpNode = null_frag> {
let usesCustomInserter = 1 in
def r : FMA3<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,

View File

@ -9,9 +9,9 @@ target triple = "x86_64-apple-macosx"
; CHECK-NOT: {{.*}}, %xmm0
; %addr lives in rdi.
; %addr2 lives in rsi.
; CHECK: vmovss (%rsi), [[ADDR2:%xmm[0-9]+]]
; CHECK: vmovss (%rdi), [[ADDR:%xmm[0-9]+]]
; The assembly syntax is in the reverse order.
; CHECK: vfmadd231ss (%rdi), [[ADDR2]], %xmm0
; CHECK: vfmadd231ss (%rsi), [[ADDR]], %xmm0
define void @test1(float* %addr, float* %addr2, float %arg) {
entry:
br label %loop