mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[mips] Use MTHC1 when it is available (MIPS32r2 and later) for both FP32 and FP64
Summary: To make this work for both AFGR64 and FGR64 register sets, I've had to make the instruction definition consistent with the white lie (that it reads the lower 32-bits of the register) when they are generated by expandBuildPairF64(). Corrected the definition of hasMips32r2() and hasMips64r2() to include MIPS32r6 and MIPS64r6. Depends on D3956 Reviewers: jkolek, zoran.jovanovic, vmedic Reviewed By: vmedic Differential Revision: http://reviews.llvm.org/D3957 llvm-svn: 210771
This commit is contained in:
parent
44e85bdbef
commit
2cb9c461b4
@ -153,6 +153,15 @@ class MTC1_FT<string opstr, RegisterOperand DstRC, RegisterOperand SrcRC,
|
||||
InstSE<(outs DstRC:$fs), (ins SrcRC:$rt), !strconcat(opstr, "\t$rt, $fs"),
|
||||
[(set DstRC:$fs, (OpNode SrcRC:$rt))], Itin, FrmFR, opstr>;
|
||||
|
||||
class MTC1_64_FT<string opstr, RegisterOperand DstRC, RegisterOperand SrcRC,
|
||||
InstrItinClass Itin> :
|
||||
InstSE<(outs DstRC:$fs), (ins DstRC:$fs_in, SrcRC:$rt),
|
||||
!strconcat(opstr, "\t$rt, $fs"), [], Itin, FrmFR, opstr> {
|
||||
// $fs_in is part of a white lie to work around a widespread bug in the FPU
|
||||
// implementation. See expandBuildPairF64 for details.
|
||||
let Constraints = "$fs = $fs_in";
|
||||
}
|
||||
|
||||
class LW_FT<string opstr, RegisterOperand RC, InstrItinClass Itin,
|
||||
SDPatternOperator OpNode= null_frag> :
|
||||
InstSE<(outs RC:$rt), (ins mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
|
||||
@ -355,8 +364,12 @@ def MTC1 : MMRel, MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd, II_MTC1,
|
||||
bitconvert>, MFC1_FM<4>;
|
||||
def MFHC1 : MMRel, MFC1_FT<"mfhc1", GPR32Opnd, FGRH32Opnd, II_MFHC1>,
|
||||
MFC1_FM<3>, ISA_MIPS32R2;
|
||||
def MTHC1 : MMRel, MTC1_FT<"mthc1", FGRH32Opnd, GPR32Opnd, II_MTHC1>,
|
||||
MFC1_FM<7>, ISA_MIPS32R2;
|
||||
def MTHC1_D32 : MMRel, MTC1_64_FT<"mthc1", FGR64Opnd, GPR32Opnd, II_MTHC1>,
|
||||
MFC1_FM<7>, ISA_MIPS32R2, AdditionalRequires<[NotFP64bit]>;
|
||||
def MTHC1_D64 : MTC1_64_FT<"mthc1", AFGR64Opnd, GPR32Opnd, II_MTHC1>,
|
||||
MFC1_FM<7>, ISA_MIPS32R2, AdditionalRequires<[IsFP64bit]> {
|
||||
let DecoderNamespace = "Mips64";
|
||||
}
|
||||
def DMFC1 : MFC1_FT<"dmfc1", GPR64Opnd, FGR64Opnd, II_DMFC1,
|
||||
bitconvert>, MFC1_FM<1>, ISA_MIPS3;
|
||||
def DMTC1 : MTC1_FT<"dmtc1", FGR64Opnd, GPR64Opnd, II_DMTC1,
|
||||
|
@ -542,20 +542,31 @@ void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB,
|
||||
const MCInstrDesc& Mtc1Tdd = get(Mips::MTC1);
|
||||
DebugLoc dl = I->getDebugLoc();
|
||||
const TargetRegisterInfo &TRI = getRegisterInfo();
|
||||
bool HasMTHC1 = TM.getSubtarget<MipsSubtarget>().hasMips32r2() ||
|
||||
TM.getSubtarget<MipsSubtarget>().hasMips32r6();
|
||||
|
||||
// For FP32 mode:
|
||||
// mtc1 Lo, $fp
|
||||
// mtc1 Hi, $fp + 1
|
||||
// For FP64 mode:
|
||||
// When mthc1 is available, use:
|
||||
// mtc1 Lo, $fp
|
||||
// mthc1 Hi, $fp
|
||||
//
|
||||
// Otherwise, for FP64:
|
||||
// spill + reload via ldc1
|
||||
// This has not been implemented since FP64 on MIPS32 and earlier is not
|
||||
// supported.
|
||||
//
|
||||
// Otherwise, for FP32:
|
||||
// mtc1 Lo, $fp
|
||||
// mtc1 Hi, $fp + 1
|
||||
|
||||
BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_lo))
|
||||
.addReg(LoReg);
|
||||
|
||||
if (FP64) {
|
||||
// FIXME: The .addReg(DstReg, RegState::Implicit) is a white lie used to
|
||||
// temporarily work around a widespread bug in the -mfp64 support.
|
||||
if (HasMTHC1 || FP64) {
|
||||
assert(TM.getSubtarget<MipsSubtarget>().hasMips32r2() &&
|
||||
"MTHC1 requires MIPS32r2");
|
||||
|
||||
// FIXME: The .addReg(DstReg) is a white lie used to temporarily work
|
||||
// around a widespread bug in the -mfp64 support.
|
||||
// The problem is that none of the 32-bit fpu ops mention the fact
|
||||
// that they clobber the upper 32-bits of the 64-bit FPR. Fixing that
|
||||
// requires a major overhaul of the FPU implementation which can't
|
||||
@ -565,9 +576,9 @@ void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB,
|
||||
// We therefore pretend that it reads the bottom 32-bits to
|
||||
// artificially create a dependency and prevent the scheduler
|
||||
// changing the behaviour of the code.
|
||||
BuildMI(MBB, I, dl, get(Mips::MTHC1), TRI.getSubReg(DstReg, Mips::sub_hi))
|
||||
.addReg(HiReg)
|
||||
.addReg(DstReg, RegState::Implicit);
|
||||
BuildMI(MBB, I, dl, get(FP64 ? Mips::MTHC1_D64 : Mips::MTHC1_D32), DstReg)
|
||||
.addReg(DstReg)
|
||||
.addReg(HiReg);
|
||||
} else
|
||||
BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_hi))
|
||||
.addReg(HiReg);
|
||||
|
@ -159,12 +159,17 @@ public:
|
||||
bool hasMips4_32() const { return HasMips4_32; }
|
||||
bool hasMips4_32r2() const { return HasMips4_32r2; }
|
||||
bool hasMips32() const { return MipsArchVersion >= Mips32; }
|
||||
bool hasMips32r2() const { return MipsArchVersion == Mips32r2 ||
|
||||
MipsArchVersion == Mips64r2; }
|
||||
bool hasMips32r6() const { return MipsArchVersion == Mips32r6 ||
|
||||
MipsArchVersion == Mips64r6; }
|
||||
bool hasMips32r2() const {
|
||||
return MipsArchVersion == Mips32r2 || MipsArchVersion == Mips32r6 ||
|
||||
MipsArchVersion == Mips64r2;
|
||||
}
|
||||
bool hasMips32r6() const {
|
||||
return MipsArchVersion == Mips32r6 || MipsArchVersion == Mips64r6;
|
||||
}
|
||||
bool hasMips64() const { return MipsArchVersion >= Mips64; }
|
||||
bool hasMips64r2() const { return MipsArchVersion == Mips64r2; }
|
||||
bool hasMips64r2() const {
|
||||
return MipsArchVersion == Mips64r2 || MipsArchVersion == Mips64r6;
|
||||
}
|
||||
bool hasMips64r6() const { return MipsArchVersion == Mips64r6; }
|
||||
|
||||
bool hasCnMips() const { return HasCnMips; }
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=mips -mattr=-fp64 < %s | FileCheck -check-prefix=CHECK-FP32 %s
|
||||
; RUN: llc -march=mips -mattr=+fp64 < %s | FileCheck -check-prefix=CHECK-FP64 %s
|
||||
; RUN: llc -march=mips -mcpu=mips32r2 -mattr=+fp64 < %s | FileCheck -check-prefix=CHECK-FP64 %s
|
||||
|
||||
; This test case is a simplified version of an llvm-stress generated test with
|
||||
; seed=3718491962.
|
||||
|
@ -28,9 +28,9 @@ entry:
|
||||
; CHECK: ori $[[REG2a:[0-9]+]], $[[REG1a]], 49353
|
||||
; CHECK: lui $[[REG1b:[0-9]+]], 21403
|
||||
; CHECK: ori $[[REG2b:[0-9]+]], $[[REG1b]], 34951
|
||||
; CHECK: mtc1 $[[REG2b]], $f[[REG3b:[0-9]+]]
|
||||
; CHECK: mtc1 $[[REG2a]], $f[[REG3a:[0-9]+]]
|
||||
; CHECK: sdc1 $f[[REG3b]], 0(${{[0-9]+}})
|
||||
; CHECK: mtc1 $[[REG2b]], $f[[REG3:[0-9]+]]
|
||||
; CHECK: mthc1 $[[REG2a]], $f[[REG3]]
|
||||
; CHECK: sdc1 $f[[REG3]], 0(${{[0-9]+}})
|
||||
; CHECK: .end d1
|
||||
ret void
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=FP32 -check-prefix=CHECK
|
||||
; RUN: llc -march=mips < %s | FileCheck %s -check-prefix=FP32 -check-prefix=CHECK
|
||||
; RUN: llc -march=mipsel -mattr=+fp64 < %s | FileCheck %s -check-prefix=FP64 -check-prefix=CHECK
|
||||
; RUN: llc -march=mips -mattr=+fp64 < %s | FileCheck %s -check-prefix=FP64 -check-prefix=CHECK
|
||||
; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=+fp64 < %s | FileCheck %s -check-prefix=FP64 -check-prefix=CHECK
|
||||
; RUN: llc -march=mips -mcpu=mips32r2 -mattr=+fp64 < %s | FileCheck %s -check-prefix=FP64 -check-prefix=CHECK
|
||||
|
||||
@a = external global i32
|
||||
|
||||
|
@ -17,7 +17,7 @@ entry:
|
||||
|
||||
; 32R2: ext $[[EXT:[0-9]+]], ${{[0-9]+}}, 31, 1
|
||||
; 32R2: ins $[[INS:[0-9]+]], $[[EXT]], 31, 1
|
||||
; 32R2: mtc1 $[[INS]], $f1
|
||||
; 32R2: mthc1 $[[INS]], $f0
|
||||
|
||||
; 64: daddiu $[[T0:[0-9]+]], $zero, 1
|
||||
; 64: dsll $[[MSK1:[0-9]+]], $[[T0]], 63
|
||||
|
@ -196,8 +196,8 @@ entry:
|
||||
|
||||
; 32R2: ldc1 $[[T0:f[0-9]+]], 16($sp)
|
||||
; 32R2: madd.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
|
||||
; 32R2: mtc1 $zero, $[[T2:f[02468]+]]
|
||||
; 32R2: mtc1 $zero, ${{f[13579]+}}
|
||||
; 32R2: mtc1 $zero, $[[T2:f[0-9]+]]
|
||||
; 32R2: mthc1 $zero, $[[T2]]
|
||||
; 32R2: add.d $f0, $[[T1]], $[[T2]]
|
||||
|
||||
; 32R6-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp)
|
||||
@ -238,8 +238,8 @@ entry:
|
||||
|
||||
; 32R2: ldc1 $[[T0:f[0-9]+]], 16($sp)
|
||||
; 32R2: msub.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
|
||||
; 32R2: mtc1 $zero, $[[T2:f[02468]+]]
|
||||
; 32R2: mtc1 $zero, ${{f[13579]+}}
|
||||
; 32R2: mtc1 $zero, $[[T2:f[0-9]+]]
|
||||
; 32R2: mthc1 $zero, $[[T2]]
|
||||
; 32R2: add.d $f0, $[[T1]], $[[T2]]
|
||||
|
||||
; 32R6-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp)
|
||||
@ -283,8 +283,8 @@ entry:
|
||||
|
||||
; 32R2-NAN: ldc1 $[[T0:f[0-9]+]], 16($sp)
|
||||
; 32R2-NAN: madd.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
|
||||
; 32R2-NAN: mtc1 $zero, $[[T2:f[02468]+]]
|
||||
; 32R2-NAN: mtc1 $zero, ${{f[13579]+}}
|
||||
; 32R2-NAN: mtc1 $zero, $[[T2:f[0-9]+]]
|
||||
; 32R2-NAN: mthc1 $zero, $[[T2]]
|
||||
; 32R2-NAN: sub.d $f0, $[[T2]], $[[T1]]
|
||||
|
||||
; 32R6-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp)
|
||||
@ -330,8 +330,8 @@ entry:
|
||||
|
||||
; 32R2-NAN: ldc1 $[[T0:f[0-9]+]], 16($sp)
|
||||
; 32R2-NAN: msub.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
|
||||
; 32R2-NAN: mtc1 $zero, $[[T2:f[02468]+]]
|
||||
; 32R2-NAN: mtc1 $zero, ${{f[13579]+}}
|
||||
; 32R2-NAN: mtc1 $zero, $[[T2:f[0-9]+]]
|
||||
; 32R2-NAN: mthc1 $zero, $[[T2]]
|
||||
; 32R2-NAN: sub.d $f0, $[[T2]], $[[T1]]
|
||||
|
||||
; 32R6-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp)
|
||||
|
@ -13,7 +13,7 @@
|
||||
; LE-PIC-DAG: lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
|
||||
; LE-PIC-DAG: lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
|
||||
; LE-PIC-DAG: mtc1 $[[R0]], $f0
|
||||
; LE-PIC-DAG: mtc1 $[[R1]], $f1
|
||||
; LE-PIC-DAG: mthc1 $[[R1]], $f0
|
||||
; LE-STATIC-LABEL: test_ldc1:
|
||||
; LE-STATIC-DAG: lui $[[R0:[0-9]+]], %hi(g0)
|
||||
; LE-STATIC-DAG: lw $[[R1:[0-9]+]], %lo(g0)($[[R0]])
|
||||
@ -66,7 +66,7 @@ entry:
|
||||
; LE-PIC-DAG: lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
|
||||
; LE-PIC-DAG: lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
|
||||
; LE-PIC-DAG: mtc1 $[[R0]], $f0
|
||||
; LE-PIC-DAG: mtc1 $[[R1]], $f1
|
||||
; LE-PIC-DAG: mthc1 $[[R1]], $f0
|
||||
; CHECK-LDC1-SDC1-LABEL: test_ldxc1:
|
||||
; CHECK-LDC1-SDC1: ldxc1 $f{{[0-9]+}}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user