mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
Fix for PR 1505 (and 1489). Rewrite X87 register
model to include f32 variants. Some factoring improvments forthcoming. llvm-svn: 37847
This commit is contained in:
parent
f85629081b
commit
7af19491d3
@ -3016,11 +3016,33 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) {
|
||||
}
|
||||
break;
|
||||
|
||||
case ISD::FP_ROUND:
|
||||
if (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)) ==
|
||||
TargetLowering::Expand) {
|
||||
// The only way we can lower this is to turn it into a TRUNCSTORE,
|
||||
// EXTLOAD pair, targetting a temporary location (a stack slot).
|
||||
|
||||
// NOTE: there is a choice here between constantly creating new stack
|
||||
// slots and always reusing the same one. We currently always create
|
||||
// new ones, as reuse may inhibit scheduling.
|
||||
MVT::ValueType VT = Op.getValueType(); // 32
|
||||
const Type *Ty = MVT::getTypeForValueType(VT);
|
||||
uint64_t TySize = TLI.getTargetData()->getTypeSize(Ty);
|
||||
unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(Ty);
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
int SSFI =
|
||||
MF.getFrameInfo()->CreateStackObject(TySize, Align);
|
||||
SDOperand StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
|
||||
Result = DAG.getTruncStore(DAG.getEntryNode(), Node->getOperand(0),
|
||||
StackSlot, NULL, 0, VT);
|
||||
Result = DAG.getLoad(VT, Result, StackSlot, NULL, 0, VT);
|
||||
break;
|
||||
}
|
||||
// FALL THROUGH
|
||||
case ISD::ANY_EXTEND:
|
||||
case ISD::ZERO_EXTEND:
|
||||
case ISD::SIGN_EXTEND:
|
||||
case ISD::FP_EXTEND:
|
||||
case ISD::FP_ROUND:
|
||||
switch (getTypeAction(Node->getOperand(0).getValueType())) {
|
||||
case Expand: assert(0 && "Shouldn't need to expand other operators here!");
|
||||
case Legal:
|
||||
|
@ -3918,15 +3918,7 @@ TargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy,
|
||||
Op = DAG.getNode(ExtOp, getTypeToTransformTo(VT), Op);
|
||||
} else {
|
||||
assert(MVT::isFloatingPoint(VT) && "Not int or FP?");
|
||||
// A true promotion would change the size of the argument.
|
||||
// Instead, pretend this is an int. If FP objects are not
|
||||
// passed the same as ints, the original type should be Legal
|
||||
// and we should not get here.
|
||||
Op = DAG.getNode(ISD::BIT_CONVERT,
|
||||
VT==MVT::f32 ? MVT::i32 :
|
||||
(VT==MVT::f64 ? MVT::i64 :
|
||||
MVT::Other),
|
||||
Op);
|
||||
Op = DAG.getNode(ISD::FP_EXTEND, getTypeToTransformTo(VT), Op);
|
||||
}
|
||||
Ops.push_back(Op);
|
||||
Ops.push_back(DAG.getConstant(Flags, MVT::i32));
|
||||
|
@ -324,61 +324,101 @@ static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) {
|
||||
// concrete X86 instruction which uses the register stack.
|
||||
//
|
||||
static const TableEntry OpcodeTable[] = {
|
||||
{ X86::FpABS , X86::FABS },
|
||||
{ X86::FpABS32 , X86::FABS },
|
||||
{ X86::FpABS64 , X86::FABS },
|
||||
{ X86::FpADD32m , X86::FADD32m },
|
||||
{ X86::FpADD64m , X86::FADD64m },
|
||||
{ X86::FpCHS , X86::FCHS },
|
||||
{ X86::FpCMOVB , X86::FCMOVB },
|
||||
{ X86::FpCMOVBE , X86::FCMOVBE },
|
||||
{ X86::FpCMOVE , X86::FCMOVE },
|
||||
{ X86::FpCMOVNB , X86::FCMOVNB },
|
||||
{ X86::FpCMOVNBE , X86::FCMOVNBE },
|
||||
{ X86::FpCMOVNE , X86::FCMOVNE },
|
||||
{ X86::FpCMOVNP , X86::FCMOVNP },
|
||||
{ X86::FpCMOVP , X86::FCMOVP },
|
||||
{ X86::FpCOS , X86::FCOS },
|
||||
{ X86::FpCHS32 , X86::FCHS },
|
||||
{ X86::FpCHS64 , X86::FCHS },
|
||||
{ X86::FpCMOVB32 , X86::FCMOVB },
|
||||
{ X86::FpCMOVB64 , X86::FCMOVB },
|
||||
{ X86::FpCMOVBE32 , X86::FCMOVBE },
|
||||
{ X86::FpCMOVBE64 , X86::FCMOVBE },
|
||||
{ X86::FpCMOVE32 , X86::FCMOVE },
|
||||
{ X86::FpCMOVE64 , X86::FCMOVE },
|
||||
{ X86::FpCMOVNB32 , X86::FCMOVNB },
|
||||
{ X86::FpCMOVNB64 , X86::FCMOVNB },
|
||||
{ X86::FpCMOVNBE32 , X86::FCMOVNBE },
|
||||
{ X86::FpCMOVNBE64 , X86::FCMOVNBE },
|
||||
{ X86::FpCMOVNE32 , X86::FCMOVNE },
|
||||
{ X86::FpCMOVNE64 , X86::FCMOVNE },
|
||||
{ X86::FpCMOVNP32 , X86::FCMOVNP },
|
||||
{ X86::FpCMOVNP64 , X86::FCMOVNP },
|
||||
{ X86::FpCMOVP32 , X86::FCMOVP },
|
||||
{ X86::FpCMOVP64 , X86::FCMOVP },
|
||||
{ X86::FpCOS32 , X86::FCOS },
|
||||
{ X86::FpCOS64 , X86::FCOS },
|
||||
{ X86::FpDIV32m , X86::FDIV32m },
|
||||
{ X86::FpDIV64m , X86::FDIV64m },
|
||||
{ X86::FpDIVR32m , X86::FDIVR32m },
|
||||
{ X86::FpDIVR64m , X86::FDIVR64m },
|
||||
{ X86::FpIADD16m , X86::FIADD16m },
|
||||
{ X86::FpIADD32m , X86::FIADD32m },
|
||||
{ X86::FpIDIV16m , X86::FIDIV16m },
|
||||
{ X86::FpIDIV32m , X86::FIDIV32m },
|
||||
{ X86::FpIDIVR16m, X86::FIDIVR16m},
|
||||
{ X86::FpIDIVR32m, X86::FIDIVR32m},
|
||||
{ X86::FpILD16m , X86::FILD16m },
|
||||
{ X86::FpILD32m , X86::FILD32m },
|
||||
{ X86::FpILD64m , X86::FILD64m },
|
||||
{ X86::FpIMUL16m , X86::FIMUL16m },
|
||||
{ X86::FpIMUL32m , X86::FIMUL32m },
|
||||
{ X86::FpIST16m , X86::FIST16m },
|
||||
{ X86::FpIST32m , X86::FIST32m },
|
||||
{ X86::FpIST64m , X86::FISTP64m },
|
||||
{ X86::FpISTT16m , X86::FISTTP16m},
|
||||
{ X86::FpISTT32m , X86::FISTTP32m},
|
||||
{ X86::FpISTT64m , X86::FISTTP64m},
|
||||
{ X86::FpISUB16m , X86::FISUB16m },
|
||||
{ X86::FpISUB32m , X86::FISUB32m },
|
||||
{ X86::FpISUBR16m, X86::FISUBR16m},
|
||||
{ X86::FpISUBR32m, X86::FISUBR32m},
|
||||
{ X86::FpLD0 , X86::FLD0 },
|
||||
{ X86::FpLD1 , X86::FLD1 },
|
||||
{ X86::FpIADD16m32 , X86::FIADD16m },
|
||||
{ X86::FpIADD16m64 , X86::FIADD16m },
|
||||
{ X86::FpIADD32m32 , X86::FIADD32m },
|
||||
{ X86::FpIADD32m64 , X86::FIADD32m },
|
||||
{ X86::FpIDIV16m32 , X86::FIDIV16m },
|
||||
{ X86::FpIDIV16m64 , X86::FIDIV16m },
|
||||
{ X86::FpIDIV32m32 , X86::FIDIV32m },
|
||||
{ X86::FpIDIV32m64 , X86::FIDIV32m },
|
||||
{ X86::FpIDIVR16m32, X86::FIDIVR16m},
|
||||
{ X86::FpIDIVR16m64, X86::FIDIVR16m},
|
||||
{ X86::FpIDIVR32m32, X86::FIDIVR32m},
|
||||
{ X86::FpIDIVR32m64, X86::FIDIVR32m},
|
||||
{ X86::FpILD16m32 , X86::FILD16m },
|
||||
{ X86::FpILD16m64 , X86::FILD16m },
|
||||
{ X86::FpILD32m32 , X86::FILD32m },
|
||||
{ X86::FpILD32m64 , X86::FILD32m },
|
||||
{ X86::FpILD64m32 , X86::FILD64m },
|
||||
{ X86::FpILD64m64 , X86::FILD64m },
|
||||
{ X86::FpIMUL16m32 , X86::FIMUL16m },
|
||||
{ X86::FpIMUL16m64 , X86::FIMUL16m },
|
||||
{ X86::FpIMUL32m32 , X86::FIMUL32m },
|
||||
{ X86::FpIMUL32m64 , X86::FIMUL32m },
|
||||
{ X86::FpIST16m32 , X86::FIST16m },
|
||||
{ X86::FpIST16m64 , X86::FIST16m },
|
||||
{ X86::FpIST32m32 , X86::FIST32m },
|
||||
{ X86::FpIST32m64 , X86::FIST32m },
|
||||
{ X86::FpIST64m32 , X86::FISTP64m },
|
||||
{ X86::FpIST64m64 , X86::FISTP64m },
|
||||
{ X86::FpISTT16m32 , X86::FISTTP16m},
|
||||
{ X86::FpISTT16m64 , X86::FISTTP16m},
|
||||
{ X86::FpISTT32m32 , X86::FISTTP32m},
|
||||
{ X86::FpISTT32m64 , X86::FISTTP32m},
|
||||
{ X86::FpISTT64m32 , X86::FISTTP64m},
|
||||
{ X86::FpISTT64m64 , X86::FISTTP64m},
|
||||
{ X86::FpISUB16m32 , X86::FISUB16m },
|
||||
{ X86::FpISUB16m64 , X86::FISUB16m },
|
||||
{ X86::FpISUB32m32 , X86::FISUB32m },
|
||||
{ X86::FpISUB32m64 , X86::FISUB32m },
|
||||
{ X86::FpISUBR16m32, X86::FISUBR16m},
|
||||
{ X86::FpISUBR16m64, X86::FISUBR16m},
|
||||
{ X86::FpISUBR32m32, X86::FISUBR32m},
|
||||
{ X86::FpISUBR32m64, X86::FISUBR32m},
|
||||
{ X86::FpLD032 , X86::FLD0 },
|
||||
{ X86::FpLD064 , X86::FLD0 },
|
||||
{ X86::FpLD132 , X86::FLD1 },
|
||||
{ X86::FpLD164 , X86::FLD1 },
|
||||
{ X86::FpLD32m , X86::FLD32m },
|
||||
{ X86::FpLD64m , X86::FLD64m },
|
||||
{ X86::FpMUL32m , X86::FMUL32m },
|
||||
{ X86::FpMUL64m , X86::FMUL64m },
|
||||
{ X86::FpSIN , X86::FSIN },
|
||||
{ X86::FpSQRT , X86::FSQRT },
|
||||
{ X86::FpSIN32 , X86::FSIN },
|
||||
{ X86::FpSIN64 , X86::FSIN },
|
||||
{ X86::FpSQRT32 , X86::FSQRT },
|
||||
{ X86::FpSQRT64 , X86::FSQRT },
|
||||
{ X86::FpST32m , X86::FST32m },
|
||||
{ X86::FpST64m , X86::FST64m },
|
||||
{ X86::FpST64m32 , X86::FST32m },
|
||||
{ X86::FpSUB32m , X86::FSUB32m },
|
||||
{ X86::FpSUB64m , X86::FSUB64m },
|
||||
{ X86::FpSUBR32m , X86::FSUBR32m },
|
||||
{ X86::FpSUBR64m , X86::FSUBR64m },
|
||||
{ X86::FpTST , X86::FTST },
|
||||
{ X86::FpUCOMIr , X86::FUCOMIr },
|
||||
{ X86::FpUCOMr , X86::FUCOMr },
|
||||
{ X86::FpTST32 , X86::FTST },
|
||||
{ X86::FpTST64 , X86::FTST },
|
||||
{ X86::FpUCOMIr32 , X86::FUCOMIr },
|
||||
{ X86::FpUCOMIr64 , X86::FUCOMIr },
|
||||
{ X86::FpUCOMr32 , X86::FUCOMr },
|
||||
{ X86::FpUCOMr64 , X86::FUCOMr },
|
||||
};
|
||||
|
||||
static unsigned getConcreteOpcode(unsigned Opcode) {
|
||||
@ -510,10 +550,14 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
|
||||
// Ditto FISTTP16m, FISTTP32m, FISTTP64m.
|
||||
//
|
||||
if (!KillsSrc &&
|
||||
(MI->getOpcode() == X86::FpIST64m ||
|
||||
MI->getOpcode() == X86::FpISTT16m ||
|
||||
MI->getOpcode() == X86::FpISTT32m ||
|
||||
MI->getOpcode() == X86::FpISTT64m)) {
|
||||
(MI->getOpcode() == X86::FpIST64m32 ||
|
||||
MI->getOpcode() == X86::FpISTT16m32 ||
|
||||
MI->getOpcode() == X86::FpISTT32m32 ||
|
||||
MI->getOpcode() == X86::FpISTT64m32 ||
|
||||
MI->getOpcode() == X86::FpIST64m64 ||
|
||||
MI->getOpcode() == X86::FpISTT16m64 ||
|
||||
MI->getOpcode() == X86::FpISTT32m64 ||
|
||||
MI->getOpcode() == X86::FpISTT64m64)) {
|
||||
duplicateToTop(Reg, 7 /*temp register*/, I);
|
||||
} else {
|
||||
moveToTop(Reg, I); // Move to the top of the stack...
|
||||
@ -578,34 +622,50 @@ void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
|
||||
|
||||
// ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i)
|
||||
static const TableEntry ForwardST0Table[] = {
|
||||
{ X86::FpADD , X86::FADDST0r },
|
||||
{ X86::FpDIV , X86::FDIVST0r },
|
||||
{ X86::FpMUL , X86::FMULST0r },
|
||||
{ X86::FpSUB , X86::FSUBST0r },
|
||||
{ X86::FpADD32 , X86::FADDST0r },
|
||||
{ X86::FpADD64 , X86::FADDST0r },
|
||||
{ X86::FpDIV32 , X86::FDIVST0r },
|
||||
{ X86::FpDIV64 , X86::FDIVST0r },
|
||||
{ X86::FpMUL32 , X86::FMULST0r },
|
||||
{ X86::FpMUL64 , X86::FMULST0r },
|
||||
{ X86::FpSUB32 , X86::FSUBST0r },
|
||||
{ X86::FpSUB64 , X86::FSUBST0r },
|
||||
};
|
||||
|
||||
// ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0)
|
||||
static const TableEntry ReverseST0Table[] = {
|
||||
{ X86::FpADD , X86::FADDST0r }, // commutative
|
||||
{ X86::FpDIV , X86::FDIVRST0r },
|
||||
{ X86::FpMUL , X86::FMULST0r }, // commutative
|
||||
{ X86::FpSUB , X86::FSUBRST0r },
|
||||
{ X86::FpADD32 , X86::FADDST0r }, // commutative
|
||||
{ X86::FpADD64 , X86::FADDST0r }, // commutative
|
||||
{ X86::FpDIV32 , X86::FDIVRST0r },
|
||||
{ X86::FpDIV64 , X86::FDIVRST0r },
|
||||
{ X86::FpMUL32 , X86::FMULST0r }, // commutative
|
||||
{ X86::FpMUL64 , X86::FMULST0r }, // commutative
|
||||
{ X86::FpSUB32 , X86::FSUBRST0r },
|
||||
{ X86::FpSUB64 , X86::FSUBRST0r },
|
||||
};
|
||||
|
||||
// ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i)
|
||||
static const TableEntry ForwardSTiTable[] = {
|
||||
{ X86::FpADD , X86::FADDrST0 }, // commutative
|
||||
{ X86::FpDIV , X86::FDIVRrST0 },
|
||||
{ X86::FpMUL , X86::FMULrST0 }, // commutative
|
||||
{ X86::FpSUB , X86::FSUBRrST0 },
|
||||
{ X86::FpADD32 , X86::FADDrST0 }, // commutative
|
||||
{ X86::FpADD64 , X86::FADDrST0 }, // commutative
|
||||
{ X86::FpDIV32 , X86::FDIVRrST0 },
|
||||
{ X86::FpDIV64 , X86::FDIVRrST0 },
|
||||
{ X86::FpMUL32 , X86::FMULrST0 }, // commutative
|
||||
{ X86::FpMUL64 , X86::FMULrST0 }, // commutative
|
||||
{ X86::FpSUB32 , X86::FSUBRrST0 },
|
||||
{ X86::FpSUB64 , X86::FSUBRrST0 },
|
||||
};
|
||||
|
||||
// ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0)
|
||||
static const TableEntry ReverseSTiTable[] = {
|
||||
{ X86::FpADD , X86::FADDrST0 },
|
||||
{ X86::FpDIV , X86::FDIVrST0 },
|
||||
{ X86::FpMUL , X86::FMULrST0 },
|
||||
{ X86::FpSUB , X86::FSUBrST0 },
|
||||
{ X86::FpADD32 , X86::FADDrST0 },
|
||||
{ X86::FpADD64 , X86::FADDrST0 },
|
||||
{ X86::FpDIV32 , X86::FDIVrST0 },
|
||||
{ X86::FpDIV64 , X86::FDIVrST0 },
|
||||
{ X86::FpMUL32 , X86::FMULrST0 },
|
||||
{ X86::FpMUL64 , X86::FMULrST0 },
|
||||
{ X86::FpSUB32 , X86::FSUBrST0 },
|
||||
{ X86::FpSUB64 , X86::FSUBrST0 },
|
||||
};
|
||||
|
||||
|
||||
@ -777,15 +837,20 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
|
||||
MachineInstr *MI = I;
|
||||
switch (MI->getOpcode()) {
|
||||
default: assert(0 && "Unknown SpecialFP instruction!");
|
||||
case X86::FpGETRESULT: // Appears immediately after a call returning FP type!
|
||||
case X86::FpGETRESULT32: // Appears immediately after a call returning FP type!
|
||||
case X86::FpGETRESULT64: // Appears immediately after a call returning FP type!
|
||||
assert(StackTop == 0 && "Stack should be empty after a call!");
|
||||
pushReg(getFPReg(MI->getOperand(0)));
|
||||
break;
|
||||
case X86::FpSETRESULT:
|
||||
case X86::FpSETRESULT32:
|
||||
case X86::FpSETRESULT64:
|
||||
assert(StackTop == 1 && "Stack should have one element on it to return!");
|
||||
--StackTop; // "Forget" we have something on the top of stack!
|
||||
break;
|
||||
case X86::FpMOV: {
|
||||
case X86::FpMOV3232:
|
||||
case X86::FpMOV3264:
|
||||
case X86::FpMOV6432:
|
||||
case X86::FpMOV6464: {
|
||||
unsigned SrcReg = getFPReg(MI->getOperand(1));
|
||||
unsigned DestReg = getFPReg(MI->getOperand(0));
|
||||
|
||||
|
@ -492,11 +492,13 @@ void X86DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) {
|
||||
for (MachineBasicBlock::iterator I = MBBI->begin(), E = MBBI->end();
|
||||
!ContainsFPCode && I != E; ++I) {
|
||||
if (I->getNumOperands() != 0 && I->getOperand(0).isRegister()) {
|
||||
const TargetRegisterClass *clas;
|
||||
for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
|
||||
if (I->getOperand(op).isRegister() && I->getOperand(op).isDef() &&
|
||||
MRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) &&
|
||||
RegMap->getRegClass(I->getOperand(0).getReg()) ==
|
||||
X86::RFPRegisterClass) {
|
||||
((clas = RegMap->getRegClass(I->getOperand(0).getReg())) ==
|
||||
X86::RFP32RegisterClass ||
|
||||
clas == X86::RFP64RegisterClass)) {
|
||||
ContainsFPCode = true;
|
||||
break;
|
||||
}
|
||||
|
@ -289,11 +289,14 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
|
||||
addLegalFPImmediate(+0.0); // xorps / xorpd
|
||||
} else {
|
||||
// Set up the FP register classes.
|
||||
addRegisterClass(MVT::f64, X86::RFPRegisterClass);
|
||||
addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
|
||||
addRegisterClass(MVT::f32, X86::RFP32RegisterClass);
|
||||
|
||||
setOperationAction(ISD::UNDEF, MVT::f64, Expand);
|
||||
setOperationAction(ISD::UNDEF, MVT::f32, Expand);
|
||||
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
|
||||
setOperationAction(ISD::FP_ROUND, MVT::f32, Expand);
|
||||
|
||||
if (!UnsafeFPMath) {
|
||||
setOperationAction(ISD::FSIN , MVT::f64 , Expand);
|
||||
@ -301,6 +304,7 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
|
||||
}
|
||||
|
||||
setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
|
||||
setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
|
||||
addLegalFPImmediate(+0.0); // FLD0
|
||||
addLegalFPImmediate(+1.0); // FLD1
|
||||
addLegalFPImmediate(-0.0); // FLD0/FCHS
|
||||
@ -553,7 +557,7 @@ SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
|
||||
MemLoc = DAG.getFrameIndex(SSFI, getPointerTy());
|
||||
Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0);
|
||||
}
|
||||
SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other);
|
||||
SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other);
|
||||
SDOperand Ops[] = {Chain, MemLoc, DAG.getValueType(RVLocs[0].getValVT())};
|
||||
Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3);
|
||||
Chain = Value.getValue(1);
|
||||
@ -604,7 +608,7 @@ LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall,
|
||||
// before the fp stackifier runs.
|
||||
|
||||
// Copy ST0 into an RFP register with FP_GET_RESULT.
|
||||
SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
|
||||
SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other, MVT::Flag);
|
||||
SDOperand GROps[] = { Chain, InFlag };
|
||||
SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2);
|
||||
Chain = RetVal.getValue(1);
|
||||
@ -626,11 +630,6 @@ LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall,
|
||||
RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain, StackSlot, NULL, 0);
|
||||
Chain = RetVal.getValue(1);
|
||||
}
|
||||
|
||||
if (RVLocs[0].getValVT() == MVT::f32 && !X86ScalarSSE)
|
||||
// FIXME: we would really like to remember that this FP_ROUND
|
||||
// operation is okay to eliminate if we allow excess FP precision.
|
||||
RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal);
|
||||
ResultVals.push_back(RetVal);
|
||||
}
|
||||
|
||||
@ -3252,7 +3251,7 @@ SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
|
||||
if (X86ScalarSSE)
|
||||
Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
|
||||
else
|
||||
Tys = DAG.getVTList(MVT::f64, MVT::Other);
|
||||
Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
|
||||
SmallVector<SDOperand, 8> Ops;
|
||||
Ops.push_back(Chain);
|
||||
Ops.push_back(StackSlot);
|
||||
@ -3307,7 +3306,7 @@ SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
|
||||
if (X86ScalarSSE) {
|
||||
assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
|
||||
Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0);
|
||||
SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other);
|
||||
SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
|
||||
SDOperand Ops[] = {
|
||||
Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType())
|
||||
};
|
||||
@ -4437,9 +4436,12 @@ X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
|
||||
return BB;
|
||||
}
|
||||
|
||||
case X86::FP_TO_INT16_IN_MEM:
|
||||
case X86::FP_TO_INT32_IN_MEM:
|
||||
case X86::FP_TO_INT64_IN_MEM: {
|
||||
case X86::FP32_TO_INT16_IN_MEM:
|
||||
case X86::FP32_TO_INT32_IN_MEM:
|
||||
case X86::FP32_TO_INT64_IN_MEM:
|
||||
case X86::FP64_TO_INT16_IN_MEM:
|
||||
case X86::FP64_TO_INT32_IN_MEM:
|
||||
case X86::FP64_TO_INT64_IN_MEM: {
|
||||
// Change the floating point control register to use "round towards zero"
|
||||
// mode when truncating to an integer value.
|
||||
MachineFunction *F = BB->getParent();
|
||||
@ -4466,9 +4468,12 @@ X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
|
||||
unsigned Opc;
|
||||
switch (MI->getOpcode()) {
|
||||
default: assert(0 && "illegal opcode!");
|
||||
case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break;
|
||||
case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break;
|
||||
case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break;
|
||||
case X86::FP32_TO_INT16_IN_MEM: Opc = X86::FpIST16m32; break;
|
||||
case X86::FP32_TO_INT32_IN_MEM: Opc = X86::FpIST32m32; break;
|
||||
case X86::FP32_TO_INT64_IN_MEM: Opc = X86::FpIST64m32; break;
|
||||
case X86::FP64_TO_INT16_IN_MEM: Opc = X86::FpIST16m64; break;
|
||||
case X86::FP64_TO_INT32_IN_MEM: Opc = X86::FpIST32m64; break;
|
||||
case X86::FP64_TO_INT64_IN_MEM: Opc = X86::FpIST64m64; break;
|
||||
}
|
||||
|
||||
X86AddressMode AM;
|
||||
|
@ -17,13 +17,13 @@
|
||||
// FPStack specific DAG Nodes.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def SDTX86FpGet : SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>;
|
||||
def SDTX86FpGet : SDTypeProfile<1, 0, [SDTCisFP<0>]>;
|
||||
def SDTX86FpSet : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
|
||||
def SDTX86Fld : SDTypeProfile<1, 2, [SDTCisVT<0, f64>,
|
||||
def SDTX86Fld : SDTypeProfile<1, 2, [SDTCisFP<0>,
|
||||
SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>;
|
||||
def SDTX86Fst : SDTypeProfile<0, 3, [SDTCisFP<0>,
|
||||
SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>;
|
||||
def SDTX86Fild : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisPtrTy<1>,
|
||||
def SDTX86Fild : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>,
|
||||
SDTCisVT<2, OtherVT>]>;
|
||||
def SDTX86FpToIMem: SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
|
||||
|
||||
@ -50,19 +50,19 @@ def X86fp_to_i64mem : SDNode<"X86ISD::FP_TO_INT64_IN_MEM", SDTX86FpToIMem,
|
||||
// FPStack pattern fragments
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def fp64imm0 : PatLeaf<(f64 fpimm), [{
|
||||
def fpimm0 : PatLeaf<(fpimm), [{
|
||||
return N->isExactlyValue(+0.0);
|
||||
}]>;
|
||||
|
||||
def fp64immneg0 : PatLeaf<(f64 fpimm), [{
|
||||
def fpimmneg0 : PatLeaf<(fpimm), [{
|
||||
return N->isExactlyValue(-0.0);
|
||||
}]>;
|
||||
|
||||
def fp64imm1 : PatLeaf<(f64 fpimm), [{
|
||||
def fpimm1 : PatLeaf<(fpimm), [{
|
||||
return N->isExactlyValue(+1.0);
|
||||
}]>;
|
||||
|
||||
def fp64immneg1 : PatLeaf<(f64 fpimm), [{
|
||||
def fpimmneg1 : PatLeaf<(fpimm), [{
|
||||
return N->isExactlyValue(-1.0);
|
||||
}]>;
|
||||
|
||||
@ -70,18 +70,30 @@ def extloadf64f32 : PatFrag<(ops node:$ptr), (f64 (extloadf32 node:$ptr))>;
|
||||
|
||||
// Some 'special' instructions
|
||||
let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
|
||||
def FP_TO_INT16_IN_MEM : I<0, Pseudo,
|
||||
(ops i16mem:$dst, RFP:$src),
|
||||
"#FP_TO_INT16_IN_MEM PSEUDO!",
|
||||
[(X86fp_to_i16mem RFP:$src, addr:$dst)]>;
|
||||
def FP_TO_INT32_IN_MEM : I<0, Pseudo,
|
||||
(ops i32mem:$dst, RFP:$src),
|
||||
"#FP_TO_INT32_IN_MEM PSEUDO!",
|
||||
[(X86fp_to_i32mem RFP:$src, addr:$dst)]>;
|
||||
def FP_TO_INT64_IN_MEM : I<0, Pseudo,
|
||||
(ops i64mem:$dst, RFP:$src),
|
||||
"#FP_TO_INT64_IN_MEM PSEUDO!",
|
||||
[(X86fp_to_i64mem RFP:$src, addr:$dst)]>;
|
||||
def FP32_TO_INT16_IN_MEM : I<0, Pseudo,
|
||||
(ops i16mem:$dst, RFP32:$src),
|
||||
"#FP32_TO_INT16_IN_MEM PSEUDO!",
|
||||
[(X86fp_to_i16mem RFP32:$src, addr:$dst)]>;
|
||||
def FP32_TO_INT32_IN_MEM : I<0, Pseudo,
|
||||
(ops i32mem:$dst, RFP32:$src),
|
||||
"#FP32_TO_INT32_IN_MEM PSEUDO!",
|
||||
[(X86fp_to_i32mem RFP32:$src, addr:$dst)]>;
|
||||
def FP32_TO_INT64_IN_MEM : I<0, Pseudo,
|
||||
(ops i64mem:$dst, RFP32:$src),
|
||||
"#FP32_TO_INT64_IN_MEM PSEUDO!",
|
||||
[(X86fp_to_i64mem RFP32:$src, addr:$dst)]>;
|
||||
def FP64_TO_INT16_IN_MEM : I<0, Pseudo,
|
||||
(ops i16mem:$dst, RFP64:$src),
|
||||
"#FP64_TO_INT16_IN_MEM PSEUDO!",
|
||||
[(X86fp_to_i16mem RFP64:$src, addr:$dst)]>;
|
||||
def FP64_TO_INT32_IN_MEM : I<0, Pseudo,
|
||||
(ops i32mem:$dst, RFP64:$src),
|
||||
"#FP64_TO_INT32_IN_MEM PSEUDO!",
|
||||
[(X86fp_to_i32mem RFP64:$src, addr:$dst)]>;
|
||||
def FP64_TO_INT64_IN_MEM : I<0, Pseudo,
|
||||
(ops i64mem:$dst, RFP64:$src),
|
||||
"#FP64_TO_INT64_IN_MEM PSEUDO!",
|
||||
[(X86fp_to_i64mem RFP64:$src, addr:$dst)]>;
|
||||
}
|
||||
|
||||
let isTerminator = 1 in
|
||||
@ -111,30 +123,47 @@ class FpI_<dag ops, FPFormat fp, list<dag> pattern>
|
||||
}
|
||||
|
||||
// Random Pseudo Instructions.
|
||||
def FpGETRESULT : FpI_<(ops RFP:$dst), SpecialFP,
|
||||
[(set RFP:$dst, X86fpget)]>; // FPR = ST(0)
|
||||
def FpGETRESULT32 : FpI_<(ops RFP32:$dst), SpecialFP,
|
||||
[(set RFP32:$dst, X86fpget)]>; // FPR = ST(0)
|
||||
|
||||
let noResults = 1 in
|
||||
def FpSETRESULT : FpI_<(ops RFP:$src), SpecialFP,
|
||||
[(X86fpset RFP:$src)]>, Imp<[], [ST0]>; // ST(0) = FPR
|
||||
def FpGETRESULT64 : FpI_<(ops RFP64:$dst), SpecialFP,
|
||||
[(set RFP64:$dst, X86fpget)]>; // FPR = ST(0)
|
||||
|
||||
let noResults = 1 in {
|
||||
def FpSETRESULT32 : FpI_<(ops RFP32:$src), SpecialFP,
|
||||
[(X86fpset RFP32:$src)]>, Imp<[], [ST0]>; // ST(0) = FPR
|
||||
|
||||
def FpSETRESULT64 : FpI_<(ops RFP64:$src), SpecialFP,
|
||||
[(X86fpset RFP64:$src)]>, Imp<[], [ST0]>; // ST(0) = FPR
|
||||
}
|
||||
// FpI - Floating Point Psuedo Instruction template. Predicated on FPStack.
|
||||
class FpI<dag ops, FPFormat fp, list<dag> pattern> :
|
||||
FpI_<ops, fp, pattern>, Requires<[FPStack]>;
|
||||
|
||||
|
||||
def FpMOV : FpI<(ops RFP:$dst, RFP:$src), SpecialFP, []>; // f1 = fmov f2
|
||||
// Register copies. Just copies, the 64->32 version does not truncate.
|
||||
def FpMOV3232 : FpI<(ops RFP32:$dst, RFP32:$src), SpecialFP, []>; // f1 = fmov f2
|
||||
def FpMOV3264 : FpI<(ops RFP64:$dst, RFP32:$src), SpecialFP, []>; // f1 = fmov f2
|
||||
def FpMOV6432 : FpI<(ops RFP32:$dst, RFP64:$src), SpecialFP, []>; // f1 = fmov f2
|
||||
def FpMOV6464 : FpI<(ops RFP64:$dst, RFP64:$src), SpecialFP, []>; // f1 = fmov f2
|
||||
|
||||
// Arithmetic
|
||||
// Add, Sub, Mul, Div.
|
||||
def FpADD : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP,
|
||||
[(set RFP:$dst, (fadd RFP:$src1, RFP:$src2))]>;
|
||||
def FpSUB : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP,
|
||||
[(set RFP:$dst, (fsub RFP:$src1, RFP:$src2))]>;
|
||||
def FpMUL : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP,
|
||||
[(set RFP:$dst, (fmul RFP:$src1, RFP:$src2))]>;
|
||||
def FpDIV : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), TwoArgFP,
|
||||
[(set RFP:$dst, (fdiv RFP:$src1, RFP:$src2))]>;
|
||||
def FpADD32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), TwoArgFP,
|
||||
[(set RFP32:$dst, (fadd RFP32:$src1, RFP32:$src2))]>;
|
||||
def FpSUB32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), TwoArgFP,
|
||||
[(set RFP32:$dst, (fsub RFP32:$src1, RFP32:$src2))]>;
|
||||
def FpMUL32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), TwoArgFP,
|
||||
[(set RFP32:$dst, (fmul RFP32:$src1, RFP32:$src2))]>;
|
||||
def FpDIV32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), TwoArgFP,
|
||||
[(set RFP32:$dst, (fdiv RFP32:$src1, RFP32:$src2))]>;
|
||||
def FpADD64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), TwoArgFP,
|
||||
[(set RFP64:$dst, (fadd RFP64:$src1, RFP64:$src2))]>;
|
||||
def FpSUB64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), TwoArgFP,
|
||||
[(set RFP64:$dst, (fsub RFP64:$src1, RFP64:$src2))]>;
|
||||
def FpMUL64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), TwoArgFP,
|
||||
[(set RFP64:$dst, (fmul RFP64:$src1, RFP64:$src2))]>;
|
||||
def FpDIV64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), TwoArgFP,
|
||||
[(set RFP64:$dst, (fdiv RFP64:$src1, RFP64:$src2))]>;
|
||||
|
||||
class FPST0rInst<bits<8> o, string asm>
|
||||
: FPI<o, AddRegFrm, (ops RST:$op), asm>, D8;
|
||||
@ -144,47 +173,41 @@ class FPrST0PInst<bits<8> o, string asm>
|
||||
: FPI<o, AddRegFrm, (ops RST:$op), asm>, DE;
|
||||
|
||||
// Binary Ops with a memory source.
|
||||
def FpADD32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
|
||||
[(set RFP:$dst, (fadd RFP:$src1,
|
||||
(extloadf64f32 addr:$src2)))]>;
|
||||
def FpADD32m : FpI<(ops RFP32:$dst, RFP32:$src1, f32mem:$src2), OneArgFPRW,
|
||||
[(set RFP32:$dst, (fadd RFP32:$src1, (loadf32 addr:$src2)))]>;
|
||||
// ST(0) = ST(0) + [mem32]
|
||||
def FpADD64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW,
|
||||
[(set RFP:$dst, (fadd RFP:$src1, (loadf64 addr:$src2)))]>;
|
||||
def FpADD64m : FpI<(ops RFP64:$dst, RFP64:$src1, f64mem:$src2), OneArgFPRW,
|
||||
[(set RFP64:$dst, (fadd RFP64:$src1, (loadf64 addr:$src2)))]>;
|
||||
// ST(0) = ST(0) + [mem64]
|
||||
def FpMUL32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
|
||||
[(set RFP:$dst, (fmul RFP:$src1,
|
||||
(extloadf64f32 addr:$src2)))]>;
|
||||
def FpMUL32m : FpI<(ops RFP32:$dst, RFP32:$src1, f32mem:$src2), OneArgFPRW,
|
||||
[(set RFP32:$dst, (fmul RFP32:$src1, (loadf32 addr:$src2)))]>;
|
||||
// ST(0) = ST(0) * [mem32]
|
||||
def FpMUL64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW,
|
||||
[(set RFP:$dst, (fmul RFP:$src1, (loadf64 addr:$src2)))]>;
|
||||
def FpMUL64m : FpI<(ops RFP64:$dst, RFP64:$src1, f64mem:$src2), OneArgFPRW,
|
||||
[(set RFP64:$dst, (fmul RFP64:$src1, (loadf64 addr:$src2)))]>;
|
||||
// ST(0) = ST(0) * [mem64]
|
||||
def FpSUB32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
|
||||
[(set RFP:$dst, (fsub RFP:$src1,
|
||||
(extloadf64f32 addr:$src2)))]>;
|
||||
def FpSUB32m : FpI<(ops RFP32:$dst, RFP32:$src1, f32mem:$src2), OneArgFPRW,
|
||||
[(set RFP32:$dst, (fsub RFP32:$src1, (loadf32 addr:$src2)))]>;
|
||||
// ST(0) = ST(0) - [mem32]
|
||||
def FpSUB64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW,
|
||||
[(set RFP:$dst, (fsub RFP:$src1, (loadf64 addr:$src2)))]>;
|
||||
def FpSUB64m : FpI<(ops RFP64:$dst, RFP64:$src1, f64mem:$src2), OneArgFPRW,
|
||||
[(set RFP64:$dst, (fsub RFP64:$src1, (loadf64 addr:$src2)))]>;
|
||||
// ST(0) = ST(0) - [mem64]
|
||||
def FpSUBR32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
|
||||
[(set RFP:$dst, (fsub (extloadf64f32 addr:$src2),
|
||||
RFP:$src1))]>;
|
||||
def FpSUBR32m : FpI<(ops RFP32:$dst, RFP32:$src1, f32mem:$src2), OneArgFPRW,
|
||||
[(set RFP32:$dst, (fsub (loadf32 addr:$src2), RFP32:$src1))]>;
|
||||
// ST(0) = [mem32] - ST(0)
|
||||
def FpSUBR64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW,
|
||||
[(set RFP:$dst, (fsub (loadf64 addr:$src2), RFP:$src1))]>;
|
||||
def FpSUBR64m : FpI<(ops RFP64:$dst, RFP64:$src1, f64mem:$src2), OneArgFPRW,
|
||||
[(set RFP64:$dst, (fsub (loadf64 addr:$src2), RFP64:$src1))]>;
|
||||
// ST(0) = [mem64] - ST(0)
|
||||
def FpDIV32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
|
||||
[(set RFP:$dst, (fdiv RFP:$src1,
|
||||
(extloadf64f32 addr:$src2)))]>;
|
||||
def FpDIV32m : FpI<(ops RFP32:$dst, RFP32:$src1, f32mem:$src2), OneArgFPRW,
|
||||
[(set RFP32:$dst, (fdiv RFP32:$src1, (loadf32 addr:$src2)))]>;
|
||||
// ST(0) = ST(0) / [mem32]
|
||||
def FpDIV64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW,
|
||||
[(set RFP:$dst, (fdiv RFP:$src1, (loadf64 addr:$src2)))]>;
|
||||
def FpDIV64m : FpI<(ops RFP64:$dst, RFP64:$src1, f64mem:$src2), OneArgFPRW,
|
||||
[(set RFP64:$dst, (fdiv RFP64:$src1, (loadf64 addr:$src2)))]>;
|
||||
// ST(0) = ST(0) / [mem64]
|
||||
def FpDIVR32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
|
||||
[(set RFP:$dst, (fdiv (extloadf64f32 addr:$src2),
|
||||
RFP:$src1))]>;
|
||||
def FpDIVR32m : FpI<(ops RFP32:$dst, RFP32:$src1, f32mem:$src2), OneArgFPRW,
|
||||
[(set RFP32:$dst, (fdiv (loadf32 addr:$src2), RFP32:$src1))]>;
|
||||
// ST(0) = [mem32] / ST(0)
|
||||
def FpDIVR64m : FpI<(ops RFP:$dst, RFP:$src1, f64mem:$src2), OneArgFPRW,
|
||||
[(set RFP:$dst, (fdiv (loadf64 addr:$src2), RFP:$src1))]>;
|
||||
def FpDIVR64m : FpI<(ops RFP64:$dst, RFP64:$src1, f64mem:$src2), OneArgFPRW,
|
||||
[(set RFP64:$dst, (fdiv (loadf64 addr:$src2), RFP64:$src1))]>;
|
||||
// ST(0) = [mem64] / ST(0)
|
||||
|
||||
|
||||
@ -201,53 +224,102 @@ def FDIV64m : FPI<0xDC, MRM6m, (ops f64mem:$src), "fdiv{l} $src">;
|
||||
def FDIVR32m : FPI<0xD8, MRM7m, (ops f32mem:$src), "fdivr{s} $src">;
|
||||
def FDIVR64m : FPI<0xDC, MRM7m, (ops f64mem:$src), "fdivr{l} $src">;
|
||||
|
||||
def FpIADD16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
|
||||
[(set RFP:$dst, (fadd RFP:$src1,
|
||||
def FpIADD16m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i16mem:$src2), OneArgFPRW,
|
||||
[(set RFP32:$dst, (fadd RFP32:$src1,
|
||||
(X86fild addr:$src2, i16)))]>;
|
||||
// ST(0) = ST(0) + [mem16int]
|
||||
def FpIADD32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
|
||||
[(set RFP:$dst, (fadd RFP:$src1,
|
||||
def FpIADD32m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i32mem:$src2), OneArgFPRW,
|
||||
[(set RFP32:$dst, (fadd RFP32:$src1,
|
||||
(X86fild addr:$src2, i32)))]>;
|
||||
// ST(0) = ST(0) + [mem32int]
|
||||
def FpIMUL16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
|
||||
[(set RFP:$dst, (fmul RFP:$src1,
|
||||
def FpIMUL16m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i16mem:$src2), OneArgFPRW,
|
||||
[(set RFP32:$dst, (fmul RFP32:$src1,
|
||||
(X86fild addr:$src2, i16)))]>;
|
||||
// ST(0) = ST(0) * [mem16int]
|
||||
def FpIMUL32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
|
||||
[(set RFP:$dst, (fmul RFP:$src1,
|
||||
def FpIMUL32m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i32mem:$src2), OneArgFPRW,
|
||||
[(set RFP32:$dst, (fmul RFP32:$src1,
|
||||
(X86fild addr:$src2, i32)))]>;
|
||||
// ST(0) = ST(0) * [mem32int]
|
||||
def FpISUB16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
|
||||
[(set RFP:$dst, (fsub RFP:$src1,
|
||||
def FpISUB16m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i16mem:$src2), OneArgFPRW,
|
||||
[(set RFP32:$dst, (fsub RFP32:$src1,
|
||||
(X86fild addr:$src2, i16)))]>;
|
||||
// ST(0) = ST(0) - [mem16int]
|
||||
def FpISUB32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
|
||||
[(set RFP:$dst, (fsub RFP:$src1,
|
||||
def FpISUB32m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i32mem:$src2), OneArgFPRW,
|
||||
[(set RFP32:$dst, (fsub RFP32:$src1,
|
||||
(X86fild addr:$src2, i32)))]>;
|
||||
// ST(0) = ST(0) - [mem32int]
|
||||
def FpISUBR16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
|
||||
[(set RFP:$dst, (fsub (X86fild addr:$src2, i16),
|
||||
RFP:$src1))]>;
|
||||
def FpISUBR16m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i16mem:$src2), OneArgFPRW,
|
||||
[(set RFP32:$dst, (fsub (X86fild addr:$src2, i16),
|
||||
RFP32:$src1))]>;
|
||||
// ST(0) = [mem16int] - ST(0)
|
||||
def FpISUBR32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
|
||||
[(set RFP:$dst, (fsub (X86fild addr:$src2, i32),
|
||||
RFP:$src1))]>;
|
||||
def FpISUBR32m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i32mem:$src2), OneArgFPRW,
|
||||
[(set RFP32:$dst, (fsub (X86fild addr:$src2, i32),
|
||||
RFP32:$src1))]>;
|
||||
// ST(0) = [mem32int] - ST(0)
|
||||
def FpIDIV16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
|
||||
[(set RFP:$dst, (fdiv RFP:$src1,
|
||||
def FpIDIV16m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i16mem:$src2), OneArgFPRW,
|
||||
[(set RFP32:$dst, (fdiv RFP32:$src1,
|
||||
(X86fild addr:$src2, i16)))]>;
|
||||
// ST(0) = ST(0) / [mem16int]
|
||||
def FpIDIV32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
|
||||
[(set RFP:$dst, (fdiv RFP:$src1,
|
||||
def FpIDIV32m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i32mem:$src2), OneArgFPRW,
|
||||
[(set RFP32:$dst, (fdiv RFP32:$src1,
|
||||
(X86fild addr:$src2, i32)))]>;
|
||||
// ST(0) = ST(0) / [mem32int]
|
||||
def FpIDIVR16m : FpI<(ops RFP:$dst, RFP:$src1, i16mem:$src2), OneArgFPRW,
|
||||
[(set RFP:$dst, (fdiv (X86fild addr:$src2, i16),
|
||||
RFP:$src1))]>;
|
||||
def FpIDIVR16m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i16mem:$src2), OneArgFPRW,
|
||||
[(set RFP32:$dst, (fdiv (X86fild addr:$src2, i16),
|
||||
RFP32:$src1))]>;
|
||||
// ST(0) = [mem16int] / ST(0)
|
||||
def FpIDIVR32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
|
||||
[(set RFP:$dst, (fdiv (X86fild addr:$src2, i32),
|
||||
RFP:$src1))]>;
|
||||
def FpIDIVR32m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i32mem:$src2), OneArgFPRW,
|
||||
[(set RFP32:$dst, (fdiv (X86fild addr:$src2, i32),
|
||||
RFP32:$src1))]>;
|
||||
// ST(0) = [mem32int] / ST(0)
|
||||
|
||||
def FpIADD16m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i16mem:$src2), OneArgFPRW,
|
||||
[(set RFP64:$dst, (fadd RFP64:$src1,
|
||||
(X86fild addr:$src2, i16)))]>;
|
||||
// ST(0) = ST(0) + [mem16int]
|
||||
def FpIADD32m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i32mem:$src2), OneArgFPRW,
|
||||
[(set RFP64:$dst, (fadd RFP64:$src1,
|
||||
(X86fild addr:$src2, i32)))]>;
|
||||
// ST(0) = ST(0) + [mem32int]
|
||||
def FpIMUL16m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i16mem:$src2), OneArgFPRW,
|
||||
[(set RFP64:$dst, (fmul RFP64:$src1,
|
||||
(X86fild addr:$src2, i16)))]>;
|
||||
// ST(0) = ST(0) * [mem16int]
|
||||
def FpIMUL32m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i32mem:$src2), OneArgFPRW,
|
||||
[(set RFP64:$dst, (fmul RFP64:$src1,
|
||||
(X86fild addr:$src2, i32)))]>;
|
||||
// ST(0) = ST(0) * [mem32int]
|
||||
def FpISUB16m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i16mem:$src2), OneArgFPRW,
|
||||
[(set RFP64:$dst, (fsub RFP64:$src1,
|
||||
(X86fild addr:$src2, i16)))]>;
|
||||
// ST(0) = ST(0) - [mem16int]
|
||||
def FpISUB32m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i32mem:$src2), OneArgFPRW,
|
||||
[(set RFP64:$dst, (fsub RFP64:$src1,
|
||||
(X86fild addr:$src2, i32)))]>;
|
||||
// ST(0) = ST(0) - [mem32int]
|
||||
def FpISUBR16m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i16mem:$src2), OneArgFPRW,
|
||||
[(set RFP64:$dst, (fsub (X86fild addr:$src2, i16),
|
||||
RFP64:$src1))]>;
|
||||
// ST(0) = [mem16int] - ST(0)
|
||||
def FpISUBR32m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i32mem:$src2), OneArgFPRW,
|
||||
[(set RFP64:$dst, (fsub (X86fild addr:$src2, i32),
|
||||
RFP64:$src1))]>;
|
||||
// ST(0) = [mem32int] - ST(0)
|
||||
def FpIDIV16m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i16mem:$src2), OneArgFPRW,
|
||||
[(set RFP64:$dst, (fdiv RFP64:$src1,
|
||||
(X86fild addr:$src2, i16)))]>;
|
||||
// ST(0) = ST(0) / [mem16int]
|
||||
def FpIDIV32m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i32mem:$src2), OneArgFPRW,
|
||||
[(set RFP64:$dst, (fdiv RFP64:$src1,
|
||||
(X86fild addr:$src2, i32)))]>;
|
||||
// ST(0) = ST(0) / [mem32int]
|
||||
def FpIDIVR16m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i16mem:$src2), OneArgFPRW,
|
||||
[(set RFP64:$dst, (fdiv (X86fild addr:$src2, i16),
|
||||
RFP64:$src1))]>;
|
||||
// ST(0) = [mem16int] / ST(0)
|
||||
def FpIDIVR32m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i32mem:$src2), OneArgFPRW,
|
||||
[(set RFP64:$dst, (fdiv (X86fild addr:$src2, i32),
|
||||
RFP64:$src1))]>;
|
||||
// ST(0) = [mem32int] / ST(0)
|
||||
|
||||
def FIADD16m : FPI<0xDE, MRM0m, (ops i16mem:$src), "fiadd{s} $src">;
|
||||
@ -285,19 +357,31 @@ def FDIVST0r : FPST0rInst <0xF0, "fdiv $op">;
|
||||
def FDIVRrST0 : FPrST0Inst <0xF0, "fdiv{|r} {%st(0), $op|$op, %ST(0)}">;
|
||||
def FDIVRPrST0 : FPrST0PInst<0xF0, "fdiv{|r}p $op">;
|
||||
|
||||
|
||||
// Unary operations.
|
||||
def FpCHS : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW,
|
||||
[(set RFP:$dst, (fneg RFP:$src))]>;
|
||||
def FpABS : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW,
|
||||
[(set RFP:$dst, (fabs RFP:$src))]>;
|
||||
def FpSQRT : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW,
|
||||
[(set RFP:$dst, (fsqrt RFP:$src))]>;
|
||||
def FpSIN : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW,
|
||||
[(set RFP:$dst, (fsin RFP:$src))]>;
|
||||
def FpCOS : FpI<(ops RFP:$dst, RFP:$src), OneArgFPRW,
|
||||
[(set RFP:$dst, (fcos RFP:$src))]>;
|
||||
def FpTST : FpI<(ops RFP:$src), OneArgFP,
|
||||
def FpCHS32 : FpI<(ops RFP32:$dst, RFP32:$src), OneArgFPRW,
|
||||
[(set RFP32:$dst, (fneg RFP32:$src))]>;
|
||||
def FpABS32 : FpI<(ops RFP32:$dst, RFP32:$src), OneArgFPRW,
|
||||
[(set RFP32:$dst, (fabs RFP32:$src))]>;
|
||||
def FpSQRT32 : FpI<(ops RFP32:$dst, RFP32:$src), OneArgFPRW,
|
||||
[(set RFP32:$dst, (fsqrt RFP32:$src))]>;
|
||||
def FpSIN32 : FpI<(ops RFP32:$dst, RFP32:$src), OneArgFPRW,
|
||||
[(set RFP32:$dst, (fsin RFP32:$src))]>;
|
||||
def FpCOS32 : FpI<(ops RFP32:$dst, RFP32:$src), OneArgFPRW,
|
||||
[(set RFP32:$dst, (fcos RFP32:$src))]>;
|
||||
def FpTST32 : FpI<(ops RFP32:$src), OneArgFP,
|
||||
[]>;
|
||||
|
||||
def FpCHS64 : FpI<(ops RFP64:$dst, RFP64:$src), OneArgFPRW,
|
||||
[(set RFP64:$dst, (fneg RFP64:$src))]>;
|
||||
def FpABS64 : FpI<(ops RFP64:$dst, RFP64:$src), OneArgFPRW,
|
||||
[(set RFP64:$dst, (fabs RFP64:$src))]>;
|
||||
def FpSQRT64 : FpI<(ops RFP64:$dst, RFP64:$src), OneArgFPRW,
|
||||
[(set RFP64:$dst, (fsqrt RFP64:$src))]>;
|
||||
def FpSIN64 : FpI<(ops RFP64:$dst, RFP64:$src), OneArgFPRW,
|
||||
[(set RFP64:$dst, (fsin RFP64:$src))]>;
|
||||
def FpCOS64 : FpI<(ops RFP64:$dst, RFP64:$src), OneArgFPRW,
|
||||
[(set RFP64:$dst, (fcos RFP64:$src))]>;
|
||||
def FpTST64 : FpI<(ops RFP64:$src), OneArgFP,
|
||||
[]>;
|
||||
|
||||
def FCHS : FPI<0xE0, RawFrm, (ops), "fchs">, D9;
|
||||
@ -310,29 +394,54 @@ def FTST : FPI<0xE4, RawFrm, (ops), "ftst">, D9;
|
||||
|
||||
// Floating point cmovs.
|
||||
let isTwoAddress = 1 in {
|
||||
def FpCMOVB : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
|
||||
[(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
|
||||
def FpCMOVB32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP,
|
||||
[(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
|
||||
X86_COND_B))]>;
|
||||
def FpCMOVBE : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
|
||||
[(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
|
||||
def FpCMOVBE32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP,
|
||||
[(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
|
||||
X86_COND_BE))]>;
|
||||
def FpCMOVE : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
|
||||
[(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
|
||||
def FpCMOVE32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP,
|
||||
[(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
|
||||
X86_COND_E))]>;
|
||||
def FpCMOVP : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
|
||||
[(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
|
||||
def FpCMOVP32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP,
|
||||
[(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
|
||||
X86_COND_P))]>;
|
||||
def FpCMOVNB : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
|
||||
[(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
|
||||
def FpCMOVNB32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP,
|
||||
[(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
|
||||
X86_COND_AE))]>;
|
||||
def FpCMOVNBE: FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
|
||||
[(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
|
||||
def FpCMOVNBE32: FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP,
|
||||
[(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
|
||||
X86_COND_A))]>;
|
||||
def FpCMOVNE : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
|
||||
[(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
|
||||
def FpCMOVNE32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP,
|
||||
[(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
|
||||
X86_COND_NE))]>;
|
||||
def FpCMOVNP : FpI<(ops RFP:$dst, RFP:$src1, RFP:$src2), CondMovFP,
|
||||
[(set RFP:$dst, (X86cmov RFP:$src1, RFP:$src2,
|
||||
def FpCMOVNP32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP,
|
||||
[(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
|
||||
X86_COND_NP))]>;
|
||||
|
||||
def FpCMOVB64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP,
|
||||
[(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
|
||||
X86_COND_B))]>;
|
||||
def FpCMOVBE64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP,
|
||||
[(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
|
||||
X86_COND_BE))]>;
|
||||
def FpCMOVE64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP,
|
||||
[(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
|
||||
X86_COND_E))]>;
|
||||
def FpCMOVP64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP,
|
||||
[(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
|
||||
X86_COND_P))]>;
|
||||
def FpCMOVNB64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP,
|
||||
[(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
|
||||
X86_COND_AE))]>;
|
||||
def FpCMOVNBE64: FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP,
|
||||
[(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
|
||||
X86_COND_A))]>;
|
||||
def FpCMOVNE64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP,
|
||||
[(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
|
||||
X86_COND_NE))]>;
|
||||
def FpCMOVNP64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP,
|
||||
[(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
|
||||
X86_COND_NP))]>;
|
||||
}
|
||||
|
||||
@ -354,27 +463,39 @@ def FCMOVNP : FPI<0xD8, AddRegFrm, (ops RST:$op),
|
||||
"fcmovnu {$op, %st(0)|%ST(0), $op}">, DB;
|
||||
|
||||
// Floating point loads & stores.
|
||||
def FpLD32m : FpI<(ops RFP:$dst, f32mem:$src), ZeroArgFP,
|
||||
[(set RFP:$dst, (extloadf64f32 addr:$src))]>;
|
||||
def FpLD64m : FpI<(ops RFP:$dst, f64mem:$src), ZeroArgFP,
|
||||
[(set RFP:$dst, (loadf64 addr:$src))]>;
|
||||
def FpILD16m : FpI<(ops RFP:$dst, i16mem:$src), ZeroArgFP,
|
||||
[(set RFP:$dst, (X86fild addr:$src, i16))]>;
|
||||
def FpILD32m : FpI<(ops RFP:$dst, i32mem:$src), ZeroArgFP,
|
||||
[(set RFP:$dst, (X86fild addr:$src, i32))]>;
|
||||
def FpILD64m : FpI<(ops RFP:$dst, i64mem:$src), ZeroArgFP,
|
||||
[(set RFP:$dst, (X86fild addr:$src, i64))]>;
|
||||
def FpLD32m : FpI<(ops RFP32:$dst, f32mem:$src), ZeroArgFP,
|
||||
[(set RFP32:$dst, (loadf32 addr:$src))]>;
|
||||
def FpLD64m : FpI<(ops RFP64:$dst, f64mem:$src), ZeroArgFP,
|
||||
[(set RFP64:$dst, (loadf64 addr:$src))]>;
|
||||
def FpILD16m32 : FpI<(ops RFP32:$dst, i16mem:$src), ZeroArgFP,
|
||||
[(set RFP32:$dst, (X86fild addr:$src, i16))]>;
|
||||
def FpILD32m32 : FpI<(ops RFP32:$dst, i32mem:$src), ZeroArgFP,
|
||||
[(set RFP32:$dst, (X86fild addr:$src, i32))]>;
|
||||
def FpILD64m32 : FpI<(ops RFP32:$dst, i64mem:$src), ZeroArgFP,
|
||||
[(set RFP32:$dst, (X86fild addr:$src, i64))]>;
|
||||
def FpILD16m64 : FpI<(ops RFP64:$dst, i16mem:$src), ZeroArgFP,
|
||||
[(set RFP64:$dst, (X86fild addr:$src, i16))]>;
|
||||
def FpILD32m64 : FpI<(ops RFP64:$dst, i32mem:$src), ZeroArgFP,
|
||||
[(set RFP64:$dst, (X86fild addr:$src, i32))]>;
|
||||
def FpILD64m64 : FpI<(ops RFP64:$dst, i64mem:$src), ZeroArgFP,
|
||||
[(set RFP64:$dst, (X86fild addr:$src, i64))]>;
|
||||
|
||||
def FpST32m : FpI<(ops f32mem:$op, RFP:$src), OneArgFP,
|
||||
[(truncstoref32 RFP:$src, addr:$op)]>;
|
||||
def FpST64m : FpI<(ops f64mem:$op, RFP:$src), OneArgFP,
|
||||
[(store RFP:$src, addr:$op)]>;
|
||||
def FpST32m : FpI<(ops f32mem:$op, RFP32:$src), OneArgFP,
|
||||
[(store RFP32:$src, addr:$op)]>;
|
||||
def FpST64m32 : FpI<(ops f32mem:$op, RFP64:$src), OneArgFP,
|
||||
[(truncstoref32 RFP64:$src, addr:$op)]>;
|
||||
def FpST64m : FpI<(ops f64mem:$op, RFP64:$src), OneArgFP,
|
||||
[(store RFP64:$src, addr:$op)]>;
|
||||
|
||||
def FpSTP32m : FpI<(ops f32mem:$op, RFP:$src), OneArgFP, []>;
|
||||
def FpSTP64m : FpI<(ops f64mem:$op, RFP:$src), OneArgFP, []>;
|
||||
def FpIST16m : FpI<(ops i16mem:$op, RFP:$src), OneArgFP, []>;
|
||||
def FpIST32m : FpI<(ops i32mem:$op, RFP:$src), OneArgFP, []>;
|
||||
def FpIST64m : FpI<(ops i64mem:$op, RFP:$src), OneArgFP, []>;
|
||||
def FpSTP32m : FpI<(ops f32mem:$op, RFP32:$src), OneArgFP, []>;
|
||||
def FpSTP64m32 : FpI<(ops f32mem:$op, RFP64:$src), OneArgFP, []>;
|
||||
def FpSTP64m : FpI<(ops f64mem:$op, RFP64:$src), OneArgFP, []>;
|
||||
def FpIST16m32 : FpI<(ops i16mem:$op, RFP32:$src), OneArgFP, []>;
|
||||
def FpIST32m32 : FpI<(ops i32mem:$op, RFP32:$src), OneArgFP, []>;
|
||||
def FpIST64m32 : FpI<(ops i64mem:$op, RFP32:$src), OneArgFP, []>;
|
||||
def FpIST16m64 : FpI<(ops i16mem:$op, RFP64:$src), OneArgFP, []>;
|
||||
def FpIST32m64 : FpI<(ops i32mem:$op, RFP64:$src), OneArgFP, []>;
|
||||
def FpIST64m64 : FpI<(ops i64mem:$op, RFP64:$src), OneArgFP, []>;
|
||||
|
||||
def FLD32m : FPI<0xD9, MRM0m, (ops f32mem:$src), "fld{s} $src">;
|
||||
def FLD64m : FPI<0xDD, MRM0m, (ops f64mem:$src), "fld{l} $src">;
|
||||
@ -392,14 +513,23 @@ def FISTP32m : FPI<0xDB, MRM3m, (ops i32mem:$dst), "fistp{l} $dst">;
|
||||
def FISTP64m : FPI<0xDF, MRM7m, (ops i64mem:$dst), "fistp{ll} $dst">;
|
||||
|
||||
// FISTTP requires SSE3 even though it's a FPStack op.
|
||||
def FpISTT16m : FpI_<(ops i16mem:$op, RFP:$src), OneArgFP,
|
||||
[(X86fp_to_i16mem RFP:$src, addr:$op)]>,
|
||||
def FpISTT16m32 : FpI_<(ops i16mem:$op, RFP32:$src), OneArgFP,
|
||||
[(X86fp_to_i16mem RFP32:$src, addr:$op)]>,
|
||||
Requires<[HasSSE3]>;
|
||||
def FpISTT32m : FpI_<(ops i32mem:$op, RFP:$src), OneArgFP,
|
||||
[(X86fp_to_i32mem RFP:$src, addr:$op)]>,
|
||||
def FpISTT32m32 : FpI_<(ops i32mem:$op, RFP32:$src), OneArgFP,
|
||||
[(X86fp_to_i32mem RFP32:$src, addr:$op)]>,
|
||||
Requires<[HasSSE3]>;
|
||||
def FpISTT64m : FpI_<(ops i64mem:$op, RFP:$src), OneArgFP,
|
||||
[(X86fp_to_i64mem RFP:$src, addr:$op)]>,
|
||||
def FpISTT64m32 : FpI_<(ops i64mem:$op, RFP32:$src), OneArgFP,
|
||||
[(X86fp_to_i64mem RFP32:$src, addr:$op)]>,
|
||||
Requires<[HasSSE3]>;
|
||||
def FpISTT16m64 : FpI_<(ops i16mem:$op, RFP64:$src), OneArgFP,
|
||||
[(X86fp_to_i16mem RFP64:$src, addr:$op)]>,
|
||||
Requires<[HasSSE3]>;
|
||||
def FpISTT32m64 : FpI_<(ops i32mem:$op, RFP64:$src), OneArgFP,
|
||||
[(X86fp_to_i32mem RFP64:$src, addr:$op)]>,
|
||||
Requires<[HasSSE3]>;
|
||||
def FpISTT64m64 : FpI_<(ops i64mem:$op, RFP64:$src), OneArgFP,
|
||||
[(X86fp_to_i64mem RFP64:$src, addr:$op)]>,
|
||||
Requires<[HasSSE3]>;
|
||||
|
||||
def FISTTP16m : FPI<0xDF, MRM1m, (ops i16mem:$dst), "fisttp{s} $dst">;
|
||||
@ -414,10 +544,14 @@ def FXCH : FPI<0xC8, AddRegFrm, (ops RST:$op), "fxch $op">, D9;
|
||||
|
||||
// Floating point constant loads.
|
||||
let isReMaterializable = 1 in {
|
||||
def FpLD0 : FpI<(ops RFP:$dst), ZeroArgFP,
|
||||
[(set RFP:$dst, fp64imm0)]>;
|
||||
def FpLD1 : FpI<(ops RFP:$dst), ZeroArgFP,
|
||||
[(set RFP:$dst, fp64imm1)]>;
|
||||
def FpLD032 : FpI<(ops RFP32:$dst), ZeroArgFP,
|
||||
[(set RFP32:$dst, fpimm0)]>;
|
||||
def FpLD132 : FpI<(ops RFP32:$dst), ZeroArgFP,
|
||||
[(set RFP32:$dst, fpimm1)]>;
|
||||
def FpLD064 : FpI<(ops RFP64:$dst), ZeroArgFP,
|
||||
[(set RFP64:$dst, fpimm0)]>;
|
||||
def FpLD164 : FpI<(ops RFP64:$dst), ZeroArgFP,
|
||||
[(set RFP64:$dst, fpimm1)]>;
|
||||
}
|
||||
|
||||
def FLD0 : FPI<0xEE, RawFrm, (ops), "fldz">, D9;
|
||||
@ -425,10 +559,14 @@ def FLD1 : FPI<0xE8, RawFrm, (ops), "fld1">, D9;
|
||||
|
||||
|
||||
// Floating point compares.
|
||||
def FpUCOMr : FpI<(ops RFP:$lhs, RFP:$rhs), CompareFP,
|
||||
def FpUCOMr32 : FpI<(ops RFP32:$lhs, RFP32:$rhs), CompareFP,
|
||||
[]>; // FPSW = cmp ST(0) with ST(i)
|
||||
def FpUCOMIr : FpI<(ops RFP:$lhs, RFP:$rhs), CompareFP,
|
||||
[(X86cmp RFP:$lhs, RFP:$rhs)]>; // CC = cmp ST(0) with ST(i)
|
||||
def FpUCOMIr32 : FpI<(ops RFP32:$lhs, RFP32:$rhs), CompareFP,
|
||||
[(X86cmp RFP32:$lhs, RFP32:$rhs)]>; // CC = cmp ST(0) with ST(i)
|
||||
def FpUCOMr64 : FpI<(ops RFP64:$lhs, RFP64:$rhs), CompareFP,
|
||||
[]>; // FPSW = cmp ST(0) with ST(i)
|
||||
def FpUCOMIr64 : FpI<(ops RFP64:$lhs, RFP64:$rhs), CompareFP,
|
||||
[(X86cmp RFP64:$lhs, RFP64:$rhs)]>; // CC = cmp ST(0) with ST(i)
|
||||
|
||||
def FUCOMr : FPI<0xE0, AddRegFrm, // FPSW = cmp ST(0) with ST(i)
|
||||
(ops RST:$reg),
|
||||
@ -447,7 +585,6 @@ def FUCOMIPr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i), pop
|
||||
(ops RST:$reg),
|
||||
"fucomip {$reg, %st(0)|%ST(0), $reg}">, DF, Imp<[ST0],[]>;
|
||||
|
||||
|
||||
// Floating point flag ops.
|
||||
def FNSTSW8r : I<0xE0, RawFrm, // AX = fp flags
|
||||
(ops), "fnstsw", []>, DF, Imp<[],[AX]>;
|
||||
@ -466,12 +603,18 @@ def : Pat<(X86fld addr:$src, f32), (FpLD32m addr:$src)>;
|
||||
def : Pat<(X86fld addr:$src, f64), (FpLD64m addr:$src)>;
|
||||
|
||||
// Required for CALL which return f32 / f64 values.
|
||||
def : Pat<(X86fst RFP:$src, addr:$op, f32), (FpST32m addr:$op, RFP:$src)>;
|
||||
def : Pat<(X86fst RFP:$src, addr:$op, f64), (FpST64m addr:$op, RFP:$src)>;
|
||||
def : Pat<(X86fst RFP32:$src, addr:$op, f32), (FpST32m addr:$op, RFP32:$src)>;
|
||||
def : Pat<(X86fst RFP64:$src, addr:$op, f32), (FpST64m32 addr:$op, RFP64:$src)>;
|
||||
def : Pat<(X86fst RFP64:$src, addr:$op, f64), (FpST64m addr:$op, RFP64:$src)>;
|
||||
|
||||
// Floating point constant -0.0 and -1.0
|
||||
def : Pat<(f64 fp64immneg0), (FpCHS (FpLD0))>, Requires<[FPStack]>;
|
||||
def : Pat<(f64 fp64immneg1), (FpCHS (FpLD1))>, Requires<[FPStack]>;
|
||||
def : Pat<(f32 fpimmneg0), (FpCHS32 (FpLD032))>, Requires<[FPStack]>;
|
||||
def : Pat<(f32 fpimmneg1), (FpCHS32 (FpLD132))>, Requires<[FPStack]>;
|
||||
def : Pat<(f64 fpimmneg0), (FpCHS64 (FpLD064))>, Requires<[FPStack]>;
|
||||
def : Pat<(f64 fpimmneg1), (FpCHS64 (FpLD164))>, Requires<[FPStack]>;
|
||||
|
||||
// Used to conv. i64 to f64 since there isn't a SSE version.
|
||||
def : Pat<(X86fildflag addr:$src, i64), (FpILD64m addr:$src)>;
|
||||
def : Pat<(X86fildflag addr:$src, i64), (FpILD64m64 addr:$src)>;
|
||||
|
||||
def : Pat<(extloadf32 addr:$src), (FpMOV3264 (FpLD32m addr:$src))>, Requires<[FPStack]>;
|
||||
def : Pat<(fextend RFP32:$src), (FpMOV3264 RFP32:$src)>, Requires<[FPStack]>;
|
||||
|
@ -33,7 +33,8 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
|
||||
if (oc == X86::MOV8rr || oc == X86::MOV16rr ||
|
||||
oc == X86::MOV32rr || oc == X86::MOV64rr ||
|
||||
oc == X86::MOV16to16_ || oc == X86::MOV32to32_ ||
|
||||
oc == X86::FpMOV || oc == X86::MOVSSrr || oc == X86::MOVSDrr ||
|
||||
oc == X86::FpMOV3232 || oc == X86::MOVSSrr || oc == X86::MOVSDrr ||
|
||||
oc == X86::FpMOV3264 || oc == X86::FpMOV6432 || oc == X86::FpMOV6464 ||
|
||||
oc == X86::FsMOVAPSrr || oc == X86::FsMOVAPDrr ||
|
||||
oc == X86::MOVAPSrr || oc == X86::MOVAPDrr ||
|
||||
oc == X86::MOVSS2PSrr || oc == X86::MOVSD2PDrr ||
|
||||
|
@ -964,7 +964,7 @@ def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (ops VR128:$src1, f128mem:$src2),
|
||||
|
||||
// Alias instructions that map fld0 to pxor for sse.
|
||||
def FsFLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst),
|
||||
"pxor $dst, $dst", [(set FR64:$dst, fp64imm0)]>,
|
||||
"pxor $dst, $dst", [(set FR64:$dst, fpimm0)]>,
|
||||
Requires<[HasSSE2]>, TB, OpSize;
|
||||
|
||||
// Alias instruction to do FR64 reg-to-reg copy using movapd. Upper bits are
|
||||
|
@ -82,8 +82,10 @@ void X86RegisterInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
||||
Opc = X86::MOV32_mr;
|
||||
} else if (RC == &X86::GR16_RegClass) {
|
||||
Opc = X86::MOV16_mr;
|
||||
} else if (RC == &X86::RFPRegClass || RC == &X86::RSTRegClass) {
|
||||
} else if (RC == &X86::RFP64RegClass || RC == &X86::RSTRegClass) {
|
||||
Opc = X86::FpST64m;
|
||||
} else if (RC == &X86::RFP32RegClass) {
|
||||
Opc = X86::FpST32m;
|
||||
} else if (RC == &X86::FR32RegClass) {
|
||||
Opc = X86::MOVSSmr;
|
||||
} else if (RC == &X86::FR64RegClass) {
|
||||
@ -117,8 +119,10 @@ void X86RegisterInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
||||
Opc = X86::MOV32_rm;
|
||||
} else if (RC == &X86::GR16_RegClass) {
|
||||
Opc = X86::MOV16_rm;
|
||||
} else if (RC == &X86::RFPRegClass || RC == &X86::RSTRegClass) {
|
||||
} else if (RC == &X86::RFP64RegClass || RC == &X86::RSTRegClass) {
|
||||
Opc = X86::FpLD64m;
|
||||
} else if (RC == &X86::RFP32RegClass) {
|
||||
Opc = X86::FpLD32m;
|
||||
} else if (RC == &X86::FR32RegClass) {
|
||||
Opc = X86::MOVSSrm;
|
||||
} else if (RC == &X86::FR64RegClass) {
|
||||
@ -151,8 +155,10 @@ void X86RegisterInfo::copyRegToReg(MachineBasicBlock &MBB,
|
||||
Opc = X86::MOV32_rr;
|
||||
} else if (RC == &X86::GR16_RegClass) {
|
||||
Opc = X86::MOV16_rr;
|
||||
} else if (RC == &X86::RFPRegClass || RC == &X86::RSTRegClass) {
|
||||
Opc = X86::FpMOV;
|
||||
} else if (RC == &X86::RFP32RegClass) {
|
||||
Opc = X86::FpMOV3232;
|
||||
} else if (RC == &X86::RFP64RegClass || RC == &X86::RSTRegClass) {
|
||||
Opc = X86::FpMOV6464;
|
||||
} else if (RC == &X86::FR32RegClass) {
|
||||
Opc = X86::FsMOVAPSrr;
|
||||
} else if (RC == &X86::FR64RegClass) {
|
||||
|
@ -425,7 +425,8 @@ def FR64 : RegisterClass<"X86", [f64], 64,
|
||||
// faster on common hardware. In reality, this should be controlled by a
|
||||
// command line option or something.
|
||||
|
||||
def RFP : RegisterClass<"X86", [f64], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
|
||||
def RFP32 : RegisterClass<"X86", [f32], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
|
||||
def RFP64 : RegisterClass<"X86", [f64], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
|
||||
|
||||
// Floating point stack registers (these are not allocatable by the
|
||||
// register allocator - the floating point stackifier is responsible
|
||||
|
Loading…
x
Reference in New Issue
Block a user