mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[FastISel][X86] Add MachineMemOperand to load/store instructions.
This commit adds MachineMemOperands to load and store instructions. This allows the peephole optimizer to fold load instructions. Unfortunatelly the peephole optimizer currently doesn't run at -O0. llvm-svn: 210858
This commit is contained in:
parent
286dd39af0
commit
9dda2c5782
@ -377,6 +377,9 @@ protected:
|
|||||||
/// Test whether the given value has exactly one use.
|
/// Test whether the given value has exactly one use.
|
||||||
bool hasTrivialKill(const Value *V) const;
|
bool hasTrivialKill(const Value *V) const;
|
||||||
|
|
||||||
|
/// \brief Create a machine mem operand from the given instruction.
|
||||||
|
MachineMemOperand *createMachineMemOperandFor(const Instruction *I) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool SelectBinaryOp(const User *I, unsigned ISDOpcode);
|
bool SelectBinaryOp(const User *I, unsigned ISDOpcode);
|
||||||
|
|
||||||
|
@ -1737,3 +1737,47 @@ bool FastISel::canFoldAddIntoGEP(const User *GEP, const Value *Add) {
|
|||||||
return isa<ConstantInt>(cast<AddOperator>(Add)->getOperand(1));
|
return isa<ConstantInt>(cast<AddOperator>(Add)->getOperand(1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MachineMemOperand *
|
||||||
|
FastISel::createMachineMemOperandFor(const Instruction *I) const {
|
||||||
|
const Value *Ptr;
|
||||||
|
Type *ValTy;
|
||||||
|
unsigned Alignment;
|
||||||
|
unsigned Flags;
|
||||||
|
bool IsVolatile;
|
||||||
|
|
||||||
|
if (const auto *LI = dyn_cast<LoadInst>(I)) {
|
||||||
|
Alignment = LI->getAlignment();
|
||||||
|
IsVolatile = LI->isVolatile();
|
||||||
|
Flags = MachineMemOperand::MOLoad;
|
||||||
|
Ptr = LI->getPointerOperand();
|
||||||
|
ValTy = LI->getType();
|
||||||
|
} else if (const auto *SI = dyn_cast<StoreInst>(I)) {
|
||||||
|
Alignment = SI->getAlignment();
|
||||||
|
IsVolatile = SI->isVolatile();
|
||||||
|
Flags = MachineMemOperand::MOStore;
|
||||||
|
Ptr = SI->getPointerOperand();
|
||||||
|
ValTy = SI->getValueOperand()->getType();
|
||||||
|
} else {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsNonTemporal = I->getMetadata("nontemporal") != nullptr;
|
||||||
|
bool IsInvariant = I->getMetadata("invariant.load") != nullptr;
|
||||||
|
const MDNode *TBAAInfo = I->getMetadata(LLVMContext::MD_tbaa);
|
||||||
|
const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range);
|
||||||
|
|
||||||
|
if (Alignment == 0) // Ensure that codegen never sees alignment 0.
|
||||||
|
Alignment = DL.getABITypeAlignment(ValTy);
|
||||||
|
|
||||||
|
unsigned Size = TM.getDataLayout()->getTypeStoreSize(ValTy);
|
||||||
|
|
||||||
|
if (IsVolatile)
|
||||||
|
Flags |= MachineMemOperand::MOVolatile;
|
||||||
|
if (IsNonTemporal)
|
||||||
|
Flags |= MachineMemOperand::MONonTemporal;
|
||||||
|
if (IsInvariant)
|
||||||
|
Flags |= MachineMemOperand::MOInvariant;
|
||||||
|
|
||||||
|
return FuncInfo.MF->getMachineMemOperand(MachinePointerInfo(Ptr), Flags, Size,
|
||||||
|
Alignment, TBAAInfo, Ranges);
|
||||||
|
}
|
||||||
|
@ -78,12 +78,14 @@ public:
|
|||||||
private:
|
private:
|
||||||
bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT);
|
bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT);
|
||||||
|
|
||||||
bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
|
bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, MachineMemOperand *MMO,
|
||||||
|
unsigned &ResultReg);
|
||||||
|
|
||||||
bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM,
|
bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM,
|
||||||
bool Aligned = false);
|
MachineMemOperand *MMO = nullptr, bool Aligned = false);
|
||||||
bool X86FastEmitStore(EVT VT, unsigned ValReg, const X86AddressMode &AM,
|
bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
|
||||||
bool Aligned = false);
|
const X86AddressMode &AM,
|
||||||
|
MachineMemOperand *MMO = nullptr, bool Aligned = false);
|
||||||
|
|
||||||
bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
|
bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
|
||||||
unsigned &ResultReg);
|
unsigned &ResultReg);
|
||||||
@ -180,7 +182,7 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
|
|||||||
/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
|
/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
|
||||||
/// Return true and the result register by reference if it is possible.
|
/// Return true and the result register by reference if it is possible.
|
||||||
bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
|
bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
|
||||||
unsigned &ResultReg) {
|
MachineMemOperand *MMO, unsigned &ResultReg) {
|
||||||
// Get opcode and regclass of the output for the given load instruction.
|
// Get opcode and regclass of the output for the given load instruction.
|
||||||
unsigned Opc = 0;
|
unsigned Opc = 0;
|
||||||
const TargetRegisterClass *RC = nullptr;
|
const TargetRegisterClass *RC = nullptr;
|
||||||
@ -228,8 +230,11 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
|
|||||||
}
|
}
|
||||||
|
|
||||||
ResultReg = createResultReg(RC);
|
ResultReg = createResultReg(RC);
|
||||||
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
|
MachineInstrBuilder MIB =
|
||||||
DbgLoc, TII.get(Opc), ResultReg), AM);
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
|
||||||
|
addFullAddress(MIB, AM);
|
||||||
|
if (MMO)
|
||||||
|
MIB->addMemOperand(*FuncInfo.MF, MMO);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -237,9 +242,9 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
|
|||||||
/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
|
/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
|
||||||
/// and a displacement offset, or a GlobalAddress,
|
/// and a displacement offset, or a GlobalAddress,
|
||||||
/// i.e. V. Return true if it is possible.
|
/// i.e. V. Return true if it is possible.
|
||||||
bool
|
bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
|
||||||
X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg,
|
const X86AddressMode &AM,
|
||||||
const X86AddressMode &AM, bool Aligned) {
|
MachineMemOperand *MMO, bool Aligned) {
|
||||||
// Get opcode and regclass of the output for the given store instruction.
|
// Get opcode and regclass of the output for the given store instruction.
|
||||||
unsigned Opc = 0;
|
unsigned Opc = 0;
|
||||||
switch (VT.getSimpleVT().SimpleTy) {
|
switch (VT.getSimpleVT().SimpleTy) {
|
||||||
@ -249,7 +254,8 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg,
|
|||||||
// Mask out all but lowest bit.
|
// Mask out all but lowest bit.
|
||||||
unsigned AndResult = createResultReg(&X86::GR8RegClass);
|
unsigned AndResult = createResultReg(&X86::GR8RegClass);
|
||||||
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
||||||
TII.get(X86::AND8ri), AndResult).addReg(ValReg).addImm(1);
|
TII.get(X86::AND8ri), AndResult)
|
||||||
|
.addReg(ValReg, getKillRegState(ValIsKill)).addImm(1);
|
||||||
ValReg = AndResult;
|
ValReg = AndResult;
|
||||||
}
|
}
|
||||||
// FALLTHROUGH, handling i1 as i8.
|
// FALLTHROUGH, handling i1 as i8.
|
||||||
@ -288,13 +294,18 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
|
MachineInstrBuilder MIB =
|
||||||
DbgLoc, TII.get(Opc)), AM).addReg(ValReg);
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
|
||||||
|
addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill));
|
||||||
|
if (MMO)
|
||||||
|
MIB->addMemOperand(*FuncInfo.MF, MMO);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
|
bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
|
||||||
const X86AddressMode &AM, bool Aligned) {
|
const X86AddressMode &AM,
|
||||||
|
MachineMemOperand *MMO, bool Aligned) {
|
||||||
// Handle 'null' like i32/i64 0.
|
// Handle 'null' like i32/i64 0.
|
||||||
if (isa<ConstantPointerNull>(Val))
|
if (isa<ConstantPointerNull>(Val))
|
||||||
Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext()));
|
Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext()));
|
||||||
@ -317,10 +328,12 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (Opc) {
|
if (Opc) {
|
||||||
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
|
MachineInstrBuilder MIB =
|
||||||
DbgLoc, TII.get(Opc)), AM)
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
|
||||||
.addImm(Signed ? (uint64_t) CI->getSExtValue() :
|
addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue()
|
||||||
CI->getZExtValue());
|
: CI->getZExtValue());
|
||||||
|
if (MMO)
|
||||||
|
MIB->addMemOperand(*FuncInfo.MF, MMO);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -329,7 +342,8 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
|
|||||||
if (ValReg == 0)
|
if (ValReg == 0)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return X86FastEmitStore(VT, ValReg, AM, Aligned);
|
bool ValKill = hasTrivialKill(Val);
|
||||||
|
return X86FastEmitStore(VT, ValReg, ValKill, AM, MMO, Aligned);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
|
/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
|
||||||
@ -740,19 +754,24 @@ bool X86FastISel::X86SelectStore(const Instruction *I) {
|
|||||||
if (S->isAtomic())
|
if (S->isAtomic())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
unsigned SABIAlignment =
|
const Value *Val = S->getValueOperand();
|
||||||
DL.getABITypeAlignment(S->getValueOperand()->getType());
|
const Value *Ptr = S->getPointerOperand();
|
||||||
bool Aligned = S->getAlignment() == 0 || S->getAlignment() >= SABIAlignment;
|
|
||||||
|
|
||||||
MVT VT;
|
MVT VT;
|
||||||
if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
|
if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
unsigned Alignment = S->getAlignment();
|
||||||
|
unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType());
|
||||||
|
if (Alignment == 0) // Ensure that codegen never sees alignment 0
|
||||||
|
Alignment = ABIAlignment;
|
||||||
|
bool Aligned = Alignment >= ABIAlignment;
|
||||||
|
|
||||||
X86AddressMode AM;
|
X86AddressMode AM;
|
||||||
if (!X86SelectAddress(I->getOperand(1), AM))
|
if (!X86SelectAddress(Ptr, AM))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return X86FastEmitStore(VT, I->getOperand(0), AM, Aligned);
|
return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// X86SelectRet - Select and emit code to implement ret instructions.
|
/// X86SelectRet - Select and emit code to implement ret instructions.
|
||||||
@ -887,25 +906,29 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
|
|||||||
|
|
||||||
/// X86SelectLoad - Select and emit code to implement load instructions.
|
/// X86SelectLoad - Select and emit code to implement load instructions.
|
||||||
///
|
///
|
||||||
bool X86FastISel::X86SelectLoad(const Instruction *I) {
|
bool X86FastISel::X86SelectLoad(const Instruction *I) {
|
||||||
|
const LoadInst *LI = cast<LoadInst>(I);
|
||||||
|
|
||||||
// Atomic loads need special handling.
|
// Atomic loads need special handling.
|
||||||
if (cast<LoadInst>(I)->isAtomic())
|
if (LI->isAtomic())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
MVT VT;
|
MVT VT;
|
||||||
if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true))
|
if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
const Value *Ptr = LI->getPointerOperand();
|
||||||
|
|
||||||
X86AddressMode AM;
|
X86AddressMode AM;
|
||||||
if (!X86SelectAddress(I->getOperand(0), AM))
|
if (!X86SelectAddress(Ptr, AM))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
unsigned ResultReg = 0;
|
unsigned ResultReg = 0;
|
||||||
if (X86FastEmitLoad(VT, AM, ResultReg)) {
|
if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg))
|
||||||
UpdateValueMap(I, ResultReg);
|
return false;
|
||||||
return true;
|
|
||||||
}
|
UpdateValueMap(I, ResultReg);
|
||||||
return false;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
|
static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
|
||||||
@ -1624,8 +1647,8 @@ bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
|
|||||||
}
|
}
|
||||||
|
|
||||||
unsigned Reg;
|
unsigned Reg;
|
||||||
bool RV = X86FastEmitLoad(VT, SrcAM, Reg);
|
bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
|
||||||
RV &= X86FastEmitStore(VT, Reg, DestAM);
|
RV &= X86FastEmitStore(VT, Reg, /*Kill=*/true, DestAM);
|
||||||
assert(RV && "Failed to emit load or store??");
|
assert(RV && "Failed to emit load or store??");
|
||||||
|
|
||||||
unsigned Size = VT.getSizeInBits()/8;
|
unsigned Size = VT.getSizeInBits()/8;
|
||||||
@ -2322,7 +2345,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
|
|||||||
if (!X86FastEmitStore(ArgVT, ArgVal, AM))
|
if (!X86FastEmitStore(ArgVT, ArgVal, AM))
|
||||||
return false;
|
return false;
|
||||||
} else {
|
} else {
|
||||||
if (!X86FastEmitStore(ArgVT, Arg, AM))
|
if (!X86FastEmitStore(ArgVT, Arg, /*ValIsKill=*/false, AM))
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2719,8 +2742,9 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
|
|||||||
|
|
||||||
bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
|
bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
|
||||||
const LoadInst *LI) {
|
const LoadInst *LI) {
|
||||||
|
const Value *Ptr = LI->getPointerOperand();
|
||||||
X86AddressMode AM;
|
X86AddressMode AM;
|
||||||
if (!X86SelectAddress(LI->getOperand(0), AM))
|
if (!X86SelectAddress(Ptr, AM))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
const X86InstrInfo &XII = (const X86InstrInfo&)TII;
|
const X86InstrInfo &XII = (const X86InstrInfo&)TII;
|
||||||
@ -2728,13 +2752,18 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
|
|||||||
unsigned Size = DL.getTypeAllocSize(LI->getType());
|
unsigned Size = DL.getTypeAllocSize(LI->getType());
|
||||||
unsigned Alignment = LI->getAlignment();
|
unsigned Alignment = LI->getAlignment();
|
||||||
|
|
||||||
|
if (Alignment == 0) // Ensure that codegen never sees alignment 0
|
||||||
|
Alignment = DL.getABITypeAlignment(LI->getType());
|
||||||
|
|
||||||
SmallVector<MachineOperand, 8> AddrOps;
|
SmallVector<MachineOperand, 8> AddrOps;
|
||||||
AM.getFullAddress(AddrOps);
|
AM.getFullAddress(AddrOps);
|
||||||
|
|
||||||
MachineInstr *Result =
|
MachineInstr *Result =
|
||||||
XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
|
XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
|
||||||
if (!Result) return false;
|
if (!Result)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
|
||||||
FuncInfo.MBB->insert(FuncInfo.InsertPt, Result);
|
FuncInfo.MBB->insert(FuncInfo.InsertPt, Result);
|
||||||
MI->eraseFromParent();
|
MI->eraseFromParent();
|
||||||
return true;
|
return true;
|
||||||
|
12
test/CodeGen/X86/fast-isel-fold-mem.ll
Normal file
12
test/CodeGen/X86/fast-isel-fold-mem.ll
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
|
||||||
|
; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin | FileCheck %s
|
||||||
|
|
||||||
|
define i64 @fold_load(i64* %a, i64 %b) {
|
||||||
|
; CHECK-LABEL: fold_load
|
||||||
|
; CHECK: addq (%rdi), %rsi
|
||||||
|
; CHECK-NEXT: movq %rsi, %rax
|
||||||
|
%1 = load i64* %a, align 8
|
||||||
|
%2 = add i64 %1, %b
|
||||||
|
ret i64 %2
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user