mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 19:52:54 +01:00
ptx: add store instruction
llvm-svn: 122652
This commit is contained in:
parent
f6477b0a90
commit
995a853724
@ -66,6 +66,56 @@ def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||
return false;
|
||||
}]>;
|
||||
|
||||
def store_global
|
||||
: PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
|
||||
const Value *Src;
|
||||
const PointerType *PT;
|
||||
if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
|
||||
(PT = dyn_cast<PointerType>(Src->getType())))
|
||||
return PT->getAddressSpace() == PTX::GLOBAL;
|
||||
return false;
|
||||
}]>;
|
||||
|
||||
def store_constant
|
||||
: PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
|
||||
const Value *Src;
|
||||
const PointerType *PT;
|
||||
if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
|
||||
(PT = dyn_cast<PointerType>(Src->getType())))
|
||||
return PT->getAddressSpace() == PTX::CONSTANT;
|
||||
return false;
|
||||
}]>;
|
||||
|
||||
def store_local
|
||||
: PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
|
||||
const Value *Src;
|
||||
const PointerType *PT;
|
||||
if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
|
||||
(PT = dyn_cast<PointerType>(Src->getType())))
|
||||
return PT->getAddressSpace() == PTX::LOCAL;
|
||||
return false;
|
||||
}]>;
|
||||
|
||||
def store_parameter
|
||||
: PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
|
||||
const Value *Src;
|
||||
const PointerType *PT;
|
||||
if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
|
||||
(PT = dyn_cast<PointerType>(Src->getType())))
|
||||
return PT->getAddressSpace() == PTX::PARAMETER;
|
||||
return false;
|
||||
}]>;
|
||||
|
||||
def store_shared
|
||||
: PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
|
||||
const Value *Src;
|
||||
const PointerType *PT;
|
||||
if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
|
||||
(PT = dyn_cast<PointerType>(Src->getType())))
|
||||
return PT->getAddressSpace() == PTX::SHARED;
|
||||
return false;
|
||||
}]>;
|
||||
|
||||
// Addressing modes.
|
||||
def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
|
||||
def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [], []>;
|
||||
@ -145,6 +195,21 @@ multiclass PTX_LD<string opstr, RegisterClass RC, PatFrag pat_load> {
|
||||
[(set RC:$d, (pat_load ADDRii:$a))]>;
|
||||
}
|
||||
|
||||
multiclass PTX_ST<string opstr, RegisterClass RC, PatFrag pat_store> {
|
||||
def rr : InstPTX<(outs),
|
||||
(ins RC:$d, MEMrr:$a),
|
||||
!strconcat(opstr, ".%type\t[$a], $d"),
|
||||
[(pat_store RC:$d, ADDRrr:$a)]>;
|
||||
def ri : InstPTX<(outs),
|
||||
(ins RC:$d, MEMri:$a),
|
||||
!strconcat(opstr, ".%type\t[$a], $d"),
|
||||
[(pat_store RC:$d, ADDRri:$a)]>;
|
||||
def ii : InstPTX<(outs),
|
||||
(ins RC:$d, MEMii:$a),
|
||||
!strconcat(opstr, ".%type\t[$a], $d"),
|
||||
[(pat_store RC:$d, ADDRii:$a)]>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -185,6 +250,12 @@ defm LDl : PTX_LD<"ld.local", RRegs32, load_local>;
|
||||
defm LDp : PTX_LD<"ld.param", RRegs32, load_parameter>;
|
||||
defm LDs : PTX_LD<"ld.shared", RRegs32, load_shared>;
|
||||
|
||||
defm STg : PTX_ST<"st.global", RRegs32, store_global>;
|
||||
defm STc : PTX_ST<"st.const", RRegs32, store_constant>;
|
||||
defm STl : PTX_ST<"st.local", RRegs32, store_local>;
|
||||
defm STp : PTX_ST<"st.param", RRegs32, store_parameter>;
|
||||
defm STs : PTX_ST<"st.shared", RRegs32, store_shared>;
|
||||
|
||||
///===- Control Flow Instructions -----------------------------------------===//
|
||||
|
||||
let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
|
||||
|
@ -22,6 +22,8 @@
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
// NOTE: PTXMFInfoExtract must after register allocation!
|
||||
|
||||
namespace llvm {
|
||||
/// PTXMFInfoExtract - PTX specific code to extract of PTX machine
|
||||
/// function information for PTXAsmPrinter
|
||||
@ -50,22 +52,38 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) {
|
||||
PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
|
||||
DEBUG(dbgs() << "****** PTX FUNCTION LOCAL VAR REG DEF ******\n");
|
||||
DEBUG(dbgs() << "******** PTX FUNCTION LOCAL VAR REG DEF ********\n");
|
||||
|
||||
unsigned reg_ret = MFI->retReg();
|
||||
unsigned retreg = MFI->retReg();
|
||||
|
||||
DEBUG(dbgs()
|
||||
<< "PTX::NoRegister == " << PTX::NoRegister << "\n"
|
||||
<< "PTX::NUM_TARGET_REGS == " << PTX::NUM_TARGET_REGS << "\n");
|
||||
|
||||
DEBUG(for (unsigned reg = PTX::NoRegister + 1;
|
||||
reg < PTX::NUM_TARGET_REGS; ++reg)
|
||||
if (MRI.isPhysRegUsed(reg))
|
||||
dbgs() << "Used Reg: " << reg << "\n";);
|
||||
|
||||
// FIXME: This is a slow linear scanning
|
||||
for (unsigned reg = PTX::NoRegister + 1; reg < PTX::NUM_TARGET_REGS; ++reg)
|
||||
if (MRI.isPhysRegUsed(reg) && reg != reg_ret && !MFI->isArgReg(reg))
|
||||
if (MRI.isPhysRegUsed(reg) && reg != retreg && !MFI->isArgReg(reg))
|
||||
MFI->addLocalVarReg(reg);
|
||||
|
||||
// Notify MachineFunctionInfo that I've done adding local var reg
|
||||
MFI->doneAddLocalVar();
|
||||
|
||||
DEBUG(dbgs() << "Return Reg: " << retreg << "\n");
|
||||
|
||||
DEBUG(for (PTXMachineFunctionInfo::reg_iterator
|
||||
i = MFI->argRegBegin(), e = MFI->argRegEnd();
|
||||
i != e; ++i)
|
||||
dbgs() << "Arg Reg: " << *i << "\n";);
|
||||
|
||||
DEBUG(for (PTXMachineFunctionInfo::reg_iterator
|
||||
i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd();
|
||||
i != e; ++i)
|
||||
dbgs() << "Used Reg: " << *i << "\n";);
|
||||
dbgs() << "Local Var Reg: " << *i << "\n";);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
@ -49,6 +49,12 @@ PTXTargetMachine::PTXTargetMachine(const Target &T,
|
||||
bool PTXTargetMachine::addInstSelector(PassManagerBase &PM,
|
||||
CodeGenOpt::Level OptLevel) {
|
||||
PM.add(createPTXISelDag(*this, OptLevel));
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM,
|
||||
CodeGenOpt::Level OptLevel) {
|
||||
// PTXMFInfoExtract must after register allocation!
|
||||
PM.add(createPTXMFInfoExtract(*this, OptLevel));
|
||||
return false;
|
||||
}
|
||||
|
@ -50,6 +50,8 @@ class PTXTargetMachine : public LLVMTargetMachine {
|
||||
|
||||
virtual bool addInstSelector(PassManagerBase &PM,
|
||||
CodeGenOpt::Level OptLevel);
|
||||
virtual bool addPostRegAlloc(PassManagerBase &PM,
|
||||
CodeGenOpt::Level OptLevel);
|
||||
}; // class PTXTargetMachine
|
||||
} // namespace llvm
|
||||
|
||||
|
78
test/CodeGen/PTX/st.ll
Normal file
78
test/CodeGen/PTX/st.ll
Normal file
@ -0,0 +1,78 @@
|
||||
; RUN: llc < %s -march=ptx | FileCheck %s
|
||||
|
||||
;CHECK: .extern .global .s32 array[];
|
||||
@array = external global [10 x i32]
|
||||
|
||||
;CHECK: .extern .const .s32 array_constant[];
|
||||
@array_constant = external addrspace(1) constant [10 x i32]
|
||||
|
||||
;CHECK: .extern .local .s32 array_local[];
|
||||
@array_local = external addrspace(2) global [10 x i32]
|
||||
|
||||
;CHECK: .extern .shared .s32 array_shared[];
|
||||
@array_shared = external addrspace(4) global [10 x i32]
|
||||
|
||||
define ptx_device void @t1(i32* %p, i32 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.s32 [r1], r2;
|
||||
store i32 %x, i32* %p
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t2(i32* %p, i32 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.s32 [r1+4], r2;
|
||||
%i = getelementptr i32* %p, i32 1
|
||||
store i32 %x, i32* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t3(i32* %p, i32 %q, i32 %x) {
|
||||
;CHECK: .reg .s32 r0;
|
||||
entry:
|
||||
;CHECK: shl.b32 r0, r2, 2;
|
||||
;CHECK: st.global.s32 [r1+r0], r3;
|
||||
%i = getelementptr i32* %p, i32 %q
|
||||
store i32 %x, i32* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_global(i32 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.s32 [array], r1;
|
||||
%i = getelementptr [10 x i32]* @array, i32 0, i32 0
|
||||
store i32 %x, i32* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_const(i32 %x) {
|
||||
entry:
|
||||
;CHECK: st.const.s32 [array_constant], r1;
|
||||
%i = getelementptr [10 x i32] addrspace(1)* @array_constant, i32 0, i32 0
|
||||
store i32 %x, i32 addrspace(1)* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_local(i32 %x) {
|
||||
entry:
|
||||
;CHECK: st.local.s32 [array_local], r1;
|
||||
%i = getelementptr [10 x i32] addrspace(2)* @array_local, i32 0, i32 0
|
||||
store i32 %x, i32 addrspace(2)* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_shared(i32 %x) {
|
||||
entry:
|
||||
;CHECK: st.shared.s32 [array_shared], r1;
|
||||
%i = getelementptr [10 x i32] addrspace(4)* @array_shared, i32 0, i32 0
|
||||
store i32 %x, i32 addrspace(4)* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t5(i32 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.s32 [array+4], r1;
|
||||
%i = getelementptr [10 x i32]* @array, i32 0, i32 1
|
||||
store i32 %x, i32* %i
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue
Block a user