mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
R600: Add support for local memory atomic add
llvm-svn: 190080
This commit is contained in:
parent
6c1db18560
commit
ce0432a0c3
@ -191,6 +191,11 @@ def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
|||||||
return isLocalLoad(dyn_cast<LoadSDNode>(N));
|
return isLocalLoad(dyn_cast<LoadSDNode>(N));
|
||||||
}]>;
|
}]>;
|
||||||
|
|
||||||
|
def atomic_load_add_local : PatFrag<(ops node:$ptr, node:$value),
|
||||||
|
(atomic_load_add node:$ptr, node:$value), [{
|
||||||
|
return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
|
||||||
|
}]>;
|
||||||
|
|
||||||
def mskor_global : PatFrag<(ops node:$val, node:$ptr),
|
def mskor_global : PatFrag<(ops node:$val, node:$ptr),
|
||||||
(AMDGPUstore_mskor node:$val, node:$ptr), [{
|
(AMDGPUstore_mskor node:$val, node:$ptr), [{
|
||||||
return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
|
return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
|
||||||
|
@ -109,16 +109,24 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
|
|||||||
|
|
||||||
switch (MI->getOpcode()) {
|
switch (MI->getOpcode()) {
|
||||||
default:
|
default:
|
||||||
if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::LDS_1A) {
|
if (TII->isLDSInstr(MI->getOpcode()) &&
|
||||||
MachineInstrBuilder NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
|
TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst) != -1) {
|
||||||
TII->get(MI->getOpcode()),
|
int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
|
||||||
AMDGPU::OQAP);
|
assert(DstIdx != -1);
|
||||||
|
MachineInstrBuilder NewMI;
|
||||||
|
if (!MRI.use_empty(MI->getOperand(DstIdx).getReg())) {
|
||||||
|
NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()),
|
||||||
|
AMDGPU::OQAP);
|
||||||
|
TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV,
|
||||||
|
MI->getOperand(0).getReg(),
|
||||||
|
AMDGPU::OQAP);
|
||||||
|
} else {
|
||||||
|
NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
|
||||||
|
TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
|
||||||
|
}
|
||||||
for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
|
for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
|
||||||
NewMI.addOperand(MI->getOperand(i));
|
NewMI.addOperand(MI->getOperand(i));
|
||||||
}
|
}
|
||||||
TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV,
|
|
||||||
MI->getOperand(0).getReg(),
|
|
||||||
AMDGPU::OQAP);
|
|
||||||
} else {
|
} else {
|
||||||
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
|
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
|
||||||
}
|
}
|
||||||
|
@ -275,6 +275,12 @@ namespace llvm {
|
|||||||
void clearFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
|
void clearFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
namespace AMDGPU {
|
||||||
|
|
||||||
|
int getLDSNoRetOp(uint16_t Opcode);
|
||||||
|
|
||||||
|
} //End namespace AMDGPU
|
||||||
|
|
||||||
} // End llvm namespace
|
} // End llvm namespace
|
||||||
|
|
||||||
#endif // R600INSTRINFO_H_
|
#endif // R600INSTRINFO_H_
|
||||||
|
@ -1626,23 +1626,39 @@ class R600_LDS_1A <bits<6> lds_op, string name, list<dag> pattern> : R600_LDS <
|
|||||||
let DisableEncoding = "$dst";
|
let DisableEncoding = "$dst";
|
||||||
}
|
}
|
||||||
|
|
||||||
class R600_LDS_1A1D <bits<6> lds_op, string name, list<dag> pattern> :
|
class R600_LDS_1A1D <bits<6> lds_op, dag outs, string name, list<dag> pattern,
|
||||||
|
string dst =""> :
|
||||||
R600_LDS <
|
R600_LDS <
|
||||||
lds_op,
|
lds_op, outs,
|
||||||
(outs),
|
|
||||||
(ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel,
|
(ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel,
|
||||||
R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel,
|
R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel,
|
||||||
LAST:$last, R600_Pred:$pred_sel,
|
LAST:$last, R600_Pred:$pred_sel,
|
||||||
BANK_SWIZZLE:$bank_swizzle),
|
BANK_SWIZZLE:$bank_swizzle),
|
||||||
" "#name#" $last $src0$src0_rel, $src1$src1_rel, $pred_sel",
|
" "#name#" $last "#dst#"$src0$src0_rel, $src1$src1_rel, $pred_sel",
|
||||||
pattern
|
pattern
|
||||||
> {
|
> {
|
||||||
|
|
||||||
|
field string BaseOp;
|
||||||
|
|
||||||
let src2 = 0;
|
let src2 = 0;
|
||||||
let src2_rel = 0;
|
let src2_rel = 0;
|
||||||
let LDS_1A1D = 1;
|
let LDS_1A1D = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class R600_LDS_1A1D_NORET <bits<6> lds_op, string name, list<dag> pattern> :
|
||||||
|
R600_LDS_1A1D <lds_op, (outs), name, pattern> {
|
||||||
|
let BaseOp = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
class R600_LDS_1A1D_RET <bits<6> lds_op, string name, list<dag> pattern> :
|
||||||
|
R600_LDS_1A1D <lds_op, (outs R600_Reg32:$dst), name##"_RET", pattern, "OQAP, "> {
|
||||||
|
|
||||||
|
let BaseOp = name;
|
||||||
|
let usesCustomInserter = 1;
|
||||||
|
let DisableEncoding = "$dst";
|
||||||
|
let Defs = [OQAP];
|
||||||
|
}
|
||||||
|
|
||||||
class R600_LDS_1A2D <bits<6> lds_op, string name, list<dag> pattern> :
|
class R600_LDS_1A2D <bits<6> lds_op, string name, list<dag> pattern> :
|
||||||
R600_LDS <
|
R600_LDS <
|
||||||
lds_op,
|
lds_op,
|
||||||
@ -1656,15 +1672,19 @@ class R600_LDS_1A2D <bits<6> lds_op, string name, list<dag> pattern> :
|
|||||||
let LDS_1A2D = 1;
|
let LDS_1A2D = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
def LDS_WRITE : R600_LDS_1A1D <0xD, "LDS_WRITE",
|
def LDS_ADD : R600_LDS_1A1D_NORET <0x0, "LDS_ADD", [] >;
|
||||||
|
def LDS_WRITE : R600_LDS_1A1D_NORET <0xD, "LDS_WRITE",
|
||||||
[(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)]
|
[(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)]
|
||||||
>;
|
>;
|
||||||
def LDS_BYTE_WRITE : R600_LDS_1A1D<0x12, "LDS_BYTE_WRITE",
|
def LDS_BYTE_WRITE : R600_LDS_1A1D_NORET<0x12, "LDS_BYTE_WRITE",
|
||||||
[(truncstorei8_local i32:$src1, i32:$src0)]
|
[(truncstorei8_local i32:$src1, i32:$src0)]
|
||||||
>;
|
>;
|
||||||
def LDS_SHORT_WRITE : R600_LDS_1A1D<0x13, "LDS_SHORT_WRITE",
|
def LDS_SHORT_WRITE : R600_LDS_1A1D_NORET<0x13, "LDS_SHORT_WRITE",
|
||||||
[(truncstorei16_local i32:$src1, i32:$src0)]
|
[(truncstorei16_local i32:$src1, i32:$src0)]
|
||||||
>;
|
>;
|
||||||
|
def LDS_ADD_RET : R600_LDS_1A1D_RET <0x20, "LDS_ADD",
|
||||||
|
[(set i32:$dst, (atomic_load_add_local i32:$src0, i32:$src1))]
|
||||||
|
>;
|
||||||
def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET",
|
def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET",
|
||||||
[(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))]
|
[(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))]
|
||||||
>;
|
>;
|
||||||
@ -2428,3 +2448,11 @@ def : BitConvert <v4i32, v4f32, R600_Reg128>;
|
|||||||
def : DwordAddrPat <i32, R600_Reg32>;
|
def : DwordAddrPat <i32, R600_Reg32>;
|
||||||
|
|
||||||
} // End isR600toCayman Predicate
|
} // End isR600toCayman Predicate
|
||||||
|
|
||||||
|
def getLDSNoRetOp : InstrMapping {
|
||||||
|
let FilterClass = "R600_LDS_1A1D";
|
||||||
|
let RowFields = ["BaseOp"];
|
||||||
|
let ColFields = ["DisableEncoding"];
|
||||||
|
let KeyCol = ["$dst"];
|
||||||
|
let ValueCols = [[""""]];
|
||||||
|
}
|
||||||
|
@ -362,6 +362,18 @@ class DS_Store_Helper <bits<8> op, string asm, RegisterClass regClass> : DS <
|
|||||||
let vdst = 0;
|
let vdst = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc> : DS <
|
||||||
|
op,
|
||||||
|
(outs rc:$vdst),
|
||||||
|
(ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, i8imm:$offset0,
|
||||||
|
i8imm:$offset1),
|
||||||
|
asm#" $gds, $vdst, $addr, $data0, $offset0, $offset1, [M0]",
|
||||||
|
[]> {
|
||||||
|
let mayStore = 1;
|
||||||
|
let mayLoad = 1;
|
||||||
|
let data1 = 0;
|
||||||
|
}
|
||||||
|
|
||||||
class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
|
class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
|
||||||
op,
|
op,
|
||||||
(outs),
|
(outs),
|
||||||
|
@ -391,6 +391,7 @@ defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">;
|
|||||||
|
|
||||||
} // End isCompare = 1
|
} // End isCompare = 1
|
||||||
|
|
||||||
|
def DS_ADD_U32_RTN : DS_1A1D_RET <0x20, "DS_ADD_U32_RTN", VReg_32>;
|
||||||
def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>;
|
def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>;
|
||||||
def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>;
|
def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>;
|
||||||
def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>;
|
def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>;
|
||||||
@ -1775,6 +1776,9 @@ def : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>;
|
|||||||
def : DSWritePat <DS_WRITE_B16, i32, truncstorei16_local>;
|
def : DSWritePat <DS_WRITE_B16, i32, truncstorei16_local>;
|
||||||
def : DSWritePat <DS_WRITE_B32, i32, local_store>;
|
def : DSWritePat <DS_WRITE_B32, i32, local_store>;
|
||||||
|
|
||||||
|
def : Pat <(atomic_load_add_local i32:$ptr, i32:$val),
|
||||||
|
(DS_ADD_U32_RTN 0, $ptr, $val, 0, 0)>;
|
||||||
|
|
||||||
/********** ================== **********/
|
/********** ================== **********/
|
||||||
/********** SMRD Patterns **********/
|
/********** SMRD Patterns **********/
|
||||||
/********** ================== **********/
|
/********** ================== **********/
|
||||||
|
@ -488,6 +488,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
NeedWQM = true;
|
NeedWQM = true;
|
||||||
// Fall through
|
// Fall through
|
||||||
case AMDGPU::DS_WRITE_B32:
|
case AMDGPU::DS_WRITE_B32:
|
||||||
|
case AMDGPU::DS_ADD_U32_RTN:
|
||||||
NeedM0 = true;
|
NeedM0 = true;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
23
test/CodeGen/R600/atomic_load_add.ll
Normal file
23
test/CodeGen/R600/atomic_load_add.ll
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
|
||||||
|
; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
|
||||||
|
|
||||||
|
; R600-CHECK-LABEL: @atomic_add_local
|
||||||
|
; R600-CHECK: LDS_ADD *
|
||||||
|
; SI-CHECK-LABEL: @atomic_add_local
|
||||||
|
; SI-CHECK: DS_ADD_U32_RTN 0
|
||||||
|
define void @atomic_add_local(i32 addrspace(3)* %local) {
|
||||||
|
entry:
|
||||||
|
%0 = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; R600-CHECK-LABEL: @atomic_add_ret_local
|
||||||
|
; R600-CHECK: LDS_ADD_RET *
|
||||||
|
; SI-CHECK-LABEL: @atomic_add_ret_local
|
||||||
|
; SI-CHECK: DS_ADD_U32_RTN 0
|
||||||
|
define void @atomic_add_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
|
||||||
|
entry:
|
||||||
|
%0 = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst
|
||||||
|
store i32 %0, i32 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user