From ce0432a0c38037e5887037b25e9f6d51c3df98df Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 5 Sep 2013 18:38:09 +0000 Subject: [PATCH] R600: Add support for local memory atomic add llvm-svn: 190080 --- lib/Target/R600/AMDGPUInstructions.td | 5 +++ lib/Target/R600/R600ISelLowering.cpp | 22 +++++++++----- lib/Target/R600/R600InstrInfo.h | 6 ++++ lib/Target/R600/R600Instructions.td | 42 +++++++++++++++++++++----- lib/Target/R600/SIInstrInfo.td | 12 ++++++++ lib/Target/R600/SIInstructions.td | 4 +++ lib/Target/R600/SILowerControlFlow.cpp | 1 + test/CodeGen/R600/atomic_load_add.ll | 23 ++++++++++++++ 8 files changed, 101 insertions(+), 14 deletions(-) create mode 100644 test/CodeGen/R600/atomic_load_add.ll diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index dec60827a49..6745fed3bae 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -191,6 +191,11 @@ def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return isLocalLoad(dyn_cast(N)); }]>; +def atomic_load_add_local : PatFrag<(ops node:$ptr, node:$value), + (atomic_load_add node:$ptr, node:$value), [{ + return dyn_cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; +}]>; + def mskor_global : PatFrag<(ops node:$val, node:$ptr), (AMDGPUstore_mskor node:$val, node:$ptr), [{ return dyn_cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 450e2a86da3..ff9ba52d0ba 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -109,16 +109,24 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( switch (MI->getOpcode()) { default: - if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::LDS_1A) { - MachineInstrBuilder NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), - TII->get(MI->getOpcode()), - AMDGPU::OQAP); + if (TII->isLDSInstr(MI->getOpcode()) && + TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst) != -1) { + int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst); + assert(DstIdx != -1); + MachineInstrBuilder NewMI; + if (!MRI.use_empty(MI->getOperand(DstIdx).getReg())) { + NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()), + AMDGPU::OQAP); + TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV, + MI->getOperand(0).getReg(), + AMDGPU::OQAP); + } else { + NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), + TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode()))); + } for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) { NewMI.addOperand(MI->getOperand(i)); } - TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV, - MI->getOperand(0).getReg(), - AMDGPU::OQAP); } else { return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); } diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h index d083fb88631..24cc43dd18a 100644 --- a/lib/Target/R600/R600InstrInfo.h +++ b/lib/Target/R600/R600InstrInfo.h @@ -275,6 +275,12 @@ namespace llvm { void clearFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const; }; +namespace AMDGPU { + +int getLDSNoRetOp(uint16_t Opcode); + +} //End namespace AMDGPU + } // End llvm namespace #endif // R600INSTRINFO_H_ diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 99ffa149d8c..efa475198ca 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1626,23 +1626,39 @@ class R600_LDS_1A lds_op, string name, list pattern> : R600_LDS < let DisableEncoding = "$dst"; } -class R600_LDS_1A1D lds_op, string name, list pattern> : +class R600_LDS_1A1D lds_op, dag outs, string name, list pattern, + string dst =""> : R600_LDS < - lds_op, - (outs), + lds_op, outs, (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel, R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel, LAST:$last, R600_Pred:$pred_sel, BANK_SWIZZLE:$bank_swizzle), - " "#name#" $last $src0$src0_rel, $src1$src1_rel, $pred_sel", + " "#name#" $last "#dst#"$src0$src0_rel, $src1$src1_rel, $pred_sel", pattern > { + field string BaseOp; + let src2 = 0; let src2_rel = 0; let LDS_1A1D = 1; } +class R600_LDS_1A1D_NORET lds_op, string name, list pattern> : + R600_LDS_1A1D { + let BaseOp = name; +} + +class R600_LDS_1A1D_RET lds_op, string name, list pattern> : + R600_LDS_1A1D { + + let BaseOp = name; + let usesCustomInserter = 1; + let DisableEncoding = "$dst"; + let Defs = [OQAP]; +} + class R600_LDS_1A2D lds_op, string name, list pattern> : R600_LDS < lds_op, @@ -1656,15 +1672,19 @@ class R600_LDS_1A2D lds_op, string name, list pattern> : let LDS_1A2D = 1; } -def LDS_WRITE : R600_LDS_1A1D <0xD, "LDS_WRITE", +def LDS_ADD : R600_LDS_1A1D_NORET <0x0, "LDS_ADD", [] >; +def LDS_WRITE : R600_LDS_1A1D_NORET <0xD, "LDS_WRITE", [(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)] >; -def LDS_BYTE_WRITE : R600_LDS_1A1D<0x12, "LDS_BYTE_WRITE", +def LDS_BYTE_WRITE : R600_LDS_1A1D_NORET<0x12, "LDS_BYTE_WRITE", [(truncstorei8_local i32:$src1, i32:$src0)] >; -def LDS_SHORT_WRITE : R600_LDS_1A1D<0x13, "LDS_SHORT_WRITE", +def LDS_SHORT_WRITE : R600_LDS_1A1D_NORET<0x13, "LDS_SHORT_WRITE", [(truncstorei16_local i32:$src1, i32:$src0)] >; +def LDS_ADD_RET : R600_LDS_1A1D_RET <0x20, "LDS_ADD", + [(set i32:$dst, (atomic_load_add_local i32:$src0, i32:$src1))] +>; def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET", [(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))] >; @@ -2428,3 +2448,11 @@ def : BitConvert ; def : DwordAddrPat ; } // End isR600toCayman Predicate + +def getLDSNoRetOp : InstrMapping { + let FilterClass = "R600_LDS_1A1D"; + let RowFields = ["BaseOp"]; + let ColFields = ["DisableEncoding"]; + let KeyCol = ["$dst"]; + let ValueCols = [[""""]]; +} diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index ecc471817e4..09d5f01dbfb 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -362,6 +362,18 @@ class DS_Store_Helper op, string asm, RegisterClass regClass> : DS < let vdst = 0; } +class DS_1A1D_RET op, string asm, RegisterClass rc> : DS < + op, + (outs rc:$vdst), + (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, i8imm:$offset0, + i8imm:$offset1), + asm#" $gds, $vdst, $addr, $data0, $offset0, $offset1, [M0]", + []> { + let mayStore = 1; + let mayLoad = 1; + let data1 = 0; +} + class MTBUF_Store_Helper op, string asm, RegisterClass regClass> : MTBUF < op, (outs), diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 136f69c07cb..31a5ad237e7 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -391,6 +391,7 @@ defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">; } // End isCompare = 1 +def DS_ADD_U32_RTN : DS_1A1D_RET <0x20, "DS_ADD_U32_RTN", VReg_32>; def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>; def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>; def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>; @@ -1775,6 +1776,9 @@ def : DSWritePat ; def : DSWritePat ; def : DSWritePat ; +def : Pat <(atomic_load_add_local i32:$ptr, i32:$val), + (DS_ADD_U32_RTN 0, $ptr, $val, 0, 0)>; + /********** ================== **********/ /********** SMRD Patterns **********/ /********** ================== **********/ diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp index 09cf25b7648..a6c43bbb2c5 100644 --- a/lib/Target/R600/SILowerControlFlow.cpp +++ b/lib/Target/R600/SILowerControlFlow.cpp @@ -488,6 +488,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { NeedWQM = true; // Fall through case AMDGPU::DS_WRITE_B32: + case AMDGPU::DS_ADD_U32_RTN: NeedM0 = true; break; diff --git a/test/CodeGen/R600/atomic_load_add.ll b/test/CodeGen/R600/atomic_load_add.ll new file mode 100644 index 00000000000..054d9cdc88e --- /dev/null +++ b/test/CodeGen/R600/atomic_load_add.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK +; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK + +; R600-CHECK-LABEL: @atomic_add_local +; R600-CHECK: LDS_ADD * +; SI-CHECK-LABEL: @atomic_add_local +; SI-CHECK: DS_ADD_U32_RTN 0 +define void @atomic_add_local(i32 addrspace(3)* %local) { +entry: + %0 = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst + ret void +} + +; R600-CHECK-LABEL: @atomic_add_ret_local +; R600-CHECK: LDS_ADD_RET * +; SI-CHECK-LABEL: @atomic_add_ret_local +; SI-CHECK: DS_ADD_U32_RTN 0 +define void @atomic_add_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) { +entry: + %0 = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst + store i32 %0, i32 addrspace(1)* %out + ret void +}