diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index 4c57d16c7f3..5d08b91ea7b 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -548,6 +548,9 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32; case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32; case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32; + case AMDGPU::S_LOAD_DWORD_SGPR: return AMDGPU::BUFFER_LOAD_DWORD_ADDR64; + case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64; + case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64; } } @@ -910,6 +913,44 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { } } +void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const { + MachineBasicBlock *MBB = MI->getParent(); + switch (MI->getOpcode()) { + case AMDGPU::S_LOAD_DWORD_SGPR: + case AMDGPU::S_LOAD_DWORDX2_SGPR: + case AMDGPU::S_LOAD_DWORDX4_SGPR: + unsigned NewOpcode = getVALUOp(*MI); + unsigned Offset = MI->getOperand(2).getReg(); + + + unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); + unsigned DWord0 = Offset; + unsigned DWord1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + unsigned DWord2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + unsigned DWord3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + + BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord1) + .addImm(0); + BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord2) + .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF); + BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3) + .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32); + BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc) + .addReg(DWord0) + .addImm(AMDGPU::sub0) + .addReg(DWord1) + .addImm(AMDGPU::sub1) + .addReg(DWord2) + .addImm(AMDGPU::sub2) + .addReg(DWord3) + .addImm(AMDGPU::sub3); + MI->setDesc(get(NewOpcode)); + MI->getOperand(2).setReg(MI->getOperand(1).getReg()); + MI->getOperand(1).setReg(SRsrc); + MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); + } +} + void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { SmallVector Worklist; Worklist.push_back(&TopInst); @@ -920,9 +961,15 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); unsigned Opcode = Inst->getOpcode(); + unsigned NewOpcode = getVALUOp(*Inst); // Handle some special cases switch (Opcode) { + default: + if (isSMRD(Inst->getOpcode())) { + moveSMRDToVALU(Inst, MRI); + } + break; case AMDGPU::S_MOV_B64: { DebugLoc DL = Inst->getDebugLoc(); @@ -973,7 +1020,6 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { llvm_unreachable("Moving this op to VALU not implemented"); } - unsigned NewOpcode = getVALUOp(*Inst); if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) { // We cannot move this instruction to the VALU, so we should try to // legalize its operands instead. diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index b6a0b1f2920..63f1d7fdee8 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -137,6 +137,8 @@ public: /// create new instruction and insert them before \p MI. void legalizeOperands(MachineInstr *MI) const; + void moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const; + /// \brief Replace this instruction's opcode with the equivalent VALU /// opcode. This function will also move the users of \p MI to the /// VALU if necessary. diff --git a/test/CodeGen/R600/salu-to-valu.ll b/test/CodeGen/R600/salu-to-valu.ll index e461bf9acec..3fd0db34598 100644 --- a/test/CodeGen/R600/salu-to-valu.ll +++ b/test/CodeGen/R600/salu-to-valu.ll @@ -46,3 +46,31 @@ declare i32 @llvm.r600.read.tidig.x() #1 declare i32 @llvm.r600.read.tidig.y() #1 attributes #1 = { nounwind readnone } + +; Test moving an SMRD instruction to the VALU + +; CHECK-LABEL: @smrd_valu +; CHECK: BUFFER_LOAD_DWORD [[OUT:v[0-9]+]] +; CHECK: BUFFER_STORE_DWORD [[OUT]] + +define void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 addrspace(1)* %out) { +entry: + %0 = icmp ne i32 %a, 0 + br i1 %0, label %if, label %else + +if: + %1 = load i32 addrspace(2)* addrspace(1)* %in + br label %endif + +else: + %2 = getelementptr i32 addrspace(2)* addrspace(1)* %in + %3 = load i32 addrspace(2)* addrspace(1)* %2 + br label %endif + +endif: + %4 = phi i32 addrspace(2)* [%1, %if], [%3, %else] + %5 = getelementptr i32 addrspace(2)* %4, i32 3000 + %6 = load i32 addrspace(2)* %5 + store i32 %6, i32 addrspace(1)* %out + ret void +}