diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 3227f944e8b..dec60827a49 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -96,6 +96,10 @@ def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i8; }]>; +def az_extloadi8_global : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{ + return isGlobalLoad(dyn_cast(N)); +}]>; + def sextloadi8_global : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{ return isGlobalLoad(dyn_cast(N)); }]>; @@ -108,8 +112,12 @@ def sextloadi8_constant : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{ return isConstantLoad(dyn_cast(N), -1); }]>; -def az_extloadi8_global : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{ - return isGlobalLoad(dyn_cast(N)); +def az_extloadi8_local : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{ + return isLocalLoad(dyn_cast(N)); +}]>; + +def sextloadi8_local : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{ + return isLocalLoad(dyn_cast(N)); }]>; def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ @@ -132,6 +140,14 @@ def sextloadi16_constant : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{ return isConstantLoad(dyn_cast(N), -1); }]>; +def az_extloadi16_local : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{ + return isLocalLoad(dyn_cast(N)); +}]>; + +def sextloadi16_local : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{ + return isLocalLoad(dyn_cast(N)); +}]>; + def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i32; }]>; diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index e846ff4aee3..9bc8e8a818f 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -104,7 +104,21 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( static_cast(MF->getTarget().getInstrInfo()); switch (MI->getOpcode()) { - default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); + default: + if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::LDS_1A) { + MachineInstrBuilder NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), + TII->get(MI->getOpcode()), + AMDGPU::OQAP); + for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) { + NewMI.addOperand(MI->getOperand(i)); + } + TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV, + MI->getOperand(0).getReg(), + AMDGPU::OQAP); + } else { + return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); + } + break; case AMDGPU::CLAMP_R600: { MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV, @@ -140,19 +154,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( break; } - case AMDGPU::LDS_READ_RET: { - MachineInstrBuilder NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), - TII->get(MI->getOpcode()), - AMDGPU::OQAP); - for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) { - NewMI.addOperand(MI->getOperand(i)); - } - TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV, - MI->getOperand(0).getReg(), - AMDGPU::OQAP); - break; - } - case AMDGPU::MOV_IMM_F32: TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(), MI->getOperand(1).getFPImm()->getValueAPF() diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 3d92278c3a6..f5c0266a7b5 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1682,6 +1682,18 @@ def LDS_SHORT_WRITE : R600_LDS_1A1D<0x13, "LDS_SHORT_WRITE", def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET", [(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))] >; +def LDS_BYTE_READ_RET : R600_LDS_1A <0x36, "LDS_BYTE_READ_RET", + [(set i32:$dst, (sextloadi8_local i32:$src0))] +>; +def LDS_UBYTE_READ_RET : R600_LDS_1A <0x37, "LDS_UBYTE_READ_RET", + [(set i32:$dst, (az_extloadi8_local i32:$src0))] +>; +def LDS_SHORT_READ_RET : R600_LDS_1A <0x38, "LDS_SHORT_READ_RET", + [(set i32:$dst, (sextloadi16_local i32:$src0))] +>; +def LDS_USHORT_READ_RET : R600_LDS_1A <0x39, "LDS_USHORT_READ_RET", + [(set i32:$dst, (az_extloadi16_local i32:$src0))] +>; // TRUNC is used for the FLT_TO_INT instructions to work around a // perceived problem where the rounding modes are applied differently diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 785dbf17e25..136f69c07cb 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -395,6 +395,10 @@ def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>; def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>; def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>; def DS_READ_B32 : DS_Load_Helper <0x00000036, "DS_READ_B32", VReg_32>; +def DS_READ_I8 : DS_Load_Helper <0x00000039, "DS_READ_I8", VReg_32>; +def DS_READ_U8 : DS_Load_Helper <0x0000003a, "DS_READ_U8", VReg_32>; +def DS_READ_I16 : DS_Load_Helper <0x0000003b, "DS_READ_I16", VReg_32>; +def DS_READ_U16 : DS_Load_Helper <0x0000003c, "DS_READ_U16", VReg_32>; //def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>; //def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>; @@ -1747,6 +1751,16 @@ def : Pat < /********** Load/Store Patterns **********/ /********** ======================= **********/ +class DSReadPat : Pat < + (frag i32:$src0), + (vt (inst 0, $src0, $src0, $src0, 0, 0)) +>; + +def : DSReadPat ; +def : DSReadPat ; +def : DSReadPat ; +def : DSReadPat ; +def : DSReadPat ; def : Pat < (local_load i32:$src0), (i32 (DS_READ_B32 0, $src0, $src0, $src0, 0, 0)) diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll index abb10617dd0..ba8250650fc 100644 --- a/test/CodeGen/R600/load.ll +++ b/test/CodeGen/R600/load.ll @@ -438,3 +438,81 @@ define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace( ret void } +;===------------------------------------------------------------------------===; +; LOCAL ADDRESS SPACE +;===------------------------------------------------------------------------===; + +; Load an i8 value from the local address space. +; R600-CHECK: @load_i8_local +; R600-CHECK: LDS_UBYTE_READ_RET +; SI-CHECK: @load_i8_local +; SI-CHECK: DS_READ_U8 +define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) { + %1 = load i8 addrspace(3)* %in + %2 = zext i8 %1 to i32 + store i32 %2, i32 addrspace(1)* %out + ret void +} + +; R600-CHECK: @load_i8_sext_local +; R600-CHECK: LDS_UBYTE_READ_RET +; R600-CHECK: ASHR +; SI-CHECK: @load_i8_sext_local +; SI-CHECK: DS_READ_I8 +define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) { +entry: + %0 = load i8 addrspace(3)* %in + %1 = sext i8 %0 to i32 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; Load an i16 value from the local address space. +; R600-CHECK: @load_i16_local +; R600-CHECK: LDS_USHORT_READ_RET +; SI-CHECK: @load_i16_local +; SI-CHECK: DS_READ_U16 +define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) { +entry: + %0 = load i16 addrspace(3)* %in + %1 = zext i16 %0 to i32 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; R600-CHECK: @load_i16_sext_local +; R600-CHECK: LDS_USHORT_READ_RET +; R600-CHECK: ASHR +; SI-CHECK: @load_i16_sext_local +; SI-CHECK: DS_READ_I16 +define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) { +entry: + %0 = load i16 addrspace(3)* %in + %1 = sext i16 %0 to i32 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; load an i32 value from the glocal address space. +; R600-CHECK: @load_i32_local +; R600-CHECK: LDS_READ_RET +; SI-CHECK: @load_i32_local +; SI-CHECK: DS_READ_B32 +define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { +entry: + %0 = load i32 addrspace(3)* %in + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; load a f32 value from the global address space. +; R600-CHECK: @load_f32_local +; R600-CHECK: LDS_READ_RET +; SI-CHECK: @load_f32_local +; SI-CHECK: DS_READ_B32 +define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) { +entry: + %0 = load float addrspace(3)* %in + store float %0, float addrspace(1)* %out + ret void +}