mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
AMDGPU: Implement read_register and write_register intrinsics
Some of the special intrinsics now that now correspond to a instruction also have special setting of some registers, e.g. llvm.SI.sendmsg sets m0 as well as use s_sendmsg. Using these explicit register intrinsics may be a better option. Reading the exec mask and others may be useful for debugging. For this I'm not sure this is entirely correct because we would want this to be convergent, although it's possible this is already treated sufficently conservatively. llvm-svn: 258785
This commit is contained in:
parent
31798fd428
commit
667cd15c1c
@ -27,6 +27,7 @@
|
||||
#include "SIMachineFunctionInfo.h"
|
||||
#include "SIRegisterInfo.h"
|
||||
#include "llvm/ADT/BitVector.h"
|
||||
#include "llvm/ADT/StringSwitch.h"
|
||||
#include "llvm/CodeGen/CallingConvLower.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
@ -989,6 +990,52 @@ SDValue SITargetLowering::LowerReturn(SDValue Chain,
|
||||
return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, RetOps);
|
||||
}
|
||||
|
||||
unsigned SITargetLowering::getRegisterByName(const char* RegName, EVT VT,
|
||||
SelectionDAG &DAG) const {
|
||||
unsigned Reg = StringSwitch<unsigned>(RegName)
|
||||
.Case("m0", AMDGPU::M0)
|
||||
.Case("exec", AMDGPU::EXEC)
|
||||
.Case("exec_lo", AMDGPU::EXEC_LO)
|
||||
.Case("exec_hi", AMDGPU::EXEC_HI)
|
||||
.Case("flat_scratch", AMDGPU::FLAT_SCR)
|
||||
.Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
|
||||
.Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
|
||||
.Default(AMDGPU::NoRegister);
|
||||
|
||||
if (Reg == AMDGPU::NoRegister) {
|
||||
report_fatal_error(Twine("invalid register name \""
|
||||
+ StringRef(RegName) + "\"."));
|
||||
|
||||
}
|
||||
|
||||
if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
|
||||
Subtarget->getRegisterInfo()->regsOverlap(Reg, AMDGPU::FLAT_SCR)) {
|
||||
report_fatal_error(Twine("invalid register \""
|
||||
+ StringRef(RegName) + "\" for subtarget."));
|
||||
}
|
||||
|
||||
switch (Reg) {
|
||||
case AMDGPU::M0:
|
||||
case AMDGPU::EXEC_LO:
|
||||
case AMDGPU::EXEC_HI:
|
||||
case AMDGPU::FLAT_SCR_LO:
|
||||
case AMDGPU::FLAT_SCR_HI:
|
||||
if (VT.getSizeInBits() == 32)
|
||||
return Reg;
|
||||
break;
|
||||
case AMDGPU::EXEC:
|
||||
case AMDGPU::FLAT_SCR:
|
||||
if (VT.getSizeInBits() == 64)
|
||||
return Reg;
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("missing register type checking");
|
||||
}
|
||||
|
||||
report_fatal_error(Twine("invalid type for register \""
|
||||
+ StringRef(RegName) + "\"."));
|
||||
}
|
||||
|
||||
MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
|
||||
MachineInstr * MI, MachineBasicBlock * BB) const {
|
||||
|
||||
|
@ -104,6 +104,9 @@ public:
|
||||
const SmallVectorImpl<SDValue> &OutVals,
|
||||
SDLoc DL, SelectionDAG &DAG) const override;
|
||||
|
||||
unsigned getRegisterByName(const char* RegName, EVT VT,
|
||||
SelectionDAG &DAG) const override;
|
||||
|
||||
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
|
||||
MachineBasicBlock * BB) const override;
|
||||
bool enableAggressiveFMAFusion(EVT VT) const override;
|
||||
|
14
test/CodeGen/AMDGPU/read-register-invalid-subtarget.ll
Normal file
14
test/CodeGen/AMDGPU/read-register-invalid-subtarget.ll
Normal file
@ -0,0 +1,14 @@
|
||||
; RUN: not llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK: invalid register "flat_scratch_lo" for subtarget.
|
||||
|
||||
declare i32 @llvm.read_register.i32(metadata) #0
|
||||
|
||||
define void @test_invalid_read_flat_scratch_lo(i32 addrspace(1)* %out) nounwind {
|
||||
store volatile i32 0, i32 addrspace(3)* undef
|
||||
%m0 = call i32 @llvm.read_register.i32(metadata !0)
|
||||
store i32 %m0, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
!0 = !{!"flat_scratch_lo"}
|
14
test/CodeGen/AMDGPU/read-register-invalid-type-i32.ll
Normal file
14
test/CodeGen/AMDGPU/read-register-invalid-type-i32.ll
Normal file
@ -0,0 +1,14 @@
|
||||
; RUN: not llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK: invalid type for register "exec".
|
||||
|
||||
declare i32 @llvm.read_register.i32(metadata) #0
|
||||
|
||||
define void @test_invalid_read_exec(i32 addrspace(1)* %out) nounwind {
|
||||
store volatile i32 0, i32 addrspace(3)* undef
|
||||
%m0 = call i32 @llvm.read_register.i32(metadata !0)
|
||||
store i32 %m0, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
!0 = !{!"exec"}
|
13
test/CodeGen/AMDGPU/read-register-invalid-type-i64.ll
Normal file
13
test/CodeGen/AMDGPU/read-register-invalid-type-i64.ll
Normal file
@ -0,0 +1,13 @@
|
||||
; RUN: not llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK: invalid type for register "m0".
|
||||
|
||||
declare i64 @llvm.read_register.i64(metadata) #0
|
||||
|
||||
define void @test_invalid_read_m0(i64 addrspace(1)* %out) #0 {
|
||||
%exec = call i64 @llvm.read_register.i64(metadata !0)
|
||||
store i64 %exec, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
!0 = !{!"m0"}
|
81
test/CodeGen/AMDGPU/read_register.ll
Normal file
81
test/CodeGen/AMDGPU/read_register.ll
Normal file
@ -0,0 +1,81 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
declare i32 @llvm.read_register.i32(metadata) #0
|
||||
declare i64 @llvm.read_register.i64(metadata) #0
|
||||
|
||||
; CHECK-LABEL: {{^}}test_read_m0:
|
||||
; CHECK: s_mov_b32 m0, -1
|
||||
; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], m0
|
||||
; CHECK: buffer_store_dword [[COPY]]
|
||||
define void @test_read_m0(i32 addrspace(1)* %out) #0 {
|
||||
store volatile i32 0, i32 addrspace(3)* undef
|
||||
%m0 = call i32 @llvm.read_register.i32(metadata !0)
|
||||
store i32 %m0, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}test_read_exec:
|
||||
; CHECK: v_mov_b32_e32 v[[LO:[0-9]+]], exec_lo
|
||||
; CHECK: v_mov_b32_e32 v[[HI:[0-9]+]], exec_hi
|
||||
; CHECK: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
define void @test_read_exec(i64 addrspace(1)* %out) #0 {
|
||||
%exec = call i64 @llvm.read_register.i64(metadata !1)
|
||||
store i64 %exec, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}test_read_flat_scratch:
|
||||
; CHECK: v_mov_b32_e32 v[[LO:[0-9]+]], flat_scratch_lo
|
||||
; CHECK: v_mov_b32_e32 v[[HI:[0-9]+]], flat_scratch_hi
|
||||
; CHECK: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
define void @test_read_flat_scratch(i64 addrspace(1)* %out) #0 {
|
||||
%flat_scratch = call i64 @llvm.read_register.i64(metadata !2)
|
||||
store i64 %flat_scratch, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}test_read_flat_scratch_lo:
|
||||
; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], flat_scratch_lo
|
||||
; CHECK: buffer_store_dword [[COPY]]
|
||||
define void @test_read_flat_scratch_lo(i32 addrspace(1)* %out) #0 {
|
||||
%flat_scratch_lo = call i32 @llvm.read_register.i32(metadata !3)
|
||||
store i32 %flat_scratch_lo, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}test_read_flat_scratch_hi:
|
||||
; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], flat_scratch_hi
|
||||
; CHECK: buffer_store_dword [[COPY]]
|
||||
define void @test_read_flat_scratch_hi(i32 addrspace(1)* %out) #0 {
|
||||
%flat_scratch_hi = call i32 @llvm.read_register.i32(metadata !4)
|
||||
store i32 %flat_scratch_hi, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}test_read_exec_lo:
|
||||
; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], exec_lo
|
||||
; CHECK: buffer_store_dword [[COPY]]
|
||||
define void @test_read_exec_lo(i32 addrspace(1)* %out) #0 {
|
||||
%exec_lo = call i32 @llvm.read_register.i32(metadata !5)
|
||||
store i32 %exec_lo, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}test_read_exec_hi:
|
||||
; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], exec_hi
|
||||
; CHECK: buffer_store_dword [[COPY]]
|
||||
define void @test_read_exec_hi(i32 addrspace(1)* %out) #0 {
|
||||
%exec_hi = call i32 @llvm.read_register.i32(metadata !6)
|
||||
store i32 %exec_hi, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
||||
!0 = !{!"m0"}
|
||||
!1 = !{!"exec"}
|
||||
!2 = !{!"flat_scratch"}
|
||||
!3 = !{!"flat_scratch_lo"}
|
||||
!4 = !{!"flat_scratch_hi"}
|
||||
!5 = !{!"exec_lo"}
|
||||
!6 = !{!"exec_hi"}
|
22
test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll
Normal file
22
test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll
Normal file
@ -0,0 +1,22 @@
|
||||
; XFAIL: *
|
||||
; REQUIRES: asserts
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s
|
||||
|
||||
; write_register doesn't prevent us from illegally trying to write a
|
||||
; vgpr value into a scalar register, but I don't think there's much we
|
||||
; can do to avoid this.
|
||||
|
||||
declare void @llvm.write_register.i32(metadata, i32) #0
|
||||
declare i32 @llvm.r600.read.tidig.x() #0
|
||||
|
||||
|
||||
define void @write_vgpr_into_sgpr() {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
call void @llvm.write_register.i32(metadata !0, i32 %tid)
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind }
|
||||
|
||||
!0 = !{!"exec_lo"}
|
80
test/CodeGen/AMDGPU/write_register.ll
Normal file
80
test/CodeGen/AMDGPU/write_register.ll
Normal file
@ -0,0 +1,80 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
declare void @llvm.write_register.i32(metadata, i32) #0
|
||||
declare void @llvm.write_register.i64(metadata, i64) #0
|
||||
|
||||
; CHECK-LABEL: {{^}}test_write_m0:
|
||||
define void @test_write_m0(i32 %val) #0 {
|
||||
call void @llvm.write_register.i32(metadata !0, i32 0)
|
||||
call void @llvm.write_register.i32(metadata !0, i32 -1)
|
||||
call void @llvm.write_register.i32(metadata !0, i32 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}test_write_exec:
|
||||
; CHECK: s_mov_b64 exec, 0
|
||||
; CHECK: s_mov_b64 exec, -1
|
||||
; CHECK: s_mov_b64 exec, s{{\[[0-9]+:[0-9]+\]}}
|
||||
define void @test_write_exec(i64 %val) #0 {
|
||||
call void @llvm.write_register.i64(metadata !1, i64 0)
|
||||
call void @llvm.write_register.i64(metadata !1, i64 -1)
|
||||
call void @llvm.write_register.i64(metadata !1, i64 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}test_write_flat_scratch:
|
||||
; CHECK: s_mov_b64 flat_scratch, 0
|
||||
; CHECK: s_mov_b64 flat_scratch, -1
|
||||
; CHECK: s_mov_b64 flat_scratch, s{{\[[0-9]+:[0-9]+\]}}
|
||||
define void @test_write_flat_scratch(i64 %val) #0 {
|
||||
call void @llvm.write_register.i64(metadata !2, i64 0)
|
||||
call void @llvm.write_register.i64(metadata !2, i64 -1)
|
||||
call void @llvm.write_register.i64(metadata !2, i64 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}test_write_flat_scratch_lo:
|
||||
; CHECK: s_mov_b32 flat_scratch_lo, 0
|
||||
; CHECK: s_mov_b32 flat_scratch_lo, s{{[0-9]+}}
|
||||
define void @test_write_flat_scratch_lo(i32 %val) #0 {
|
||||
call void @llvm.write_register.i32(metadata !3, i32 0)
|
||||
call void @llvm.write_register.i32(metadata !3, i32 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}test_write_flat_scratch_hi:
|
||||
; CHECK: s_mov_b32 flat_scratch_hi, 0
|
||||
; CHECK: s_mov_b32 flat_scratch_hi, s{{[0-9]+}}
|
||||
define void @test_write_flat_scratch_hi(i32 %val) #0 {
|
||||
call void @llvm.write_register.i32(metadata !4, i32 0)
|
||||
call void @llvm.write_register.i32(metadata !4, i32 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}test_write_exec_lo:
|
||||
; CHECK: s_mov_b32 exec_lo, 0
|
||||
; CHECK: s_mov_b32 exec_lo, s{{[0-9]+}}
|
||||
define void @test_write_exec_lo(i32 %val) #0 {
|
||||
call void @llvm.write_register.i32(metadata !5, i32 0)
|
||||
call void @llvm.write_register.i32(metadata !5, i32 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}test_write_exec_hi:
|
||||
; CHECK: s_mov_b32 exec_hi, 0
|
||||
; CHECK: s_mov_b32 exec_hi, s{{[0-9]+}}
|
||||
define void @test_write_exec_hi(i32 %val) #0 {
|
||||
call void @llvm.write_register.i32(metadata !6, i32 0)
|
||||
call void @llvm.write_register.i32(metadata !6, i32 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
||||
!0 = !{!"m0"}
|
||||
!1 = !{!"exec"}
|
||||
!2 = !{!"flat_scratch"}
|
||||
!3 = !{!"flat_scratch_lo"}
|
||||
!4 = !{!"flat_scratch_hi"}
|
||||
!5 = !{!"exec_lo"}
|
||||
!6 = !{!"exec_hi"}
|
Loading…
Reference in New Issue
Block a user