1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 04:02:41 +01:00

AMDGPU: Implement read_register and write_register intrinsics

Some of the special intrinsics now that now correspond to a instruction
also have special setting of some registers, e.g. llvm.SI.sendmsg sets
m0 as well as use s_sendmsg. Using these explicit register intrinsics
may be a better option.

Reading the exec mask and others may be useful for debugging. For this
I'm not sure this is entirely correct because we would want this to
be convergent, although it's possible this is already treated
sufficently conservatively.

llvm-svn: 258785
This commit is contained in:
Matt Arsenault 2016-01-26 04:29:24 +00:00
parent 31798fd428
commit 667cd15c1c
8 changed files with 274 additions and 0 deletions

View File

@ -27,6 +27,7 @@
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@ -989,6 +990,52 @@ SDValue SITargetLowering::LowerReturn(SDValue Chain,
return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, RetOps);
}
unsigned SITargetLowering::getRegisterByName(const char* RegName, EVT VT,
SelectionDAG &DAG) const {
unsigned Reg = StringSwitch<unsigned>(RegName)
.Case("m0", AMDGPU::M0)
.Case("exec", AMDGPU::EXEC)
.Case("exec_lo", AMDGPU::EXEC_LO)
.Case("exec_hi", AMDGPU::EXEC_HI)
.Case("flat_scratch", AMDGPU::FLAT_SCR)
.Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
.Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
.Default(AMDGPU::NoRegister);
if (Reg == AMDGPU::NoRegister) {
report_fatal_error(Twine("invalid register name \""
+ StringRef(RegName) + "\"."));
}
if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
Subtarget->getRegisterInfo()->regsOverlap(Reg, AMDGPU::FLAT_SCR)) {
report_fatal_error(Twine("invalid register \""
+ StringRef(RegName) + "\" for subtarget."));
}
switch (Reg) {
case AMDGPU::M0:
case AMDGPU::EXEC_LO:
case AMDGPU::EXEC_HI:
case AMDGPU::FLAT_SCR_LO:
case AMDGPU::FLAT_SCR_HI:
if (VT.getSizeInBits() == 32)
return Reg;
break;
case AMDGPU::EXEC:
case AMDGPU::FLAT_SCR:
if (VT.getSizeInBits() == 64)
return Reg;
break;
default:
llvm_unreachable("missing register type checking");
}
report_fatal_error(Twine("invalid type for register \""
+ StringRef(RegName) + "\"."));
}
MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
MachineInstr * MI, MachineBasicBlock * BB) const {

View File

@ -104,6 +104,9 @@ public:
const SmallVectorImpl<SDValue> &OutVals,
SDLoc DL, SelectionDAG &DAG) const override;
unsigned getRegisterByName(const char* RegName, EVT VT,
SelectionDAG &DAG) const override;
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
MachineBasicBlock * BB) const override;
bool enableAggressiveFMAFusion(EVT VT) const override;

View File

@ -0,0 +1,14 @@
; RUN: not llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s 2>&1 | FileCheck %s
; CHECK: invalid register "flat_scratch_lo" for subtarget.
declare i32 @llvm.read_register.i32(metadata) #0
define void @test_invalid_read_flat_scratch_lo(i32 addrspace(1)* %out) nounwind {
store volatile i32 0, i32 addrspace(3)* undef
%m0 = call i32 @llvm.read_register.i32(metadata !0)
store i32 %m0, i32 addrspace(1)* %out
ret void
}
!0 = !{!"flat_scratch_lo"}

View File

@ -0,0 +1,14 @@
; RUN: not llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck %s
; CHECK: invalid type for register "exec".
declare i32 @llvm.read_register.i32(metadata) #0
define void @test_invalid_read_exec(i32 addrspace(1)* %out) nounwind {
store volatile i32 0, i32 addrspace(3)* undef
%m0 = call i32 @llvm.read_register.i32(metadata !0)
store i32 %m0, i32 addrspace(1)* %out
ret void
}
!0 = !{!"exec"}

View File

@ -0,0 +1,13 @@
; RUN: not llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck %s
; CHECK: invalid type for register "m0".
declare i64 @llvm.read_register.i64(metadata) #0
define void @test_invalid_read_m0(i64 addrspace(1)* %out) #0 {
%exec = call i64 @llvm.read_register.i64(metadata !0)
store i64 %exec, i64 addrspace(1)* %out
ret void
}
!0 = !{!"m0"}

View File

@ -0,0 +1,81 @@
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck %s
declare i32 @llvm.read_register.i32(metadata) #0
declare i64 @llvm.read_register.i64(metadata) #0
; CHECK-LABEL: {{^}}test_read_m0:
; CHECK: s_mov_b32 m0, -1
; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], m0
; CHECK: buffer_store_dword [[COPY]]
define void @test_read_m0(i32 addrspace(1)* %out) #0 {
store volatile i32 0, i32 addrspace(3)* undef
%m0 = call i32 @llvm.read_register.i32(metadata !0)
store i32 %m0, i32 addrspace(1)* %out
ret void
}
; CHECK-LABEL: {{^}}test_read_exec:
; CHECK: v_mov_b32_e32 v[[LO:[0-9]+]], exec_lo
; CHECK: v_mov_b32_e32 v[[HI:[0-9]+]], exec_hi
; CHECK: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
define void @test_read_exec(i64 addrspace(1)* %out) #0 {
%exec = call i64 @llvm.read_register.i64(metadata !1)
store i64 %exec, i64 addrspace(1)* %out
ret void
}
; CHECK-LABEL: {{^}}test_read_flat_scratch:
; CHECK: v_mov_b32_e32 v[[LO:[0-9]+]], flat_scratch_lo
; CHECK: v_mov_b32_e32 v[[HI:[0-9]+]], flat_scratch_hi
; CHECK: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
define void @test_read_flat_scratch(i64 addrspace(1)* %out) #0 {
%flat_scratch = call i64 @llvm.read_register.i64(metadata !2)
store i64 %flat_scratch, i64 addrspace(1)* %out
ret void
}
; CHECK-LABEL: {{^}}test_read_flat_scratch_lo:
; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], flat_scratch_lo
; CHECK: buffer_store_dword [[COPY]]
define void @test_read_flat_scratch_lo(i32 addrspace(1)* %out) #0 {
%flat_scratch_lo = call i32 @llvm.read_register.i32(metadata !3)
store i32 %flat_scratch_lo, i32 addrspace(1)* %out
ret void
}
; CHECK-LABEL: {{^}}test_read_flat_scratch_hi:
; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], flat_scratch_hi
; CHECK: buffer_store_dword [[COPY]]
define void @test_read_flat_scratch_hi(i32 addrspace(1)* %out) #0 {
%flat_scratch_hi = call i32 @llvm.read_register.i32(metadata !4)
store i32 %flat_scratch_hi, i32 addrspace(1)* %out
ret void
}
; CHECK-LABEL: {{^}}test_read_exec_lo:
; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], exec_lo
; CHECK: buffer_store_dword [[COPY]]
define void @test_read_exec_lo(i32 addrspace(1)* %out) #0 {
%exec_lo = call i32 @llvm.read_register.i32(metadata !5)
store i32 %exec_lo, i32 addrspace(1)* %out
ret void
}
; CHECK-LABEL: {{^}}test_read_exec_hi:
; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], exec_hi
; CHECK: buffer_store_dword [[COPY]]
define void @test_read_exec_hi(i32 addrspace(1)* %out) #0 {
%exec_hi = call i32 @llvm.read_register.i32(metadata !6)
store i32 %exec_hi, i32 addrspace(1)* %out
ret void
}
attributes #0 = { nounwind }
!0 = !{!"m0"}
!1 = !{!"exec"}
!2 = !{!"flat_scratch"}
!3 = !{!"flat_scratch_lo"}
!4 = !{!"flat_scratch_hi"}
!5 = !{!"exec_lo"}
!6 = !{!"exec_hi"}

View File

@ -0,0 +1,22 @@
; XFAIL: *
; REQUIRES: asserts
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s
; write_register doesn't prevent us from illegally trying to write a
; vgpr value into a scalar register, but I don't think there's much we
; can do to avoid this.
declare void @llvm.write_register.i32(metadata, i32) #0
declare i32 @llvm.r600.read.tidig.x() #0
define void @write_vgpr_into_sgpr() {
%tid = call i32 @llvm.r600.read.tidig.x()
call void @llvm.write_register.i32(metadata !0, i32 %tid)
ret void
}
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
!0 = !{!"exec_lo"}

View File

@ -0,0 +1,80 @@
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck %s
declare void @llvm.write_register.i32(metadata, i32) #0
declare void @llvm.write_register.i64(metadata, i64) #0
; CHECK-LABEL: {{^}}test_write_m0:
define void @test_write_m0(i32 %val) #0 {
call void @llvm.write_register.i32(metadata !0, i32 0)
call void @llvm.write_register.i32(metadata !0, i32 -1)
call void @llvm.write_register.i32(metadata !0, i32 %val)
ret void
}
; CHECK-LABEL: {{^}}test_write_exec:
; CHECK: s_mov_b64 exec, 0
; CHECK: s_mov_b64 exec, -1
; CHECK: s_mov_b64 exec, s{{\[[0-9]+:[0-9]+\]}}
define void @test_write_exec(i64 %val) #0 {
call void @llvm.write_register.i64(metadata !1, i64 0)
call void @llvm.write_register.i64(metadata !1, i64 -1)
call void @llvm.write_register.i64(metadata !1, i64 %val)
ret void
}
; CHECK-LABEL: {{^}}test_write_flat_scratch:
; CHECK: s_mov_b64 flat_scratch, 0
; CHECK: s_mov_b64 flat_scratch, -1
; CHECK: s_mov_b64 flat_scratch, s{{\[[0-9]+:[0-9]+\]}}
define void @test_write_flat_scratch(i64 %val) #0 {
call void @llvm.write_register.i64(metadata !2, i64 0)
call void @llvm.write_register.i64(metadata !2, i64 -1)
call void @llvm.write_register.i64(metadata !2, i64 %val)
ret void
}
; CHECK-LABEL: {{^}}test_write_flat_scratch_lo:
; CHECK: s_mov_b32 flat_scratch_lo, 0
; CHECK: s_mov_b32 flat_scratch_lo, s{{[0-9]+}}
define void @test_write_flat_scratch_lo(i32 %val) #0 {
call void @llvm.write_register.i32(metadata !3, i32 0)
call void @llvm.write_register.i32(metadata !3, i32 %val)
ret void
}
; CHECK-LABEL: {{^}}test_write_flat_scratch_hi:
; CHECK: s_mov_b32 flat_scratch_hi, 0
; CHECK: s_mov_b32 flat_scratch_hi, s{{[0-9]+}}
define void @test_write_flat_scratch_hi(i32 %val) #0 {
call void @llvm.write_register.i32(metadata !4, i32 0)
call void @llvm.write_register.i32(metadata !4, i32 %val)
ret void
}
; CHECK-LABEL: {{^}}test_write_exec_lo:
; CHECK: s_mov_b32 exec_lo, 0
; CHECK: s_mov_b32 exec_lo, s{{[0-9]+}}
define void @test_write_exec_lo(i32 %val) #0 {
call void @llvm.write_register.i32(metadata !5, i32 0)
call void @llvm.write_register.i32(metadata !5, i32 %val)
ret void
}
; CHECK-LABEL: {{^}}test_write_exec_hi:
; CHECK: s_mov_b32 exec_hi, 0
; CHECK: s_mov_b32 exec_hi, s{{[0-9]+}}
define void @test_write_exec_hi(i32 %val) #0 {
call void @llvm.write_register.i32(metadata !6, i32 0)
call void @llvm.write_register.i32(metadata !6, i32 %val)
ret void
}
attributes #0 = { nounwind }
!0 = !{!"m0"}
!1 = !{!"exec"}
!2 = !{!"flat_scratch"}
!3 = !{!"flat_scratch_lo"}
!4 = !{!"flat_scratch_hi"}
!5 = !{!"exec_lo"}
!6 = !{!"exec_hi"}