1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

AMDGPU/GlobalISel: Select llvm.returnaddress

This commit is contained in:
Matt Arsenault 2020-07-19 21:26:02 -04:00 committed by Matt Arsenault
parent ba4d17c159
commit 15b7474538
5 changed files with 196 additions and 4 deletions

View File

@ -872,6 +872,8 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
return selectBallot(I);
case Intrinsic::amdgcn_reloc_constant:
return selectRelocConstant(I);
case Intrinsic::returnaddress:
return selectReturnAddress(I);
default:
return selectImpl(I, *CoverageInfo);
}
@ -1077,6 +1079,54 @@ bool AMDGPUInstructionSelector::selectRelocConstant(MachineInstr &I) const {
return true;
}
bool AMDGPUInstructionSelector::selectReturnAddress(MachineInstr &I) const {
MachineBasicBlock *MBB = I.getParent();
MachineFunction &MF = *MBB->getParent();
const DebugLoc &DL = I.getDebugLoc();
MachineOperand &Dst = I.getOperand(0);
Register DstReg = Dst.getReg();
unsigned Depth = I.getOperand(2).getImm();
const TargetRegisterClass *RC
= TRI.getConstrainedRegClassForOperand(Dst, *MRI);
if (!RC->hasSubClassEq(&AMDGPU::SGPR_64RegClass) ||
!RBI.constrainGenericRegister(DstReg, *RC, *MRI))
return false;
MachineBasicBlock &EntryMBB = MF.front();
// Check for kernel and shader functions
if (Depth != 0 ||
MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_MOV_B64), DstReg)
.addImm(0);
I.eraseFromParent();
return true;
}
Register ReturnAddrReg = TRI.getReturnAddressReg(MF);
MachineFrameInfo &MFI = MF.getFrameInfo();
// There is a call to @llvm.returnaddress in this function
MFI.setReturnAddressIsTaken(true);
// Get the return address reg and mark it as an implicit live-in
Register LiveIn = MRI->getLiveInVirtReg(ReturnAddrReg);
if (!LiveIn) {
LiveIn = MF.addLiveIn(ReturnAddrReg, RC);
BuildMI(EntryMBB, EntryMBB.begin(), DL, TII.get(AMDGPU::COPY), LiveIn)
.addReg(ReturnAddrReg);
if (!EntryMBB.isLiveIn(ReturnAddrReg))
EntryMBB.addLiveIn(ReturnAddrReg);
}
BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), DstReg)
.addReg(LiveIn);
I.eraseFromParent();
return true;
}
bool AMDGPUInstructionSelector::selectEndCfIntrinsic(MachineInstr &MI) const {
// FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
// SelectionDAG uses for wave32 vs wave64.

View File

@ -109,6 +109,7 @@ private:
bool selectIntrinsicIcmp(MachineInstr &MI) const;
bool selectBallot(MachineInstr &I) const;
bool selectRelocConstant(MachineInstr &I) const;
bool selectReturnAddress(MachineInstr &I) const;
bool selectG_INTRINSIC(MachineInstr &I) const;
bool selectEndCfIntrinsic(MachineInstr &MI) const;

View File

@ -4023,7 +4023,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_kernarg_segment_ptr:
case Intrinsic::amdgcn_s_getpc:
case Intrinsic::amdgcn_groupstaticsize:
case Intrinsic::amdgcn_reloc_constant: {
case Intrinsic::amdgcn_reloc_constant:
case Intrinsic::returnaddress: {
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
break;

View File

@ -0,0 +1,122 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
---
name: return_address_already_live_in_copy
legalized: true
regBankSelected: true
tracksRegLiveness: true
liveins:
- { reg: '$sgpr30_sgpr31', virtual-reg: '%0' }
body: |
bb.0:
liveins: $sgpr30_sgpr31
; CHECK-LABEL: name: return_address_already_live_in_copy
; CHECK: liveins: $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
; CHECK: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY]]
%0:sgpr(p0) = COPY $sgpr30_sgpr31
%1:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0
S_ENDPGM 0, implicit %0, implicit %1
...
---
name: return_address_already_block_live_in_copy_not_mf_life_in
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr30_sgpr31
; CHECK-LABEL: name: return_address_already_block_live_in_copy_not_mf_life_in
; CHECK: liveins: $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
; CHECK: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
; CHECK: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY]]
%0:sgpr(p0) = COPY $sgpr30_sgpr31
%1:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0
S_ENDPGM 0, implicit %0, implicit %1
...
---
name: return_address_no_live_in
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: return_address_no_live_in
; CHECK: liveins: $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
; CHECK: S_ENDPGM 0, implicit [[COPY]]
%0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0
S_ENDPGM 0, implicit %0
...
---
name: return_address_no_live_in_non_entry_block
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: return_address_no_live_in_non_entry_block
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: liveins: $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
; CHECK: S_BRANCH %bb.1
; CHECK: bb.1:
; CHECK: S_ENDPGM 0, implicit [[COPY]]
bb.0:
G_BR %bb.1
bb.1:
%0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0
S_ENDPGM 0, implicit %0
...
---
name: return_address_multi_use
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: return_address_multi_use
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: liveins: $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
; CHECK: S_BRANCH %bb.1
; CHECK: bb.1:
; CHECK: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY]]
bb.0:
%0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0
G_BR %bb.1
bb.1:
%1:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0
S_ENDPGM 0, implicit %0, implicit %1
...
---
name: return_address_kernel_is_null
legalized: true
regBankSelected: true
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
; CHECK-LABEL: name: return_address_kernel_is_null
; CHECK: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; CHECK: S_ENDPGM 0, implicit [[S_MOV_B64_]]
%0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0
S_ENDPGM 0, implicit %0
...

View File

@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
; RUN: llc -global-isel -amdgpu-fixed-function-abi -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
; Test with zero frame
; GCN-LABEL: {{^}}func1
@ -25,7 +26,7 @@ entry:
; Test with amdgpu_kernel
; GCN-LABEL: {{^}}func3
; GCN: v_mov_b32_e32 v0, 0
; GCN: v_mov_b32_e32 v1, v0
; GCN: v_mov_b32_e32 v1, {{v0|0}}
define amdgpu_kernel void @func3(i8** %out) nounwind {
entry:
%tmp = tail call i8* @llvm.returnaddress(i32 0)
@ -36,7 +37,7 @@ entry:
; Test with use outside the entry-block
; GCN-LABEL: {{^}}func4
; GCN: v_mov_b32_e32 v0, 0
; GCN: v_mov_b32_e32 v1, v0
; GCN: v_mov_b32_e32 v1, {{v0|0}}
define amdgpu_kernel void @func4(i8** %out, i32 %val) nounwind {
entry:
%cmp = icmp ne i32 %val, 0
@ -61,5 +62,22 @@ entry:
unreachable
}
declare void @callee()
; GCN-LABEL: {{^}}multi_use:
; GCN-DAG: v_mov_b32_e32 v[[LO:4[0-9]+]], s30
; GCN-DAG: v_mov_b32_e32 v[[HI:4[0-9]+]], s31
; GCN: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
; GCN: s_swappc_b64
; GCN: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
define void @multi_use() nounwind {
entry:
%ret0 = tail call i8* @llvm.returnaddress(i32 0)
store volatile i8* %ret0, i8* addrspace(1)* undef
call void @callee()
%ret1 = tail call i8* @llvm.returnaddress(i32 0)
store volatile i8* %ret1, i8* addrspace(1)* undef
ret void
}
declare i8* @llvm.returnaddress(i32) nounwind readnone