1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-22 12:33:33 +02:00

AMDGPU: Insert wait at start of callee functions

llvm-svn: 300000
This commit is contained in:
Matt Arsenault 2017-04-11 22:29:31 +00:00
parent c16c71ecdb
commit 8aa0f7f3d2
3 changed files with 40 additions and 1 deletions

View File

@ -690,5 +690,19 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
for (MachineInstr *I : RemoveMI)
I->eraseFromParent();
if (!MFI->isEntryFunction()) {
// Wait for any outstanding memory operations that the input registers may
// depend on. We can't track them and it's better to to the wait after the
// costly call sequence.
// TODO: Could insert earlier and schedule more liberally with operations
// that only use caller preserved registers.
MachineBasicBlock &EntryBB = MF.front();
BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
.addImm(0);
Changes = true;
}
return Changes;
}

View File

@ -26,7 +26,7 @@
; ELF: Symbol {
; ELF: Name: simple
; ELF: Size: 288
; ELF: Size: 292
; ELF: Type: Function (0x2)
; ELF: }

View File

@ -0,0 +1,25 @@
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs -run-pass si-insert-waits -o - %s | FileCheck %s
--- |
define float @entry_callee_wait(float %arg) #0 {
ret float %arg
}
attributes #0 = { nounwind }
...
---
# CHECK-LABEL: name: entry_callee_wait{{$}}
# CHECK: bb.0:
# CHECK-NEXT: S_WAITCNT 0{{$}}
# CHECK-NEXT: V_ADD_F32
# CHECK-NEXT: S_SETPC_B64
liveins:
- { reg: '%sgpr0_sgpr1' }
- { reg: '%vgpr0' }
name: entry_callee_wait
body: |
bb.0:
%vgpr0 = V_ADD_F32_e32 %vgpr0, %vgpr0, implicit %exec
S_SETPC_B64 killed %sgpr0_sgpr1
...