1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[AMDGPU] Skip CFIInstructions in SIInsertWaitcnts

Summary:
CFI emitted during PEI at the beginning of the prologue needs to apply
to any inserted waitcnts on function entry.

Reviewers: arsenm, t-tye, RamNalamothu

Reviewed By: arsenm

Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits

Tags: #llvm, #debug-info

Differential Revision: https://reviews.llvm.org/D76881
This commit is contained in:
Scott Linder 2020-06-17 12:38:34 -04:00
parent a597b94680
commit b04677bc50
4 changed files with 120 additions and 22 deletions

View File

@ -1632,13 +1632,15 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
// TODO: Could insert earlier and schedule more liberally with operations
// that only use caller preserved registers.
MachineBasicBlock &EntryBB = MF.front();
MachineBasicBlock::iterator I = EntryBB.begin();
for (MachineBasicBlock::iterator E = EntryBB.end();
I != E && (I->isPHI() || I->isMetaInstruction()); ++I)
;
BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(0);
if (ST->hasVscnt())
BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(),
TII->get(AMDGPU::S_WAITCNT_VSCNT))
.addReg(AMDGPU::SGPR_NULL, RegState::Undef)
.addImm(0);
BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
.addImm(0);
BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT_VSCNT))
.addReg(AMDGPU::SGPR_NULL, RegState::Undef)
.addImm(0);
Modified = true;
}

View File

@ -7,12 +7,12 @@ define hidden <4 x float> @split_v4f32_arg(<4 x float> returned %arg) local_unna
; GCN-NEXT: .file 0
; GCN-NEXT: .loc 0 3 0 ; /tmp/dbg.cl:3:0
; GCN-NEXT: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: .Ltmp0:
; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 96 32] $vgpr3
; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 64 32] $vgpr2
; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1
; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: .Ltmp0:
; GCN-NEXT: .loc 0 4 5 prologue_end ; /tmp/dbg.cl:4:5
; GCN-NEXT: s_setpc_b64 s[30:31]
; GCN-NEXT: .Ltmp1:
@ -25,14 +25,14 @@ define hidden <4 x float> @split_v4f32_multi_arg(<4 x float> %arg0, <2 x float>
; GCN: .Lfunc_begin1:
; GCN-NEXT: .loc 0 7 0 ; /tmp/dbg.cl:7:0
; GCN-NEXT: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: .Ltmp2:
; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg1 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr5
; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg1 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr4
; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 96 32] $vgpr3
; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 64 32] $vgpr2
; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1
; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: .Ltmp2:
; GCN-NEXT: .loc 0 8 17 prologue_end ; /tmp/dbg.cl:8:17
; GCN-NEXT: v_add_f32_e32 v0, v4, v0
; GCN-NEXT: .Ltmp3:
@ -57,10 +57,10 @@ define hidden <4 x half> @split_v4f16_arg(<4 x half> returned %arg) local_unname
; GCN: .Lfunc_begin2:
; GCN-NEXT: .loc 0 11 0 is_stmt 1 ; /tmp/dbg.cl:11:0
; GCN-NEXT: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: .Ltmp8:
; GCN-NEXT: ;DEBUG_VALUE: split_v4f16_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1
; GCN-NEXT: ;DEBUG_VALUE: split_v4f16_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: .Ltmp8:
; GCN-NEXT: .loc 0 12 5 prologue_end ; /tmp/dbg.cl:12:5
; GCN-NEXT: s_setpc_b64 s[30:31]
; GCN-NEXT: .Ltmp9:
@ -73,10 +73,10 @@ define hidden double @split_f64_arg(double returned %arg) local_unnamed_addr #0
; GCN: .Lfunc_begin3:
; GCN-NEXT: .loc 0 15 0 ; /tmp/dbg.cl:15:0
; GCN-NEXT: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: .Ltmp10:
; GCN-NEXT: ;DEBUG_VALUE: split_f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1
; GCN-NEXT: ;DEBUG_VALUE: split_f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: .Ltmp10:
; GCN-NEXT: .loc 0 16 5 prologue_end ; /tmp/dbg.cl:16:5
; GCN-NEXT: s_setpc_b64 s[30:31]
; GCN-NEXT: .Ltmp11:
@ -89,12 +89,12 @@ define hidden <2 x double> @split_v2f64_arg(<2 x double> returned %arg) local_un
; GCN: .Lfunc_begin4:
; GCN-NEXT: .loc 0 19 0 ; /tmp/dbg.cl:19:0
; GCN-NEXT: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: .Ltmp12:
; GCN-NEXT: ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 96 32] $vgpr3
; GCN-NEXT: ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 64 32] $vgpr2
; GCN-NEXT: ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1
; GCN-NEXT: ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: .Ltmp12:
; GCN-NEXT: .loc 0 20 5 prologue_end ; /tmp/dbg.cl:20:5
; GCN-NEXT: s_setpc_b64 s[30:31]
; GCN-NEXT: .Ltmp13:
@ -107,10 +107,10 @@ define hidden i64 @split_i64_arg(i64 returned %arg) local_unnamed_addr #0 !dbg !
; GCN: .Lfunc_begin5:
; GCN-NEXT: .loc 0 23 0 ; /tmp/dbg.cl:23:0
; GCN-NEXT: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: .Ltmp14:
; GCN-NEXT: ;DEBUG_VALUE: split_i64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1
; GCN-NEXT: ;DEBUG_VALUE: split_i64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: .Ltmp14:
; GCN-NEXT: .loc 0 24 5 prologue_end ; /tmp/dbg.cl:24:5
; GCN-NEXT: s_setpc_b64 s[30:31]
; GCN-NEXT: .Ltmp15:
@ -123,10 +123,10 @@ define hidden i8 addrspace(1)* @split_ptr_arg(i8 addrspace(1)* readnone returned
; GCN: .Lfunc_begin6:
; GCN-NEXT: .loc 0 27 0 ; /tmp/dbg.cl:27:0
; GCN-NEXT: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: .Ltmp16:
; GCN-NEXT: ;DEBUG_VALUE: split_ptr_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1
; GCN-NEXT: ;DEBUG_VALUE: split_ptr_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: .Ltmp16:
; GCN-NEXT: .loc 0 28 5 prologue_end ; /tmp/dbg.cl:28:5
; GCN-NEXT: s_setpc_b64 s[30:31]
; GCN-NEXT: .Ltmp17:

View File

@ -1,12 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 | FileCheck %s --check-prefix=GCN
define void @vgpr_descriptor_waterfall_loop_idom_update(<4 x i32>* %arg) {
define void @vgpr_descriptor_waterfall_loop_idom_update(<4 x i32>* %arg) #0 {
; GCN-LABEL: vgpr_descriptor_waterfall_loop_idom_update:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: ; implicit-def: $vcc_hi
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: ; implicit-def: $vcc_hi
; GCN-NEXT: BB0_1: ; %bb0
; GCN-NEXT: ; =>This Loop Header: Depth=1
; GCN-NEXT: ; Child Loop BB0_2 Depth 2

View File

@ -0,0 +1,96 @@
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-insert-waitcnts %s -o - | FileCheck %s
# Ensure we insert waitcnts after any meta instructions at the start of
# non-kernel functions. Without this, the inserted waitcnts can affect e.g. the
# PC ranges covered by CFI and debug values.
---
# CHECK-LABEL: name: skip_implicit_def{{$}}
# CHECK: IMPLICIT_DEF
# CHECK: S_WAITCNT
name: skip_implicit_def
machineFunctionInfo:
body: |
bb.0:
$sgpr0 = IMPLICIT_DEF
...
---
# CHECK-LABEL: name: skip_kill{{$}}
# CHECK: KILL
# CHECK: S_WAITCNT
name: skip_kill
machineFunctionInfo:
body: |
bb.0:
KILL $sgpr0
...
---
# CHECK-LABEL: name: skip_cfi{{$}}
# CHECK: CFI_INSTRUCTION
# CHECK: S_WAITCNT
name: skip_cfi
machineFunctionInfo:
body: |
bb.0:
CFI_INSTRUCTION undefined $sgpr0
...
---
# CHECK-LABEL: name: skip_eh_label{{$}}
# CHECK: EH_LABEL
# CHECK: S_WAITCNT
name: skip_eh_label
machineFunctionInfo:
body: |
bb.0:
EH_LABEL 0
...
---
# CHECK-LABEL: name: skip_gc_label{{$}}
# CHECK: GC_LABEL
# CHECK: S_WAITCNT
name: skip_gc_label
machineFunctionInfo:
body: |
bb.0:
GC_LABEL 0
...
---
# CHECK-LABEL: name: skip_dbg_value{{$}}
# CHECK: DBG_VALUE
# CHECK: S_WAITCNT
name: skip_dbg_value
machineFunctionInfo:
body: |
bb.0:
DBG_VALUE 0
...
---
# CHECK-LABEL: name: skip_dbg_label{{$}}
# CHECK: DBG_LABEL
# CHECK: S_WAITCNT
name: skip_dbg_label
machineFunctionInfo:
body: |
bb.0:
DBG_LABEL 0
...
---
# CHECK-LABEL: name: skip_lifetime_start{{$}}
# CHECK: LIFETIME_START
# CHECK: S_WAITCNT
name: skip_lifetime_start
machineFunctionInfo:
body: |
bb.0:
LIFETIME_START 0
...
---
# CHECK-LABEL: name: skip_lifetime_end{{$}}
# CHECK: LIFETIME_END
# CHECK: S_WAITCNT
name: skip_lifetime_end
machineFunctionInfo:
body: |
bb.0:
LIFETIME_END 0
...