From 6cd7c75463a6d975b57c78e722cc460ef1ab5439 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Thu, 24 Oct 2019 10:34:47 -0700 Subject: [PATCH] [AMDGPU] Fix mfma scheduling crash An SUnit can be neither intruction not SDNode. It is all null if represents a nop. Fixed a crash on using SU->getInstr(). Differential Revision: https://reviews.llvm.org/D69395 --- lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 7 +++++- test/CodeGen/AMDGPU/mfma-loop.ll | 34 +++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 3bb6dd4571c..46e4d76367f 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -773,6 +773,11 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation { return MI && TII->isSALU(*MI) && !MI->isTerminator(); } + bool isVALU(const SUnit *SU) const { + const MachineInstr *MI = SU->getInstr(); + return MI && TII->isVALU(*MI); + } + bool canAddEdge(const SUnit *Succ, const SUnit *Pred) const { if (Pred->NodeNum < Succ->NodeNum) return true; @@ -821,7 +826,7 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation { for (SDep &SI : From->Succs) { SUnit *SUv = SI.getSUnit(); - if (SUv != From && TII->isVALU(*SUv->getInstr()) && canAddEdge(SUv, SU)) + if (SUv != From && isVALU(SUv) && canAddEdge(SUv, SU)) SUv->addPred(SDep(SU, SDep::Artificial), false); } diff --git a/test/CodeGen/AMDGPU/mfma-loop.ll b/test/CodeGen/AMDGPU/mfma-loop.ll index a67aadfcd27..90c64763899 100644 --- a/test/CodeGen/AMDGPU/mfma-loop.ll +++ b/test/CodeGen/AMDGPU/mfma-loop.ll @@ -76,5 +76,39 @@ exit: ret void } +; GCN-LABEL: {{^}}test_mfma_loop_mfma_forward_init: + +; GCN-COUNT32: v_accvgpr_write_b32 a{{[0-9]+}}, 0 +; GCN: v_mfma_f32_32x32x1f32 +; GCN-NOT: v_accvgpr + +; GCN: [[LOOP:BB[0-9_]+]]: +; GCN-NOT: v_accvgpr +; GCN: v_mfma_f32_32x32x1f32 +; GCN-NOT: v_accvgpr +; GCN: s_cbranch_scc1 [[LOOP]] + +; GCN-COUNT32: v_accvgpr_read_b32 +define amdgpu_kernel void @test_mfma_loop_mfma_forward_init(<32 x float> addrspace(1)* %arg) { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %init = bitcast i32 %tid to float + %mai.0 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> zeroinitializer, i32 0, i32 0, i32 0) + + br label %for.cond.preheader + +for.cond.preheader: + %phi = phi <32 x float> [ %mai.0, %entry ], [ %mai.1, %for.cond.preheader ] + %c = phi i32 [ 0, %entry ], [ %inc, %for.cond.preheader ] + %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %phi, i32 0, i32 0, i32 0) + %inc = add nuw nsw i32 %c, 1 + %cc = icmp eq i32 %inc, 16 + br i1 %cc, label %exit, label %for.cond.preheader + +exit: + store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg + ret void +} + declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32) declare i32 @llvm.amdgcn.workitem.id.x()