AMDGPU: Fix assert when rewriting saddr d16 loads

moveOperands does not handle moving tied operands since it would generally have to fixup the tied operand references. Avoid the assert by untying and retying after the modification. These in place modifications really aren't managable.
2024-10-19 11:02:59 +02:00 · 2021-05-11 18:10:47 -04:00 · 2021-05-11 18:10:47 -04:00 · 15058e16a1
commit 15058e16a1
parent 7a6506cfad
2 changed files with 41 additions and 1 deletions
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@ -5034,8 +5034,24 @@ bool SIInstrInfo::moveFlatAddrToVGPR(MachineInstr &Inst) const {
  } else {
    assert(OldSAddrIdx == NewVAddrIdx);

-    if (OldVAddrIdx >= 0)
+    if (OldVAddrIdx >= 0) {
+      int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
+                                                 AMDGPU::OpName::vdst_in);
+
+      // RemoveOperand doesn't try to fixup tied operand indexes at it goes, so
+      // it asserts. Untie the operands for now and retie them afterwards.
+      if (NewVDstIn != -1) {
+        int OldVDstIn = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
+        Inst.untieRegOperand(OldVDstIn);
+      }
+
      Inst.RemoveOperand(OldVAddrIdx);
+
+      if (NewVDstIn != -1) {
+        int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
+        Inst.tieOperands(NewVDst, NewVDstIn);
+      }
+    }
  }

  if (VAddrDef && MRI.use_nodbg_empty(VAddrDef->getOperand(0).getReg()))
--- a/test/CodeGen/AMDGPU/global-load-saddr-to-vaddr.ll
+++ b/test/CodeGen/AMDGPU/global-load-saddr-to-vaddr.ll
@ -31,3 +31,27 @@ bb3:                                              ; preds = %bb3, %bb
  %i9 = icmp eq i32 %i8, 256
  br i1 %i9, label %bb2, label %bb3
 }
+
+; GCN-LABEL: {{^}}test_move_load_address_to_vgpr_d16_hi:
+; GCN-NOT: v_readfirstlane_b32
+; GCN: global_load_short_d16_hi v{{[0-9]+}}, v[{{[0-9:]+}}], off glc
+define amdgpu_kernel void @test_move_load_address_to_vgpr_d16_hi(i16 addrspace(1)* nocapture %arg) {
+bb:
+  %i1 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 0
+  %load.pre = load volatile i16, i16 addrspace(1)* %i1, align 4
+  %i2 = zext i16 %load.pre to i32
+  br label %bb3
+
+bb2:                                              ; preds = %bb3
+  ret void
+
+bb3:                                              ; preds = %bb3, %bb
+  %i = phi i32 [ %i2, %bb ], [ %i8, %bb3 ]
+  %i4 = zext i32 %i to i64
+  %i5 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 %i4
+  %i6 = load volatile i16, i16 addrspace(1)* %i5, align 4
+  %insertelt = insertelement <2 x i16> undef, i16 %i6, i32 1
+  %i8 =  bitcast <2 x i16> %insertelt to i32
+  %i9 = icmp eq i32 %i8, 256
+  br i1 %i9, label %bb2, label %bb3
+}