1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

AMDGPU: Fix assert when rewriting saddr d16 loads

moveOperands does not handle moving tied operands since it would
generally have to fixup the tied operand references. Avoid the assert
by untying and retying after the modification. These in place
modifications really aren't managable.
This commit is contained in:
Matt Arsenault 2021-05-11 18:10:47 -04:00
parent 7a6506cfad
commit 15058e16a1
2 changed files with 41 additions and 1 deletions

View File

@ -5034,8 +5034,24 @@ bool SIInstrInfo::moveFlatAddrToVGPR(MachineInstr &Inst) const {
} else {
assert(OldSAddrIdx == NewVAddrIdx);
if (OldVAddrIdx >= 0)
if (OldVAddrIdx >= 0) {
int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
AMDGPU::OpName::vdst_in);
// RemoveOperand doesn't try to fixup tied operand indexes at it goes, so
// it asserts. Untie the operands for now and retie them afterwards.
if (NewVDstIn != -1) {
int OldVDstIn = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
Inst.untieRegOperand(OldVDstIn);
}
Inst.RemoveOperand(OldVAddrIdx);
if (NewVDstIn != -1) {
int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
Inst.tieOperands(NewVDst, NewVDstIn);
}
}
}
if (VAddrDef && MRI.use_nodbg_empty(VAddrDef->getOperand(0).getReg()))

View File

@ -31,3 +31,27 @@ bb3: ; preds = %bb3, %bb
%i9 = icmp eq i32 %i8, 256
br i1 %i9, label %bb2, label %bb3
}
; GCN-LABEL: {{^}}test_move_load_address_to_vgpr_d16_hi:
; GCN-NOT: v_readfirstlane_b32
; GCN: global_load_short_d16_hi v{{[0-9]+}}, v[{{[0-9:]+}}], off glc
define amdgpu_kernel void @test_move_load_address_to_vgpr_d16_hi(i16 addrspace(1)* nocapture %arg) {
bb:
%i1 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 0
%load.pre = load volatile i16, i16 addrspace(1)* %i1, align 4
%i2 = zext i16 %load.pre to i32
br label %bb3
bb2: ; preds = %bb3
ret void
bb3: ; preds = %bb3, %bb
%i = phi i32 [ %i2, %bb ], [ %i8, %bb3 ]
%i4 = zext i32 %i to i64
%i5 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 %i4
%i6 = load volatile i16, i16 addrspace(1)* %i5, align 4
%insertelt = insertelement <2 x i16> undef, i16 %i6, i32 1
%i8 = bitcast <2 x i16> %insertelt to i32
%i9 = icmp eq i32 %i8, 256
br i1 %i9, label %bb2, label %bb3
}