1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 19:52:54 +01:00

AMDGPU: Fix crash from inconsistent register types for v3i16/v3f16

This is something of a workaround since computeRegisterProperties
seems to be doing the wrong thing.

llvm-svn: 370086
This commit is contained in:
Matt Arsenault 2019-08-27 17:51:56 +00:00
parent 54e79945ee
commit e31d0615ec
2 changed files with 92 additions and 3 deletions

View File

@ -1351,9 +1351,9 @@ bool SITargetLowering::isMemOpUniform(const SDNode *N) const {
TargetLoweringBase::LegalizeTypeAction
SITargetLowering::getPreferredVectorAction(MVT VT) const {
if (VT.getVectorNumElements() != 1 && VT.getScalarType().bitsLE(MVT::i16))
return TypeSplitVector;
int NumElts = VT.getVectorNumElements();
if (NumElts != 1 && VT.getScalarType().bitsLE(MVT::i16))
return VT.isPow2VectorType() ? TypeSplitVector : TypeWidenVector;
return TargetLoweringBase::getPreferredVectorAction(VT);
}

View File

@ -165,11 +165,100 @@ bb1:
ret { i32, half } %ins1
}
define amdgpu_kernel void @v3i16_registers(i1 %cond) #0 {
; GCN-LABEL: v3i16_registers:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_dword s4, s[4:5], 0x0
; GCN-NEXT: s_mov_b32 s33, s9
; GCN-NEXT: s_add_u32 flat_scratch_lo, s6, s33
; GCN-NEXT: s_addc_u32 flat_scratch_hi, s7, 0
; GCN-NEXT: s_mov_b32 s32, s33
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_and_b32 s4, 1, s4
; GCN-NEXT: v_cmp_eq_u32_e64 s[4:5], s4, 1
; GCN-NEXT: s_and_b64 vcc, exec, s[4:5]
; GCN-NEXT: s_cbranch_vccz BB4_2
; GCN-NEXT: ; %bb.1:
; GCN-NEXT: s_mov_b32 s4, 0
; GCN-NEXT: s_mov_b32 s5, s4
; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: v_mov_b32_e32 v1, s5
; GCN-NEXT: s_branch BB4_3
; GCN-NEXT: BB4_2: ; %if.else
; GCN-NEXT: s_getpc_b64 s[4:5]
; GCN-NEXT: s_add_u32 s4, s4, func_v3i16@rel32@lo+4
; GCN-NEXT: s_addc_u32 s5, s5, func_v3i16@rel32@hi+4
; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GCN-NEXT: BB4_3: ; %if.end
; GCN-NEXT: global_store_short v[0:1], v1, off
; GCN-NEXT: global_store_dword v[0:1], v0, off
; GCN-NEXT: s_endpgm
entry:
br i1 %cond, label %if.then, label %if.else
if.then: ; preds = %entry
br label %if.end
if.else: ; preds = %entry
%call6 = tail call <3 x i16> @func_v3i16() #0
br label %if.end
if.end: ; preds = %if.else, %if.then
%call6.sink = phi <3 x i16> [ %call6, %if.else ], [ undef, %if.then ]
store <3 x i16> %call6.sink, <3 x i16> addrspace(1)* undef
ret void
}
define amdgpu_kernel void @v3f16_registers(i1 %cond) #0 {
; GCN-LABEL: v3f16_registers:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_dword s4, s[4:5], 0x0
; GCN-NEXT: s_mov_b32 s33, s9
; GCN-NEXT: s_add_u32 flat_scratch_lo, s6, s33
; GCN-NEXT: s_addc_u32 flat_scratch_hi, s7, 0
; GCN-NEXT: s_mov_b32 s32, s33
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_and_b32 s4, 1, s4
; GCN-NEXT: v_cmp_eq_u32_e64 s[4:5], s4, 1
; GCN-NEXT: s_and_b64 vcc, exec, s[4:5]
; GCN-NEXT: s_cbranch_vccz BB5_2
; GCN-NEXT: ; %bb.1:
; GCN-NEXT: s_mov_b32 s4, 0
; GCN-NEXT: s_mov_b32 s5, s4
; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: v_mov_b32_e32 v1, s5
; GCN-NEXT: s_branch BB5_3
; GCN-NEXT: BB5_2: ; %if.else
; GCN-NEXT: s_getpc_b64 s[4:5]
; GCN-NEXT: s_add_u32 s4, s4, func_v3f16@rel32@lo+4
; GCN-NEXT: s_addc_u32 s5, s5, func_v3f16@rel32@hi+4
; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GCN-NEXT: BB5_3: ; %if.end
; GCN-NEXT: global_store_short v[0:1], v1, off
; GCN-NEXT: global_store_dword v[0:1], v0, off
; GCN-NEXT: s_endpgm
entry:
br i1 %cond, label %if.then, label %if.else
if.then: ; preds = %entry
br label %if.end
if.else: ; preds = %entry
%call6 = tail call <3 x half> @func_v3f16() #0
br label %if.end
if.end: ; preds = %if.else, %if.then
%call6.sink = phi <3 x half> [ %call6, %if.else ], [ undef, %if.then ]
store <3 x half> %call6.sink, <3 x half> addrspace(1)* undef
ret void
}
declare hidden <2 x float> @func_v2f32() #0
declare hidden <3 x float> @func_v3f32() #0
declare hidden <4 x float> @func_v4f32() #0
declare hidden <4 x half> @func_v4f16() #0
declare hidden <3 x i16> @func_v3i16()
declare hidden <3 x half> @func_v3f16()
declare hidden { <4 x i32>, <4 x half> } @func_struct() #0