mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
AMDGPU: Improve <2 x i24> arguments and return value handling
This was asserting for GlobalISel. For SelectionDAG, this was passing this on the stack. Instead, scalarize this as if it were a 32-bit vector.
This commit is contained in:
parent
6aba538d9d
commit
f2aa3ef913
@ -921,15 +921,18 @@ MVT SITargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
|
||||
if (VT.isVector()) {
|
||||
EVT ScalarVT = VT.getScalarType();
|
||||
unsigned Size = ScalarVT.getSizeInBits();
|
||||
if (Size == 32)
|
||||
return ScalarVT.getSimpleVT();
|
||||
if (Size == 16) {
|
||||
if (Subtarget->has16BitInsts())
|
||||
return VT.isInteger() ? MVT::v2i16 : MVT::v2f16;
|
||||
return VT.isInteger() ? MVT::i32 : MVT::f32;
|
||||
}
|
||||
|
||||
if (Size > 32)
|
||||
return MVT::i32;
|
||||
if (Size < 16)
|
||||
return Subtarget->has16BitInsts() ? MVT::i16 : MVT::i32;
|
||||
return Size == 32 ? ScalarVT.getSimpleVT() : MVT::i32;
|
||||
}
|
||||
|
||||
if (Size == 16 && Subtarget->has16BitInsts())
|
||||
return VT.isInteger() ? MVT::v2i16 : MVT::v2f16;
|
||||
} else if (VT.getSizeInBits() > 32)
|
||||
if (VT.getSizeInBits() > 32)
|
||||
return MVT::i32;
|
||||
|
||||
return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
|
||||
@ -946,14 +949,15 @@ unsigned SITargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
|
||||
EVT ScalarVT = VT.getScalarType();
|
||||
unsigned Size = ScalarVT.getSizeInBits();
|
||||
|
||||
if (Size == 32)
|
||||
// FIXME: Should probably promote 8-bit vectors to i16.
|
||||
if (Size == 16 && Subtarget->has16BitInsts())
|
||||
return (NumElts + 1) / 2;
|
||||
|
||||
if (Size <= 32)
|
||||
return NumElts;
|
||||
|
||||
if (Size > 32)
|
||||
return NumElts * ((Size + 31) / 32);
|
||||
|
||||
if (Size == 16 && Subtarget->has16BitInsts())
|
||||
return (NumElts + 1) / 2;
|
||||
} else if (VT.getSizeInBits() > 32)
|
||||
return (VT.getSizeInBits() + 31) / 32;
|
||||
|
||||
@ -968,6 +972,16 @@ unsigned SITargetLowering::getVectorTypeBreakdownForCallingConv(
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
EVT ScalarVT = VT.getScalarType();
|
||||
unsigned Size = ScalarVT.getSizeInBits();
|
||||
// FIXME: We should fix the ABI to be the same on targets without 16-bit
|
||||
// support, but unless we can properly handle 3-vectors, it will be still be
|
||||
// inconsistent.
|
||||
if (Size == 16 && Subtarget->has16BitInsts()) {
|
||||
RegisterVT = VT.isInteger() ? MVT::v2i16 : MVT::v2f16;
|
||||
IntermediateVT = RegisterVT;
|
||||
NumIntermediates = (NumElts + 1) / 2;
|
||||
return NumIntermediates;
|
||||
}
|
||||
|
||||
if (Size == 32) {
|
||||
RegisterVT = ScalarVT.getSimpleVT();
|
||||
IntermediateVT = RegisterVT;
|
||||
@ -975,22 +989,28 @@ unsigned SITargetLowering::getVectorTypeBreakdownForCallingConv(
|
||||
return NumIntermediates;
|
||||
}
|
||||
|
||||
if (Size < 16 && Subtarget->has16BitInsts()) {
|
||||
// FIXME: Should probably form v2i16 pieces
|
||||
RegisterVT = MVT::i16;
|
||||
IntermediateVT = ScalarVT;
|
||||
NumIntermediates = NumElts;
|
||||
return NumIntermediates;
|
||||
}
|
||||
|
||||
|
||||
if (Size != 16 && Size <= 32) {
|
||||
RegisterVT = MVT::i32;
|
||||
IntermediateVT = ScalarVT;
|
||||
NumIntermediates = NumElts;
|
||||
return NumIntermediates;
|
||||
}
|
||||
|
||||
if (Size > 32) {
|
||||
RegisterVT = MVT::i32;
|
||||
IntermediateVT = RegisterVT;
|
||||
NumIntermediates = NumElts * ((Size + 31) / 32);
|
||||
return NumIntermediates;
|
||||
}
|
||||
|
||||
// FIXME: We should fix the ABI to be the same on targets without 16-bit
|
||||
// support, but unless we can properly handle 3-vectors, it will be still be
|
||||
// inconsistent.
|
||||
if (Size == 16 && Subtarget->has16BitInsts()) {
|
||||
RegisterVT = VT.isInteger() ? MVT::v2i16 : MVT::v2f16;
|
||||
IntermediateVT = RegisterVT;
|
||||
NumIntermediates = (NumElts + 1) / 2;
|
||||
return NumIntermediates;
|
||||
}
|
||||
}
|
||||
|
||||
return TargetLowering::getVectorTypeBreakdownForCallingConv(
|
||||
|
@ -196,6 +196,89 @@ define half @f16_func_void() #0 {
|
||||
ret half %val
|
||||
}
|
||||
|
||||
define i24 @i24_func_void() #0 {
|
||||
; CHECK-LABEL: name: i24_func_void
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load 3 from `i24 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s24)
|
||||
; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
|
||||
; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0
|
||||
%val = load i24, i24 addrspace(1)* undef
|
||||
ret i24 %val
|
||||
}
|
||||
|
||||
define zeroext i24 @i24_zeroext_func_void() #0 {
|
||||
; CHECK-LABEL: name: i24_zeroext_func_void
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load 3 from `i24 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s24)
|
||||
; CHECK: $vgpr0 = COPY [[ZEXT]](s32)
|
||||
; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0
|
||||
%val = load i24, i24 addrspace(1)* undef
|
||||
ret i24 %val
|
||||
}
|
||||
|
||||
define signext i24 @i24_signext_func_void() #0 {
|
||||
; CHECK-LABEL: name: i24_signext_func_void
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load 3 from `i24 addrspace(1)* undef`, align 4, addrspace 1)
|
||||
; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s24)
|
||||
; CHECK: $vgpr0 = COPY [[SEXT]](s32)
|
||||
; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0
|
||||
%val = load i24, i24 addrspace(1)* undef
|
||||
ret i24 %val
|
||||
}
|
||||
|
||||
define <2 x i24> @v2i24_func_void() #0 {
|
||||
; CHECK-LABEL: name: v2i24_func_void
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s24>) = G_LOAD [[DEF]](p1) :: (load 6 from `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1)
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<2 x s24>)
|
||||
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24)
|
||||
; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24)
|
||||
; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
|
||||
; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32)
|
||||
; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1
|
||||
%val = load <2 x i24>, <2 x i24> addrspace(1)* undef
|
||||
ret <2 x i24> %val
|
||||
}
|
||||
|
||||
define <3 x i24> @v3i24_func_void() #0 {
|
||||
; CHECK-LABEL: name: v3i24_func_void
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s24>) = G_LOAD [[DEF]](p1) :: (load 9 from `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1)
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24), [[UV2:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<3 x s24>)
|
||||
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24)
|
||||
; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24)
|
||||
; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s24)
|
||||
; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
|
||||
; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32)
|
||||
; CHECK: $vgpr2 = COPY [[ANYEXT2]](s32)
|
||||
; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
|
||||
%val = load <3 x i24>, <3 x i24> addrspace(1)* undef
|
||||
ret <3 x i24> %val
|
||||
}
|
||||
|
||||
define i32 @i32_func_void() #0 {
|
||||
; CHECK-LABEL: name: i32_func_void
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
@ -977,6 +1060,44 @@ define <16 x i8> @v16i8_func_void() #0 {
|
||||
ret <16 x i8> %val
|
||||
}
|
||||
|
||||
define <2 x i8> @v2i8_func_void() #0 {
|
||||
; CHECK-LABEL: name: v2i8_func_void
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[DEF]](p1) :: (load 2 from `<2 x i8> addrspace(1)* undef`, addrspace 1)
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<2 x s8>)
|
||||
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8)
|
||||
; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8)
|
||||
; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
|
||||
; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32)
|
||||
; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1
|
||||
%val = load <2 x i8>, <2 x i8> addrspace(1)* undef
|
||||
ret <2 x i8> %val
|
||||
}
|
||||
|
||||
define <3 x i8> @v3i8_func_void() #0 {
|
||||
; CHECK-LABEL: name: v3i8_func_void
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[DEF]](p1) :: (load 3 from `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1)
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<3 x s8>)
|
||||
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8)
|
||||
; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8)
|
||||
; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8)
|
||||
; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
|
||||
; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32)
|
||||
; CHECK: $vgpr2 = COPY [[ANYEXT2]](s32)
|
||||
; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
|
||||
%val = load <3 x i8>, <3 x i8> addrspace(1)* undef
|
||||
ret <3 x i8> %val
|
||||
}
|
||||
|
||||
define <4 x i8> @v4i8_func_void() #0 {
|
||||
; CHECK-LABEL: name: v4i8_func_void
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
|
@ -553,6 +553,104 @@ define void @void_func_v2i32(<2 x i32> %arg0) #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @void_func_v2i24(<2 x i24> %arg0) #0 {
|
||||
; CHECK-LABEL: name: void_func_v2i24
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
|
||||
; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s24>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: G_STORE [[TRUNC]](<2 x s24>), [[DEF]](p1) :: (store 6 into `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1)
|
||||
; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY3]]
|
||||
store <2 x i24> %arg0, <2 x i24> addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @void_func_v3i24(<3 x i24> %arg0) #0 {
|
||||
; CHECK-LABEL: name: void_func_v3i24
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
|
||||
; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s24>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: G_STORE [[TRUNC]](<3 x s24>), [[DEF]](p1) :: (store 9 into `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY4]]
|
||||
store <3 x i24> %arg0, <3 x i24> addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @void_func_v2i8(<2 x i8> %arg0) #0 {
|
||||
; CHECK-LABEL: name: void_func_v2i8
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
|
||||
; CHECK: [[TRUNC2:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s16>)
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: G_STORE [[TRUNC2]](<2 x s8>), [[DEF]](p1) :: (store 2 into `<2 x i8> addrspace(1)* undef`, addrspace 1)
|
||||
; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY3]]
|
||||
store <2 x i8> %arg0, <2 x i8> addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @void_func_v3i8(<3 x i8> %arg0) #0 {
|
||||
; CHECK-LABEL: name: void_func_v3i8
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16)
|
||||
; CHECK: [[TRUNC3:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s16>)
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: G_STORE [[TRUNC3]](<3 x s8>), [[DEF]](p1) :: (store 3 into `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY4]]
|
||||
store <3 x i8> %arg0, <3 x i8> addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @void_func_v4i8(<4 x i8> %arg0) #0 {
|
||||
; CHECK-LABEL: name: void_func_v4i8
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
|
||||
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
|
||||
; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16)
|
||||
; CHECK: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s16>)
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: G_STORE [[TRUNC4]](<4 x s8>), [[DEF]](p1) :: (store 4 into `<4 x i8> addrspace(1)* undef`, addrspace 1)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY5]]
|
||||
store <4 x i8> %arg0, <4 x i8> addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @void_func_v2p3i8(<2 x i8 addrspace(3)*> %arg0) #0 {
|
||||
; CHECK-LABEL: name: void_func_v2p3i8
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
|
@ -30,6 +30,8 @@ declare <3 x float> @external_v3f32_func_void() #0
|
||||
declare <5 x float> @external_v5f32_func_void() #0
|
||||
declare <2 x double> @external_v2f64_func_void() #0
|
||||
|
||||
declare <2 x i24> @external_v2i24_func_void() #0
|
||||
|
||||
declare <2 x i32> @external_v2i32_func_void() #0
|
||||
declare <3 x i32> @external_v3i32_func_void() #0
|
||||
declare <4 x i32> @external_v4i32_func_void() #0
|
||||
@ -250,6 +252,18 @@ define amdgpu_kernel void @test_call_external_v4f16_func_void() #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_call_external_v2i24_func_void:
|
||||
; GCN: s_swappc_b64
|
||||
; GCN: v_add_{{i|u}}32_e32 v0, {{(vcc, )?}}v0, v1
|
||||
define amdgpu_kernel void @test_call_external_v2i24_func_void() #0 {
|
||||
%val = call <2 x i24> @external_v2i24_func_void()
|
||||
%elt0 = extractelement <2 x i24> %val, i32 0
|
||||
%elt1 = extractelement <2 x i24> %val, i32 1
|
||||
%add = add i24 %elt0, %elt1
|
||||
store volatile i24 %add, i24 addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_call_external_v3f32_func_void:
|
||||
; GCN: s_swappc
|
||||
; GFX7-DAG: flat_store_dwordx3 {{.*}}, v[0:2]
|
||||
|
@ -981,127 +981,61 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2
|
||||
; SI-LABEL: v_fshr_v2i24:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; SI-NEXT: buffer_load_dword v1, off, s[0:3], s32
|
||||
; SI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16
|
||||
; SI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20
|
||||
; SI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12
|
||||
; SI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:4
|
||||
; SI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:8
|
||||
; SI-NEXT: s_mov_b32 s4, 0xaaaaaaab
|
||||
; SI-NEXT: v_add_i32_e32 v7, vcc, 3, v0
|
||||
; SI-NEXT: v_add_i32_e32 v8, vcc, 4, v0
|
||||
; SI-NEXT: v_add_i32_e32 v9, vcc, 5, v0
|
||||
; SI-NEXT: v_add_i32_e32 v10, vcc, 2, v0
|
||||
; SI-NEXT: s_waitcnt vmcnt(4)
|
||||
; SI-NEXT: v_mul_hi_u32 v11, v2, s4
|
||||
; SI-NEXT: s_waitcnt vmcnt(3)
|
||||
; SI-NEXT: v_mul_hi_u32 v12, v3, s4
|
||||
; SI-NEXT: s_waitcnt vmcnt(2)
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v4, 8, v4
|
||||
; SI-NEXT: v_lshrrev_b32_e32 v11, 4, v11
|
||||
; SI-NEXT: v_lshrrev_b32_e32 v12, 4, v12
|
||||
; SI-NEXT: v_mul_lo_u32 v11, v11, 24
|
||||
; SI-NEXT: v_mul_lo_u32 v12, v12, 24
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v6, 8, v6
|
||||
; SI-NEXT: v_sub_i32_e32 v2, vcc, v2, v11
|
||||
; SI-NEXT: v_sub_i32_e32 v3, vcc, v3, v12
|
||||
; SI-NEXT: v_add_i32_e32 v2, vcc, 8, v2
|
||||
; SI-NEXT: v_mul_hi_u32 v6, v4, s4
|
||||
; SI-NEXT: v_mul_hi_u32 v7, v5, s4
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v2, 8, v2
|
||||
; SI-NEXT: v_lshrrev_b32_e32 v6, 4, v6
|
||||
; SI-NEXT: v_mul_lo_u32 v6, v6, 24
|
||||
; SI-NEXT: v_sub_i32_e32 v4, vcc, v4, v6
|
||||
; SI-NEXT: v_lshrrev_b32_e32 v6, 4, v7
|
||||
; SI-NEXT: v_mul_lo_u32 v6, v6, 24
|
||||
; SI-NEXT: v_add_i32_e32 v4, vcc, 8, v4
|
||||
; SI-NEXT: v_alignbit_b32 v0, v0, v2, v4
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v2, 8, v3
|
||||
; SI-NEXT: v_sub_i32_e32 v3, vcc, v5, v6
|
||||
; SI-NEXT: v_add_i32_e32 v3, vcc, 8, v3
|
||||
; SI-NEXT: v_alignbit_b32 v1, v1, v6, v2
|
||||
; SI-NEXT: v_alignbit_b32 v2, v5, v4, v3
|
||||
; SI-NEXT: buffer_store_byte v2, v7, s[0:3], 0 offen
|
||||
; SI-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen
|
||||
; SI-NEXT: v_lshrrev_b32_e32 v0, 8, v2
|
||||
; SI-NEXT: s_waitcnt expcnt(1)
|
||||
; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
||||
; SI-NEXT: buffer_store_byte v0, v8, s[0:3], 0 offen
|
||||
; SI-NEXT: buffer_store_byte v2, v9, s[0:3], 0 offen
|
||||
; SI-NEXT: buffer_store_byte v1, v10, s[0:3], 0 offen
|
||||
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0)
|
||||
; SI-NEXT: v_alignbit_b32 v1, v1, v2, v3
|
||||
; SI-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; VI-LABEL: v_fshr_v2i24:
|
||||
; VI: ; %bb.0:
|
||||
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32
|
||||
; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16
|
||||
; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20
|
||||
; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12
|
||||
; VI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:4
|
||||
; VI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:8
|
||||
; VI-NEXT: s_mov_b32 s4, 0xaaaaaaab
|
||||
; VI-NEXT: v_add_u32_e32 v7, vcc, 3, v0
|
||||
; VI-NEXT: v_add_u32_e32 v8, vcc, 4, v0
|
||||
; VI-NEXT: v_add_u32_e32 v9, vcc, 5, v0
|
||||
; VI-NEXT: v_add_u32_e32 v10, vcc, 2, v0
|
||||
; VI-NEXT: s_waitcnt vmcnt(4)
|
||||
; VI-NEXT: v_mul_hi_u32 v11, v2, s4
|
||||
; VI-NEXT: s_waitcnt vmcnt(3)
|
||||
; VI-NEXT: v_mul_hi_u32 v12, v3, s4
|
||||
; VI-NEXT: s_waitcnt vmcnt(2)
|
||||
; VI-NEXT: v_lshlrev_b32_e32 v4, 8, v4
|
||||
; VI-NEXT: v_lshrrev_b32_e32 v11, 4, v11
|
||||
; VI-NEXT: v_lshrrev_b32_e32 v12, 4, v12
|
||||
; VI-NEXT: v_mul_lo_u32 v11, v11, 24
|
||||
; VI-NEXT: v_mul_lo_u32 v12, v12, 24
|
||||
; VI-NEXT: s_waitcnt vmcnt(0)
|
||||
; VI-NEXT: v_lshlrev_b32_e32 v6, 8, v6
|
||||
; VI-NEXT: v_sub_u32_e32 v2, vcc, v2, v11
|
||||
; VI-NEXT: v_sub_u32_e32 v3, vcc, v3, v12
|
||||
; VI-NEXT: v_add_u32_e32 v2, vcc, 8, v2
|
||||
; VI-NEXT: v_mul_hi_u32 v6, v4, s4
|
||||
; VI-NEXT: v_mul_hi_u32 v7, v5, s4
|
||||
; VI-NEXT: v_lshlrev_b32_e32 v2, 8, v2
|
||||
; VI-NEXT: v_lshrrev_b32_e32 v6, 4, v6
|
||||
; VI-NEXT: v_mul_lo_u32 v6, v6, 24
|
||||
; VI-NEXT: v_sub_u32_e32 v4, vcc, v4, v6
|
||||
; VI-NEXT: v_lshrrev_b32_e32 v6, 4, v7
|
||||
; VI-NEXT: v_mul_lo_u32 v6, v6, 24
|
||||
; VI-NEXT: v_add_u32_e32 v4, vcc, 8, v4
|
||||
; VI-NEXT: v_alignbit_b32 v0, v0, v2, v4
|
||||
; VI-NEXT: v_lshlrev_b32_e32 v2, 8, v3
|
||||
; VI-NEXT: v_sub_u32_e32 v3, vcc, v5, v6
|
||||
; VI-NEXT: v_add_u32_e32 v3, vcc, 8, v3
|
||||
; VI-NEXT: v_alignbit_b32 v1, v1, v6, v2
|
||||
; VI-NEXT: v_alignbit_b32 v2, v5, v4, v3
|
||||
; VI-NEXT: buffer_store_byte v2, v7, s[0:3], 0 offen
|
||||
; VI-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen
|
||||
; VI-NEXT: v_lshrrev_b32_e32 v0, 8, v2
|
||||
; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
|
||||
; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
||||
; VI-NEXT: buffer_store_byte v0, v8, s[0:3], 0 offen
|
||||
; VI-NEXT: buffer_store_byte v2, v9, s[0:3], 0 offen
|
||||
; VI-NEXT: buffer_store_byte v1, v10, s[0:3], 0 offen
|
||||
; VI-NEXT: s_waitcnt vmcnt(0)
|
||||
; VI-NEXT: v_alignbit_b32 v1, v1, v2, v3
|
||||
; VI-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9-LABEL: v_fshr_v2i24:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16
|
||||
; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:20
|
||||
; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:12
|
||||
; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:4
|
||||
; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:8
|
||||
; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s32
|
||||
; GFX9-NEXT: s_mov_b32 s4, 0xaaaaaaab
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(5)
|
||||
; GFX9-NEXT: v_mul_hi_u32 v6, v1, s4
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(4)
|
||||
; GFX9-NEXT: v_mul_hi_u32 v7, v2, s4
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(3)
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v3, 8, v3
|
||||
; GFX9-NEXT: v_mul_hi_u32 v6, v4, s4
|
||||
; GFX9-NEXT: v_mul_hi_u32 v7, v5, s4
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 8, v2
|
||||
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 4, v6
|
||||
; GFX9-NEXT: v_lshrrev_b32_e32 v7, 4, v7
|
||||
; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24
|
||||
; GFX9-NEXT: v_mul_lo_u32 v7, v7, 24
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(1)
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v5, 8, v5
|
||||
; GFX9-NEXT: v_sub_u32_e32 v1, v1, v6
|
||||
; GFX9-NEXT: v_sub_u32_e32 v2, v2, v7
|
||||
; GFX9-NEXT: v_add_u32_e32 v1, 8, v1
|
||||
; GFX9-NEXT: v_add_u32_e32 v2, 8, v2
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: v_alignbit_b32 v1, v8, v5, v1
|
||||
; GFX9-NEXT: v_alignbit_b32 v2, v4, v3, v2
|
||||
; GFX9-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:3
|
||||
; GFX9-NEXT: buffer_store_byte_d16_hi v2, v0, s[0:3], 0 offen offset:5
|
||||
; GFX9-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], 0 offen offset:2
|
||||
; GFX9-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen
|
||||
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v2
|
||||
; GFX9-NEXT: buffer_store_byte v1, v0, s[0:3], 0 offen offset:4
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: v_sub_u32_e32 v4, v4, v6
|
||||
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 4, v7
|
||||
; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24
|
||||
; GFX9-NEXT: v_add_u32_e32 v4, 8, v4
|
||||
; GFX9-NEXT: v_alignbit_b32 v0, v0, v2, v4
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 8, v3
|
||||
; GFX9-NEXT: v_sub_u32_e32 v3, v5, v6
|
||||
; GFX9-NEXT: v_add_u32_e32 v3, 8, v3
|
||||
; GFX9-NEXT: v_alignbit_b32 v1, v1, v2, v3
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; R600-LABEL: v_fshr_v2i24:
|
||||
|
@ -344,6 +344,16 @@ define void @void_func_v16i16(<16 x i16> %arg0) #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}void_func_v2i24:
|
||||
; GCN: v_add_{{i|u}}32_e32 v0, {{(vcc, )?}}v0, v1
|
||||
define void @void_func_v2i24(<2 x i24> %arg0) #0 {
|
||||
%elt0 = extractelement <2 x i24> %arg0, i32 0
|
||||
%elt1 = extractelement <2 x i24> %arg0, i32 1
|
||||
%add = add i24 %elt0, %elt1
|
||||
store i24 %add, i24 addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}void_func_v2f32:
|
||||
; GCN-NOT: v[0:1]
|
||||
; GCN-NOT: v0
|
||||
|
Loading…
x
Reference in New Issue
Block a user