mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
AMDGPU: Fix scalar_to_vector for v4i16/v4f16
llvm-svn: 335161
This commit is contained in:
parent
808144795a
commit
5622f9d716
@ -493,9 +493,8 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
||||
case ISD::BUILD_VECTOR: {
|
||||
EVT VT = N->getValueType(0);
|
||||
unsigned NumVectorElts = VT.getVectorNumElements();
|
||||
|
||||
if (VT == MVT::v2i16 || VT == MVT::v2f16) {
|
||||
if (Opc == ISD::BUILD_VECTOR) {
|
||||
if (VT.getScalarSizeInBits() == 16) {
|
||||
if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
|
||||
uint32_t LHSVal, RHSVal;
|
||||
if (getConstantValue(N->getOperand(0), LHSVal) &&
|
||||
getConstantValue(N->getOperand(1), RHSVal)) {
|
||||
|
@ -1489,6 +1489,16 @@ def : GCNPat <
|
||||
// (COPY $src0)
|
||||
// >;
|
||||
|
||||
def : GCNPat <
|
||||
(v4i16 (scalar_to_vector i16:$src0)),
|
||||
(INSERT_SUBREG (IMPLICIT_DEF), $src0, sub0)
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(v4f16 (scalar_to_vector f16:$src0)),
|
||||
(INSERT_SUBREG (IMPLICIT_DEF), $src0, sub0)
|
||||
>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Fract Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -29,6 +29,39 @@ define amdgpu_kernel void @scalar_to_vector_v2f32(<4 x i16> addrspace(1)* %out,
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}scalar_to_vector_v4i16:
|
||||
; VI: v_lshlrev_b16_e32
|
||||
; VI: v_lshlrev_b16_e32
|
||||
; VI: v_or_b32_e32
|
||||
; VI: v_lshlrev_b32
|
||||
; VI: v_or_b32_sdwa
|
||||
; VI: v_or_b32_sdwa
|
||||
define amdgpu_kernel void @scalar_to_vector_v4i16() {
|
||||
bb:
|
||||
%tmp = load <2 x i8>, <2 x i8> addrspace(1)* undef, align 1
|
||||
%tmp1 = shufflevector <2 x i8> %tmp, <2 x i8> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 0, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
|
||||
store <8 x i8> %tmp2, <8 x i8> addrspace(1)* undef, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}scalar_to_vector_v4f16:
|
||||
; VI: v_lshlrev_b16_e32
|
||||
; VI: v_lshlrev_b16_e32
|
||||
; VI: v_or_b32_e32
|
||||
; VI: v_lshlrev_b32
|
||||
; VI: v_or_b32_sdwa
|
||||
; VI: v_or_b32_sdwa
|
||||
define amdgpu_kernel void @scalar_to_vector_v4f16() {
|
||||
bb:
|
||||
%load = load half, half addrspace(1)* undef, align 1
|
||||
%tmp = bitcast half %load to <2 x i8>
|
||||
%tmp1 = shufflevector <2 x i8> %tmp, <2 x i8> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 0, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
|
||||
store <8 x i8> %tmp2, <8 x i8> addrspace(1)* undef, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; Getting a SCALAR_TO_VECTOR seems to be tricky. These cases managed
|
||||
; to produce one, but for some reason never made it to selection.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user