1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00
llvm-mirror/test/CodeGen/AMDGPU/extract_vector_elt-i8.ll
Yaxun Liu c23a93c71f CodeGen: Fix pointer info in SplitVecOp_EXTRACT_VECTOR_ELT/SplitVecRes_INSERT_VECTOR_ELT
Two issues found when doing codegen for splitting vector with non-zero alloca addr space:

DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT/SplitVecOp_EXTRACT_VECTOR_ELT uses dummy pointer info for creating
SDStore. Since one pointer operand contains multiply and add, InferPointerInfo is unable to
infer the correct pointer info, which ends up with a dummy pointer info for the target to lower
store and results in isel failure. The fix is to introduce MachinePointerInfo::getUnknownStack to
represent MachinePointerInfo which is known in alloca address space but without other information.

TargetLowering::getVectorElementPointer uses value type of pointer in addr space 0 for
multiplication of index and then add it to the pointer. However the pointer may be in an addr
space which has different size than addr space 0. The fix is to use the pointer value type for
index multiplication.

Differential Revision: https://reviews.llvm.org/D39758

llvm-svn: 319622
2017-12-02 22:13:22 +00:00

152 lines
5.0 KiB
LLVM

; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}extract_vector_elt_v1i8:
; GCN: buffer_load_ubyte
; GCN: buffer_store_byte
define amdgpu_kernel void @extract_vector_elt_v1i8(i8 addrspace(1)* %out, <1 x i8> %foo) #0 {
%p0 = extractelement <1 x i8> %foo, i32 0
store i8 %p0, i8 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}extract_vector_elt_v2i8:
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_store_byte
; GCN: buffer_store_byte
define amdgpu_kernel void @extract_vector_elt_v2i8(i8 addrspace(1)* %out, <2 x i8> %foo) #0 {
%p0 = extractelement <2 x i8> %foo, i32 0
%p1 = extractelement <2 x i8> %foo, i32 1
%out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
store i8 %p1, i8 addrspace(1)* %out
store i8 %p0, i8 addrspace(1)* %out1
ret void
}
; FUNC-LABEL: {{^}}extract_vector_elt_v3i8:
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_store_byte
; GCN: buffer_store_byte
define amdgpu_kernel void @extract_vector_elt_v3i8(i8 addrspace(1)* %out, <3 x i8> %foo) #0 {
%p0 = extractelement <3 x i8> %foo, i32 0
%p1 = extractelement <3 x i8> %foo, i32 2
%out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
store i8 %p1, i8 addrspace(1)* %out
store i8 %p0, i8 addrspace(1)* %out1
ret void
}
; FUNC-LABEL: {{^}}extract_vector_elt_v4i8:
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_store_byte
; GCN: buffer_store_byte
define amdgpu_kernel void @extract_vector_elt_v4i8(i8 addrspace(1)* %out, <4 x i8> %foo) #0 {
%p0 = extractelement <4 x i8> %foo, i32 0
%p1 = extractelement <4 x i8> %foo, i32 2
%out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
store i8 %p1, i8 addrspace(1)* %out
store i8 %p0, i8 addrspace(1)* %out1
ret void
}
; FUNC-LABEL: {{^}}extract_vector_elt_v8i8:
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_store_byte
; GCN: buffer_store_byte
define amdgpu_kernel void @extract_vector_elt_v8i8(i8 addrspace(1)* %out, <8 x i8> %foo) #0 {
%p0 = extractelement <8 x i8> %foo, i32 0
%p1 = extractelement <8 x i8> %foo, i32 2
%out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
store i8 %p1, i8 addrspace(1)* %out
store i8 %p0, i8 addrspace(1)* %out1
ret void
}
; FUNC-LABEL: {{^}}extract_vector_elt_v16i8:
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_store_byte
; GCN: buffer_store_byte
define amdgpu_kernel void @extract_vector_elt_v16i8(i8 addrspace(1)* %out, <16 x i8> %foo) #0 {
%p0 = extractelement <16 x i8> %foo, i32 0
%p1 = extractelement <16 x i8> %foo, i32 2
%out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
store i8 %p1, i8 addrspace(1)* %out
store i8 %p0, i8 addrspace(1)* %out1
ret void
}
; FUNC-LABEL: {{^}}extract_vector_elt_v32i8:
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_store_byte
; GCN: buffer_store_byte
define amdgpu_kernel void @extract_vector_elt_v32i8(i8 addrspace(1)* %out, <32 x i8> %foo) #0 {
%p0 = extractelement <32 x i8> %foo, i32 0
%p1 = extractelement <32 x i8> %foo, i32 2
%out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
store i8 %p1, i8 addrspace(1)* %out
store i8 %p0, i8 addrspace(1)* %out1
ret void
}
; FUNC-LABEL: {{^}}extract_vector_elt_v64i8:
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_store_byte
; GCN: buffer_store_byte
define amdgpu_kernel void @extract_vector_elt_v64i8(i8 addrspace(1)* %out, <64 x i8> %foo) #0 {
%p0 = extractelement <64 x i8> %foo, i32 0
%p1 = extractelement <64 x i8> %foo, i32 2
%out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
store i8 %p1, i8 addrspace(1)* %out
store i8 %p0, i8 addrspace(1)* %out1
ret void
}
; FUNC-LABEL: {{^}}dynamic_extract_vector_elt_v3i8:
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_store_byte
; GCN: buffer_store_byte
; GCN: buffer_store_byte
; GCN: buffer_store_byte
; GCN: buffer_load_ubyte
; GCN: buffer_store_byte
define amdgpu_kernel void @dynamic_extract_vector_elt_v3i8(i8 addrspace(1)* %out, <3 x i8> %foo, i32 %idx) #0 {
%p0 = extractelement <3 x i8> %foo, i32 %idx
%out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
store i8 %p0, i8 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}dynamic_extract_vector_elt_v4i8:
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_load_ubyte
; GCN: buffer_store_byte
; GCN: buffer_store_byte
; GCN: buffer_store_byte
; GCN: buffer_store_byte
; GCN: buffer_store_byte
; GCN: buffer_load_ubyte
; GCN: buffer_store_byte
define amdgpu_kernel void @dynamic_extract_vector_elt_v4i8(i8 addrspace(1)* %out, <4 x i8> %foo, i32 %idx) #0 {
%p0 = extractelement <4 x i8> %foo, i32 %idx
%out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
store i8 %p0, i8 addrspace(1)* %out
ret void
}
attributes #0 = { nounwind }