1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-20 19:42:54 +02:00
llvm-mirror/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll
Matt Arsenault 7932e530a0 AMDGPU: Make i64 loads/stores promote to v2i32
Now that unaligned access expansion should not attempt
to produce i64 accesses, we can remove the hack in
PreprocessISelDAG where this is done.

This allows splitting i64 private accesses while
allowing the new add nodes indexing the vector components
can be folded with the base pointer arithmetic.

llvm-svn: 268293
2016-05-02 20:07:26 +00:00

121 lines
5.9 KiB
LLVM

; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; Extract the high bit of the 1st quarter
; GCN-LABEL: {{^}}v_uextract_bit_31_i128:
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}}
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO0]]:[[ZERO1]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN: s_endpgm
define void @v_uextract_bit_31_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x
%ld.64 = load i128, i128 addrspace(1)* %in.gep
%srl = lshr i128 %ld.64, 31
%bit = and i128 %srl, 1
store i128 %bit, i128 addrspace(1)* %out.gep
ret void
}
; Extract the high bit of the 2nd quarter
; GCN-LABEL: {{^}}v_uextract_bit_63_i128:
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}}
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO0]]:[[ZERO1]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN: s_endpgm
define void @v_uextract_bit_63_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x
%ld.64 = load i128, i128 addrspace(1)* %in.gep
%srl = lshr i128 %ld.64, 63
%bit = and i128 %srl, 1
store i128 %bit, i128 addrspace(1)* %out.gep
ret void
}
; Extract the high bit of the 3rd quarter
; GCN-LABEL: {{^}}v_uextract_bit_95_i128:
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}}
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO0]]:[[ZERO1]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN: s_endpgm
define void @v_uextract_bit_95_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x
%ld.64 = load i128, i128 addrspace(1)* %in.gep
%srl = lshr i128 %ld.64, 95
%bit = and i128 %srl, 1
store i128 %bit, i128 addrspace(1)* %out.gep
ret void
}
; Extract the high bit of the 4th quarter
; GCN-LABEL: {{^}}v_uextract_bit_127_i128:
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}}
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO0]]:[[ZERO1]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN: s_endpgm
define void @v_uextract_bit_127_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x
%ld.64 = load i128, i128 addrspace(1)* %in.gep
%srl = lshr i128 %ld.64, 127
%bit = and i128 %srl, 1
store i128 %bit, i128 addrspace(1)* %out.gep
ret void
}
; Spans more than 2 dword boundaries
; GCN-LABEL: {{^}}v_uextract_bit_34_100_i128:
; GCN: buffer_load_dwordx2 v{{\[}}[[VAL2:[0-9]+]]:[[VAL3:[0-9]+]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
; GCN: buffer_load_dword v[[VAL1:[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
; GCN-DAG: v_lshl_b64 v{{\[}}[[SHLLO:[0-9]+]]:[[SHLHI:[0-9]+]]{{\]}}, v{{\[}}[[VAL2]]:[[VAL3]]{{\]}}, 30
; GCN-DAG: v_lshrrev_b32_e32 v[[ELT1PART:[0-9]+]], 2, v[[VAL1]]
; GCN-DAG: v_bfe_u32 v[[ELT2PART:[0-9]+]], v[[VAL3]], 2, 2{{$}}
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN-DAG: v_or_b32_e32 v[[OR0:[0-9]+]], v[[SHLLO]], v[[ELT1PART]]
; GCN-DAG: v_or_b32_e32 v[[OR1:[0-9]+]], 0, v[[SHLHI]]{{$}}
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ELT2PART]]:[[ZERO]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[OR0]]:[[OR1]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN: s_endpgm
define void @v_uextract_bit_34_100_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x
%out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x
%ld.64 = load i128, i128 addrspace(1)* %in.gep
%srl = lshr i128 %ld.64, 34
%bit = and i128 %srl, 73786976294838206463
store i128 %bit, i128 addrspace(1)* %out.gep
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x() #0
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }