mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-23 13:02:52 +02:00
7932e530a0
Now that unaligned access expansion should not attempt to produce i64 accesses, we can remove the hack in PreprocessISelDAG where this is done. This allows splitting i64 private accesses while allowing the new add nodes indexing the vector components can be folded with the base pointer arithmetic. llvm-svn: 268293
121 lines
5.9 KiB
LLVM
121 lines
5.9 KiB
LLVM
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
|
|
|
; Extract the high bit of the 1st quarter
|
|
; GCN-LABEL: {{^}}v_uextract_bit_31_i128:
|
|
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
|
|
|
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
|
|
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}}
|
|
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
|
|
|
|
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO0]]:[[ZERO1]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
|
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
|
; GCN: s_endpgm
|
|
define void @v_uextract_bit_31_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
|
|
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x
|
|
%out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x
|
|
%ld.64 = load i128, i128 addrspace(1)* %in.gep
|
|
%srl = lshr i128 %ld.64, 31
|
|
%bit = and i128 %srl, 1
|
|
store i128 %bit, i128 addrspace(1)* %out.gep
|
|
ret void
|
|
}
|
|
|
|
; Extract the high bit of the 2nd quarter
|
|
; GCN-LABEL: {{^}}v_uextract_bit_63_i128:
|
|
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
|
|
|
|
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
|
|
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}}
|
|
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
|
|
|
|
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO0]]:[[ZERO1]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
|
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
|
; GCN: s_endpgm
|
|
define void @v_uextract_bit_63_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
|
|
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x
|
|
%out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x
|
|
%ld.64 = load i128, i128 addrspace(1)* %in.gep
|
|
%srl = lshr i128 %ld.64, 63
|
|
%bit = and i128 %srl, 1
|
|
store i128 %bit, i128 addrspace(1)* %out.gep
|
|
ret void
|
|
}
|
|
|
|
; Extract the high bit of the 3rd quarter
|
|
; GCN-LABEL: {{^}}v_uextract_bit_95_i128:
|
|
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
|
|
|
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
|
|
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}}
|
|
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
|
|
|
|
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO0]]:[[ZERO1]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
|
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
|
; GCN: s_endpgm
|
|
define void @v_uextract_bit_95_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
|
|
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x
|
|
%out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x
|
|
%ld.64 = load i128, i128 addrspace(1)* %in.gep
|
|
%srl = lshr i128 %ld.64, 95
|
|
%bit = and i128 %srl, 1
|
|
store i128 %bit, i128 addrspace(1)* %out.gep
|
|
ret void
|
|
}
|
|
|
|
; Extract the high bit of the 4th quarter
|
|
; GCN-LABEL: {{^}}v_uextract_bit_127_i128:
|
|
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
|
|
|
|
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
|
|
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}}
|
|
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
|
|
|
|
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO0]]:[[ZERO1]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
|
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO0]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
|
; GCN: s_endpgm
|
|
define void @v_uextract_bit_127_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
|
|
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x
|
|
%out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x
|
|
%ld.64 = load i128, i128 addrspace(1)* %in.gep
|
|
%srl = lshr i128 %ld.64, 127
|
|
%bit = and i128 %srl, 1
|
|
store i128 %bit, i128 addrspace(1)* %out.gep
|
|
ret void
|
|
}
|
|
|
|
; Spans more than 2 dword boundaries
|
|
; GCN-LABEL: {{^}}v_uextract_bit_34_100_i128:
|
|
; GCN: buffer_load_dwordx2 v{{\[}}[[VAL2:[0-9]+]]:[[VAL3:[0-9]+]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
|
; GCN: buffer_load_dword v[[VAL1:[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
|
|
|
|
; GCN-DAG: v_lshl_b64 v{{\[}}[[SHLLO:[0-9]+]]:[[SHLHI:[0-9]+]]{{\]}}, v{{\[}}[[VAL2]]:[[VAL3]]{{\]}}, 30
|
|
; GCN-DAG: v_lshrrev_b32_e32 v[[ELT1PART:[0-9]+]], 2, v[[VAL1]]
|
|
; GCN-DAG: v_bfe_u32 v[[ELT2PART:[0-9]+]], v[[VAL3]], 2, 2{{$}}
|
|
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
|
|
; GCN-DAG: v_or_b32_e32 v[[OR0:[0-9]+]], v[[SHLLO]], v[[ELT1PART]]
|
|
; GCN-DAG: v_or_b32_e32 v[[OR1:[0-9]+]], 0, v[[SHLHI]]{{$}}
|
|
|
|
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ELT2PART]]:[[ZERO]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
|
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[OR0]]:[[OR1]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
|
; GCN: s_endpgm
|
|
define void @v_uextract_bit_34_100_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
|
|
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x
|
|
%out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x
|
|
%ld.64 = load i128, i128 addrspace(1)* %in.gep
|
|
%srl = lshr i128 %ld.64, 34
|
|
%bit = and i128 %srl, 73786976294838206463
|
|
store i128 %bit, i128 addrspace(1)* %out.gep
|
|
ret void
|
|
}
|
|
|
|
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
|
|
|
attributes #0 = { nounwind readnone }
|
|
attributes #1 = { nounwind }
|