mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
AMDGPU/GlobalISel: Work around forming illegal zextload after legalize
Selection would fail after the post legalize combiner put an illegal zextload back together. The base combiner has parameter to only allow legal operations, but they appear to not be used. I also don't see a nice way to remove a single entry from all_combines, so just hack around this.
This commit is contained in:
parent
8105dac4cc
commit
70f3ab2891
@ -37,8 +37,18 @@ def AMDGPUPreLegalizerCombinerHelper: GICombinerHelper<
|
||||
let DisableRuleOption = "amdgpuprelegalizercombiner-disable-rule";
|
||||
}
|
||||
|
||||
|
||||
// FIXME: combines_for_extload can introduce illegal extloads which
|
||||
// aren't re-legalized.
|
||||
// FIXME: Is there a way to remove a single item from all_combines?
|
||||
def all_combines_minus_extload : GICombineGroup<[trivial_combines,
|
||||
ptr_add_immed_chain, combine_indexed_load_store, undef_combines,
|
||||
identity_combines]
|
||||
>;
|
||||
|
||||
def AMDGPUPostLegalizerCombinerHelper: GICombinerHelper<
|
||||
"AMDGPUGenPostLegalizerCombinerHelper", [all_combines,
|
||||
gfx6gfx7_combines, uchar_to_float]> {
|
||||
"AMDGPUGenPostLegalizerCombinerHelper",
|
||||
[all_combines_minus_extload, gfx6gfx7_combines,
|
||||
uchar_to_float]> {
|
||||
let DisableRuleOption = "amdgpupostlegalizercombiner-disable-rule";
|
||||
}
|
||||
|
217
test/CodeGen/AMDGPU/GlobalISel/zextload.ll
Normal file
217
test/CodeGen/AMDGPU/GlobalISel/zextload.ll
Normal file
@ -0,0 +1,217 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
|
||||
|
||||
define i64 @zextload_global_i1_to_i64(i1 addrspace(1)* %ptr) {
|
||||
; GFX9-LABEL: zextload_global_i1_to_i64:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: zextload_global_i1_to_i64:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
||||
; GFX8-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_and_b32_e32 v0, 1, v0
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: zextload_global_i1_to_i64:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_mov_b32 s6, 0
|
||||
; GFX6-NEXT: s_mov_b32 s7, 0xf000
|
||||
; GFX6-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: v_and_b32_e32 v0, 1, v0
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
%load = load i1, i1 addrspace(1)* %ptr
|
||||
%ext = zext i1 %load to i64
|
||||
ret i64 %ext
|
||||
}
|
||||
|
||||
define i64 @zextload_global_i8_to_i64(i8 addrspace(1)* %ptr) {
|
||||
; GFX9-LABEL: zextload_global_i8_to_i64:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: zextload_global_i8_to_i64:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
|
||||
; GFX8-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: zextload_global_i8_to_i64:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_mov_b32 s6, 0
|
||||
; GFX6-NEXT: s_mov_b32 s7, 0xf000
|
||||
; GFX6-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
%load = load i8, i8 addrspace(1)* %ptr
|
||||
%ext = zext i8 %load to i64
|
||||
ret i64 %ext
|
||||
}
|
||||
|
||||
define i64 @zextload_global_i16_to_i64(i16 addrspace(1)* %ptr) {
|
||||
; GFX9-LABEL: zextload_global_i16_to_i64:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: global_load_ushort v0, v[0:1], off
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: zextload_global_i16_to_i64:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: flat_load_ushort v0, v[0:1]
|
||||
; GFX8-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: zextload_global_i16_to_i64:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_mov_b32 s6, 0
|
||||
; GFX6-NEXT: s_mov_b32 s7, 0xf000
|
||||
; GFX6-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
%load = load i16, i16 addrspace(1)* %ptr
|
||||
%ext = zext i16 %load to i64
|
||||
ret i64 %ext
|
||||
}
|
||||
|
||||
define i64 @zextload_global_i32_to_i64(i32 addrspace(1)* %ptr) {
|
||||
; GFX9-LABEL: zextload_global_i32_to_i64:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: global_load_dword v0, v[0:1], off
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: zextload_global_i32_to_i64:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: flat_load_dword v0, v[0:1]
|
||||
; GFX8-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: zextload_global_i32_to_i64:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_mov_b32 s6, 0
|
||||
; GFX6-NEXT: s_mov_b32 s7, 0xf000
|
||||
; GFX6-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
%load = load i32, i32 addrspace(1)* %ptr
|
||||
%ext = zext i32 %load to i64
|
||||
ret i64 %ext
|
||||
}
|
||||
|
||||
define i96 @zextload_global_i32_to_i96(i32 addrspace(1)* %ptr) {
|
||||
; GFX9-LABEL: zextload_global_i32_to_i96:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: global_load_dword v0, v[0:1], off
|
||||
; GFX9-NEXT: s_mov_b32 s4, 0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: zextload_global_i32_to_i96:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: flat_load_dword v0, v[0:1]
|
||||
; GFX8-NEXT: s_mov_b32 s4, 0
|
||||
; GFX8-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GFX8-NEXT: v_mov_b32_e32 v2, s4
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: zextload_global_i32_to_i96:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: s_mov_b32 s6, 0
|
||||
; GFX6-NEXT: s_mov_b32 s7, 0xf000
|
||||
; GFX6-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
|
||||
; GFX6-NEXT: s_mov_b32 s8, 0
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s8
|
||||
; GFX6-NEXT: v_mov_b32_e32 v2, s8
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
%load = load i32, i32 addrspace(1)* %ptr
|
||||
%ext = zext i32 %load to i96
|
||||
ret i96 %ext
|
||||
}
|
||||
|
||||
define i128 @zextload_global_i32_to_i128(i32 addrspace(1)* %ptr) {
|
||||
; GFX9-LABEL: zextload_global_i32_to_i128:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_mov_b32_e32 v3, v1
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, v0
|
||||
; GFX9-NEXT: global_load_dword v0, v[2:3], off
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v3, 0
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX8-LABEL: zextload_global_i32_to_i128:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_mov_b32_e32 v3, v1
|
||||
; GFX8-NEXT: v_mov_b32_e32 v2, v0
|
||||
; GFX8-NEXT: flat_load_dword v0, v[2:3]
|
||||
; GFX8-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX8-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX8-NEXT: v_mov_b32_e32 v3, 0
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: zextload_global_i32_to_i128:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: v_mov_b32_e32 v3, v1
|
||||
; GFX6-NEXT: v_mov_b32_e32 v2, v0
|
||||
; GFX6-NEXT: s_mov_b32 s6, 0
|
||||
; GFX6-NEXT: s_mov_b32 s7, 0xf000
|
||||
; GFX6-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GFX6-NEXT: buffer_load_dword v0, v[2:3], s[4:7], 0 addr64
|
||||
; GFX6-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX6-NEXT: v_mov_b32_e32 v3, 0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||
%load = load i32, i32 addrspace(1)* %ptr
|
||||
%ext = zext i32 %load to i128
|
||||
ret i128 %ext
|
||||
}
|
Loading…
Reference in New Issue
Block a user