mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
AMDGPU: Always allocate emergency stack slot at offset 0
This allows us to ensure that 0 is never a valid pointer to a user object, and ensures that the offset is always legal without needing a register to access it. This comes at the cost of usable offsets and wasted stack space. llvm-svn: 295877
This commit is contained in:
parent
16f94c0682
commit
d47230b13f
@ -391,11 +391,25 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
|
||||
if (!MFI.hasStackObjects())
|
||||
return;
|
||||
|
||||
assert(RS && "RegScavenger required if spilling");
|
||||
int ScavengeFI = MFI.CreateStackObject(
|
||||
AMDGPU::SGPR_32RegClass.getSize(),
|
||||
AMDGPU::SGPR_32RegClass.getAlignment(), false);
|
||||
RS->addScavengingFrameIndex(ScavengeFI);
|
||||
bool MayNeedScavengingEmergencySlot = MFI.hasStackObjects();
|
||||
if (MayNeedScavengingEmergencySlot) {
|
||||
// We force this to be at offset 0 so no user object ever has 0 as an
|
||||
// address, so we may use 0 as an invalid pointer value. This is because
|
||||
// LLVM assumes 0 is an invalid pointer in address space 0. Because alloca
|
||||
// is required to be address space 0, we are forced to accept this for
|
||||
// now. Ideally we could have the stack in another address space with 0 as a
|
||||
// valid pointer, and -1 as the null value.
|
||||
//
|
||||
// This will also waste additional space when user stack objects require > 4
|
||||
// byte alignment.
|
||||
//
|
||||
// The main cost here is losing the offset for addressing modes. However
|
||||
// this also ensures we shouldn't need a register for the offset when
|
||||
// emergency scavenging.
|
||||
int ScavengeFI = MFI.CreateFixedObject(
|
||||
AMDGPU::SGPR_32RegClass.getSize(), 0, false);
|
||||
RS->addScavengingFrameIndex(ScavengeFI);
|
||||
}
|
||||
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
|
@ -227,8 +227,8 @@ for.end:
|
||||
|
||||
; R600: MOVA_INT
|
||||
|
||||
; SI-ALLOCA-DAG: buffer_store_short v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} ; encoding: [0x00,0x00,0x68,0xe0
|
||||
; SI-ALLOCA-DAG: buffer_store_short v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:2 ; encoding: [0x02,0x00,0x68,0xe0
|
||||
; SI-ALLOCA-DAG: buffer_store_short v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:6 ; encoding: [0x06,0x00,0x68,0xe0
|
||||
; SI-ALLOCA-DAG: buffer_store_short v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4 ; encoding: [0x04,0x00,0x68,0xe0
|
||||
; Loaded value is 0 or 1, so sext will become zext, so we get buffer_load_ushort instead of buffer_load_sshort.
|
||||
; SI-ALLOCA: buffer_load_sshort v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
|
||||
|
||||
@ -253,11 +253,11 @@ entry:
|
||||
|
||||
; R600: MOVA_INT
|
||||
|
||||
; SI-PROMOTE-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} ; encoding:
|
||||
; SI-PROMOTE-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:1 ; encoding:
|
||||
; SI-PROMOTE-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4 ; encoding:
|
||||
; SI-PROMOTE-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:5 ; encoding:
|
||||
|
||||
; SI-ALLOCA-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} ; encoding: [0x00,0x00,0x60,0xe0
|
||||
; SI-ALLOCA-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:1 ; encoding: [0x01,0x00,0x60,0xe0
|
||||
; SI-ALLOCA-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4 ; encoding: [0x04,0x00,0x60,0xe0
|
||||
; SI-ALLOCA-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:5 ; encoding: [0x05,0x00,0x60,0xe0
|
||||
define void @char_array(i32 addrspace(1)* %out, i32 %index) #0 {
|
||||
entry:
|
||||
%0 = alloca [2 x i8]
|
||||
|
@ -12,11 +12,7 @@ declare void @llvm.amdgcn.s.barrier() #2
|
||||
|
||||
; SI-LABEL: {{^}}test_private_array_ptr_calc:
|
||||
|
||||
; FIXME: We end up with zero argument for ADD, because
|
||||
; SIRegisterInfo::eliminateFrameIndex() blindly replaces the frame index
|
||||
; with the appropriate offset. We should fold this into the store.
|
||||
|
||||
; SI-ALLOCA: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 0, v{{[0-9]+}}
|
||||
; SI-ALLOCA: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 16, v{{[0-9]+}}
|
||||
; SI-ALLOCA: buffer_store_dword {{v[0-9]+}}, [[PTRREG]], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:64
|
||||
; SI-ALLOCA: s_barrier
|
||||
; SI-ALLOCA: buffer_load_dword {{v[0-9]+}}, [[PTRREG]], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:64
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: llc -mtriple=amdgcn-- -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; GCN-LABEL: {{^}}store_fi_lifetime:
|
||||
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}}
|
||||
; GCN: buffer_store_dword [[FI]]
|
||||
define void @store_fi_lifetime(i32 addrspace(1)* %out, i32 %in) #0 {
|
||||
entry:
|
||||
@ -15,7 +15,7 @@ entry:
|
||||
; GCN-LABEL: {{^}}stored_fi_to_lds:
|
||||
; GCN: s_load_dword [[LDSPTR:s[0-9]+]]
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, off,
|
||||
; GCN: v_mov_b32_e32 [[ZERO0:v[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 [[ZERO0:v[0-9]+]], 4{{$}}
|
||||
; GCN: v_mov_b32_e32 [[VLDSPTR:v[0-9]+]], [[LDSPTR]]
|
||||
; GCN: ds_write_b32 [[VLDSPTR]], [[ZERO0]]
|
||||
define void @stored_fi_to_lds(float* addrspace(3)* %ptr) #0 {
|
||||
@ -27,16 +27,16 @@ define void @stored_fi_to_lds(float* addrspace(3)* %ptr) #0 {
|
||||
|
||||
; Offset is applied
|
||||
; GCN-LABEL: {{^}}stored_fi_to_lds_2_small_objects:
|
||||
; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}{{$}}
|
||||
; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 4{{$}}
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4{{$}}
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:8{{$}}
|
||||
|
||||
; GCN-DAG: s_load_dword [[LDSPTR:s[0-9]+]]
|
||||
|
||||
; GCN-DAG: v_mov_b32_e32 [[VLDSPTR:v[0-9]+]], [[LDSPTR]]
|
||||
; GCN: ds_write_b32 [[VLDSPTR]], [[ZERO]]
|
||||
|
||||
; GCN-DAG: v_mov_b32_e32 [[FI1:v[0-9]+]], 4{{$}}
|
||||
; GCN-DAG: v_mov_b32_e32 [[FI1:v[0-9]+]], 8{{$}}
|
||||
; GCN: ds_write_b32 [[VLDSPTR]], [[FI1]]
|
||||
define void @stored_fi_to_lds_2_small_objects(float* addrspace(3)* %ptr) #0 {
|
||||
%tmp0 = alloca float
|
||||
@ -51,9 +51,9 @@ define void @stored_fi_to_lds_2_small_objects(float* addrspace(3)* %ptr) #0 {
|
||||
; Same frame index is used multiple times in the store
|
||||
; GCN-LABEL: {{^}}stored_fi_to_self:
|
||||
; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x4d2{{$}}
|
||||
; GCN: buffer_store_dword [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}{{$}}
|
||||
; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dword [[ZERO]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}{{$}}
|
||||
; GCN: buffer_store_dword [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4{{$}}
|
||||
; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 4{{$}}
|
||||
; GCN: buffer_store_dword [[ZERO]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4{{$}}
|
||||
define void @stored_fi_to_self() #0 {
|
||||
%tmp = alloca i32*
|
||||
|
||||
@ -66,13 +66,13 @@ define void @stored_fi_to_self() #0 {
|
||||
|
||||
; GCN-LABEL: {{^}}stored_fi_to_self_offset:
|
||||
; GCN-DAG: v_mov_b32_e32 [[K0:v[0-9]+]], 32{{$}}
|
||||
; GCN: buffer_store_dword [[K0]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}{{$}}
|
||||
; GCN: buffer_store_dword [[K0]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4{{$}}
|
||||
|
||||
; GCN-DAG: v_mov_b32_e32 [[K1:v[0-9]+]], 0x4d2{{$}}
|
||||
; GCN: buffer_store_dword [[K1]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:2048{{$}}
|
||||
; GCN: buffer_store_dword [[K1]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:2052{{$}}
|
||||
|
||||
; GCN: v_mov_b32_e32 [[OFFSETK:v[0-9]+]], 0x800{{$}}
|
||||
; GCN: buffer_store_dword [[OFFSETK]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:2048{{$}}
|
||||
; GCN: v_mov_b32_e32 [[OFFSETK:v[0-9]+]], 0x804{{$}}
|
||||
; GCN: buffer_store_dword [[OFFSETK]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:2052{{$}}
|
||||
define void @stored_fi_to_self_offset() #0 {
|
||||
%tmp0 = alloca [512 x i32]
|
||||
%tmp1 = alloca i32*
|
||||
@ -89,15 +89,15 @@ define void @stored_fi_to_self_offset() #0 {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}stored_fi_to_fi:
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}{{$}}
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4{{$}}
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:8{{$}}
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:12{{$}}
|
||||
|
||||
; GCN: v_mov_b32_e32 [[FI1:v[0-9]+]], 4{{$}}
|
||||
; GCN: buffer_store_dword [[FI1]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:8{{$}}
|
||||
; GCN: v_mov_b32_e32 [[FI1:v[0-9]+]], 8{{$}}
|
||||
; GCN: buffer_store_dword [[FI1]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:12{{$}}
|
||||
|
||||
; GCN: v_mov_b32_e32 [[FI2:v[0-9]+]], 8{{$}}
|
||||
; GCN: buffer_store_dword [[FI2]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4{{$}}
|
||||
; GCN: v_mov_b32_e32 [[FI2:v[0-9]+]], 12{{$}}
|
||||
; GCN: buffer_store_dword [[FI2]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:8{{$}}
|
||||
define void @stored_fi_to_fi() #0 {
|
||||
%tmp0 = alloca i32*
|
||||
%tmp1 = alloca i32*
|
||||
@ -115,8 +115,8 @@ define void @stored_fi_to_fi() #0 {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}stored_fi_to_global:
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
|
||||
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4{{$}}
|
||||
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}}
|
||||
; GCN: buffer_store_dword [[FI]]
|
||||
define void @stored_fi_to_global(float* addrspace(1)* %ptr) #0 {
|
||||
%tmp = alloca float
|
||||
@ -127,14 +127,14 @@ define void @stored_fi_to_global(float* addrspace(1)* %ptr) #0 {
|
||||
|
||||
; Offset is applied
|
||||
; GCN-LABEL: {{^}}stored_fi_to_global_2_small_objects:
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}{{$}}
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4{{$}}
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:8{{$}}
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:12{{$}}
|
||||
|
||||
; GCN: v_mov_b32_e32 [[FI1:v[0-9]+]], 4{{$}}
|
||||
; GCN: v_mov_b32_e32 [[FI1:v[0-9]+]], 8{{$}}
|
||||
; GCN: buffer_store_dword [[FI1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
||||
|
||||
; GCN-DAG: v_mov_b32_e32 [[FI2:v[0-9]+]], 8{{$}}
|
||||
; GCN-DAG: v_mov_b32_e32 [[FI2:v[0-9]+]], 12{{$}}
|
||||
; GCN: buffer_store_dword [[FI2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
||||
define void @stored_fi_to_global_2_small_objects(float* addrspace(1)* %ptr) #0 {
|
||||
%tmp0 = alloca float
|
||||
@ -150,10 +150,10 @@ define void @stored_fi_to_global_2_small_objects(float* addrspace(1)* %ptr) #0 {
|
||||
|
||||
; GCN-LABEL: {{^}}stored_fi_to_global_huge_frame_offset:
|
||||
; GCN: v_mov_b32_e32 [[BASE_0:v[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dword [[BASE_0]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
|
||||
; GCN: buffer_store_dword [[BASE_0]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4{{$}}
|
||||
|
||||
; FIXME: Re-initialize
|
||||
; GCN: v_mov_b32_e32 [[BASE_0_1:v[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 [[BASE_0_1:v[0-9]+]], 4{{$}}
|
||||
|
||||
; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
|
||||
; GCN-DAG: v_add_i32_e32 [[BASE_1_OFF_1:v[0-9]+]], vcc, 0x3ffc, [[BASE_0_1]]
|
||||
@ -184,7 +184,7 @@ define void @stored_fi_to_global_huge_frame_offset(i32* addrspace(1)* %ptr) #0 {
|
||||
; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
|
||||
; GCN: s_add_u32 s{{[0-9]+}}, s[[PC_LO]], g1@gotpcrel32@lo+4
|
||||
; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC_HI]], g1@gotpcrel32@hi+4
|
||||
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}}
|
||||
; GCN: buffer_store_dword [[FI]]
|
||||
define void @cannot_select_assertzext_valuetype(i32 addrspace(1)* %out, i32 %idx) #0 {
|
||||
entry:
|
||||
|
@ -130,6 +130,47 @@ done:
|
||||
; GCN: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:4092{{$}}
|
||||
; GCN: {{^}}BB4_2:
|
||||
define void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
|
||||
entry:
|
||||
%alloca = alloca [512 x i32], align 4
|
||||
%out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
|
||||
%out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
|
||||
%add.arg = add i32 %arg, 8
|
||||
%alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1022
|
||||
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
|
||||
%tmp0 = icmp eq i32 %tid, 0
|
||||
br i1 %tmp0, label %endif, label %if
|
||||
|
||||
if:
|
||||
store volatile i32 123, i32* %alloca.gep
|
||||
%tmp1 = load volatile i32, i32* %alloca.gep
|
||||
br label %endif
|
||||
|
||||
endif:
|
||||
%x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
|
||||
store i32 %x, i32 addrspace(1)* %out.gep.0
|
||||
%load = load volatile i32, i32* %alloca.gep
|
||||
store i32 %load, i32 addrspace(1)* %out.gep.1
|
||||
br label %done
|
||||
|
||||
done:
|
||||
ret void
|
||||
}
|
||||
|
||||
; This ends up not fitting due to the reserved 4 bytes at offset 0
|
||||
; OPT-LABEL: @test_sink_scratch_small_offset_i32_reserved(
|
||||
; OPT-NOT: getelementptr [512 x i32]
|
||||
; OPT: br i1
|
||||
; OPT: ptrtoint
|
||||
|
||||
; GCN-LABEL: {{^}}test_sink_scratch_small_offset_i32_reserved:
|
||||
; GCN: s_and_saveexec_b64
|
||||
; GCN: v_mov_b32_e32 [[BASE_FI0:v[0-9]+]], 4
|
||||
; GCN: buffer_store_dword {{v[0-9]+}}, [[BASE_FI0]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
|
||||
; GCN: v_mov_b32_e32 [[BASE_FI1:v[0-9]+]], 4
|
||||
; GCN: buffer_load_dword {{v[0-9]+}}, [[BASE_FI1]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
|
||||
; GCN: {{^BB[0-9]+}}_2:
|
||||
|
||||
define void @test_sink_scratch_small_offset_i32_reserved(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
|
||||
entry:
|
||||
%alloca = alloca [512 x i32], align 4
|
||||
%out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
|
||||
@ -165,7 +206,7 @@ done:
|
||||
; GCN: s_and_saveexec_b64
|
||||
; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
|
||||
; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
|
||||
; GCN: {{^}}BB5_2:
|
||||
; GCN: {{^BB[0-9]+}}_2:
|
||||
define void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
|
||||
entry:
|
||||
%alloca = alloca [512 x i32], align 4
|
||||
@ -197,7 +238,7 @@ done:
|
||||
; GCN: s_and_saveexec_b64
|
||||
; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
|
||||
; GCN: {{^}}BB6_2:
|
||||
; GCN: {{^BB[0-9]+}}_2:
|
||||
define void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset) {
|
||||
entry:
|
||||
%offset.ext = zext i32 %offset to i64
|
||||
|
@ -701,7 +701,7 @@ define void @commute_uno_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i
|
||||
; GCN-LABEL: {{^}}commute_frameindex:
|
||||
; XGCN: v_cmp_eq_u32_e32 vcc, 0, v{{[0-9]+}}
|
||||
|
||||
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}}
|
||||
; GCN: v_cmp_eq_u32_e32 vcc, [[FI]], v{{[0-9]+}}
|
||||
define void @commute_frameindex(i32 addrspace(1)* nocapture %out) #0 {
|
||||
entry:
|
||||
|
@ -28,13 +28,12 @@
|
||||
|
||||
|
||||
; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_LO:[0-9]+]], s[[SAVEEXEC_LO]]
|
||||
; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 ; 4-byte Folded Spill
|
||||
; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:4 ; 4-byte Folded Spill
|
||||
; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_HI:[0-9]+]], s[[SAVEEXEC_HI]]
|
||||
; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:4 ; 4-byte Folded Spill
|
||||
; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:8 ; 4-byte Folded Spill
|
||||
|
||||
; Spill load
|
||||
; VMEM: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill
|
||||
; VGPR: buffer_store_dword [[LOAD0]], off, s[0:3], s7 ; 4-byte Folded Spill
|
||||
; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill
|
||||
|
||||
; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}
|
||||
|
||||
@ -44,8 +43,7 @@
|
||||
; GCN: {{^}}BB{{[0-9]+}}_1: ; %if
|
||||
; GCN: s_mov_b32 m0, -1
|
||||
; GCN: ds_read_b32 [[LOAD1:v[0-9]+]]
|
||||
; VMEM: buffer_load_dword [[RELOAD_LOAD0:v[0-9]+]], off, s[0:3], s7 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload
|
||||
; VGPR: buffer_load_dword [[RELOAD_LOAD0:v[0-9]+]], off, s[0:3], s7 ; 4-byte Folded Reload
|
||||
; GCN: buffer_load_dword [[RELOAD_LOAD0:v[0-9]+]], off, s[0:3], s7 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload
|
||||
; GCN: s_waitcnt vmcnt(0)
|
||||
|
||||
; Spill val register
|
||||
@ -60,11 +58,11 @@
|
||||
|
||||
|
||||
|
||||
; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 ; 4-byte Folded Reload
|
||||
; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload
|
||||
; VMEM: s_waitcnt vmcnt(0)
|
||||
; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC_LO]]
|
||||
|
||||
; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload
|
||||
; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:8 ; 4-byte Folded Reload
|
||||
; VMEM: s_waitcnt vmcnt(0)
|
||||
; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC_HI]]
|
||||
|
||||
@ -107,7 +105,7 @@ endif:
|
||||
; GCN: s_xor_b64 s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}
|
||||
|
||||
; Spill load
|
||||
; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 ; 4-byte Folded Spill
|
||||
; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:4 ; 4-byte Folded Spill
|
||||
|
||||
; Spill saved exec
|
||||
; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]]
|
||||
@ -115,9 +113,9 @@ endif:
|
||||
|
||||
|
||||
; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_LO:[0-9]+]], s[[SAVEEXEC_LO]]
|
||||
; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:16 ; 4-byte Folded Spill
|
||||
; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:20 ; 4-byte Folded Spill
|
||||
; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_HI:[0-9]+]], s[[SAVEEXEC_HI]]
|
||||
; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:20 ; 4-byte Folded Spill
|
||||
; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:24 ; 4-byte Folded Spill
|
||||
|
||||
; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}
|
||||
|
||||
@ -127,7 +125,7 @@ endif:
|
||||
|
||||
|
||||
; GCN: [[LOOP:BB[0-9]+_[0-9]+]]:
|
||||
; GCN: buffer_load_dword v[[VAL_LOOP_RELOAD:[0-9]+]], off, s[0:3], s7 ; 4-byte Folded Reload
|
||||
; GCN: buffer_load_dword v[[VAL_LOOP_RELOAD:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload
|
||||
; GCN: v_subrev_i32_e32 [[VAL_LOOP:v[0-9]+]], vcc, v{{[0-9]+}}, v[[VAL_LOOP_RELOAD]]
|
||||
; GCN: v_cmp_ne_u32_e32 vcc,
|
||||
; GCN: s_and_b64 vcc, exec, vcc
|
||||
@ -140,11 +138,11 @@ endif:
|
||||
; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]]
|
||||
; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]]
|
||||
|
||||
; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:16 ; 4-byte Folded Reload
|
||||
; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:20 ; 4-byte Folded Reload
|
||||
; VMEM: s_waitcnt vmcnt(0)
|
||||
; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC_LO]]
|
||||
|
||||
; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:20 ; 4-byte Folded Reload
|
||||
; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:24 ; 4-byte Folded Reload
|
||||
; VMEM: s_waitcnt vmcnt(0)
|
||||
; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC_HI]]
|
||||
|
||||
@ -187,7 +185,7 @@ end:
|
||||
; GCN: s_xor_b64 s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}
|
||||
|
||||
; Spill load
|
||||
; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 ; 4-byte Folded Spill
|
||||
; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:4 ; 4-byte Folded Spill
|
||||
|
||||
; Spill saved exec
|
||||
; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]]
|
||||
@ -244,14 +242,14 @@ end:
|
||||
|
||||
; GCN: BB{{[0-9]+}}_2: ; %if
|
||||
; GCN: ds_read_b32
|
||||
; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], s7 ; 4-byte Folded Reload
|
||||
; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload
|
||||
; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, v{{[0-9]+}}, v[[LOAD0_RELOAD]]
|
||||
; GCN: buffer_store_dword [[ADD]], off, s[0:3], s7 offset:[[RESULT_OFFSET]] ; 4-byte Folded Spill
|
||||
; GCN: s_waitcnt vmcnt(0) expcnt(0)
|
||||
; GCN-NEXT: s_branch [[ENDIF:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN: [[ELSE]]: ; %else
|
||||
; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], s7 ; 4-byte Folded Reload
|
||||
; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload
|
||||
; GCN: v_subrev_i32_e32 [[SUB:v[0-9]+]], vcc, v{{[0-9]+}}, v[[LOAD0_RELOAD]]
|
||||
; GCN: buffer_store_dword [[ADD]], off, s[0:3], s7 offset:[[FLOW_RESULT_OFFSET:[0-9]+]] ; 4-byte Folded Spill
|
||||
; GCN: s_waitcnt vmcnt(0) expcnt(0)
|
||||
|
@ -2,7 +2,7 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}load_i8_sext_private:
|
||||
; SI: buffer_load_sbyte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+$}}
|
||||
; SI: buffer_load_sbyte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4{{$}}
|
||||
define void @load_i8_sext_private(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%tmp0 = alloca i8
|
||||
@ -13,7 +13,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}load_i8_zext_private:
|
||||
; SI: buffer_load_ubyte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+$}}
|
||||
; SI: buffer_load_ubyte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4{{$}}
|
||||
define void @load_i8_zext_private(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%tmp0 = alloca i8
|
||||
@ -24,7 +24,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}load_i16_sext_private:
|
||||
; SI: buffer_load_sshort v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+$}}
|
||||
; SI: buffer_load_sshort v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4{{$}}
|
||||
define void @load_i16_sext_private(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%tmp0 = alloca i16
|
||||
@ -35,7 +35,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}load_i16_zext_private:
|
||||
; SI: buffer_load_ushort v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+$}}
|
||||
; SI: buffer_load_ushort v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4{{$}}
|
||||
define void @load_i16_zext_private(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%tmp0 = alloca i16
|
||||
|
@ -207,14 +207,14 @@ define void @dynamic_insertelement_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16>
|
||||
; GCN: buffer_load_ushort v{{[0-9]+}}, off
|
||||
; GCN: buffer_load_ushort v{{[0-9]+}}, off
|
||||
|
||||
; GCN-DAG: v_mov_b32_e32 [[BASE_FI:v[0-9]+]], 0{{$}}
|
||||
; GCN-DAG: v_mov_b32_e32 [[BASE_FI:v[0-9]+]], 8{{$}}
|
||||
; GCN-DAG: s_and_b32 [[MASK_IDX:s[0-9]+]], s{{[0-9]+}}, 3{{$}}
|
||||
; GCN-DAG: v_or_b32_e32 [[IDX:v[0-9]+]], [[MASK_IDX]], [[BASE_FI]]{{$}}
|
||||
|
||||
; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:6
|
||||
; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4
|
||||
; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:2
|
||||
; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
|
||||
; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:14
|
||||
; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:12
|
||||
; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:10
|
||||
; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:8
|
||||
; GCN: buffer_store_short v{{[0-9]+}}, [[IDX]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
|
||||
|
||||
; GCN: s_waitcnt
|
||||
@ -235,8 +235,8 @@ define void @dynamic_insertelement_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16>
|
||||
; GCN: buffer_load_ubyte v{{[0-9]+}}, off
|
||||
; GCN: buffer_load_ubyte v{{[0-9]+}}, off
|
||||
|
||||
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:1
|
||||
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}{{$}}
|
||||
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:5
|
||||
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4
|
||||
|
||||
; GCN: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
|
||||
|
||||
@ -255,9 +255,9 @@ define void @dynamic_insertelement_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> %a
|
||||
; GCN: buffer_load_ubyte v{{[0-9]+}}, off
|
||||
; GCN: buffer_load_ubyte v{{[0-9]+}}, off
|
||||
|
||||
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:2
|
||||
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:1
|
||||
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}{{$}}
|
||||
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:6
|
||||
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:5
|
||||
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4
|
||||
|
||||
; GCN: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
|
||||
|
||||
@ -279,10 +279,10 @@ define void @dynamic_insertelement_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> %a
|
||||
; GCN: buffer_load_ubyte v{{[0-9]+}}, off
|
||||
; GCN: buffer_load_ubyte v{{[0-9]+}}, off
|
||||
|
||||
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:3
|
||||
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:2
|
||||
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:1
|
||||
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}{{$}}
|
||||
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:7
|
||||
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:6
|
||||
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:5
|
||||
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4
|
||||
|
||||
; GCN: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
|
||||
|
||||
@ -396,15 +396,15 @@ define void @dynamic_insertelement_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64>
|
||||
|
||||
; Stack store
|
||||
|
||||
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}}{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:16{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:32{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:48{{$}}
|
||||
|
||||
; Write element
|
||||
; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
|
||||
|
||||
; Stack reload
|
||||
; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:16{{$}}
|
||||
; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}}{{$}}
|
||||
; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:32{{$}}
|
||||
; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:48{{$}}
|
||||
|
||||
; Store result
|
||||
; GCN: buffer_store_dwordx4
|
||||
@ -421,17 +421,17 @@ define void @dynamic_insertelement_v4f64(<4 x double> addrspace(1)* %out, <4 x d
|
||||
; GCN-LABEL: {{^}}dynamic_insertelement_v8f64:
|
||||
; GCN-DAG: SCRATCH_RSRC_DWORD
|
||||
|
||||
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:16{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:32{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:48{{$}}
|
||||
; GCN: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}}{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:64{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:80{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:96{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:112{{$}}
|
||||
|
||||
; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
|
||||
|
||||
; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:48{{$}}
|
||||
; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:32{{$}}
|
||||
; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:16{{$}}
|
||||
; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}}{{$}}
|
||||
; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:64{{$}}
|
||||
; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:80{{$}}
|
||||
; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:96{{$}}
|
||||
; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:112{{$}}
|
||||
|
||||
; GCN: buffer_store_dwordx4
|
||||
; GCN: buffer_store_dwordx4
|
||||
|
@ -4,7 +4,7 @@
|
||||
; alignment of the stack
|
||||
|
||||
; CHECK-LABEL: {{^}}no_args:
|
||||
; CHECK: ScratchSize: 8{{$}}
|
||||
; CHECK: ScratchSize: 5{{$}}
|
||||
define void @no_args() {
|
||||
%alloca = alloca i8
|
||||
store volatile i8 0, i8* %alloca
|
||||
@ -12,7 +12,7 @@ define void @no_args() {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}force_align32:
|
||||
; CHECK: ScratchSize: 8{{$}}
|
||||
; CHECK: ScratchSize: 5{{$}}
|
||||
define void @force_align32(<8 x i32>) {
|
||||
%alloca = alloca i8
|
||||
store volatile i8 0, i8* %alloca
|
||||
@ -20,7 +20,7 @@ define void @force_align32(<8 x i32>) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}force_align64:
|
||||
; CHECK: ScratchSize: 8{{$}}
|
||||
; CHECK: ScratchSize: 5{{$}}
|
||||
define void @force_align64(<16 x i32>) {
|
||||
%alloca = alloca i8
|
||||
store volatile i8 0, i8* %alloca
|
||||
@ -28,7 +28,7 @@ define void @force_align64(<16 x i32>) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}force_align128:
|
||||
; CHECK: ScratchSize: 8{{$}}
|
||||
; CHECK: ScratchSize: 5{{$}}
|
||||
define void @force_align128(<32 x i32>) {
|
||||
%alloca = alloca i8
|
||||
store volatile i8 0, i8* %alloca
|
||||
@ -36,7 +36,7 @@ define void @force_align128(<32 x i32>) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}force_align256:
|
||||
; CHECK: ScratchSize: 8{{$}}
|
||||
; CHECK: ScratchSize: 5{{$}}
|
||||
define void @force_align256(<64 x i32>) {
|
||||
%alloca = alloca i8
|
||||
store volatile i8 0, i8* %alloca
|
||||
|
@ -8,13 +8,12 @@
|
||||
; CHECK-LABEL: {{^}}main:
|
||||
|
||||
; CHECK-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x200
|
||||
; CHECK-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
|
||||
; CHECK-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0x400{{$}}
|
||||
; CHECK-DAG: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0
|
||||
; CHECK-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, [[BYTES]]
|
||||
|
||||
; TODO: add 0?
|
||||
; CHECK-DAG: v_or_b32_e32 [[LO_OFF:v[0-9]+]], [[CLAMP_IDX]], [[ZERO]]
|
||||
; CHECK-DAG: v_or_b32_e32 [[HI_OFF:v[0-9]+]], [[CLAMP_IDX]], [[K]]
|
||||
; CHECK-DAG: v_or_b32_e32 [[LO_OFF:v[0-9]+]], [[CLAMP_IDX]], [[K]]
|
||||
; CHECK-DAG: v_or_b32_e32 [[HI_OFF:v[0-9]+]], [[CLAMP_IDX]], [[ZERO]]
|
||||
|
||||
; CHECK: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
|
||||
; CHECK: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
|
||||
|
@ -10,27 +10,27 @@
|
||||
; HSA-ELT4: private_element_size = 1
|
||||
|
||||
|
||||
; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9{{$}}
|
||||
; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:16
|
||||
; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:32
|
||||
; HSA-ELT16-DAG: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
|
||||
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9{{$}}
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:8
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:24{{$}}
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:16
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:24
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:32
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:40
|
||||
|
||||
; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen
|
||||
; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen
|
||||
|
||||
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:4{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:8{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:12{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:16{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:20{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:24{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:28{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:32{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:36{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:40{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:44{{$}}
|
||||
|
||||
; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
|
||||
; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}
|
||||
@ -59,36 +59,28 @@ entry:
|
||||
; HSA-ELT8: private_element_size = 2
|
||||
; HSA-ELT4: private_element_size = 1
|
||||
|
||||
; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9{{$}}
|
||||
; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:16
|
||||
; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:32
|
||||
; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:48
|
||||
; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:64
|
||||
; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:80
|
||||
|
||||
; HSA-ELT16-DAG: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
|
||||
; HSA-ELT16-DAG: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
|
||||
|
||||
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9{{$}}
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:8
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:16
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:24
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:32
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:40
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:48
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:56
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:88
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:80
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:72
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:64
|
||||
|
||||
; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen
|
||||
; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen
|
||||
|
||||
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:4{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:8{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:12{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:16{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:20{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:24{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:28{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:32{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:36{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:40{{$}}
|
||||
@ -97,6 +89,14 @@ entry:
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:52{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:56{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:60{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:64{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:68{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:72{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:76{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:80{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:84{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:88{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:92{{$}}
|
||||
|
||||
; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
|
||||
; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}
|
||||
@ -130,16 +130,16 @@ entry:
|
||||
; HSA-ELT8: private_element_size = 2
|
||||
; HSA-ELT4: private_element_size = 1
|
||||
|
||||
; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9{{$}}
|
||||
; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:8
|
||||
; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:16
|
||||
; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:24
|
||||
|
||||
; HSA-ELTGE8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen
|
||||
|
||||
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:4{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:8{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:12{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:16{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:20{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:24{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:28{{$}}
|
||||
|
||||
; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
|
||||
; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}
|
||||
@ -166,16 +166,16 @@ entry:
|
||||
; HSA-ELT8: private_element_size = 2
|
||||
; HSA-ELT4: private_element_size = 1
|
||||
|
||||
; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9{{$}}
|
||||
; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:8
|
||||
; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:16
|
||||
; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:24
|
||||
|
||||
; HSA-ELTGE8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen
|
||||
|
||||
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:4{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:8{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:12{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:16{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:20{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:24{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:28{{$}}
|
||||
|
||||
; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
|
||||
; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}
|
||||
@ -202,27 +202,27 @@ entry:
|
||||
; HSA-ELT8: private_element_size = 2
|
||||
; HSA-ELT4: private_element_size = 1
|
||||
|
||||
; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9{{$}}
|
||||
; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:16
|
||||
; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:32
|
||||
; HSA-ELT16-DAG: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
|
||||
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9{{$}}
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:8
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:16
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:16{{$}}
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:24
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:40
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:32
|
||||
|
||||
; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen
|
||||
; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen
|
||||
|
||||
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:4{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:8{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:12{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:16{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:20{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:24{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:28{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:32{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:36{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:40{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:44{{$}}
|
||||
|
||||
; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
|
||||
; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}
|
||||
|
@ -9,8 +9,8 @@
|
||||
; should be able to reuse the same regiser for each scratch buffer access.
|
||||
|
||||
; GCN-LABEL: {{^}}legal_offset_fi:
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+$}}
|
||||
; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x8000
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offset:4{{$}}
|
||||
; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x8004
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}
|
||||
|
||||
define void @legal_offset_fi(i32 addrspace(1)* %out, i32 %cond, i32 %if_offset, i32 %else_offset) {
|
||||
@ -49,7 +49,7 @@ done:
|
||||
; GCN-LABEL: {{^}}legal_offset_fi_offset:
|
||||
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}
|
||||
; This constant isn't folded, because it has multiple uses.
|
||||
; GCN-DAG: v_mov_b32_e32 [[K8000:v[0-9]+]], 0x8000
|
||||
; GCN-DAG: v_mov_b32_e32 [[K8000:v[0-9]+]], 0x8004
|
||||
; GCN-DAG: v_add_i32_e32 [[OFFSET:v[0-9]+]], vcc, [[K8000]]
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}
|
||||
|
||||
@ -98,7 +98,7 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}pos_vaddr_offset:
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:16
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:20
|
||||
define void @pos_vaddr_offset(i32 addrspace(1)* %out, i32 %offset) {
|
||||
entry:
|
||||
%array = alloca [8192 x i32]
|
||||
|
@ -8,7 +8,7 @@
|
||||
; ALL: s_mov_b32 s[[HI:[0-9]+]], 0xe80000
|
||||
|
||||
; Make sure we are handling hazards correctly.
|
||||
; SGPR: buffer_load_dword [[VHI:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:12
|
||||
; SGPR: buffer_load_dword [[VHI:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:16
|
||||
; SGPR-NEXT: s_waitcnt vmcnt(0)
|
||||
; SGPR-NEXT: v_readfirstlane_b32 s[[HI:[0-9]+]], [[VHI]]
|
||||
; SGPR-NEXT: s_nop 4
|
||||
@ -16,10 +16,10 @@
|
||||
|
||||
; Make sure scratch wave offset register is correctly incremented and
|
||||
; then restored.
|
||||
; SMEM: s_mov_b32 m0, s[[OFF]]{{$}}
|
||||
; SMEM: s_add_u32 m0, s[[OFF]], 0x100{{$}}
|
||||
; SMEM: s_buffer_store_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[LO]]:[[HI]]], m0 ; 16-byte Folded Spill
|
||||
|
||||
; SMEM: s_mov_b32 m0, s[[OFF]]{{$}}
|
||||
; SMEM: s_add_u32 m0, s[[OFF]], 0x100{{$}}
|
||||
; SMEM: s_buffer_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[LO]]:[[HI]]], m0 ; 16-byte Folded Reload
|
||||
|
||||
; SMEM: s_dcache_wb
|
||||
|
@ -17,11 +17,11 @@
|
||||
|
||||
; TOVMEM-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0
|
||||
; TOVMEM-DAG: v_mov_b32_e32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]]
|
||||
; TOVMEM: buffer_store_dword [[SPILL_VREG]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} ; 4-byte Folded Spill
|
||||
; TOVMEM: buffer_store_dword [[SPILL_VREG]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4 ; 4-byte Folded Spill
|
||||
; TOVMEM: s_waitcnt vmcnt(0)
|
||||
|
||||
; TOSMEM-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0
|
||||
; TOSMEM: s_mov_b32 m0, s3{{$}}
|
||||
; TOSMEM: s_add_u32 m0, s3, 0x100{{$}}
|
||||
; TOSMEM-NOT: [[M0_COPY]]
|
||||
; TOSMEM: s_buffer_store_dword [[M0_COPY]], s{{\[}}[[LO]]:[[HI]]], m0 ; 4-byte Folded Spill
|
||||
; TOSMEM: s_waitcnt lgkmcnt(0)
|
||||
@ -32,12 +32,12 @@
|
||||
; TOVGPR: v_readlane_b32 [[M0_RESTORE:s[0-9]+]], [[SPILL_VREG]], 0
|
||||
; TOVGPR: s_mov_b32 m0, [[M0_RESTORE]]
|
||||
|
||||
; TOVMEM: buffer_load_dword [[RELOAD_VREG:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} ; 4-byte Folded Reload
|
||||
; TOVMEM: buffer_load_dword [[RELOAD_VREG:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4 ; 4-byte Folded Reload
|
||||
; TOVMEM: s_waitcnt vmcnt(0)
|
||||
; TOVMEM: v_readfirstlane_b32 [[M0_RESTORE:s[0-9]+]], [[RELOAD_VREG]]
|
||||
; TOVMEM: s_mov_b32 m0, [[M0_RESTORE]]
|
||||
|
||||
; TOSMEM: s_mov_b32 m0, s3{{$}}
|
||||
; TOSMEM: s_add_u32 m0, s3, 0x100{{$}}
|
||||
; TOSMEM: s_buffer_load_dword [[M0_RESTORE:s[0-9]+]], s{{\[}}[[LO]]:[[HI]]], m0 ; 4-byte Folded Reload
|
||||
; TOSMEM-NOT: [[M0_RESTORE]]
|
||||
; TOSMEM: s_mov_b32 m0, [[M0_RESTORE]]
|
||||
@ -67,12 +67,12 @@ endif:
|
||||
; GCN: v_interp_mov_f32
|
||||
|
||||
; TOSMEM-NOT: s_m0
|
||||
; TOSMEM: s_mov_b32 m0, s7
|
||||
; TOSMEM: s_add_u32 m0, s7, 0x100
|
||||
; TOSMEM-NEXT: s_buffer_store_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 4-byte Folded Spill
|
||||
; TOSMEM-NOT: m0
|
||||
|
||||
; TOSMEM-NOT: m0
|
||||
; TOSMEM: s_add_u32 m0, s7, 0x100
|
||||
; TOSMEM: s_add_u32 m0, s7, 0x200
|
||||
; TOSMEM: s_buffer_store_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Spill
|
||||
; TOSMEM-NOT: m0
|
||||
|
||||
@ -81,7 +81,7 @@ endif:
|
||||
; TOSMEM: s_branch
|
||||
|
||||
; TOSMEM: BB{{[0-9]+_[0-9]+}}:
|
||||
; TOSMEM-NEXT: s_add_u32 m0, s7, 0x100
|
||||
; TOSMEM-NEXT: s_add_u32 m0, s7, 0x200
|
||||
; TOSMEM-NEXT: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Reload
|
||||
|
||||
|
||||
@ -121,7 +121,7 @@ endif: ; preds = %else, %if
|
||||
; GCN: ; clobber m0
|
||||
|
||||
; TOSMEM: s_mov_b32 vcc_hi, m0
|
||||
; TOSMEM: s_mov_b32 m0, s3
|
||||
; TOSMEM: s_add_u32 m0, s3, 0x100
|
||||
; TOSMEM-NEXT: s_buffer_store_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Spill
|
||||
; TOSMEM: s_mov_b32 m0, vcc_hi
|
||||
|
||||
@ -130,7 +130,7 @@ endif: ; preds = %else, %if
|
||||
; TOSMEM: s_branch
|
||||
|
||||
; TOSMEM: BB{{[0-9]+_[0-9]+}}:
|
||||
; TOSMEM-NEXT: s_mov_b32 m0, s3
|
||||
; TOSMEM-NEXT: s_add_u32 m0, s3, 0x100
|
||||
; TOSMEM-NEXT: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Reload
|
||||
|
||||
; GCN-NOT: v_readlane_b32 m0
|
||||
@ -160,10 +160,10 @@ endif:
|
||||
; TOSMEM: s_load_dwordx2 [[REG:s\[[0-9]+:[0-9]+\]]]
|
||||
; TOSMEM: s_cmp_eq_u32
|
||||
; TOSMEM-NOT: m0
|
||||
; TOSMEM: s_mov_b32 m0, s3
|
||||
; TOSMEM: s_add_u32 m0, s3, 0x100
|
||||
; TOSMEM: s_buffer_store_dwordx2 [[REG]], s[88:91], m0 ; 8-byte Folded Spill
|
||||
; TOSMEM-NOT: m0
|
||||
; TOSMEM: s_add_u32 m0, s3, 0x200
|
||||
; TOSMEM: s_add_u32 m0, s3, 0x300
|
||||
; TOSMEM: s_buffer_store_dword s{{[0-9]+}}, s[88:91], m0 ; 4-byte Folded Spill
|
||||
; TOSMEM-NOT: m0
|
||||
; TOSMEM: s_cbranch_scc1
|
||||
@ -171,7 +171,7 @@ endif:
|
||||
; TOSMEM: s_mov_b32 m0, -1
|
||||
|
||||
; TOSMEM: s_mov_b32 vcc_hi, m0
|
||||
; TOSMEM: s_mov_b32 m0, s3
|
||||
; TOSMEM: s_add_u32 m0, s3, 0x100
|
||||
; TOSMEM: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[88:91], m0 ; 8-byte Folded Reload
|
||||
; TOSMEM: s_mov_b32 m0, vcc_hi
|
||||
; TOSMEM: s_waitcnt lgkmcnt(0)
|
||||
@ -179,7 +179,7 @@ endif:
|
||||
; TOSMEM: ds_write_b64
|
||||
|
||||
; TOSMEM-NOT: m0
|
||||
; TOSMEM: s_add_u32 m0, s3, 0x200
|
||||
; TOSMEM: s_add_u32 m0, s3, 0x300
|
||||
; TOSMEM: s_buffer_load_dword s0, s[88:91], m0 ; 4-byte Folded Reload
|
||||
; TOSMEM-NOT: m0
|
||||
; TOSMEM: s_waitcnt lgkmcnt(0)
|
||||
|
@ -3,11 +3,11 @@
|
||||
; RUN: llc -O0 -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -amdgpu-spill-sgpr-to-vgpr=0 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=VMEM %s
|
||||
|
||||
; ALL-LABEL: {{^}}spill_sgpr_x2:
|
||||
; SMEM: s_mov_b32 m0, s3{{$}}
|
||||
; SMEM: s_add_u32 m0, s3, 0x100{{$}}
|
||||
; SMEM: s_buffer_store_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[8:11], m0 ; 8-byte Folded Spill
|
||||
; SMEM: s_cbranch_scc1
|
||||
|
||||
; SMEM: s_mov_b32 m0, s3{{$}}
|
||||
; SMEM: s_add_u32 m0, s3, 0x100{{$}}
|
||||
; SMEM: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[8:11], m0 ; 8-byte Folded Reload
|
||||
|
||||
; SMEM: s_dcache_wb
|
||||
@ -44,11 +44,11 @@ ret:
|
||||
}
|
||||
|
||||
; ALL-LABEL: {{^}}spill_sgpr_x4:
|
||||
; SMEM: s_mov_b32 m0, s3{{$}}
|
||||
; SMEM: s_add_u32 m0, s3, 0x100{{$}}
|
||||
; SMEM: s_buffer_store_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[12:15], m0 ; 16-byte Folded Spill
|
||||
; SMEM: s_cbranch_scc1
|
||||
|
||||
; SMEM: s_mov_b32 m0, s3{{$}}
|
||||
; SMEM: s_add_u32 m0, s3, 0x100{{$}}
|
||||
; SMEM: s_buffer_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[12:15], m0 ; 16-byte Folded Reload
|
||||
; SMEM: s_dcache_wb
|
||||
; SMEM: s_endpgm
|
||||
@ -93,15 +93,15 @@ ret:
|
||||
|
||||
; ALL-LABEL: {{^}}spill_sgpr_x8:
|
||||
|
||||
; SMEM: s_mov_b32 m0, s3{{$}}
|
||||
; SMEM: s_add_u32 m0, s3, 0x100{{$}}
|
||||
; SMEM: s_buffer_store_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[16:19], m0 ; 16-byte Folded Spill
|
||||
; SMEM: s_add_u32 m0, s3, 16
|
||||
; SMEM: s_add_u32 m0, s3, 0x110{{$}}
|
||||
; SMEM: s_buffer_store_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[16:19], m0 ; 16-byte Folded Spill
|
||||
; SMEM: s_cbranch_scc1
|
||||
|
||||
; SMEM: s_mov_b32 m0, s3{{$}}
|
||||
; SMEM: s_add_u32 m0, s3, 0x100{{$}}
|
||||
; SMEM: s_buffer_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[16:19], m0 ; 16-byte Folded Reload
|
||||
; SMEM: s_add_u32 m0, s3, 16
|
||||
; SMEM: s_add_u32 m0, s3, 0x110{{$}}
|
||||
; SMEM: s_buffer_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[16:19], m0 ; 16-byte Folded Reload
|
||||
|
||||
; SMEM: s_dcache_wb
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
; HSA: enable_sgpr_private_segment_buffer = 1
|
||||
; HSA: enable_sgpr_flat_scratch_init = 0
|
||||
; HSA: workitem_private_segment_byte_size = 1024
|
||||
; HSA: workitem_private_segment_byte_size = 1536
|
||||
|
||||
; GCN-NOT: flat_scr
|
||||
|
||||
@ -40,7 +40,7 @@
|
||||
; GCN: buffer_load_dword {{v[0-9]+}}, off, s[12:15], s16 offset:{{[0-9]+}}
|
||||
|
||||
; GCN: NumVgprs: 256
|
||||
; GCN: ScratchSize: 1024
|
||||
; GCN: ScratchSize: 1536
|
||||
|
||||
; s[0:3] input user SGPRs. s4,s5,s6 = workgroup IDs. s8 scratch offset.
|
||||
define void @spill_vgpr_compute(<4 x float> %arg6, float addrspace(1)* %arg, i32 %arg1, i32 %arg2, float %arg3, float %arg4, float %arg5) #0 {
|
||||
|
@ -23,7 +23,7 @@
|
||||
; GCN: buffer_store_dword {{v[0-9]+}}, off, s{{\[}}[[DESC0]]:[[DESC3]]], s[[OFFREG]] offset:{{[0-9]+}} ; 4-byte Folded Spill
|
||||
; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[}}[[DESC0]]:[[DESC3]]], s[[OFFREG]] offset:{{[0-9]+}} ; 4-byte Folded Reload
|
||||
; GCN: NumVgprs: 256
|
||||
; GCN: ScratchSize: 1024
|
||||
; GCN: ScratchSize: 1536
|
||||
|
||||
define amdgpu_vs void @main([9 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, [16 x <16 x i8>] addrspace(2)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {
|
||||
bb:
|
||||
|
@ -392,7 +392,7 @@ break:
|
||||
; CHECK: s_and_b64 exec, exec, [[LIVE]]
|
||||
; CHECK: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0
|
||||
; CHECK: s_wqm_b64 exec, exec
|
||||
; CHECK: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+$}}
|
||||
; CHECK: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:4{{$}}
|
||||
; CHECK: s_and_b64 exec, exec, [[LIVE]]
|
||||
; CHECK: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen
|
||||
; CHECK: s_wqm_b64 exec, exec
|
||||
|
Loading…
Reference in New Issue
Block a user