mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
AMDGPU: Don't use offen if it is 0
This removes many re-initializations of a base register to 0. llvm-svn: 282999
This commit is contained in:
parent
d309b57e6e
commit
58e2ff3f3c
@ -320,14 +320,82 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
|
||||
}
|
||||
}
|
||||
|
||||
void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
|
||||
unsigned LoadStoreOp,
|
||||
const MachineOperand *SrcDst,
|
||||
unsigned ScratchRsrcReg,
|
||||
unsigned ScratchOffset,
|
||||
int64_t Offset,
|
||||
RegScavenger *RS) const {
|
||||
static int getOffsetMUBUFStore(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
|
||||
return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
|
||||
case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
|
||||
return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
|
||||
case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
|
||||
return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
|
||||
case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
|
||||
return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
|
||||
case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
|
||||
return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
static int getOffsetMUBUFLoad(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
|
||||
return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
|
||||
case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
|
||||
return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
|
||||
case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
|
||||
return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
|
||||
case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
|
||||
return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
|
||||
case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
|
||||
return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
|
||||
case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
|
||||
return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
|
||||
case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
|
||||
return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
|
||||
// need to handle the case where an SGPR may need to be spilled while spilling.
|
||||
static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
|
||||
MachineFrameInfo &MFI,
|
||||
MachineBasicBlock::iterator MI,
|
||||
int Index,
|
||||
int64_t Offset) {
|
||||
MachineBasicBlock *MBB = MI->getParent();
|
||||
const DebugLoc &DL = MI->getDebugLoc();
|
||||
bool IsStore = MI->mayStore();
|
||||
|
||||
unsigned Opc = MI->getOpcode();
|
||||
int LoadStoreOp = IsStore ?
|
||||
getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc);
|
||||
if (LoadStoreOp == -1)
|
||||
return false;
|
||||
|
||||
unsigned Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata)->getReg();
|
||||
|
||||
BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
|
||||
.addReg(Reg, getDefRegState(!IsStore))
|
||||
.addOperand(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
|
||||
.addOperand(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
|
||||
.addImm(Offset)
|
||||
.addImm(0) // glc
|
||||
.addImm(0) // slc
|
||||
.addImm(0) // tfe
|
||||
.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
|
||||
return true;
|
||||
}
|
||||
|
||||
void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
|
||||
unsigned LoadStoreOp,
|
||||
const MachineOperand *SrcDst,
|
||||
unsigned ScratchRsrcReg,
|
||||
unsigned ScratchOffset,
|
||||
int64_t Offset,
|
||||
RegScavenger *RS) const {
|
||||
unsigned Value = SrcDst->getReg();
|
||||
bool IsKill = SrcDst->isKill();
|
||||
MachineBasicBlock *MBB = MI->getParent();
|
||||
@ -574,7 +642,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
|
||||
case AMDGPU::SI_SPILL_V96_SAVE:
|
||||
case AMDGPU::SI_SPILL_V64_SAVE:
|
||||
case AMDGPU::SI_SPILL_V32_SAVE:
|
||||
buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
|
||||
buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
|
||||
TII->getNamedOperand(*MI, AMDGPU::OpName::vdata),
|
||||
TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
|
||||
TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
|
||||
@ -589,7 +657,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
|
||||
case AMDGPU::SI_SPILL_V128_RESTORE:
|
||||
case AMDGPU::SI_SPILL_V256_RESTORE:
|
||||
case AMDGPU::SI_SPILL_V512_RESTORE: {
|
||||
buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
|
||||
buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
|
||||
TII->getNamedOperand(*MI, AMDGPU::OpName::vdata),
|
||||
TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
|
||||
TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
|
||||
@ -600,6 +668,24 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
|
||||
}
|
||||
|
||||
default: {
|
||||
if (TII->isMUBUF(*MI)) {
|
||||
// Disable offen so we don't need a 0 vgpr base.
|
||||
assert(static_cast<int>(FIOperandNum) ==
|
||||
AMDGPU::getNamedOperandIdx(MI->getOpcode(),
|
||||
AMDGPU::OpName::vaddr));
|
||||
|
||||
int64_t Offset = FrameInfo.getObjectOffset(Index);
|
||||
int64_t OldImm
|
||||
= TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
|
||||
int64_t NewOffset = OldImm + Offset;
|
||||
|
||||
if (isUInt<12>(NewOffset) &&
|
||||
buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
|
||||
MI->eraseFromParent();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int64_t Offset = FrameInfo.getObjectOffset(Index);
|
||||
FIOp.ChangeToImmediate(Offset);
|
||||
if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
|
||||
|
@ -240,11 +240,11 @@ public:
|
||||
unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
|
||||
|
||||
private:
|
||||
void buildScratchLoadStore(MachineBasicBlock::iterator MI,
|
||||
unsigned LoadStoreOp, const MachineOperand *SrcDst,
|
||||
unsigned ScratchRsrcReg, unsigned ScratchOffset,
|
||||
int64_t Offset,
|
||||
RegScavenger *RS) const;
|
||||
void buildSpillLoadStore(MachineBasicBlock::iterator MI,
|
||||
unsigned LoadStoreOp, const MachineOperand *SrcDst,
|
||||
unsigned ScratchRsrcReg, unsigned ScratchOffset,
|
||||
int64_t Offset,
|
||||
RegScavenger *RS) const;
|
||||
};
|
||||
|
||||
} // End namespace llvm
|
||||
|
@ -227,8 +227,8 @@ for.end:
|
||||
|
||||
; R600: MOVA_INT
|
||||
|
||||
; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x68,0xe0
|
||||
; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:2 ; encoding: [0x02,0x10,0x68,0xe0
|
||||
; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} ; encoding: [0x00,0x00,0x68,0xe0,
|
||||
; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:2 ; encoding: [0x02,0x00,0x68,0xe0,
|
||||
; SI-PROMOTE: buffer_load_sshort v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
|
||||
define void @short_array(i32 addrspace(1)* %out, i32 %index) #0 {
|
||||
entry:
|
||||
@ -248,8 +248,11 @@ entry:
|
||||
|
||||
; R600: MOVA_INT
|
||||
|
||||
; SI-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x60,0xe0
|
||||
; SI-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:1 ; encoding: [0x01,0x10,0x60,0xe0
|
||||
; SI-PROMOTE-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} ; encoding:
|
||||
; SI-PROMOTE-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:1 ; encoding:
|
||||
|
||||
; SI-ALLOCA-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x60,0xe0
|
||||
; SI-ALLOCA-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:1 ; encoding: [0x01,0x10,0x60,0xe0
|
||||
define void @char_array(i32 addrspace(1)* %out, i32 %index) #0 {
|
||||
entry:
|
||||
%0 = alloca [2 x i8]
|
||||
@ -262,14 +265,13 @@ entry:
|
||||
%5 = sext i8 %4 to i32
|
||||
store i32 %5, i32 addrspace(1)* %out
|
||||
ret void
|
||||
|
||||
}
|
||||
|
||||
; Test that two stack objects are not stored in the same register
|
||||
; The second stack object should be in T3.X
|
||||
; FUNC-LABEL: {{^}}no_overlap:
|
||||
; R600_CHECK: MOV
|
||||
; R600_CHECK: [[CHAN:[XYZW]]]+
|
||||
; R600-CHECK: MOV
|
||||
; R600-CHECK: [[CHAN:[XYZW]]]+
|
||||
; R600-NOT: [[CHAN]]+
|
||||
; SI: v_mov_b32_e32 v3
|
||||
define void @no_overlap(i32 addrspace(1)* %out, i32 %in) #0 {
|
||||
|
@ -14,8 +14,7 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}stored_fi_to_lds:
|
||||
; GCN: s_load_dword [[LDSPTR:s[0-9]+]]
|
||||
; GCN: v_mov_b32_e32 [[ZERO1:v[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, [[ZERO1]]
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, off,
|
||||
; GCN: v_mov_b32_e32 [[ZERO0:v[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 [[VLDSPTR:v[0-9]+]], [[LDSPTR]]
|
||||
; GCN: ds_write_b32 [[VLDSPTR]], [[ZERO0]]
|
||||
@ -118,7 +117,7 @@ define void @stored_fi_to_fi() #0 {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}stored_fi_to_global:
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
|
||||
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dword [[FI]]
|
||||
define void @stored_fi_to_global(float* addrspace(1)* %ptr) #0 {
|
||||
@ -152,18 +151,20 @@ define void @stored_fi_to_global_2_small_objects(float* addrspace(1)* %ptr) #0 {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}stored_fi_to_global_huge_frame_offset:
|
||||
; GCN: v_mov_b32_e32 [[VAL_0:v[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 [[BASE_0:v[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dword [[VAL_0]], [[BASE_0]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen
|
||||
; GCN: buffer_store_dword [[BASE_0]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
|
||||
|
||||
; FIXME: Re-initialize
|
||||
; GCN: v_mov_b32_e32 [[BASE_0_1:v[0-9]+]], 0{{$}}
|
||||
; GCN: v_add_i32_e32 [[BASE_1_OFF_0:v[0-9]+]], vcc, 0x3ffc, [[BASE_0_1]]
|
||||
|
||||
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
|
||||
; GCN: v_add_i32_e32 [[BASE_1_OFF_1:v[0-9]+]], vcc, 56, [[BASE_0_1]]
|
||||
; GCN: buffer_store_dword [[K]], [[BASE_1_OFF_0]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
|
||||
; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
|
||||
; GCN-DAG: v_add_i32_e32 [[BASE_1_OFF_1:v[0-9]+]], vcc, 0x3ffc, [[BASE_0_1]]
|
||||
|
||||
; GCN: buffer_store_dword [[BASE_1_OFF_1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
||||
|
||||
; GCN: v_add_i32_e32 [[BASE_1_OFF_2:v[0-9]+]], vcc, 56, [[BASE_0_1]]
|
||||
; GCN: buffer_store_dword [[K]], [[BASE_1_OFF_1]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
|
||||
|
||||
; GCN: buffer_store_dword [[BASE_1_OFF_2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
||||
define void @stored_fi_to_global_huge_frame_offset(i32* addrspace(1)* %ptr) #0 {
|
||||
%tmp0 = alloca [4096 x i32]
|
||||
%tmp1 = alloca [4096 x i32]
|
||||
|
@ -126,8 +126,8 @@ done:
|
||||
|
||||
; GCN-LABEL: {{^}}test_sink_scratch_small_offset_i32:
|
||||
; GCN: s_and_saveexec_b64
|
||||
; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
|
||||
; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
|
||||
; GCN: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:4092{{$}}
|
||||
; GCN: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:4092{{$}}
|
||||
; GCN: {{^}}BB4_2:
|
||||
define void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
|
||||
entry:
|
||||
|
@ -2,7 +2,7 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}load_i8_sext_private:
|
||||
; SI: buffer_load_sbyte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
|
||||
; SI: buffer_load_sbyte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+$}}
|
||||
define void @load_i8_sext_private(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%tmp0 = alloca i8
|
||||
@ -13,7 +13,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}load_i8_zext_private:
|
||||
; SI: buffer_load_ubyte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
|
||||
; SI: buffer_load_ubyte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+$}}
|
||||
define void @load_i8_zext_private(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%tmp0 = alloca i8
|
||||
@ -24,7 +24,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}load_i16_sext_private:
|
||||
; SI: buffer_load_sshort v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
|
||||
; SI: buffer_load_sshort v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+$}}
|
||||
define void @load_i16_sext_private(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%tmp0 = alloca i16
|
||||
@ -35,7 +35,7 @@ entry:
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}load_i16_zext_private:
|
||||
; SI: buffer_load_ushort v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
|
||||
; SI: buffer_load_ushort v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+$}}
|
||||
define void @load_i16_zext_private(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%tmp0 = alloca i16
|
||||
|
@ -207,12 +207,17 @@ define void @dynamic_insertelement_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16>
|
||||
; GCN: buffer_load_ushort v{{[0-9]+}}, off
|
||||
; GCN: buffer_load_ushort v{{[0-9]+}}, off
|
||||
|
||||
; GCN-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:6
|
||||
; GCN-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:4
|
||||
; GCN-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:2
|
||||
; GCN-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
|
||||
; GCN: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
|
||||
; GCN: v_mov_b32_e32 [[BASE_FI:v[0-9]+]], 0{{$}}
|
||||
|
||||
; GCN-DAG: buffer_store_short v{{[0-9]+}}, [[BASE_FI]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:6
|
||||
; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4
|
||||
; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:2
|
||||
; GCN-DAG: buffer_store_short v{{[0-9]+}}, [[BASE_FI]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
|
||||
; GCN: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
|
||||
|
||||
; GCN: s_waitcnt
|
||||
|
||||
; GCN: buffer_load_ushort
|
||||
; GCN: buffer_load_ushort
|
||||
; GCN: buffer_load_ushort
|
||||
@ -229,7 +234,7 @@ define void @dynamic_insertelement_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16>
|
||||
; GCN: buffer_load_ubyte v{{[0-9]+}}, off
|
||||
; GCN: buffer_load_ubyte v{{[0-9]+}}, off
|
||||
|
||||
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:1
|
||||
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:1
|
||||
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
|
||||
|
||||
; GCN: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
|
||||
@ -250,7 +255,7 @@ define void @dynamic_insertelement_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> %a
|
||||
; GCN: buffer_load_ubyte v{{[0-9]+}}, off
|
||||
|
||||
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:2
|
||||
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:1
|
||||
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:1
|
||||
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
|
||||
|
||||
; GCN: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
|
||||
@ -274,8 +279,8 @@ define void @dynamic_insertelement_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> %a
|
||||
; GCN: buffer_load_ubyte v{{[0-9]+}}, off
|
||||
|
||||
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:3
|
||||
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:2
|
||||
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:1
|
||||
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:2
|
||||
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:1
|
||||
; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
|
||||
|
||||
; GCN: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
|
||||
@ -390,8 +395,8 @@ define void @dynamic_insertelement_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64>
|
||||
|
||||
; Stack store
|
||||
|
||||
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}}
|
||||
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:16{{$}}
|
||||
|
||||
; Write element
|
||||
; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
|
||||
@ -416,8 +421,8 @@ define void @dynamic_insertelement_v4f64(<4 x double> addrspace(1)* %out, <4 x d
|
||||
; GCN: SCRATCH_RSRC_DWORD
|
||||
|
||||
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:32{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:16{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:32{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:48{{$}}
|
||||
|
||||
; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
|
||||
|
@ -6,8 +6,14 @@
|
||||
; from https://bugs.freedesktop.org/show_bug.cgi?id=96602
|
||||
;
|
||||
; CHECK-LABEL: {{^}}main:
|
||||
; CHECK: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0
|
||||
; CHECK-DAG: v_mov_b32_e32 [[ZERO_BASE_FI:v[0-9]+]], 0{{$}}
|
||||
|
||||
; FIXME: add 0?
|
||||
; CHECK-DAG: s_movk_i32 [[K0:s[0-9]+]], 0x138
|
||||
; CHECK-DAG: v_add_i32_e64 [[ADD_K0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, [[K0]], 0
|
||||
|
||||
; CHECK-DAG: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0
|
||||
; CHECK-DAG: buffer_store_dword {{v[0-9]+}}, [[ADD_K0]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
|
||||
|
||||
; CHECK-DAG: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, 0x200, [[BYTES]]
|
||||
; CHECK-DAG: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, 0, [[BYTES]]
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
; HSA-ELT16-DAG: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
|
||||
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:8
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:16
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:24
|
||||
|
||||
@ -24,10 +24,10 @@
|
||||
|
||||
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:4{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:8{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:12{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:16{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:16{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:20{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:24{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:28{{$}}
|
||||
@ -60,7 +60,7 @@ entry:
|
||||
; HSA-ELT4: private_element_size = 1
|
||||
|
||||
; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
|
||||
; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:16
|
||||
; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:16
|
||||
; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:32
|
||||
; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:48
|
||||
|
||||
@ -69,10 +69,10 @@ entry:
|
||||
|
||||
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:16
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:8
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:16
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:24
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:32
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:32
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:40
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:48
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:56
|
||||
@ -82,14 +82,14 @@ entry:
|
||||
|
||||
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:12{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:16{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:20{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:24{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:4{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:8{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:12{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:16{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:20{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:24{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:28{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:32{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:32{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:36{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:40{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:44{{$}}
|
||||
@ -137,7 +137,7 @@ entry:
|
||||
|
||||
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:4{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:12{{$}}
|
||||
|
||||
@ -173,7 +173,7 @@ entry:
|
||||
|
||||
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:4{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:12{{$}}
|
||||
|
||||
@ -207,7 +207,7 @@ entry:
|
||||
; HSA-ELT16-DAG: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
|
||||
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:8
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:16
|
||||
; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen offset:24
|
||||
|
||||
@ -216,10 +216,10 @@ entry:
|
||||
|
||||
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:4{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:8{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:12{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:16{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:16{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:20{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:24{{$}}
|
||||
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:28{{$}}
|
||||
|
@ -9,9 +9,8 @@
|
||||
; should be able to reuse the same regiser for each scratch buffer access.
|
||||
|
||||
; GCN-LABEL: {{^}}legal_offset_fi:
|
||||
; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen
|
||||
; GCN: v_mov_b32_e32 [[OFFSET]], 0x8000
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+$}}
|
||||
; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x8000
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}
|
||||
|
||||
define void @legal_offset_fi(i32 addrspace(1)* %out, i32 %cond, i32 %if_offset, i32 %else_offset) {
|
||||
@ -97,7 +96,7 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}pos_vaddr_offset:
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:16
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:16
|
||||
define void @pos_vaddr_offset(i32 addrspace(1)* %out, i32 %offset) {
|
||||
entry:
|
||||
%array = alloca [8192 x i32]
|
||||
|
@ -29,10 +29,10 @@
|
||||
|
||||
; GCN: buffer_store_dword {{v[0-9]+}}, off, s[12:15], s16 offset:{{[0-9]+}} ; 4-byte Folded Spill
|
||||
|
||||
; GCN: buffer_store_dword {{v[0-9]}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}
|
||||
; GCN: buffer_store_dword {{v[0-9]}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}
|
||||
; GCN: buffer_store_dword {{v[0-9]}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}
|
||||
; GCN: buffer_store_dword {{v[0-9]}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}
|
||||
; GCN: buffer_store_dword {{v[0-9]}}, off, s[12:15], s16 offset:{{[0-9]+}}
|
||||
; GCN: buffer_store_dword {{v[0-9]}}, off, s[12:15], s16 offset:{{[0-9]+}}
|
||||
; GCN: buffer_store_dword {{v[0-9]}}, off, s[12:15], s16 offset:{{[0-9]+}}
|
||||
; GCN: buffer_store_dword {{v[0-9]}}, off, s[12:15], s16 offset:{{[0-9]+}}
|
||||
|
||||
; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}
|
||||
; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}
|
||||
|
@ -395,7 +395,7 @@ break:
|
||||
; CHECK: s_and_b64 exec, exec, [[LIVE]]
|
||||
; CHECK: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0
|
||||
; CHECK: s_wqm_b64 exec, exec
|
||||
; CHECK: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
|
||||
; CHECK: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+$}}
|
||||
; CHECK: s_and_b64 exec, exec, [[LIVE]]
|
||||
; CHECK: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen
|
||||
; CHECK: s_wqm_b64 exec, exec
|
||||
|
Loading…
Reference in New Issue
Block a user