mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
Revert "AMDGPU/SI: Use flat for global load/store when targeting HSA"
This reverts commit r256273. It broke CodeGen/AMDGPU/llvm.dbg.value.ll llvm-svn: 256275
This commit is contained in:
parent
2af3ff098d
commit
6880ff7f5d
@ -108,11 +108,6 @@ def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature <"unsafe-ds-offset-fol
|
||||
"true",
|
||||
"Force using DS instruction immediate offsets on SI">;
|
||||
|
||||
def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
|
||||
"FlatForGlobal",
|
||||
"true",
|
||||
"Force to generate flat instruction for global">;
|
||||
|
||||
def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
|
||||
"FlatAddressSpace",
|
||||
"true",
|
||||
|
@ -95,7 +95,7 @@ private:
|
||||
bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
|
||||
bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
|
||||
SDValue &Offset1) const;
|
||||
bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
|
||||
void SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
|
||||
SDValue &SOffset, SDValue &Offset, SDValue &Offen,
|
||||
SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
|
||||
SDValue &TFE) const;
|
||||
@ -920,16 +920,12 @@ static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
|
||||
return isUInt<12>(Imm->getZExtValue());
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
|
||||
void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
|
||||
SDValue &VAddr, SDValue &SOffset,
|
||||
SDValue &Offset, SDValue &Offen,
|
||||
SDValue &Idxen, SDValue &Addr64,
|
||||
SDValue &GLC, SDValue &SLC,
|
||||
SDValue &TFE) const {
|
||||
// Subtarget prefers to use flat instruction
|
||||
if (Subtarget->useFlatForGlobal())
|
||||
return false;
|
||||
|
||||
SDLoc DL(Addr);
|
||||
|
||||
GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
||||
@ -962,14 +958,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
|
||||
|
||||
if (isLegalMUBUFImmOffset(C1)) {
|
||||
Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
|
||||
return true;
|
||||
return;
|
||||
} else if (isUInt<32>(C1->getZExtValue())) {
|
||||
// Illegal offset, store it in soffset.
|
||||
Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
|
||||
SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
|
||||
CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
|
||||
0);
|
||||
return true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@ -981,15 +977,13 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
|
||||
Ptr = N0;
|
||||
VAddr = N1;
|
||||
Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
|
||||
return true;
|
||||
return;
|
||||
}
|
||||
|
||||
// default case -> offset
|
||||
VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
|
||||
Ptr = Addr;
|
||||
Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
|
||||
@ -1002,9 +996,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
|
||||
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
return false;
|
||||
|
||||
if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
|
||||
GLC, SLC, TFE))
|
||||
return false;
|
||||
SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
|
||||
GLC, SLC, TFE);
|
||||
|
||||
ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
|
||||
if (C->getSExtValue()) {
|
||||
@ -1070,9 +1063,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
|
||||
const SIInstrInfo *TII =
|
||||
static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
|
||||
|
||||
if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
|
||||
GLC, SLC, TFE))
|
||||
return false;
|
||||
SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
|
||||
GLC, SLC, TFE);
|
||||
|
||||
if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
|
||||
!cast<ConstantSDNode>(Idxen)->getSExtValue() &&
|
||||
|
@ -45,8 +45,6 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
|
||||
// disable it.
|
||||
|
||||
SmallString<256> FullFS("+promote-alloca,+fp64-denormals,");
|
||||
if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
|
||||
FullFS += "+flat-for-global,";
|
||||
FullFS += FS;
|
||||
|
||||
if (GPU == "" && TT.getArch() == Triple::amdgcn)
|
||||
@ -70,9 +68,9 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
DumpCode(false), R600ALUInst(false), HasVertexCache(false),
|
||||
TexVTXClauseSize(0), Gen(AMDGPUSubtarget::R600), FP64(false),
|
||||
FP64Denormals(false), FP32Denormals(false), FastFMAF32(false),
|
||||
CaymanISA(false), FlatAddressSpace(false), FlatForGlobal(false),
|
||||
EnableIRStructurizer(true), EnablePromoteAlloca(false), EnableIfCvt(true),
|
||||
EnableLoadStoreOpt(false), EnableUnsafeDSOffsetFolding(false),
|
||||
CaymanISA(false), FlatAddressSpace(false), EnableIRStructurizer(true),
|
||||
EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false),
|
||||
EnableUnsafeDSOffsetFolding(false),
|
||||
WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
|
||||
EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
|
||||
GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0),
|
||||
|
@ -70,7 +70,6 @@ private:
|
||||
bool FastFMAF32;
|
||||
bool CaymanISA;
|
||||
bool FlatAddressSpace;
|
||||
bool FlatForGlobal;
|
||||
bool EnableIRStructurizer;
|
||||
bool EnablePromoteAlloca;
|
||||
bool EnableIfCvt;
|
||||
@ -160,10 +159,6 @@ public:
|
||||
return FlatAddressSpace;
|
||||
}
|
||||
|
||||
bool useFlatForGlobal() const {
|
||||
return FlatForGlobal;
|
||||
}
|
||||
|
||||
bool hasBFE() const {
|
||||
return (getGeneration() >= EVERGREEN);
|
||||
}
|
||||
|
@ -234,63 +234,3 @@ def : Pat <
|
||||
>;
|
||||
|
||||
} // End Predicates = [isCI]
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Patterns to generate flat for global
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def useFlatForGlobal : Predicate <
|
||||
"Subtarget->useFlatForGlobal() || "
|
||||
"Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">;
|
||||
|
||||
let Predicates = [useFlatForGlobal] in {
|
||||
|
||||
// 1. Offset as 20bit DWORD immediate
|
||||
def : Pat <
|
||||
(SIload_constant v4i32:$sbase, IMM20bit:$offset),
|
||||
(S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset))
|
||||
>;
|
||||
|
||||
// Patterns for global loads with no offset
|
||||
class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
|
||||
(vt (node i64:$addr)),
|
||||
(inst $addr, 0, 0, 0)
|
||||
>;
|
||||
|
||||
def : FlatLoadPat <FLAT_LOAD_UBYTE, az_extloadi8_global, i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_global, i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_USHORT, az_extloadi16_global, i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_global, i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_DWORD, global_load, i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_DWORDX2, global_load, v2i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_DWORDX4, global_load, v4i32>;
|
||||
|
||||
class FlatStorePat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
|
||||
(node vt:$data, i64:$addr),
|
||||
(inst $data, $addr, 0, 0, 0)
|
||||
>;
|
||||
|
||||
def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_global, i32>;
|
||||
def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_global, i32>;
|
||||
def : FlatStorePat <FLAT_STORE_DWORD, global_store, i32>;
|
||||
def : FlatStorePat <FLAT_STORE_DWORDX2, global_store, v2i32>;
|
||||
def : FlatStorePat <FLAT_STORE_DWORDX4, global_store, v4i32>;
|
||||
|
||||
class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
|
||||
(vt (node i64:$addr, vt:$data)),
|
||||
(inst $addr, $data, 0, 0)
|
||||
>;
|
||||
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_max_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_umax_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_min_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_umin_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
|
||||
|
||||
} // End Predicates = [useFlatForGlobal]
|
||||
|
@ -101,3 +101,58 @@ def S_DCACHE_WB_VOL : SMEM_Inval <0x23,
|
||||
|
||||
} // End SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SMEM Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Predicates = [isVI] in {
|
||||
|
||||
// 1. Offset as 20bit DWORD immediate
|
||||
def : Pat <
|
||||
(SIload_constant v4i32:$sbase, IMM20bit:$offset),
|
||||
(S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset))
|
||||
>;
|
||||
|
||||
// Patterns for global loads with no offset
|
||||
class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
|
||||
(vt (node i64:$addr)),
|
||||
(inst $addr, 0, 0, 0)
|
||||
>;
|
||||
|
||||
def : FlatLoadPat <FLAT_LOAD_UBYTE, az_extloadi8_global, i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_global, i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_USHORT, az_extloadi16_global, i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_global, i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_DWORD, global_load, i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_DWORDX2, global_load, v2i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_DWORDX4, global_load, v4i32>;
|
||||
|
||||
class FlatStorePat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
|
||||
(node vt:$data, i64:$addr),
|
||||
(inst $data, $addr, 0, 0, 0)
|
||||
>;
|
||||
|
||||
def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_global, i32>;
|
||||
def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_global, i32>;
|
||||
def : FlatStorePat <FLAT_STORE_DWORD, global_store, i32>;
|
||||
def : FlatStorePat <FLAT_STORE_DWORDX2, global_store, v2i32>;
|
||||
def : FlatStorePat <FLAT_STORE_DWORDX4, global_store, v4i32>;
|
||||
|
||||
class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
|
||||
(vt (node i64:$addr, vt:$data)),
|
||||
(inst $addr, $data, 0, 0)
|
||||
>;
|
||||
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_max_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_umax_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_min_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_umin_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
|
||||
|
||||
|
||||
} // End Predicates = [isVI]
|
||||
|
@ -1,15 +0,0 @@
|
||||
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck -check-prefix=HSA-DEFAULT %s
|
||||
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global | FileCheck -check-prefix=HSA-NODEFAULT %s
|
||||
; RUN: llc < %s -mtriple=amdgcn -mcpu=kaveri | FileCheck -check-prefix=NOHSA-DEFAULT %s
|
||||
; RUN: llc < %s -mtriple=amdgcn -mcpu=kaveri -mattr=+flat-for-global | FileCheck -check-prefix=NOHSA-NODEFAULT %s
|
||||
|
||||
|
||||
; HSA-DEFAULT: flat_store_dword
|
||||
; HSA-NODEFAULT: buffer_store_dword
|
||||
; NOHSA-DEFAULT: buffer_store_dword
|
||||
; NOHSA-NODEFAULT: flat_store_dword
|
||||
define void @test(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
store i32 0, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
@ -1,8 +1,6 @@
|
||||
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s
|
||||
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global | FileCheck --check-prefix=HSA-CI %s
|
||||
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA %s
|
||||
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-flat-for-global | FileCheck --check-prefix=HSA-VI %s
|
||||
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj | llvm-readobj -symbols -s -sd | FileCheck --check-prefix=ELF %s
|
||||
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA-CI --check-prefix=HSA %s
|
||||
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA-VI --check-prefix=HSA %s
|
||||
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj | llvm-readobj -symbols -s -sd | FileCheck --check-prefix=ELF %s
|
||||
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri | llvm-readobj -symbols -s -sd | FileCheck %s --check-prefix=ELF
|
||||
|
||||
; The SHT_NOTE section contains the output from the .hsa_code_object_*
|
||||
@ -49,8 +47,7 @@
|
||||
; HSA-CI: s_mov_b32 s[[HI:[0-9]]], 0x100f000
|
||||
; On VI+ we also need to set MTYPE = 2
|
||||
; HSA-VI: s_mov_b32 s[[HI:[0-9]]], 0x1100f000
|
||||
; Make sure we generate flat store for HSA
|
||||
; HSA: flat_store_dword v{{[0-9]+}}
|
||||
; HSA: buffer_store_dword v{{[0-9]+}}, s[0:[[HI]]], 0
|
||||
|
||||
define void @simple(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=ALL %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=ALL %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -mtriple=amdgcn-unknown-amdhsa < %s -mattr=-flat-for-global | FileCheck -check-prefix=GCNHSA -check-prefix=CIHSA -check-prefix=ALL %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mtriple=amdgcn-unknown-amdhsa -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCNHSA -check-prefix=VIHSA -check-prefix=ALL %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefix=GCNHSA -check-prefix=CIHSA -check-prefix=ALL %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefix=GCNHSA -check-prefix=VIHSA -check-prefix=ALL %s
|
||||
|
||||
; FIXME: align on alloca seems to be ignored for private_segment_alignment
|
||||
|
||||
|
@ -3,7 +3,7 @@
|
||||
; CHECK-LABEL: {{^}}test_debug_value:
|
||||
; CHECK: s_load_dwordx2 s[4:5]
|
||||
; CHECK: DEBUG_VALUE: test_debug_value:globalptr_arg <- %SGPR4_SGPR5
|
||||
; CHECK: flat_store_dword
|
||||
; CHECK: buffer_store_dword
|
||||
; CHECK: s_endpgm
|
||||
define void @test_debug_value(i32 addrspace(1)* nocapture %globalptr_arg) #0 !dbg !4 {
|
||||
entry:
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=kaveri -mtriple=amdgcn-unknown-amdhsa -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=kaveri -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; Check that when mubuf addr64 instruction is handled in moveToVALU
|
||||
; from the pointer, dead register writes are not emitted.
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs -asm-verbose < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs -asm-verbose -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs -asm-verbose < %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
declare i32 @llvm.SI.tid() nounwind readnone
|
||||
|
||||
|
@ -129,8 +129,7 @@ entry:
|
||||
|
||||
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}}
|
||||
; HSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s6{{$}}
|
||||
; GCN-NOHSA: buffer_store_dword [[VVAL]]
|
||||
; HSA: flat_store_dword [[VVAL]]
|
||||
; GCN: buffer_store_dword [[VVAL]]
|
||||
|
||||
; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6
|
||||
; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
|
||||
@ -156,8 +155,7 @@ entry:
|
||||
; HSA: enable_sgpr_grid_workgroup_count_z = 0
|
||||
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3
|
||||
; GCN-HSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s7
|
||||
; GCN-NOHSA: buffer_store_dword [[VVAL]]
|
||||
; HSA: flat_store_dword [[VVAL]]
|
||||
; GCN: buffer_store_dword [[VVAL]]
|
||||
|
||||
; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6
|
||||
; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
|
||||
@ -192,8 +190,7 @@ entry:
|
||||
|
||||
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}}
|
||||
; HSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s7{{$}}
|
||||
; GCN-NOHSA: buffer_store_dword [[VVAL]]
|
||||
; HSA: flat_store_dword [[VVAL]]
|
||||
; GCN: buffer_store_dword [[VVAL]]
|
||||
|
||||
; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6
|
||||
; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
|
||||
@ -214,8 +211,7 @@ entry:
|
||||
|
||||
; FUNC-LABEL: {{^}}tidig_x:
|
||||
; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0
|
||||
; GCN-NOHSA: buffer_store_dword v0
|
||||
; HSA: flat_store_dword v0
|
||||
; GCN: buffer_store_dword v0
|
||||
define void @tidig_x(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%0 = call i32 @llvm.r600.read.tidig.x() #0
|
||||
@ -230,8 +226,7 @@ entry:
|
||||
; FUNC-LABEL: {{^}}tidig_y:
|
||||
|
||||
; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 1
|
||||
; GCN-NOHSA: buffer_store_dword v1
|
||||
; HSA: flat_store_dword v1
|
||||
; GCN: buffer_store_dword v1
|
||||
define void @tidig_y(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%0 = call i32 @llvm.r600.read.tidig.y() #0
|
||||
@ -245,8 +240,7 @@ entry:
|
||||
|
||||
; FUNC-LABEL: {{^}}tidig_z:
|
||||
; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 2
|
||||
; GCN-NOHSA: buffer_store_dword v2
|
||||
; HSA: flat_store_dword v2
|
||||
; GCN: buffer_store_dword v2
|
||||
define void @tidig_z(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%0 = call i32 @llvm.r600.read.tidig.z() #0
|
||||
|
Loading…
Reference in New Issue
Block a user