1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

AMDGPU: Enable FeatureFlatForGlobal on Volcanic Islands

Accomplishes what r292982 was supposed to, which ended up
only really making the necessary test changes.

This should be applied to the 4.0 branch.

Patch by Vedran Miletić <vedran@miletic.net>

llvm-svn: 293310
This commit is contained in:
Matt Arsenault 2017-01-27 17:42:26 +00:00
parent 401964cda6
commit 9317a1de75
5 changed files with 63 additions and 37 deletions

View File

@ -305,12 +305,6 @@ def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler",
"Enable SI Machine Scheduler"
>;
def FeatureNoAddr64 : SubtargetFeature<"mubuf-no-addr64",
"NoAddr64",
"true",
"MUBUF instructions have addr64 bit"
>;
// Unless +-flat-for-global is specified, turn on FlatForGlobal for
// all OS-es on VI and newer hardware to avoid assertion failures due
// to missing ADDR64 variants of MUBUF instructions.
@ -320,8 +314,7 @@ def FeatureNoAddr64 : SubtargetFeature<"mubuf-no-addr64",
def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
"FlatForGlobal",
"true",
"Force to generate flat instruction for global",
[FeatureNoAddr64]
"Force to generate flat instruction for global"
>;
// Dummy feature used to disable assembler instructions.
@ -374,7 +367,7 @@ def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
FeatureScalarStores, FeatureInv2PiInlineImm, FeatureSDWA,
FeatureDPP, FeatureNoAddr64
FeatureDPP
]
>;

View File

@ -49,6 +49,13 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
ParseSubtargetFeatures(GPU, FullFS);
// Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es
// on VI and newer hardware to avoid assertion failures due to missing ADDR64
// variants of MUBUF instructions.
if (!hasAddr64() && !FS.contains("flat-for-global")) {
FlatForGlobal = true;
}
// FIXME: I don't think think Evergreen has any useful support for
// denormals, but should be checked. Should we issue a warning somewhere
// if someone tries to enable these?
@ -82,7 +89,6 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
FP64FP16Denormals(false),
FPExceptions(false),
FlatForGlobal(false),
NoAddr64(false),
UnalignedScratchAccess(false),
UnalignedBufferAccess(false),

View File

@ -85,7 +85,6 @@ protected:
bool FP64FP16Denormals;
bool FPExceptions;
bool FlatForGlobal;
bool NoAddr64;
bool UnalignedScratchAccess;
bool UnalignedBufferAccess;
bool EnableXNACK;

View File

@ -1,26 +0,0 @@
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s
; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s
; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s
; There are no stack objects even though flat is used by default, so
; flat_scratch_init should be disabled.
; ALL-LABEL: {{^}}test:
; HSA: .amd_kernel_code_t
; HSA: enable_sgpr_flat_scratch_init = 0
; HSA: .end_amd_kernel_code_t
; ALL-NOT: flat_scr
; HSA-DEFAULT: flat_store_dword
; HSA-NODEFAULT: buffer_store_dword
; NOHSA-DEFAULT: buffer_store_dword
; NOHSA-NODEFAULT: flat_store_dword
define void @test(i32 addrspace(1)* %out) {
entry:
store i32 0, i32 addrspace(1)* %out
ret void
}

View File

@ -0,0 +1,54 @@
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s
; RUN: llc -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefix=HSA-NOADDR64 -check-prefix=ALL %s
; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s
; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s
; RUN: llc -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefix=NOHSA-NOADDR64 -check-prefix=ALL %s
; There are no stack objects even though flat is used by default, so
; flat_scratch_init should be disabled.
; ALL-LABEL: {{^}}test:
; HSA: .amd_kernel_code_t
; HSA: enable_sgpr_flat_scratch_init = 0
; HSA: .end_amd_kernel_code_t
; ALL-NOT: flat_scr
; HSA-DEFAULT: flat_store_dword
; HSA-NODEFAULT: buffer_store_dword
; HSA-NOADDR64: flat_store_dword
; NOHSA-DEFAULT: buffer_store_dword
; NOHSA-NODEFAULT: flat_store_dword
; NOHSA-NOADDR64: flat_store_dword
define void @test(i32 addrspace(1)* %out) {
entry:
store i32 0, i32 addrspace(1)* %out
ret void
}
; HSA-DEFAULT: flat_store_dword
; HSA-NODEFAULT: buffer_store_dword
; HSA-NOADDR64: flat_store_dword
; NOHSA-DEFAULT: buffer_store_dword
; NOHSA-NODEFAULT: flat_store_dword
; NOHSA-NOADDR64: flat_store_dword
define void @test_addr64(i32 addrspace(1)* %out) {
entry:
%out.addr = alloca i32 addrspace(1)*, align 4
store i32 addrspace(1)* %out, i32 addrspace(1)** %out.addr, align 4
%ld0 = load i32 addrspace(1)*, i32 addrspace(1)** %out.addr, align 4
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %ld0, i32 0
store i32 1, i32 addrspace(1)* %arrayidx, align 4
%ld1 = load i32 addrspace(1)*, i32 addrspace(1)** %out.addr, align 4
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %ld1, i32 1
store i32 2, i32 addrspace(1)* %arrayidx1, align 4
ret void
}