From 9317a1de75ebc3f31d0288a98c82c205332284bf Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 27 Jan 2017 17:42:26 +0000 Subject: [PATCH] AMDGPU: Enable FeatureFlatForGlobal on Volcanic Islands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Accomplishes what r292982 was supposed to, which ended up only really making the necessary test changes. This should be applied to the 4.0 branch. Patch by Vedran Miletić llvm-svn: 293310 --- lib/Target/AMDGPU/AMDGPU.td | 11 +--- lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 8 ++- lib/Target/AMDGPU/AMDGPUSubtarget.h | 1 - test/CodeGen/AMDGPU/ci-use-flat-for-global.ll | 26 --------- .../flat-for-global-subtarget-feature.ll | 54 +++++++++++++++++++ 5 files changed, 63 insertions(+), 37 deletions(-) delete mode 100644 test/CodeGen/AMDGPU/ci-use-flat-for-global.ll create mode 100644 test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td index 371042d8b4c..4f3c6df3a22 100644 --- a/lib/Target/AMDGPU/AMDGPU.td +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -305,12 +305,6 @@ def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler", "Enable SI Machine Scheduler" >; -def FeatureNoAddr64 : SubtargetFeature<"mubuf-no-addr64", - "NoAddr64", - "true", - "MUBUF instructions have addr64 bit" ->; - // Unless +-flat-for-global is specified, turn on FlatForGlobal for // all OS-es on VI and newer hardware to avoid assertion failures due // to missing ADDR64 variants of MUBUF instructions. @@ -320,8 +314,7 @@ def FeatureNoAddr64 : SubtargetFeature<"mubuf-no-addr64", def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global", "FlatForGlobal", "true", - "Force to generate flat instruction for global", - [FeatureNoAddr64] + "Force to generate flat instruction for global" >; // Dummy feature used to disable assembler instructions. @@ -374,7 +367,7 @@ def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS", FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel, FeatureScalarStores, FeatureInv2PiInlineImm, FeatureSDWA, - FeatureDPP, FeatureNoAddr64 + FeatureDPP ] >; diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index f19382ea0a9..c85d2159bdb 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -49,6 +49,13 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, ParseSubtargetFeatures(GPU, FullFS); + // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es + // on VI and newer hardware to avoid assertion failures due to missing ADDR64 + // variants of MUBUF instructions. + if (!hasAddr64() && !FS.contains("flat-for-global")) { + FlatForGlobal = true; + } + // FIXME: I don't think think Evergreen has any useful support for // denormals, but should be checked. Should we issue a warning somewhere // if someone tries to enable these? @@ -82,7 +89,6 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, FP64FP16Denormals(false), FPExceptions(false), FlatForGlobal(false), - NoAddr64(false), UnalignedScratchAccess(false), UnalignedBufferAccess(false), diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index 37a21c25a50..bbe69003944 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -85,7 +85,6 @@ protected: bool FP64FP16Denormals; bool FPExceptions; bool FlatForGlobal; - bool NoAddr64; bool UnalignedScratchAccess; bool UnalignedBufferAccess; bool EnableXNACK; diff --git a/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll b/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll deleted file mode 100644 index 8227d4c873e..00000000000 --- a/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll +++ /dev/null @@ -1,26 +0,0 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s -; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s -; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s - - -; There are no stack objects even though flat is used by default, so -; flat_scratch_init should be disabled. - -; ALL-LABEL: {{^}}test: -; HSA: .amd_kernel_code_t -; HSA: enable_sgpr_flat_scratch_init = 0 -; HSA: .end_amd_kernel_code_t - -; ALL-NOT: flat_scr - -; HSA-DEFAULT: flat_store_dword -; HSA-NODEFAULT: buffer_store_dword - -; NOHSA-DEFAULT: buffer_store_dword -; NOHSA-NODEFAULT: flat_store_dword -define void @test(i32 addrspace(1)* %out) { -entry: - store i32 0, i32 addrspace(1)* %out - ret void -} diff --git a/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll b/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll new file mode 100644 index 00000000000..df9ba00c697 --- /dev/null +++ b/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll @@ -0,0 +1,54 @@ +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s +; RUN: llc -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefix=HSA-NOADDR64 -check-prefix=ALL %s +; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s +; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s +; RUN: llc -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefix=NOHSA-NOADDR64 -check-prefix=ALL %s + + +; There are no stack objects even though flat is used by default, so +; flat_scratch_init should be disabled. + +; ALL-LABEL: {{^}}test: +; HSA: .amd_kernel_code_t +; HSA: enable_sgpr_flat_scratch_init = 0 +; HSA: .end_amd_kernel_code_t + +; ALL-NOT: flat_scr + +; HSA-DEFAULT: flat_store_dword +; HSA-NODEFAULT: buffer_store_dword +; HSA-NOADDR64: flat_store_dword + +; NOHSA-DEFAULT: buffer_store_dword +; NOHSA-NODEFAULT: flat_store_dword +; NOHSA-NOADDR64: flat_store_dword +define void @test(i32 addrspace(1)* %out) { +entry: + store i32 0, i32 addrspace(1)* %out + ret void +} + +; HSA-DEFAULT: flat_store_dword +; HSA-NODEFAULT: buffer_store_dword +; HSA-NOADDR64: flat_store_dword + +; NOHSA-DEFAULT: buffer_store_dword +; NOHSA-NODEFAULT: flat_store_dword +; NOHSA-NOADDR64: flat_store_dword +define void @test_addr64(i32 addrspace(1)* %out) { +entry: + %out.addr = alloca i32 addrspace(1)*, align 4 + + store i32 addrspace(1)* %out, i32 addrspace(1)** %out.addr, align 4 + %ld0 = load i32 addrspace(1)*, i32 addrspace(1)** %out.addr, align 4 + + %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %ld0, i32 0 + store i32 1, i32 addrspace(1)* %arrayidx, align 4 + + %ld1 = load i32 addrspace(1)*, i32 addrspace(1)** %out.addr, align 4 + %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %ld1, i32 1 + store i32 2, i32 addrspace(1)* %arrayidx1, align 4 + + ret void +}