1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

AMDGPU: Partially fix default device for HSA

There are a few different issues, mostly stemming from using
generation based checks for anything instead of subtarget
features. Stop adding flat-address-space as a feature for HSA, as it
should only be a device property. This was incorrectly allowing flat
instructions to select for SI.

Increase the default generation for HSA to avoid the encoding error
when emitting objects. This has some other side effects from various
checks which probably should be separate subtarget features (in the
cost model and for dealing with the DS offset folding issue).

Partial fix for bug 41070. It should probably be an error to try using
amdhsa without flat support.

llvm-svn: 356347
This commit is contained in:
Matt Arsenault 2019-03-17 21:31:35 +00:00
parent 1135cfa6a7
commit 73010faa28
7 changed files with 35 additions and 10 deletions

View File

@ -169,12 +169,14 @@ void AMDGPU::fillValidArchListR600(SmallVectorImpl<StringRef> &Values) {
}
AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
if (GPU == "generic")
return {7, 0, 0};
AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
if (AK == AMDGPU::GPUKind::GK_NONE)
if (AK == AMDGPU::GPUKind::GK_NONE) {
if (GPU == "generic-hsa")
return {7, 0, 0};
if (GPU == "generic")
return {6, 0, 0};
return {0, 0, 0};
}
switch (AK) {
case GK_GFX600: return {6, 0, 0};

View File

@ -80,7 +80,7 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
SmallString<256> FullFS("+promote-alloca,+dx10-clamp,+load-store-opt,");
if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
FullFS += "+flat-address-space,+flat-for-global,+unaligned-buffer-access,+trap-handler,";
FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,";
// FIXME: I don't think think Evergreen has any useful support for
// denormals, but should be checked. Should we issue a warning somewhere
@ -155,7 +155,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
AMDGPUGenSubtargetInfo(TT, GPU, FS),
AMDGPUSubtarget(TT),
TargetTriple(TT),
Gen(SOUTHERN_ISLANDS),
Gen(TT.getOS() == Triple::AMDHSA ? SEA_ISLANDS : SOUTHERN_ISLANDS),
InstrItins(getInstrItineraryForCPU(GPU)),
LDSBankCount(0),
MaxPrivateElementSize(0),

View File

@ -306,8 +306,9 @@ static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) {
if (!GPU.empty())
return GPU;
// Need to default to a target with flat support for HSA.
if (TT.getArch() == Triple::amdgcn)
return "generic";
return TT.getOS() == Triple::AMDHSA ? "generic-hsa" : "generic";
return "r600";
}

View File

@ -12,6 +12,10 @@ def : ProcessorModel<"generic", NoSchedModel,
[FeatureWavefrontSize64]
>;
def : ProcessorModel<"generic-hsa", NoSchedModel,
[FeatureWavefrontSize64, FeatureFlatAddressSpace]
>;
//===------------------------------------------------------------===//
// GCN GFX6 (Southern Islands (SI)).
//===------------------------------------------------------------===//

View File

@ -1,7 +1,7 @@
; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,CIFASTF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM %s
; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,CISLOWF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM %s
; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=tahiti -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,SIFASTF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM %s
; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=verde -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,SISLOWF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM %s
; RUN: opt -cost-model -analyze -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,SIFASTF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM %s
; RUN: opt -cost-model -analyze -mtriple=amdgcn-mesa-mesa3d -mcpu=verde -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,SISLOWF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM %s
; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii -mattr=+fp32-denormals < %s | FileCheck -check-prefixes=ALL,FP32DENORMS,SLOWFP32DENORMS,NOFP16,NOFP16-FP32DENORM %s
; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+fp32-denormals < %s | FileCheck -check-prefixes=ALL,FP32DENORMS,FASTFP32DENORMS,FP16 %s

View File

@ -0,0 +1,15 @@
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -filetype=obj -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERROR %s
; RUN: not llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx600 -filetype=obj -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERROR %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -o - %s | FileCheck -check-prefix=HSA-DEFAULT %s
; RUN: not llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx600 -filetype=obj -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERROR %s
; Flat instructions should not select if the target device doesn't
; support them. The default device should be able to select for HSA.
; ERROR: LLVM ERROR: Cannot select: t{{[0-9]+}}: i32,ch = load<(volatile load 4 from %ir.flat.ptr.load)>
; HSA-DEFAULT: flat_load_dword
define amdgpu_kernel void @load_flat_i32(i32* %flat.ptr) {
%load = load volatile i32, i32* %flat.ptr, align 4
ret void
}

View File

@ -531,7 +531,10 @@ define amdgpu_kernel void @kern_lds_ptr(i32 addrspace(3)* %lds) #0 {
define amdgpu_kernel void @kern_lds_ptr_si(i32 addrspace(3)* %lds) #2 {
; HSA-LABEL: @kern_lds_ptr_si(
; HSA-NEXT: [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: store i32 0, i32 addrspace(3)* [[LDS:%.*]], align 4
; HSA-NEXT: [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_LDS_PTR_SI_KERNARG_SEGMENT]], i64 0
; HSA-NEXT: [[LDS_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[LDS_KERNARG_OFFSET]] to i32 addrspace(3)* addrspace(4)*
; HSA-NEXT: [[LDS_LOAD:%.*]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LDS_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
; HSA-NEXT: store i32 0, i32 addrspace(3)* [[LDS_LOAD]], align 4
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_lds_ptr_si(