1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

AMDGPU: Add sram-ecc feature

Differential Revision: https://reviews.llvm.org/D53222

llvm-svn: 346177
This commit is contained in:
Konstantin Zhuravlyov 2018-11-05 22:44:19 +00:00
parent 2635bd856c
commit 085d608821
14 changed files with 181 additions and 43 deletions

View File

@ -207,6 +207,8 @@ names from both the *Processor* and *Alternative Processor* can be used.
names.
``gfx906`` ``amdgcn`` dGPU - xnack *TBA*
[off]
sram-ecc
[on]
.. TODO
Add product
names.
@ -246,24 +248,26 @@ For example:
.. table:: AMDGPU Target Features
:name: amdgpu-target-feature-table
============== ==================================================
Target Feature Description
============== ==================================================
-m[no-]xnack Enable/disable generating code that has
memory clauses that are compatible with
having XNACK replay enabled.
=============== ==================================================
Target Feature Description
=============== ==================================================
-m[no-]xnack Enable/disable generating code that has
memory clauses that are compatible with
having XNACK replay enabled.
This is used for demand paging and page
migration. If XNACK replay is enabled in
the device, then if a page fault occurs
the code may execute incorrectly if the
``xnack`` feature is not enabled. Executing
code that has the feature enabled on a
device that does not have XNACK replay
enabled will execute correctly, but may
be less performant than code with the
feature disabled.
============== ==================================================
This is used for demand paging and page
migration. If XNACK replay is enabled in
the device, then if a page fault occurs
the code may execute incorrectly if the
``xnack`` feature is not enabled. Executing
code that has the feature enabled on a
device that does not have XNACK replay
enabled will execute correctly, but may
be less performant than code with the
feature disabled.
-m[no-]sram-ecc Enable/disable generating code that assumes SRAM
ECC is enabled/disabled.
=============== ==================================================
.. _amdgpu-address-spaces:
@ -549,6 +553,17 @@ The AMDGPU backend uses the following ELF header:
be 0.
See
:ref:`amdgpu-target-features`.
``EF_AMDGPU_SRAM_ECC`` 0x00000200 Indicates if the ``sram-ecc``
target feature is
enabled for all code
contained in the code object.
If the processor
does not support the
``sram-ecc`` target
feature then must
be 0.
See
:ref:`amdgpu-target-features`.
================================= ========== =============================
.. table:: AMDGPU ``EF_AMDGPU_MACH`` Values

View File

@ -711,9 +711,12 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX909,
// Indicates if the xnack target feature is enabled for all code contained in
// the object.
// Indicates if the "xnack" target feature is enabled for all code contained
// in the object.
EF_AMDGPU_XNACK = 0x100,
// Indicates if the "sram-ecc" target feature is enabled for all code
// contained in the object.
EF_AMDGPU_SRAM_ECC = 0x200,
};
// ELF Relocation types for AMDGPU

View File

@ -404,6 +404,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX906, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX909, EF_AMDGPU_MACH);
BCase(EF_AMDGPU_XNACK);
BCase(EF_AMDGPU_SRAM_ECC);
break;
case ELF::EM_X86_64:
break;

View File

@ -266,13 +266,10 @@ def FeatureDLInsts : SubtargetFeature<"dl-insts",
"Has deep learning instructions"
>;
def FeatureD16PreservesUnusedBits : SubtargetFeature<
"d16-preserves-unused-bits",
"D16PreservesUnusedBits",
def FeatureSRAMECC : SubtargetFeature<"sram-ecc",
"EnableSRAMECC",
"true",
"If present, then instructions defined by HasD16LoadStore predicate preserve "
"unused bits. Otherwise instructions defined by HasD16LoadStore predicate "
"zero unused bits."
"Enable SRAM ECC"
>;
//===------------------------------------------------------------===//
@ -524,35 +521,32 @@ def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0,
def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0,
[FeatureGFX9,
FeatureMadMixInsts,
FeatureLDSBankCount32,
FeatureD16PreservesUnusedBits]>;
FeatureLDSBankCount32]>;
def FeatureISAVersion9_0_2 : SubtargetFeatureISAVersion <9,0,2,
[FeatureGFX9,
FeatureMadMixInsts,
FeatureLDSBankCount32,
FeatureXNACK,
FeatureD16PreservesUnusedBits]>;
FeatureXNACK]>;
def FeatureISAVersion9_0_4 : SubtargetFeatureISAVersion <9,0,4,
[FeatureGFX9,
FeatureLDSBankCount32,
FeatureFmaMixInsts,
FeatureD16PreservesUnusedBits]>;
FeatureFmaMixInsts]>;
def FeatureISAVersion9_0_6 : SubtargetFeatureISAVersion <9,0,6,
[FeatureGFX9,
HalfRate64Ops,
FeatureFmaMixInsts,
FeatureLDSBankCount32,
FeatureDLInsts]>;
FeatureDLInsts,
FeatureSRAMECC]>;
def FeatureISAVersion9_0_9 : SubtargetFeatureISAVersion <9,0,9,
[FeatureGFX9,
FeatureMadMixInsts,
FeatureLDSBankCount32,
FeatureXNACK,
FeatureD16PreservesUnusedBits]>;
FeatureXNACK]>;
//===----------------------------------------------------------------------===//
// Debugger related subtarget features.
@ -684,8 +678,9 @@ def HasUnpackedD16VMem : Predicate<"Subtarget->hasUnpackedD16VMem()">,
def HasPackedD16VMem : Predicate<"!Subtarget->hasUnpackedD16VMem()">,
AssemblerPredicate<"!FeatureUnpackedD16VMem">;
def D16PreservesUnusedBits : Predicate<"Subtarget->d16PreservesUnusedBits()">,
AssemblerPredicate<"FeatureD16PreservesUnusedBits">;
def D16PreservesUnusedBits :
Predicate<"Subtarget->hasD16LoadStore() && !Subtarget->isSRAMECCEnabled()">,
AssemblerPredicate<"FeatureGFX9Insts,!FeatureSRAMECC">;
def LDSRequiresM0Init : Predicate<"Subtarget->ldsRequiresM0Init()">;
def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">;

View File

@ -198,7 +198,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
HasDPP(false),
HasR128A16(false),
HasDLInsts(false),
D16PreservesUnusedBits(false),
EnableSRAMECC(false),
FlatAddressSpace(false),
FlatInstOffsets(false),
FlatGlobalInsts(false),

View File

@ -353,7 +353,7 @@ protected:
bool HasDPP;
bool HasR128A16;
bool HasDLInsts;
bool D16PreservesUnusedBits;
bool EnableSRAMECC;
bool FlatAddressSpace;
bool FlatInstOffsets;
bool FlatGlobalInsts;
@ -679,8 +679,8 @@ public:
return HasDLInsts;
}
bool d16PreservesUnusedBits() const {
return D16PreservesUnusedBits;
bool isSRAMECCEnabled() const {
return EnableSRAMECC;
}
// Scratch is allocated in 256 dword per wave blocks for the entire

View File

@ -347,6 +347,10 @@ AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(
if (AMDGPU::hasXNACK(STI))
EFlags |= ELF::EF_AMDGPU_XNACK;
EFlags &= ~ELF::EF_AMDGPU_SRAM_ECC;
if (AMDGPU::hasSRAMECC(STI))
EFlags |= ELF::EF_AMDGPU_SRAM_ECC;
MCA.setELFHeaderEFlags(EFlags);
}

View File

@ -152,6 +152,8 @@ void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
if (hasXNACK(*STI))
Stream << "+xnack";
if (hasSRAMECC(*STI))
Stream << "+sram-ecc";
Stream.flush();
}
@ -593,6 +595,10 @@ bool hasXNACK(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
}
bool hasSRAMECC(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureSRAMECC];
}
bool hasMIMG_R128(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128];
}

View File

@ -342,6 +342,7 @@ inline bool isKernel(CallingConv::ID CC) {
}
bool hasXNACK(const MCSubtargetInfo &STI);
bool hasSRAMECC(const MCSubtargetInfo &STI);
bool hasMIMG_R128(const MCSubtargetInfo &STI);
bool hasPackedD16(const MCSubtargetInfo &STI);

View File

@ -34,6 +34,12 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+code-object-v3,+xnack < %s | FileCheck --check-prefixes=XNACK-GFX900 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=+code-object-v3,-xnack < %s | FileCheck --check-prefixes=NO-XNACK-GFX902 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+code-object-v3,+sram-ecc < %s | FileCheck --check-prefixes=SRAM-ECC-GFX904 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+code-object-v3,-sram-ecc < %s | FileCheck --check-prefixes=NO-SRAM-ECC-GFX906 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+code-object-v3,+sram-ecc,+xnack < %s | FileCheck --check-prefixes=SRAM-ECC-XNACK-GFX904 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+code-object-v3,+xnack < %s | FileCheck --check-prefixes=XNACK-GFX906 %s
; GFX600: .amdgcn_target "amdgcn-amd-amdhsa--gfx600"
; GFX601: .amdgcn_target "amdgcn-amd-amdhsa--gfx601"
; GFX700: .amdgcn_target "amdgcn-amd-amdhsa--gfx700"
@ -48,10 +54,16 @@
; GFX900: .amdgcn_target "amdgcn-amd-amdhsa--gfx900"
; GFX902: .amdgcn_target "amdgcn-amd-amdhsa--gfx902+xnack"
; GFX904: .amdgcn_target "amdgcn-amd-amdhsa--gfx904"
; GFX906: .amdgcn_target "amdgcn-amd-amdhsa--gfx906"
; GFX906: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+sram-ecc"
; XNACK-GFX900: .amdgcn_target "amdgcn-amd-amdhsa--gfx900+xnack"
; NO-XNACK-GFX902: .amdgcn_target "amdgcn-amd-amdhsa--gfx902
; NO-XNACK-GFX902: .amdgcn_target "amdgcn-amd-amdhsa--gfx902"
; SRAM-ECC-GFX904: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+sram-ecc"
; NO-SRAM-ECC-GFX906: "amdgcn-amd-amdhsa--gfx906"
; SRAM-ECC-XNACK-GFX904: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack+sram-ecc"
; XNACK-GFX906: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+xnack+sram-ecc"
define amdgpu_kernel void @directive_amdgcn_target() {
ret void

View File

@ -86,6 +86,7 @@
; GFX902-NEXT: EF_AMDGPU_XNACK (0x100)
; GFX904: EF_AMDGPU_MACH_AMDGCN_GFX904 (0x2E)
; GFX906: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
; GFX906-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
; GFX909: EF_AMDGPU_MACH_AMDGCN_GFX909 (0x31)
; ALL: ]

View File

@ -0,0 +1,38 @@
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx902 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=NO-SRAM-ECC-GFX902 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx902 -mattr=-sram-ecc < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=NO-SRAM-ECC-GFX902 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx902 -mattr=+sram-ecc < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=SRAM-ECC-GFX902 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=SRAM-ECC-GFX906 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 -mattr=-sram-ecc < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=NO-SRAM-ECC-GFX906 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 -mattr=+sram-ecc < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=SRAM-ECC-GFX906 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 -mattr=+sram-ecc,+xnack < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=SRAM-ECC-XNACK-GFX906 %s
; NO-SRAM-ECC-GFX902: Flags [
; NO-SRAM-ECC-GFX902-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX902 (0x2D)
; NO-SRAM-ECC-GFX902-NEXT: EF_AMDGPU_XNACK (0x100)
; NO-SRAM-ECC-GFX902-NEXT: ]
; SRAM-ECC-GFX902: Flags [
; SRAM-ECC-GFX902-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX902 (0x2D)
; SRAM-ECC-GFX902-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
; SRAM-ECC-GFX902-NEXT: EF_AMDGPU_XNACK (0x100)
; SRAM-ECC-GFX902-NEXT: ]
; NO-SRAM-ECC-GFX906: Flags [
; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
; NO-SRAM-ECC-GFX906-NEXT: ]
; SRAM-ECC-GFX906: Flags [
; SRAM-ECC-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
; SRAM-ECC-GFX906-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
; SRAM-ECC-GFX906-NEXT: ]
; SRAM-ECC-XNACK-GFX906: Flags [
; SRAM-ECC-XNACK-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
; SRAM-ECC-XNACK-GFX906-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
; SRAM-ECC-XNACK-GFX906-NEXT: EF_AMDGPU_XNACK (0x100)
; SRAM-ECC-XNACK-GFX906-NEXT: ]
define amdgpu_kernel void @elf_header() {
ret void
}

View File

@ -0,0 +1,61 @@
# RUN: yaml2obj -docnum=1 %s > %t.o.1
# RUN: llvm-readobj -s -file-headers %t.o.1 | FileCheck --check-prefixes=ELF-ALL,ELF-SRAM-ECC-NONE %s
# RUN: obj2yaml %t.o.1 | FileCheck --check-prefixes=YAML-SRAM-ECC-NONE %s
# RUN: yaml2obj -docnum=2 %s > %t.o.2
# RUN: llvm-readobj -s -file-headers %t.o.2 | FileCheck --check-prefixes=ELF-ALL,ELF-SRAM-ECC-GFX900 %s
# RUN: obj2yaml %t.o.2 | FileCheck --check-prefixes=YAML-SRAM-ECC-GFX900 %s
# RUN: yaml2obj -docnum=3 %s > %t.o.3
# RUN: llvm-readobj -s -file-headers %t.o.3 | FileCheck --check-prefixes=ELF-ALL,ELF-SRAM-ECC-XNACK-GFX900 %s
# RUN: obj2yaml %t.o.3 | FileCheck --check-prefixes=YAML-SRAM-ECC-XNACK-GFX900 %s
# ELF-SRAM-ECC-NONE: Flags [
# ELF-SRAM-ECC-NONE-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
# ELF-SRAM-ECC-NONE-NEXT: ]
# ELF-SRAM-ECC-GFX900: Flags [
# ELF-SRAM-ECC-GFX900-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
# ELF-SRAM-ECC-GFX900-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
# ELF-SRAM-ECC-GFX900-NEXT: ]
# ELF-SRAM-ECC-XNACK-GFX900: Flags [
# ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
# ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_SRAM_ECC (0x200)
# ELF-SRAM-ECC-XNACK-GFX900-NEXT: EF_AMDGPU_XNACK (0x100)
# ELF-SRAM-ECC-XNACK-GFX900-NEXT: ]
# YAML-SRAM-ECC-NONE: Flags: [ EF_AMDGPU_MACH_NONE, EF_AMDGPU_SRAM_ECC ]
# YAML-SRAM-ECC-GFX900: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_SRAM_ECC ]
# YAML-SRAM-ECC-XNACK-GFX900: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_XNACK, EF_AMDGPU_SRAM_ECC ]
# Doc1
--- !ELF
FileHeader:
Class: ELFCLASS64
Data: ELFDATA2LSB
OSABI: ELFOSABI_NONE
Type: ET_REL
Machine: EM_AMDGPU
Flags: [ EF_AMDGPU_SRAM_ECC ]
...
# Doc2
--- !ELF
FileHeader:
Class: ELFCLASS64
Data: ELFDATA2LSB
OSABI: ELFOSABI_NONE
Type: ET_REL
Machine: EM_AMDGPU
Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_SRAM_ECC ]
...
# Doc3
--- !ELF
FileHeader:
Class: ELFCLASS64
Data: ELFDATA2LSB
OSABI: ELFOSABI_NONE
Type: ET_REL
Machine: EM_AMDGPU
Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_XNACK, EF_AMDGPU_SRAM_ECC ]
...

View File

@ -1355,7 +1355,8 @@ static const EnumEntry<unsigned> ElfHeaderAMDGPUFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX904),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX906),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX909),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_XNACK)
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_XNACK),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_SRAM_ECC)
};
static const EnumEntry<unsigned> ElfHeaderRISCVFlags[] = {