mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
Reland "[AMDGPU] Add gfx1013 target"
This reverts commit 211e584fa2a4c032e4d573e7cdbffd622aad0a8f. Fixed a use-after-free error that caused the sanitizers to fail.
This commit is contained in:
parent
81cfcb1033
commit
3a664dba6e
@ -386,6 +386,13 @@ Every processor supports every OS ABI (see :ref:`amdgpu-os`) with the following
|
||||
``gfx1012`` ``amdgcn`` dGPU - cumode - Absolute - *rocm-amdhsa* - Radeon RX 5500
|
||||
- wavefrontsize64 flat - *pal-amdhsa* - Radeon RX 5500 XT
|
||||
- xnack scratch - *pal-amdpal*
|
||||
``gfx1013`` ``amdgcn`` APU - cumode - Absolute - *rocm-amdhsa* *TBA*
|
||||
- wavefrontsize64 flat - *pal-amdhsa*
|
||||
- xnack scratch - *pal-amdpal* .. TODO::
|
||||
|
||||
Add product
|
||||
names.
|
||||
|
||||
**GCN GFX10 (RDNA 2)** [AMD-GCN-GFX10-RDNA2]_
|
||||
-----------------------------------------------------------------------------------------------------------------------
|
||||
``gfx1030`` ``amdgcn`` dGPU - cumode - Absolute - *rocm-amdhsa* - Radeon RX 6800
|
||||
@ -1149,6 +1156,7 @@ The AMDGPU backend uses the following ELF header:
|
||||
``EF_AMDGPU_MACH_AMDGCN_GFX90A`` 0x03f ``gfx90a``
|
||||
*reserved* 0x040 Reserved.
|
||||
*reserved* 0x041 Reserved.
|
||||
``EF_AMDGPU_MACH_AMDGCN_GFX1013`` 0x042 ``gfx1013``
|
||||
==================================== ========== =============================
|
||||
|
||||
Sections
|
||||
|
@ -742,10 +742,11 @@ enum : unsigned {
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX90A = 0x03f,
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X40 = 0x040,
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X41 = 0x041,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1013 = 0x042,
|
||||
|
||||
// First/last AMDGCN-based processors.
|
||||
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
|
||||
EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX90A,
|
||||
EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX1013,
|
||||
|
||||
// Indicates if the "xnack" target feature is enabled for all code contained
|
||||
// in the object.
|
||||
|
@ -89,6 +89,7 @@ enum GPUKind : uint32_t {
|
||||
GK_GFX1010 = 71,
|
||||
GK_GFX1011 = 72,
|
||||
GK_GFX1012 = 73,
|
||||
GK_GFX1013 = 74,
|
||||
GK_GFX1030 = 75,
|
||||
GK_GFX1031 = 76,
|
||||
GK_GFX1032 = 77,
|
||||
|
@ -469,6 +469,8 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
|
||||
return "gfx1011";
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012:
|
||||
return "gfx1012";
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013:
|
||||
return "gfx1013";
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030:
|
||||
return "gfx1030";
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031:
|
||||
|
@ -549,6 +549,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
|
||||
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH);
|
||||
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1011, EF_AMDGPU_MACH);
|
||||
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1012, EF_AMDGPU_MACH);
|
||||
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1013, EF_AMDGPU_MACH);
|
||||
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1030, EF_AMDGPU_MACH);
|
||||
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1031, EF_AMDGPU_MACH);
|
||||
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1032, EF_AMDGPU_MACH);
|
||||
|
@ -109,6 +109,7 @@ constexpr GPUInfo AMDGCNGPUs[] = {
|
||||
{{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
|
||||
{{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
|
||||
{{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
|
||||
{{"gfx1013"}, {"gfx1013"}, GK_GFX1013, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
|
||||
{{"gfx1030"}, {"gfx1030"}, GK_GFX1030, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
|
||||
{{"gfx1031"}, {"gfx1031"}, GK_GFX1031, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
|
||||
{{"gfx1032"}, {"gfx1032"}, GK_GFX1032, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
|
||||
@ -220,6 +221,7 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
|
||||
case GK_GFX1010: return {10, 1, 0};
|
||||
case GK_GFX1011: return {10, 1, 1};
|
||||
case GK_GFX1012: return {10, 1, 2};
|
||||
case GK_GFX1013: return {10, 1, 3};
|
||||
case GK_GFX1030: return {10, 3, 0};
|
||||
case GK_GFX1031: return {10, 3, 1};
|
||||
case GK_GFX1032: return {10, 3, 2};
|
||||
|
@ -465,6 +465,12 @@ def FeatureExtendedImageInsts : SubtargetFeature<"extended-image-insts",
|
||||
"Support mips != 0, lod != 0, gather4, and get_lod"
|
||||
>;
|
||||
|
||||
def FeatureGFX10_AEncoding : SubtargetFeature<"gfx10_a-encoding",
|
||||
"GFX10_AEncoding",
|
||||
"true",
|
||||
"Has BVH ray tracing instructions"
|
||||
>;
|
||||
|
||||
def FeatureGFX10_BEncoding : SubtargetFeature<"gfx10_b-encoding",
|
||||
"GFX10_BEncoding",
|
||||
"true",
|
||||
@ -1077,8 +1083,26 @@ def FeatureISAVersion10_1_2 : FeatureSet<
|
||||
FeatureLdsMisalignedBug,
|
||||
FeatureSupportsXNACK])>;
|
||||
|
||||
def FeatureISAVersion10_1_3 : FeatureSet<
|
||||
!listconcat(FeatureGroup.GFX10_1_Bugs,
|
||||
[FeatureGFX10,
|
||||
FeatureGFX10_AEncoding,
|
||||
FeatureLDSBankCount32,
|
||||
FeatureDLInsts,
|
||||
FeatureNSAEncoding,
|
||||
FeatureWavefrontSize32,
|
||||
FeatureScalarStores,
|
||||
FeatureScalarAtomics,
|
||||
FeatureScalarFlatScratchInsts,
|
||||
FeatureGetWaveIdInst,
|
||||
FeatureMadMacF32Insts,
|
||||
FeatureDsSrc2Insts,
|
||||
FeatureLdsMisalignedBug,
|
||||
FeatureSupportsXNACK])>;
|
||||
|
||||
def FeatureISAVersion10_3_0 : FeatureSet<
|
||||
[FeatureGFX10,
|
||||
FeatureGFX10_AEncoding,
|
||||
FeatureGFX10_BEncoding,
|
||||
FeatureGFX10_3Insts,
|
||||
FeatureLDSBankCount32,
|
||||
@ -1291,6 +1315,9 @@ def HasD16LoadStore : Predicate<"Subtarget->hasD16LoadStore()">,
|
||||
def HasFlatScratchSTMode : Predicate<"Subtarget->hasFlatScratchSTMode()">,
|
||||
AssemblerPredicate<(any_of FeatureGFX10_3Insts)>;
|
||||
|
||||
def HasGFX10_AEncoding : Predicate<"Subtarget->hasGFX10_AEncoding()">,
|
||||
AssemblerPredicate<(all_of FeatureGFX10_AEncoding)>;
|
||||
|
||||
def HasGFX10_BEncoding : Predicate<"Subtarget->hasGFX10_BEncoding()">,
|
||||
AssemblerPredicate<(all_of FeatureGFX10_BEncoding)>;
|
||||
|
||||
|
@ -4686,6 +4686,14 @@ bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI,
|
||||
Register RayInvDir = MI.getOperand(6).getReg();
|
||||
Register TDescr = MI.getOperand(7).getReg();
|
||||
|
||||
if (!ST.hasGFX10_AEncoding()) {
|
||||
DiagnosticInfoUnsupported BadIntrin(B.getMF().getFunction(),
|
||||
"intrinsic not supported on subtarget",
|
||||
MI.getDebugLoc());
|
||||
B.getMF().getFunction().getContext().diagnose(BadIntrin);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool IsA16 = MRI.getType(RayDir).getElementType().getSizeInBits() == 16;
|
||||
bool Is64 = MRI.getType(NodePtr).getSizeInBits() == 64;
|
||||
unsigned Opcode = IsA16 ? Is64 ? AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa
|
||||
|
@ -262,6 +262,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
HasGFX10A16(false),
|
||||
HasG16(false),
|
||||
HasNSAEncoding(false),
|
||||
GFX10_AEncoding(false),
|
||||
GFX10_BEncoding(false),
|
||||
HasDLInsts(false),
|
||||
HasDot1Insts(false),
|
||||
|
@ -208,6 +208,10 @@ def : ProcessorModel<"gfx1012", GFX10SpeedModel,
|
||||
FeatureISAVersion10_1_2.Features
|
||||
>;
|
||||
|
||||
def : ProcessorModel<"gfx1013", GFX10SpeedModel,
|
||||
FeatureISAVersion10_1_3.Features
|
||||
>;
|
||||
|
||||
def : ProcessorModel<"gfx1030", GFX10SpeedModel,
|
||||
FeatureISAVersion10_3_0.Features
|
||||
>;
|
||||
|
@ -136,6 +136,7 @@ protected:
|
||||
bool HasGFX10A16;
|
||||
bool HasG16;
|
||||
bool HasNSAEncoding;
|
||||
bool GFX10_AEncoding;
|
||||
bool GFX10_BEncoding;
|
||||
bool HasDLInsts;
|
||||
bool HasDot1Insts;
|
||||
@ -872,6 +873,10 @@ public:
|
||||
|
||||
bool hasNSAEncoding() const { return HasNSAEncoding; }
|
||||
|
||||
bool hasGFX10_AEncoding() const {
|
||||
return GFX10_AEncoding;
|
||||
}
|
||||
|
||||
bool hasGFX10_BEncoding() const {
|
||||
return GFX10_BEncoding;
|
||||
}
|
||||
|
@ -105,6 +105,7 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013: AK = GK_GFX1013; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030: AK = GK_GFX1030; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031: AK = GK_GFX1031; break;
|
||||
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032: AK = GK_GFX1032; break;
|
||||
@ -166,6 +167,7 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
|
||||
case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
|
||||
case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
|
||||
case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;
|
||||
case GK_GFX1013: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013;
|
||||
case GK_GFX1030: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030;
|
||||
case GK_GFX1031: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031;
|
||||
case GK_GFX1032: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032;
|
||||
|
@ -886,8 +886,8 @@ class MIMG_IntersectRay_nsa_gfx10<mimgopc op, string opcode, int num_addrs, bit
|
||||
|
||||
multiclass MIMG_IntersectRay<mimgopc op, string opcode, int num_addrs, bit A16> {
|
||||
def "" : MIMGBaseOpcode;
|
||||
let SubtargetPredicate = HasGFX10_BEncoding,
|
||||
AssemblerPredicate = HasGFX10_BEncoding,
|
||||
let SubtargetPredicate = HasGFX10_AEncoding,
|
||||
AssemblerPredicate = HasGFX10_AEncoding,
|
||||
AsmMatchConverter = !if(A16, "cvtIntersectRay", ""),
|
||||
dmask = 0xf,
|
||||
unorm = 1,
|
||||
@ -1036,7 +1036,7 @@ defm IMAGE_SAMPLE_C_CD_CL_O_G16 : MIMG_Sampler <mimgopc<0xef>, AMDGPUSample_c_cd
|
||||
//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"image_rsrc256", 0x0000007e>;
|
||||
//def IMAGE_SAMPLER : MIMG_NoPattern_ <"image_sampler", 0x0000007f>;
|
||||
|
||||
let SubtargetPredicate = HasGFX10_BEncoding in
|
||||
let SubtargetPredicate = HasGFX10_AEncoding in
|
||||
defm IMAGE_MSAA_LOAD_X : MIMG_NoSampler <mimgopc<0x80>, "image_msaa_load", 1, 0, 0, 1>;
|
||||
|
||||
defm IMAGE_BVH_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0xe6>, "image_bvh_intersect_ray", 11, 0>;
|
||||
|
@ -7341,6 +7341,11 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
||||
assert(RayDir.getValueType() == MVT::v4f16 ||
|
||||
RayDir.getValueType() == MVT::v4f32);
|
||||
|
||||
if (!Subtarget->hasGFX10_AEncoding()) {
|
||||
emitRemovedIntrinsicError(DAG, DL, Op.getValueType());
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
bool IsA16 = RayDir.getValueType().getVectorElementType() == MVT::f16;
|
||||
bool Is64 = NodePtr.getValueType() == MVT::i64;
|
||||
unsigned Opcode = IsA16 ? Is64 ? AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa
|
||||
|
@ -1447,6 +1447,10 @@ bool isGCN3Encoding(const MCSubtargetInfo &STI) {
|
||||
return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
|
||||
}
|
||||
|
||||
bool isGFX10_AEncoding(const MCSubtargetInfo &STI) {
|
||||
return STI.getFeatureBits()[AMDGPU::FeatureGFX10_AEncoding];
|
||||
}
|
||||
|
||||
bool isGFX10_BEncoding(const MCSubtargetInfo &STI) {
|
||||
return STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding];
|
||||
}
|
||||
|
@ -737,6 +737,7 @@ bool isGFX9Plus(const MCSubtargetInfo &STI);
|
||||
bool isGFX10(const MCSubtargetInfo &STI);
|
||||
bool isGFX10Plus(const MCSubtargetInfo &STI);
|
||||
bool isGCN3Encoding(const MCSubtargetInfo &STI);
|
||||
bool isGFX10_AEncoding(const MCSubtargetInfo &STI);
|
||||
bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
|
||||
bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
|
||||
bool isGFX90A(const MCSubtargetInfo &STI);
|
||||
|
@ -1,5 +1,7 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1013 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: not --crash llc -global-isel -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s -o /dev/null 2>&1 | FileCheck -check-prefix=ERR %s
|
||||
|
||||
; uint4 llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(uint node_ptr, float ray_extent, float4 ray_origin, float4 ray_dir, float4 ray_inv_dir, uint4 texture_descr)
|
||||
; uint4 llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(uint node_ptr, float ray_extent, float4 ray_origin, half4 ray_dir, half4 ray_inv_dir, uint4 texture_descr)
|
||||
@ -17,6 +19,7 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray(i32 %node_ptr, float %ray_
|
||||
; GCN-NEXT: image_bvh_intersect_ray v[0:3], [v0, v1, v2, v3, v4, v6, v7, v8, v10, v11, v12], s[0:3]
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: ; return to shader part epilog
|
||||
; ERR: in function image_bvh_intersect_ray{{.*}}intrinsic not supported on subtarget
|
||||
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(i32 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> %tdescr)
|
||||
%r = bitcast <4 x i32> %v to <4 x float>
|
||||
ret <4 x float> %r
|
||||
|
@ -80,6 +80,9 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1012 < %s | FileCheck --check-prefixes=V3-GFX1012-XNACK %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1012 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1012-NOXNACK %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1012 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1012-XNACK %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1013 < %s | FileCheck --check-prefixes=V3-GFX1013-XNACK %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1013 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1013-NOXNACK %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1013 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1013-XNACK %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1030 < %s | FileCheck --check-prefixes=V3-GFX1030 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1031 < %s | FileCheck --check-prefixes=V3-GFX1031 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1032 < %s | FileCheck --check-prefixes=V3-GFX1032 %s
|
||||
@ -168,6 +171,9 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 < %s | FileCheck --check-prefixes=GFX1012 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX1012-NOXNACK %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX1012-XNACK %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1013 < %s | FileCheck --check-prefixes=GFX1013 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1013 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX1013-NOXNACK %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1013 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX1013-XNACK %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck --check-prefixes=GFX1030 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX1031 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1032 < %s | FileCheck --check-prefixes=GFX1032 %s
|
||||
@ -214,6 +220,8 @@
|
||||
; V3-GFX1011-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011+xnack"
|
||||
; V3-GFX1012-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012"
|
||||
; V3-GFX1012-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012+xnack"
|
||||
; V3-GFX1013-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1013"
|
||||
; V3-GFX1013-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1013+xnack"
|
||||
; V3-GFX1030: .amdgcn_target "amdgcn-amd-amdhsa--gfx1030"
|
||||
; V3-GFX1031: .amdgcn_target "amdgcn-amd-amdhsa--gfx1031"
|
||||
; V3-GFX1032: .amdgcn_target "amdgcn-amd-amdhsa--gfx1032"
|
||||
@ -280,6 +288,9 @@
|
||||
; GFX1012: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012"
|
||||
; GFX1012-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012:xnack-"
|
||||
; GFX1012-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012:xnack+"
|
||||
; GFX1013: .amdgcn_target "amdgcn-amd-amdhsa--gfx1013"
|
||||
; GFX1013-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1013:xnack-"
|
||||
; GFX1013-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1013:xnack+"
|
||||
; GFX1030: .amdgcn_target "amdgcn-amd-amdhsa--gfx1030"
|
||||
; GFX1031: .amdgcn_target "amdgcn-amd-amdhsa--gfx1031"
|
||||
; GFX1032: .amdgcn_target "amdgcn-amd-amdhsa--gfx1032"
|
||||
|
@ -57,6 +57,7 @@
|
||||
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1010 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1010 %s
|
||||
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1011 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1011 %s
|
||||
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1012 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1012 %s
|
||||
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1013 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1013 %s
|
||||
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1030 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1030 %s
|
||||
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1031 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1031 %s
|
||||
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1032 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1032 %s
|
||||
@ -116,6 +117,7 @@
|
||||
; GFX1010: EF_AMDGPU_MACH_AMDGCN_GFX1010 (0x33)
|
||||
; GFX1011: EF_AMDGPU_MACH_AMDGCN_GFX1011 (0x34)
|
||||
; GFX1012: EF_AMDGPU_MACH_AMDGCN_GFX1012 (0x35)
|
||||
; GFX1013: EF_AMDGPU_MACH_AMDGCN_GFX1013 (0x42)
|
||||
; GFX1030: EF_AMDGPU_MACH_AMDGCN_GFX1030 (0x36)
|
||||
; GFX1031: EF_AMDGPU_MACH_AMDGCN_GFX1031 (0x37)
|
||||
; GFX1032: EF_AMDGPU_MACH_AMDGCN_GFX1032 (0x38)
|
||||
|
@ -1,4 +1,6 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx1013 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: not --crash llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERR %s
|
||||
|
||||
; uint4 llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(uint node_ptr, float ray_extent, float4 ray_origin, float4 ray_dir, float4 ray_inv_dir, uint4 texture_descr)
|
||||
; uint4 llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(uint node_ptr, float ray_extent, float4 ray_origin, half4 ray_dir, half4 ray_inv_dir, uint4 texture_descr)
|
||||
@ -12,6 +14,7 @@ declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64, float, <4
|
||||
|
||||
; GCN-LABEL: {{^}}image_bvh_intersect_ray:
|
||||
; GCN: image_bvh_intersect_ray v[0:3], v[0:15], s[0:3]{{$}}
|
||||
; ERR: in function image_bvh_intersect_ray{{.*}}intrinsic not supported on subtarget
|
||||
; Arguments are flattened to represent the actual VGPR_A layout, so we have no
|
||||
; extra moves in the generated kernel.
|
||||
define amdgpu_ps <4 x float> @image_bvh_intersect_ray(i32 %node_ptr, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, float %ray_inv_dir_x, float %ray_inv_dir_y, float %ray_inv_dir_z, <4 x i32> inreg %tdescr) {
|
||||
|
@ -2,6 +2,7 @@
|
||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s
|
||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx906 %s 2>&1 | FileCheck %s --check-prefix=GFX906-GFX908
|
||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx908 %s 2>&1 | FileCheck %s --check-prefix=GFX906-GFX908
|
||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1013 %s 2>&1 | FileCheck %s --check-prefix=GFX1013
|
||||
|
||||
//
|
||||
// Test unsupported GPUs.
|
||||
@ -12,18 +13,25 @@ v_fmac_f32 v0, v1, v2
|
||||
// CHECK: error: instruction not supported on this GPU
|
||||
v_xnor_b32 v0, v1, v2
|
||||
// CHECK: error: instruction not supported on this GPU
|
||||
// GFX1013: error: instruction not supported on this GPU
|
||||
v_dot2_f32_f16 v0, v1, v2, v3
|
||||
// CHECK: error: instruction not supported on this GPU
|
||||
// GFX1013: error: instruction not supported on this GPU
|
||||
v_dot2_i32_i16 v0, v1, v2, v3
|
||||
// CHECK: error: instruction not supported on this GPU
|
||||
// GFX1013: error: instruction not supported on this GPU
|
||||
v_dot2_u32_u16 v0, v1, v2, v3
|
||||
// CHECK: error: instruction not supported on this GPU
|
||||
// GFX1013: error: instruction not supported on this GPU
|
||||
v_dot4_i32_i8 v0, v1, v2, v3
|
||||
// CHECK: error: instruction not supported on this GPU
|
||||
// GFX1013: error: instruction not supported on this GPU
|
||||
v_dot4_u32_u8 v0, v1, v2, v3
|
||||
// CHECK: error: instruction not supported on this GPU
|
||||
// GFX1013: error: instruction not supported on this GPU
|
||||
v_dot8_i32_i4 v0, v1, v2, v3
|
||||
// CHECK: error: instruction not supported on this GPU
|
||||
// GFX1013: error: instruction not supported on this GPU
|
||||
v_dot8_u32_u4 v0, v1, v2, v3
|
||||
|
||||
//
|
||||
|
@ -1,5 +1,6 @@
|
||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --implicit-check-not=error: %s
|
||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --implicit-check-not=error: %s
|
||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1013 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --implicit-check-not=error: %s
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Unsupported instructions.
|
||||
|
@ -162,6 +162,10 @@
|
||||
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX1012 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX1012 %s
|
||||
# RUN: obj2yaml %t.o.AMDGCN_GFX1012 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX1012 %s
|
||||
|
||||
# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX1013/' %s | yaml2obj -o %t.o.AMDGCN_GFX1013
|
||||
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX1013 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX1013 %s
|
||||
# RUN: obj2yaml %t.o.AMDGCN_GFX1013 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX1013 %s
|
||||
|
||||
# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX1030/' %s | yaml2obj -o %t.o.AMDGCN_GFX1030
|
||||
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX1030 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX1030 %s
|
||||
# RUN: obj2yaml %t.o.AMDGCN_GFX1030 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX1030 %s
|
||||
@ -322,6 +326,9 @@
|
||||
# ELF-AMDGCN-GFX1012: EF_AMDGPU_MACH_AMDGCN_GFX1012 (0x35)
|
||||
# YAML-AMDGCN-GFX1012: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1012 ]
|
||||
|
||||
# ELF-AMDGCN-GFX1013: EF_AMDGPU_MACH_AMDGCN_GFX1013 (0x42)
|
||||
# YAML-AMDGCN-GFX1013: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1013 ]
|
||||
|
||||
# ELF-AMDGCN-GFX1030: EF_AMDGPU_MACH_AMDGCN_GFX1030 (0x36)
|
||||
# YAML-AMDGCN-GFX1030: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1030 ]
|
||||
|
||||
|
@ -31,6 +31,11 @@ define amdgpu_kernel void @test_kernel() {
|
||||
; RUN: llvm-objdump -D %t.o > %t-detect.txt
|
||||
; RUN: diff %t-specify.txt %t-detect.txt
|
||||
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1013 -filetype=obj -O0 -o %t.o %s
|
||||
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1013 %t.o > %t-specify.txt
|
||||
; RUN: llvm-objdump -D %t.o > %t-detect.txt
|
||||
; RUN: diff %t-specify.txt %t-detect.txt
|
||||
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 -filetype=obj -O0 -o %t.o %s
|
||||
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1012 %t.o > %t-specify.txt
|
||||
; RUN: llvm-objdump -D %t.o > %t-detect.txt
|
||||
|
@ -223,6 +223,15 @@
|
||||
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1012
|
||||
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1012 -DFLAG_VALUE=0x35
|
||||
|
||||
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013
|
||||
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013 -DFLAG_VALUE=0x42
|
||||
|
||||
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013
|
||||
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013 -DFLAG_VALUE=0x42
|
||||
|
||||
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013
|
||||
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013 -DFLAG_VALUE=0x42
|
||||
|
||||
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1030
|
||||
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1030 -DFLAG_VALUE=0x36
|
||||
|
||||
|
@ -1482,6 +1482,7 @@ static const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion3[] = {
|
||||
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010),
|
||||
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1011),
|
||||
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1012),
|
||||
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1013),
|
||||
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1030),
|
||||
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1031),
|
||||
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1032),
|
||||
@ -1534,6 +1535,7 @@ static const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
|
||||
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010),
|
||||
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1011),
|
||||
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1012),
|
||||
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1013),
|
||||
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1030),
|
||||
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1031),
|
||||
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1032),
|
||||
|
Loading…
Reference in New Issue
Block a user