mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[AMDGPU] Reorganize GCN subtarget features for unaligned access
Features UnalignedBufferAccess and UnalignedDSAccess are now used to determine whether hardware supports such access. UnalignedAccessMode should be used to enable them. hasUnalignedBufferAccessEnabled() and hasUnalignedDSAccessEnabled() can be now used to quickly check both. Differential Revision: https://reviews.llvm.org/D84522
This commit is contained in:
parent
49f2d14543
commit
08706e7bce
@ -90,7 +90,7 @@ def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts",
|
||||
def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access",
|
||||
"UnalignedBufferAccess",
|
||||
"true",
|
||||
"Support unaligned global loads and stores"
|
||||
"Hardware supports unaligned global loads and stores"
|
||||
>;
|
||||
|
||||
def FeatureTrapHandler: SubtargetFeature<"trap-handler",
|
||||
@ -105,18 +105,10 @@ def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access",
|
||||
"Support unaligned scratch loads and stores"
|
||||
>;
|
||||
|
||||
// LDS alignment enforcement is controlled by a configuration register:
|
||||
// SH_MEM_CONFIG.alignment_mode
|
||||
def FeatureUnalignedAccessMode : SubtargetFeature<"unaligned-access-mode",
|
||||
"UnalignedAccessMode",
|
||||
"true",
|
||||
"Support unaligned local and region loads and stores"
|
||||
>;
|
||||
|
||||
def FeatureUnalignedDSAccess : SubtargetFeature<"unaligned-ds-access",
|
||||
"UnalignedDSAccess",
|
||||
"true",
|
||||
"Does not requires 16 byte alignment for certain local and region loads and stores"
|
||||
"Hardware supports unaligned local and region loads and stores"
|
||||
>;
|
||||
|
||||
def FeatureApertureRegs : SubtargetFeature<"aperture-regs",
|
||||
@ -653,6 +645,15 @@ def FeatureTrigReducedRange : SubtargetFeature<"trig-reduced-range",
|
||||
"Requires use of fract on arguments to trig instructions"
|
||||
>;
|
||||
|
||||
// Alignment enforcement is controlled by a configuration register:
|
||||
// SH_MEM_CONFIG.alignment_mode
|
||||
def FeatureUnalignedAccessMode : SubtargetFeature<"unaligned-access-mode",
|
||||
"UnalignedAccessMode",
|
||||
"true",
|
||||
"Enable unaligned global, local and region loads and stores if the hardware"
|
||||
" supports it"
|
||||
>;
|
||||
|
||||
// Dummy feature used to disable assembler instructions.
|
||||
def FeatureDisable : SubtargetFeature<"",
|
||||
"FeatureDisable","true",
|
||||
@ -679,7 +680,8 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
|
||||
FeatureWavefrontSize64, FeatureFlatAddressSpace,
|
||||
FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange,
|
||||
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
|
||||
FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC]
|
||||
FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC,
|
||||
FeatureUnalignedBufferAccess]
|
||||
>;
|
||||
|
||||
def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
|
||||
@ -692,7 +694,8 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
|
||||
FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP,
|
||||
FeatureIntClamp, FeatureTrigReducedRange, FeatureGFX8Insts,
|
||||
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
|
||||
FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC, FeatureFastDenormalF32
|
||||
FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC, FeatureFastDenormalF32,
|
||||
FeatureUnalignedBufferAccess
|
||||
]
|
||||
>;
|
||||
|
||||
@ -709,7 +712,8 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
|
||||
FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts,
|
||||
FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16,
|
||||
FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts,
|
||||
FeatureFastDenormalF32, FeatureUnalignedDSAccess
|
||||
FeatureFastDenormalF32, FeatureUnalignedBufferAccess,
|
||||
FeatureUnalignedDSAccess
|
||||
]
|
||||
>;
|
||||
|
||||
@ -728,7 +732,7 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
|
||||
FeatureVOP3Literal, FeatureDPP8,
|
||||
FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC,
|
||||
FeatureGFX10A16, FeatureFastDenormalF32, FeatureG16,
|
||||
FeatureUnalignedDSAccess
|
||||
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess
|
||||
]
|
||||
>;
|
||||
|
||||
|
@ -1051,9 +1051,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
|
||||
return false;
|
||||
};
|
||||
|
||||
unsigned GlobalAlign32 = ST.hasUnalignedBufferAccess() ? 0 : 32;
|
||||
unsigned GlobalAlign16 = ST.hasUnalignedBufferAccess() ? 0 : 16;
|
||||
unsigned GlobalAlign8 = ST.hasUnalignedBufferAccess() ? 0 : 8;
|
||||
unsigned GlobalAlign32 = ST.hasUnalignedBufferAccessEnabled() ? 0 : 32;
|
||||
unsigned GlobalAlign16 = ST.hasUnalignedBufferAccessEnabled() ? 0 : 16;
|
||||
unsigned GlobalAlign8 = ST.hasUnalignedBufferAccessEnabled() ? 0 : 8;
|
||||
|
||||
// TODO: Refine based on subtargets which support unaligned access or 128-bit
|
||||
// LDS
|
||||
|
@ -81,7 +81,7 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
|
||||
SmallString<256> FullFS("+promote-alloca,+load-store-opt,+enable-ds128,+sram-ecc,+xnack,");
|
||||
|
||||
if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
|
||||
FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,";
|
||||
FullFS += "+flat-for-global,+unaligned-access-mode,+trap-handler,";
|
||||
|
||||
FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS
|
||||
|
||||
@ -186,7 +186,6 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
AutoWaitcntBeforeBarrier(false),
|
||||
CodeObjectV3(false),
|
||||
UnalignedScratchAccess(false),
|
||||
UnalignedBufferAccess(false),
|
||||
UnalignedAccessMode(false),
|
||||
|
||||
HasApertureRegs(false),
|
||||
@ -258,6 +257,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
HasUnpackedD16VMem(false),
|
||||
LDSMisalignedBug(false),
|
||||
HasMFMAInlineLiteralBug(false),
|
||||
UnalignedBufferAccess(false),
|
||||
UnalignedDSAccess(false),
|
||||
|
||||
ScalarizeGlobal(false),
|
||||
|
@ -314,7 +314,6 @@ protected:
|
||||
bool AutoWaitcntBeforeBarrier;
|
||||
bool CodeObjectV3;
|
||||
bool UnalignedScratchAccess;
|
||||
bool UnalignedBufferAccess;
|
||||
bool UnalignedAccessMode;
|
||||
bool HasApertureRegs;
|
||||
bool EnableXNACK;
|
||||
@ -395,6 +394,7 @@ protected:
|
||||
bool HasMFMAInlineLiteralBug;
|
||||
bool HasVertexCache;
|
||||
short TexVTXClauseSize;
|
||||
bool UnalignedBufferAccess;
|
||||
bool UnalignedDSAccess;
|
||||
bool ScalarizeGlobal;
|
||||
|
||||
@ -697,6 +697,18 @@ public:
|
||||
return UnalignedBufferAccess;
|
||||
}
|
||||
|
||||
bool hasUnalignedBufferAccessEnabled() const {
|
||||
return UnalignedBufferAccess && UnalignedAccessMode;
|
||||
}
|
||||
|
||||
bool hasUnalignedDSAccess() const {
|
||||
return UnalignedDSAccess;
|
||||
}
|
||||
|
||||
bool hasUnalignedDSAccessEnabled() const {
|
||||
return UnalignedDSAccess && UnalignedAccessMode;
|
||||
}
|
||||
|
||||
bool hasUnalignedScratchAccess() const {
|
||||
return UnalignedScratchAccess;
|
||||
}
|
||||
@ -705,10 +717,6 @@ public:
|
||||
return UnalignedAccessMode;
|
||||
}
|
||||
|
||||
bool hasUnalignedDSAccess() const {
|
||||
return UnalignedDSAccess;
|
||||
}
|
||||
|
||||
bool hasApertureRegs() const {
|
||||
return HasApertureRegs;
|
||||
}
|
||||
|
@ -88,7 +88,6 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
|
||||
AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
|
||||
AMDGPU::FeatureFlatForGlobal,
|
||||
AMDGPU::FeaturePromoteAlloca,
|
||||
AMDGPU::FeatureUnalignedBufferAccess,
|
||||
AMDGPU::FeatureUnalignedScratchAccess,
|
||||
AMDGPU::FeatureUnalignedAccessMode,
|
||||
|
||||
|
@ -1398,8 +1398,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
|
||||
AddrSpace == AMDGPUAS::REGION_ADDRESS) {
|
||||
// Check if alignment requirements for ds_read/write instructions are
|
||||
// disabled.
|
||||
if (Subtarget->hasUnalignedDSAccess() &&
|
||||
Subtarget->hasUnalignedAccessMode()) {
|
||||
if (Subtarget->hasUnalignedDSAccessEnabled()) {
|
||||
if (IsFast)
|
||||
*IsFast = true;
|
||||
return true;
|
||||
@ -1450,7 +1449,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
|
||||
return AlignedBy4;
|
||||
}
|
||||
|
||||
if (Subtarget->hasUnalignedBufferAccess() &&
|
||||
if (Subtarget->hasUnalignedBufferAccessEnabled() &&
|
||||
!(AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
|
||||
AddrSpace == AMDGPUAS::REGION_ADDRESS)) {
|
||||
// If we have an uniform constant load, it still requires using a slow
|
||||
|
@ -1,8 +1,8 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-NOUNALIGNED %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=+unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-UNALIGNED %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=-unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-NOUNALIGNED %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-NOUNALIGNED %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-UNALIGNED %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-NOUNALIGNED %s
|
||||
|
||||
; FIXME:
|
||||
; XUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
|
||||
|
@ -1,10 +1,10 @@
|
||||
; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-code-object-v3,-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE %s
|
||||
; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-code-object-v3,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE %s
|
||||
; RUN: llc -show-mc-encoding -mattr=-code-object-v3,-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
|
||||
; RUN: llc -show-mc-encoding -mattr=-code-object-v3,-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-code-object-v3,-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA %s
|
||||
; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-code-object-v3,-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-code-object-v3,-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE-VECT -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -show-mc-encoding -mattr=-code-object-v3,-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-code-object-v3,-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -show-mc-encoding -mattr=-code-object-v3,-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-code-object-v3,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA %s
|
||||
; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-code-object-v3,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-code-object-v3,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE-VECT -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -show-mc-encoding -mattr=-code-object-v3,-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-code-object-v3,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -data-layout=A5 -mcpu=kaveri -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck -enable-var-scope -check-prefix=HSAOPT -check-prefix=OPT %s
|
||||
; RUN: opt -S -mtriple=amdgcn-unknown-unknown -data-layout=A5 -mcpu=kaveri -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck -enable-var-scope -check-prefix=NOHSAOPT -check-prefix=OPT %s
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX900 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX900 %s
|
||||
|
||||
define <2 x half> @chain_hi_to_lo_private() {
|
||||
; GCN-LABEL: chain_hi_to_lo_private:
|
||||
|
@ -1,7 +1,7 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=-unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX7-ALIGNED %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=+unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX7-UNALIGNED %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX9 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7-ALIGNED %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7-UNALIGNED %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9 %s
|
||||
|
||||
; Should not merge this to a dword load
|
||||
define i32 @global_load_2xi16_align2(i16 addrspace(1)* %p) #0 {
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=ALIGNED %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=+unaligned-buffer-access -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=UNALIGNED %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=+unaligned-access-mode -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=UNALIGNED %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=ALIGNED %s
|
||||
|
||||
; SI-LABEL: {{^}}local_unaligned_load_store_i16:
|
||||
|
@ -1,5 +1,5 @@
|
||||
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=none -o - %s | FileCheck -check-prefix=MCPU %s
|
||||
# RUN: llc -march=amdgcn -mattr=+unaligned-buffer-access -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s
|
||||
# RUN: llc -march=amdgcn -mattr=+unaligned-access-mode -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s
|
||||
|
||||
# FIXME: This overrides attributes that already are present. It should probably
|
||||
# only touch functions without an existing attribute.
|
||||
@ -10,8 +10,8 @@
|
||||
# MCPU: attributes #0 = { "target-cpu"="fiji" }
|
||||
# MCPU: attributes #1 = { "target-cpu"="hawaii" }
|
||||
|
||||
# MATTR: attributes #0 = { "target-cpu"="fiji" "target-features"="+unaligned-buffer-access" }
|
||||
# MATTR: attributes #1 = { "target-features"="+unaligned-buffer-access" }
|
||||
# MATTR: attributes #0 = { "target-cpu"="fiji" "target-features"="+unaligned-access-mode" }
|
||||
# MATTR: attributes #1 = { "target-features"="+unaligned-access-mode" }
|
||||
|
||||
--- |
|
||||
define amdgpu_kernel void @with_cpu_attr() #0 {
|
||||
|
@ -1,10 +1,10 @@
|
||||
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=none -o - %s | FileCheck -check-prefix=MCPU %s
|
||||
# RUN: llc -march=amdgcn -mattr=+unaligned-buffer-access -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s
|
||||
# RUN: llc -march=amdgcn -mattr=+unaligned-access-mode -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s
|
||||
|
||||
# The command line arguments for -mcpu and -mattr should manifest themselves by adding the corresponding attributes to the stub IR function.
|
||||
|
||||
# MCPU: attributes #0 = { "target-cpu"="hawaii" }
|
||||
# MATTR: attributes #0 = { "target-features"="+unaligned-buffer-access" }
|
||||
# MATTR: attributes #0 = { "target-features"="+unaligned-access-mode" }
|
||||
|
||||
---
|
||||
name: no_ir
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: opt -S -load-store-vectorizer -mattr=-unaligned-buffer-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s
|
||||
; RUN: opt -S -load-store-vectorizer -mattr=+unaligned-buffer-access,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s
|
||||
; RUN: opt -S -passes='function(load-store-vectorizer)' -mattr=-unaligned-buffer-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s
|
||||
; RUN: opt -S -passes='function(load-store-vectorizer)' -mattr=+unaligned-buffer-access,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s
|
||||
; RUN: opt -S -load-store-vectorizer --mcpu=hawaii -mattr=-unaligned-access-mode,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s
|
||||
; RUN: opt -S -load-store-vectorizer --mcpu=hawaii -mattr=+unaligned-access-mode,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s
|
||||
; RUN: opt -S -passes='function(load-store-vectorizer)' --mcpu=hawaii -mattr=-unaligned-access-mode,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s
|
||||
; RUN: opt -S -passes='function(load-store-vectorizer)' --mcpu=hawaii -mattr=+unaligned-access-mode,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s
|
||||
|
||||
target triple = "amdgcn--"
|
||||
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt -mtriple=amdgcn-amd-amdhsa -load-store-vectorizer -S -o - %s | FileCheck %s
|
||||
; RUN: opt -mtriple=amdgcn-amd-amdhsa --mcpu=hawaii -load-store-vectorizer -S -o - %s | FileCheck %s
|
||||
; Copy of test/CodeGen/AMDGPU/merge-stores.ll with some additions
|
||||
|
||||
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
|
||||
|
Loading…
Reference in New Issue
Block a user