mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
Revert "[AMDGPU] Reorganize GCN subtarget features for unaligned access"
This reverts commit f5cd7ec9f3fc969ff5e1feed961996844333de3b. Certain rocPRIM/rocThrust/hipCUB tests were failing because of this change.
This commit is contained in:
parent
e606604b45
commit
367c918b83
@ -90,7 +90,7 @@ def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts",
|
||||
def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access",
|
||||
"UnalignedBufferAccess",
|
||||
"true",
|
||||
"Hardware supports unaligned global loads and stores"
|
||||
"Support unaligned global loads and stores"
|
||||
>;
|
||||
|
||||
def FeatureTrapHandler: SubtargetFeature<"trap-handler",
|
||||
@ -105,10 +105,18 @@ def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access",
|
||||
"Support unaligned scratch loads and stores"
|
||||
>;
|
||||
|
||||
// LDS alignment enforcement is controlled by a configuration register:
|
||||
// SH_MEM_CONFIG.alignment_mode
|
||||
def FeatureUnalignedAccessMode : SubtargetFeature<"unaligned-access-mode",
|
||||
"UnalignedAccessMode",
|
||||
"true",
|
||||
"Support unaligned local and region loads and stores"
|
||||
>;
|
||||
|
||||
def FeatureUnalignedDSAccess : SubtargetFeature<"unaligned-ds-access",
|
||||
"UnalignedDSAccess",
|
||||
"true",
|
||||
"Hardware supports unaligned local and region loads and stores"
|
||||
"Does not requires 16 byte alignment for certain local and region loads and stores"
|
||||
>;
|
||||
|
||||
def FeatureApertureRegs : SubtargetFeature<"aperture-regs",
|
||||
@ -645,15 +653,6 @@ def FeatureTrigReducedRange : SubtargetFeature<"trig-reduced-range",
|
||||
"Requires use of fract on arguments to trig instructions"
|
||||
>;
|
||||
|
||||
// Alignment enforcement is controlled by a configuration register:
|
||||
// SH_MEM_CONFIG.alignment_mode
|
||||
def FeatureUnalignedAccessMode : SubtargetFeature<"unaligned-access-mode",
|
||||
"UnalignedAccessMode",
|
||||
"true",
|
||||
"Enable unaligned global, local and region loads and stores if the hardware"
|
||||
" supports it"
|
||||
>;
|
||||
|
||||
// Dummy feature used to disable assembler instructions.
|
||||
def FeatureDisable : SubtargetFeature<"",
|
||||
"FeatureDisable","true",
|
||||
@ -680,8 +679,7 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
|
||||
FeatureWavefrontSize64, FeatureFlatAddressSpace,
|
||||
FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange,
|
||||
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
|
||||
FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC,
|
||||
FeatureUnalignedBufferAccess]
|
||||
FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC]
|
||||
>;
|
||||
|
||||
def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
|
||||
@ -694,8 +692,7 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
|
||||
FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP,
|
||||
FeatureIntClamp, FeatureTrigReducedRange, FeatureGFX8Insts,
|
||||
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
|
||||
FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC, FeatureFastDenormalF32,
|
||||
FeatureUnalignedBufferAccess
|
||||
FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC, FeatureFastDenormalF32
|
||||
]
|
||||
>;
|
||||
|
||||
@ -712,8 +709,7 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
|
||||
FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts,
|
||||
FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16,
|
||||
FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts,
|
||||
FeatureFastDenormalF32, FeatureUnalignedBufferAccess,
|
||||
FeatureUnalignedDSAccess
|
||||
FeatureFastDenormalF32, FeatureUnalignedDSAccess
|
||||
]
|
||||
>;
|
||||
|
||||
@ -732,7 +728,7 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
|
||||
FeatureVOP3Literal, FeatureDPP8,
|
||||
FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC,
|
||||
FeatureGFX10A16, FeatureFastDenormalF32, FeatureG16,
|
||||
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess
|
||||
FeatureUnalignedDSAccess
|
||||
]
|
||||
>;
|
||||
|
||||
|
@ -1068,9 +1068,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
|
||||
return false;
|
||||
};
|
||||
|
||||
unsigned GlobalAlign32 = ST.hasUnalignedBufferAccessEnabled() ? 0 : 32;
|
||||
unsigned GlobalAlign16 = ST.hasUnalignedBufferAccessEnabled() ? 0 : 16;
|
||||
unsigned GlobalAlign8 = ST.hasUnalignedBufferAccessEnabled() ? 0 : 8;
|
||||
unsigned GlobalAlign32 = ST.hasUnalignedBufferAccess() ? 0 : 32;
|
||||
unsigned GlobalAlign16 = ST.hasUnalignedBufferAccess() ? 0 : 16;
|
||||
unsigned GlobalAlign8 = ST.hasUnalignedBufferAccess() ? 0 : 8;
|
||||
|
||||
// TODO: Refine based on subtargets which support unaligned access or 128-bit
|
||||
// LDS
|
||||
|
@ -81,7 +81,7 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
|
||||
SmallString<256> FullFS("+promote-alloca,+load-store-opt,+enable-ds128,+sram-ecc,+xnack,");
|
||||
|
||||
if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
|
||||
FullFS += "+flat-for-global,+unaligned-access-mode,+trap-handler,";
|
||||
FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,";
|
||||
|
||||
FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS
|
||||
|
||||
@ -186,6 +186,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
AutoWaitcntBeforeBarrier(false),
|
||||
CodeObjectV3(false),
|
||||
UnalignedScratchAccess(false),
|
||||
UnalignedBufferAccess(false),
|
||||
UnalignedAccessMode(false),
|
||||
|
||||
HasApertureRegs(false),
|
||||
@ -257,7 +258,6 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
HasUnpackedD16VMem(false),
|
||||
LDSMisalignedBug(false),
|
||||
HasMFMAInlineLiteralBug(false),
|
||||
UnalignedBufferAccess(false),
|
||||
UnalignedDSAccess(false),
|
||||
|
||||
ScalarizeGlobal(false),
|
||||
|
@ -318,6 +318,7 @@ protected:
|
||||
bool AutoWaitcntBeforeBarrier;
|
||||
bool CodeObjectV3;
|
||||
bool UnalignedScratchAccess;
|
||||
bool UnalignedBufferAccess;
|
||||
bool UnalignedAccessMode;
|
||||
bool HasApertureRegs;
|
||||
bool EnableXNACK;
|
||||
@ -398,7 +399,6 @@ protected:
|
||||
bool HasMFMAInlineLiteralBug;
|
||||
bool HasVertexCache;
|
||||
short TexVTXClauseSize;
|
||||
bool UnalignedBufferAccess;
|
||||
bool UnalignedDSAccess;
|
||||
bool ScalarizeGlobal;
|
||||
|
||||
@ -706,18 +706,6 @@ public:
|
||||
return UnalignedBufferAccess;
|
||||
}
|
||||
|
||||
bool hasUnalignedBufferAccessEnabled() const {
|
||||
return UnalignedBufferAccess && UnalignedAccessMode;
|
||||
}
|
||||
|
||||
bool hasUnalignedDSAccess() const {
|
||||
return UnalignedDSAccess;
|
||||
}
|
||||
|
||||
bool hasUnalignedDSAccessEnabled() const {
|
||||
return UnalignedDSAccess && UnalignedAccessMode;
|
||||
}
|
||||
|
||||
bool hasUnalignedScratchAccess() const {
|
||||
return UnalignedScratchAccess;
|
||||
}
|
||||
@ -726,6 +714,10 @@ public:
|
||||
return UnalignedAccessMode;
|
||||
}
|
||||
|
||||
bool hasUnalignedDSAccess() const {
|
||||
return UnalignedDSAccess;
|
||||
}
|
||||
|
||||
bool hasApertureRegs() const {
|
||||
return HasApertureRegs;
|
||||
}
|
||||
|
@ -88,6 +88,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
|
||||
AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
|
||||
AMDGPU::FeatureFlatForGlobal,
|
||||
AMDGPU::FeaturePromoteAlloca,
|
||||
AMDGPU::FeatureUnalignedBufferAccess,
|
||||
AMDGPU::FeatureUnalignedScratchAccess,
|
||||
AMDGPU::FeatureUnalignedAccessMode,
|
||||
|
||||
|
@ -1433,7 +1433,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
|
||||
AddrSpace == AMDGPUAS::REGION_ADDRESS) {
|
||||
// Check if alignment requirements for ds_read/write instructions are
|
||||
// disabled.
|
||||
if (Subtarget->hasUnalignedDSAccessEnabled() &&
|
||||
if (Subtarget->hasUnalignedDSAccess() &&
|
||||
Subtarget->hasUnalignedAccessMode() &&
|
||||
!Subtarget->hasLDSMisalignedBug()) {
|
||||
if (IsFast)
|
||||
*IsFast = Alignment != Align(2);
|
||||
@ -1483,7 +1484,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
|
||||
return AlignedBy4;
|
||||
}
|
||||
|
||||
if (Subtarget->hasUnalignedBufferAccessEnabled() &&
|
||||
if (Subtarget->hasUnalignedBufferAccess() &&
|
||||
!(AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
|
||||
AddrSpace == AMDGPUAS::REGION_ADDRESS)) {
|
||||
// If we have an uniform constant load, it still requires using a slow
|
||||
|
@ -1,8 +1,8 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-NOUNALIGNED %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-UNALIGNED %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-NOUNALIGNED %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-NOUNALIGNED %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=+unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-UNALIGNED %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=-unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-NOUNALIGNED %s
|
||||
|
||||
; FIXME:
|
||||
; XUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
|
||||
|
@ -1,10 +1,10 @@
|
||||
; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-code-object-v3,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE %s
|
||||
; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-code-object-v3,-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE %s
|
||||
; RUN: llc -show-mc-encoding -mattr=-code-object-v3,-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
|
||||
; RUN: llc -show-mc-encoding -mattr=-code-object-v3,-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-code-object-v3,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA %s
|
||||
; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-code-object-v3,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-code-object-v3,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE-VECT -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -show-mc-encoding -mattr=-code-object-v3,-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-code-object-v3,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -show-mc-encoding -mattr=-code-object-v3,-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-code-object-v3,-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA %s
|
||||
; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-code-object-v3,-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -show-mc-encoding -mattr=-code-object-v3,+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-code-object-v3,-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE-VECT -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -show-mc-encoding -mattr=-code-object-v3,-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-code-object-v3,-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -data-layout=A5 -mcpu=kaveri -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck -enable-var-scope -check-prefix=HSAOPT -check-prefix=OPT %s
|
||||
; RUN: opt -S -mtriple=amdgcn-unknown-unknown -data-layout=A5 -mcpu=kaveri -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck -enable-var-scope -check-prefix=NOHSAOPT -check-prefix=OPT %s
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX900 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX900 %s
|
||||
|
||||
define <2 x half> @chain_hi_to_lo_private() {
|
||||
; GCN-LABEL: chain_hi_to_lo_private:
|
||||
|
@ -1,7 +1,7 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7-ALIGNED %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7-UNALIGNED %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=-unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX7-ALIGNED %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=+unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX7-UNALIGNED %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX9 %s
|
||||
|
||||
; Should not merge this to a dword load
|
||||
define i32 @global_load_2xi16_align2(i16 addrspace(1)* %p) #0 {
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=ALIGNED %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=+unaligned-access-mode -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=UNALIGNED %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=+unaligned-buffer-access -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=UNALIGNED %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=ALIGNED %s
|
||||
|
||||
; SI-LABEL: {{^}}local_unaligned_load_store_i16:
|
||||
|
@ -1,5 +1,5 @@
|
||||
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=none -o - %s | FileCheck -check-prefix=MCPU %s
|
||||
# RUN: llc -march=amdgcn -mattr=+unaligned-access-mode -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s
|
||||
# RUN: llc -march=amdgcn -mattr=+unaligned-buffer-access -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s
|
||||
|
||||
# FIXME: This overrides attributes that already are present. It should probably
|
||||
# only touch functions without an existing attribute.
|
||||
@ -10,8 +10,8 @@
|
||||
# MCPU: attributes #0 = { "target-cpu"="fiji" }
|
||||
# MCPU: attributes #1 = { "target-cpu"="hawaii" }
|
||||
|
||||
# MATTR: attributes #0 = { "target-cpu"="fiji" "target-features"="+unaligned-access-mode" }
|
||||
# MATTR: attributes #1 = { "target-features"="+unaligned-access-mode" }
|
||||
# MATTR: attributes #0 = { "target-cpu"="fiji" "target-features"="+unaligned-buffer-access" }
|
||||
# MATTR: attributes #1 = { "target-features"="+unaligned-buffer-access" }
|
||||
|
||||
--- |
|
||||
define amdgpu_kernel void @with_cpu_attr() #0 {
|
||||
|
@ -1,10 +1,10 @@
|
||||
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=none -o - %s | FileCheck -check-prefix=MCPU %s
|
||||
# RUN: llc -march=amdgcn -mattr=+unaligned-access-mode -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s
|
||||
# RUN: llc -march=amdgcn -mattr=+unaligned-buffer-access -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s
|
||||
|
||||
# The command line arguments for -mcpu and -mattr should manifest themselves by adding the corresponding attributes to the stub IR function.
|
||||
|
||||
# MCPU: attributes #0 = { "target-cpu"="hawaii" }
|
||||
# MATTR: attributes #0 = { "target-features"="+unaligned-access-mode" }
|
||||
# MATTR: attributes #0 = { "target-features"="+unaligned-buffer-access" }
|
||||
|
||||
---
|
||||
name: no_ir
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: opt -S -load-store-vectorizer --mcpu=hawaii -mattr=-unaligned-access-mode,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s
|
||||
; RUN: opt -S -load-store-vectorizer --mcpu=hawaii -mattr=+unaligned-access-mode,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s
|
||||
; RUN: opt -S -passes='function(load-store-vectorizer)' --mcpu=hawaii -mattr=-unaligned-access-mode,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s
|
||||
; RUN: opt -S -passes='function(load-store-vectorizer)' --mcpu=hawaii -mattr=+unaligned-access-mode,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s
|
||||
; RUN: opt -S -load-store-vectorizer -mattr=-unaligned-buffer-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s
|
||||
; RUN: opt -S -load-store-vectorizer -mattr=+unaligned-buffer-access,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s
|
||||
; RUN: opt -S -passes='function(load-store-vectorizer)' -mattr=-unaligned-buffer-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s
|
||||
; RUN: opt -S -passes='function(load-store-vectorizer)' -mattr=+unaligned-buffer-access,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s
|
||||
|
||||
target triple = "amdgcn--"
|
||||
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt -mtriple=amdgcn-amd-amdhsa --mcpu=hawaii -load-store-vectorizer -S -o - %s | FileCheck %s
|
||||
; RUN: opt -mtriple=amdgcn-amd-amdhsa -load-store-vectorizer -S -o - %s | FileCheck %s
|
||||
; Copy of test/CodeGen/AMDGPU/merge-stores.ll with some additions
|
||||
|
||||
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -basic-aa -load-store-vectorizer -S -o - %s | FileCheck -check-prefixes=GCN,GFX7 %s
|
||||
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -basic-aa -load-store-vectorizer -S -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s
|
||||
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -basic-aa -load-store-vectorizer -S -o - %s | FileCheck -check-prefixes=GCN %s
|
||||
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -basic-aa -load-store-vectorizer -S -o - %s | FileCheck -check-prefixes=GCN %s
|
||||
|
||||
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
|
||||
|
||||
@ -31,20 +31,13 @@ define amdgpu_kernel void @no_crash(i32 %arg) {
|
||||
|
||||
; GCN-LABEL: @interleave_get_longest
|
||||
|
||||
; GFX7: load <2 x i32>
|
||||
; GFX7: load i32
|
||||
; GFX7: store <2 x i32> zeroinitializer
|
||||
; GFX7: load i32
|
||||
; GFX7: load <2 x i32>
|
||||
; GFX7: load i32
|
||||
; GFX7: load i32
|
||||
|
||||
; GFX9: load <4 x i32>
|
||||
; GFX9: load i32
|
||||
; GFX9: store <2 x i32> zeroinitializer
|
||||
; GFX9: load i32
|
||||
; GFX9: load i32
|
||||
; GFX9: load i32
|
||||
; GCN: load <2 x i32>
|
||||
; GCN: load i32
|
||||
; GCN: store <2 x i32> zeroinitializer
|
||||
; GCN: load i32
|
||||
; GCN: load <2 x i32>
|
||||
; GCN: load i32
|
||||
; GCN: load i32
|
||||
|
||||
define amdgpu_kernel void @interleave_get_longest(i32 %arg) {
|
||||
%a1 = add i32 %arg, 1
|
||||
|
Loading…
x
Reference in New Issue
Block a user