llvm-mirror/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp

//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// Implementation of the TargetInstrInfo class that is common to all
/// AMD GPUs.
//
//===----------------------------------------------------------------------===//

#include "AMDGPUInstrInfo.h"
#include "AMDGPURegisterInfo.h"
#include "AMDGPUTargetMachine.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"

using namespace llvm;

#define GET_INSTRINFO_CTOR_DTOR
#include "AMDGPUGenInstrInfo.inc"

namespace llvm {
namespace AMDGPU {
#define GET_RSRCINTRINSIC_IMPL
#include "AMDGPUGenSearchableTables.inc"

#define GET_D16IMAGEDIMINTRINSIC_IMPL
#include "AMDGPUGenSearchableTables.inc"
}
}

// Pin the vtable to this file.
void AMDGPUInstrInfo::anchor() {}

AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
  : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
    ST(ST),
    AMDGPUASI(ST.getAMDGPUAS()) {}

// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
// the first 16 loads will be interleaved with the stores, and the next 16 will
// be clustered as expected. It should really split into 2 16 store batches.
//
// Loads are clustered until this returns false, rather than trying to schedule
// groups of stores. This also means we have to deal with saying different
// address space loads should be clustered, and ones which might cause bank
// conflicts.
//
// This might be deprecated so it might not be worth that much effort to fix.
bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
                                              int64_t Offset0, int64_t Offset1,
                                              unsigned NumLoads) const {
  assert(Offset1 > Offset0 &&
         "Second offset should be larger than first offset!");
  // If we have less than 16 loads in a row, and the offsets are within 64
  // bytes, then schedule together.

  // A cacheline is 64 bytes (for global memory).
  return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
}

// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
enum SIEncodingFamily {
  SI = 0,
  VI = 1,
  SDWA = 2,
  SDWA9 = 3,
  GFX80 = 4,
  GFX9 = 5
};

static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
  switch (ST.getGeneration()) {
  case AMDGPUSubtarget::SOUTHERN_ISLANDS:
  case AMDGPUSubtarget::SEA_ISLANDS:
    return SIEncodingFamily::SI;
  case AMDGPUSubtarget::VOLCANIC_ISLANDS:
  case AMDGPUSubtarget::GFX9:
    return SIEncodingFamily::VI;

  // FIXME: This should never be called for r600 GPUs.
  case AMDGPUSubtarget::R600:
  case AMDGPUSubtarget::R700:
  case AMDGPUSubtarget::EVERGREEN:
  case AMDGPUSubtarget::NORTHERN_ISLANDS:
    return SIEncodingFamily::SI;
  }

  llvm_unreachable("Unknown subtarget generation!");
}

int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
  SIEncodingFamily Gen = subtargetEncodingFamily(ST);

  if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
    ST.getGeneration() >= AMDGPUSubtarget::GFX9)
    Gen = SIEncodingFamily::GFX9;

  if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
    Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
                                                      : SIEncodingFamily::SDWA;
  // Adjust the encoding family to GFX80 for D16 buffer instructions when the
  // subtarget has UnpackedD16VMem feature.
  // TODO: remove this when we discard GFX80 encoding.
  if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16)
                              && !(get(Opcode).TSFlags & SIInstrFlags::MIMG))
    Gen = SIEncodingFamily::GFX80;

  int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);

  // -1 means that Opcode is already a native instruction.
  if (MCOp == -1)
    return Opcode;

  // (uint16_t)-1 means that Opcode is a pseudo instruction that has
  // no encoding in the given subtarget generation.
  if (MCOp == (uint16_t)-1)
    return -1;

  return MCOp;
}

// TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.
bool AMDGPUInstrInfo::isUniformMMO(const MachineMemOperand *MMO) {
  const Value *Ptr = MMO->getValue();
  // UndefValue means this is a load of a kernel input.  These are uniform.
  // Sometimes LDS instructions have constant pointers.
  // If Ptr is null, then that means this mem operand contains a
  // PseudoSourceValue like GOT.
  if (!Ptr || isa<UndefValue>(Ptr) ||
      isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
    return true;

  if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
    return true;

  if (const Argument *Arg = dyn_cast<Argument>(Ptr))
    return AMDGPU::isArgPassedInSGPR(Arg);

  const Instruction *I = dyn_cast<Instruction>(Ptr);
  return I && I->getMetadata("amdgpu.uniform");
}
Add R600 backend A new backend supporting AMD GPUs: Radeon HD2XXX - HD7XXX llvm-svn: 169915 2012-12-11 22:25:42 +01:00			`//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//`
			`//`
			`// The LLVM Compiler Infrastructure`
			`//`
			`// This file is distributed under the University of Illinois Open Source`
			`// License. See LICENSE.TXT for details.`
			`//`
			`//===----------------------------------------------------------------------===//`
			`//`
			`/// \file`
Remove \brief commands from doxygen comments. We've been running doxygen with the autobrief option for a couple of years now. This makes the \brief markers into our comments redundant. Since they are a visual distraction and we don't want to encourage more \brief markers in new code either, this patch removes them all. Patch produced by for i in $(git grep -l '\\brief'); do perl -pi -e 's/\\brief //g' $i & done Differential Revision: https://reviews.llvm.org/D46290 llvm-svn: 331272 2018-05-01 17:54:18 +02:00			`/// Implementation of the TargetInstrInfo class that is common to all`
Add R600 backend A new backend supporting AMD GPUs: Radeon HD2XXX - HD7XXX llvm-svn: 169915 2012-12-11 22:25:42 +01:00			`/// AMD GPUs.`
			`//`
			`//===----------------------------------------------------------------------===//`

			`#include "AMDGPUInstrInfo.h"`
			`#include "AMDGPURegisterInfo.h"`
			`#include "AMDGPUTargetMachine.h"`
AMDGPU: Remove #include "MCTargetDesc/AMDGPUMCTargetDesc.h" from common headers Summary: MCTargetDesc/AMDGPUMCTargetDesc.h contains enums for all the instuction and register defintions, which are huge so we only want to include them where needed. This will also make it easier if we want to split the R600 and GCN definitions into separate tablegenerated files. I was unable to remove AMDGPUMCTargetDesc.h from SIMachineFunctionInfo.h because it uses some enums from the header to initialize default values for the SIMachineFunction class, so I ended up having to remove includes of SIMachineFunctionInfo.h from headers too. Reviewers: arsenm, nhaehnle Reviewed By: nhaehnle Subscribers: MatzeB, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits Differential Revision: https://reviews.llvm.org/D46272 llvm-svn: 332930 2018-05-22 04:03:23 +02:00			`#include "MCTargetDesc/AMDGPUMCTargetDesc.h"`
Add R600 backend A new backend supporting AMD GPUs: Radeon HD2XXX - HD7XXX llvm-svn: 169915 2012-12-11 22:25:42 +01:00			`#include "llvm/CodeGen/MachineFrameInfo.h"`
			`#include "llvm/CodeGen/MachineInstrBuilder.h"`
			`#include "llvm/CodeGen/MachineRegisterInfo.h"`

[cleanup] Lift using directives, DEBUG_TYPE definitions, and even some system headers above the includes of generated '.inc' files that actually contain code. In a few targets this was already done pretty consistently, but it wasn't done really consistently anywhere. It is strictly cleaner IMO and necessary in a bunch of places where the DEBUG_TYPE is referenced from the generated code. Consistency with the necessary places trumps. Hopefully the build bots are OK with the movement of intrin.h... llvm-svn: 206838 2014-04-22 04:03:14 +02:00			`using namespace llvm;`

[weak vtables] Remove a bunch of weak vtables This patch removes most of the trivial cases of weak vtables by pinning them to a single object file. The memory leaks in this version have been fixed. Thanks Alexey for pointing them out. Differential Revision: http://llvm-reviews.chandlerc.com/D2068 Reviewed by Andy llvm-svn: 195064 2013-11-19 01:57:56 +01:00			`#define GET_INSTRINFO_CTOR_DTOR`
Add R600 backend A new backend supporting AMD GPUs: Radeon HD2XXX - HD7XXX llvm-svn: 169915 2012-12-11 22:25:42 +01:00			`#include "AMDGPUGenInstrInfo.inc"`

AMDGPU: Make getTgtMemIntrinsic table-driven for resource-based intrinsics Summary: Avoids having to list all intrinsics manually. This is in preparation for the new dimension-aware image intrinsics, which I'd rather not have to list here by hand. Change-Id: If7ced04998397ef68c4cb8f7de66b5050fb767e5 Reviewers: arsenm, rampitec, b-sumner Subscribers: kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, llvm-commits, t-tye Differential Revision: https://reviews.llvm.org/D44937 llvm-svn: 328938 2018-04-01 19:09:07 +02:00			`namespace llvm {`
			`namespace AMDGPU {`
			`#define GET_RSRCINTRINSIC_IMPL`
			`#include "AMDGPUGenSearchableTables.inc"`
AMDGPU: Dimension-aware image intrinsics Summary: These new image intrinsics contain the texture type as part of their name and have each component of the address/coordinate as individual parameters. This is a preparatory step for implementing the A16 feature, where coordinates are passed as half-floats or -ints, but the Z compare value and texel offsets are still full dwords, making it difficult or impossible to distinguish between A16 on or off in the old-style intrinsics. Additionally, these intrinsics pass the 'texfailpolicy' and 'cachectrl' as i32 bit fields to reduce operand clutter and allow for future extensibility. v2: - gather4 supports 2darray images - fix a bug with 1D images on SI Change-Id: I099f309e0a394082a5901ea196c3967afb867f04 Reviewers: arsenm, rampitec, b-sumner Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye Differential Revision: https://reviews.llvm.org/D44939 llvm-svn: 329166 2018-04-04 12:58:54 +02:00
			`#define GET_D16IMAGEDIMINTRINSIC_IMPL`
			`#include "AMDGPUGenSearchableTables.inc"`
AMDGPU: Make getTgtMemIntrinsic table-driven for resource-based intrinsics Summary: Avoids having to list all intrinsics manually. This is in preparation for the new dimension-aware image intrinsics, which I'd rather not have to list here by hand. Change-Id: If7ced04998397ef68c4cb8f7de66b5050fb767e5 Reviewers: arsenm, rampitec, b-sumner Subscribers: kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, llvm-commits, t-tye Differential Revision: https://reviews.llvm.org/D44937 llvm-svn: 328938 2018-04-01 19:09:07 +02:00			`}`
			`}`

[weak vtables] Remove a bunch of weak vtables This patch removes most of the trivial cases of weak vtables by pinning them to a single object file. The memory leaks in this version have been fixed. Thanks Alexey for pointing them out. Differential Revision: http://llvm-reviews.chandlerc.com/D2068 Reviewed by Andy llvm-svn: 195064 2013-11-19 01:57:56 +01:00			`// Pin the vtable to this file.`
			`void AMDGPUInstrInfo::anchor() {}`

AMDGPU: Cleanup subtarget handling. Split AMDGPUSubtarget into amdgcn/r600 specific subclasses. This removes most of the static_casting of the basic codegen classes everywhere, and tries to restrict the features visible on the wrong target. llvm-svn: 273652 2016-06-24 08:30:11 +02:00			`AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)`
AMDGPU: Initial implementation of calls Includes a hack to fix the type selected for the GlobalAddress of the function, which will be fixed by changing the default datalayout to use generic pointers for 0. llvm-svn: 309732 2017-08-01 21:54:18 +02:00			`: AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),`
			`ST(ST),`
			`AMDGPUASI(ST.getAMDGPUAS()) {}`
Add R600 backend A new backend supporting AMD GPUs: Radeon HD2XXX - HD7XXX llvm-svn: 169915 2012-12-11 22:25:42 +01:00
R600: Increase nearby load scheduling threshold. This partially fixes weird looking load scheduling in memcpy test. The load clustering doesn't seem particularly smart, but this method seems to be partially deprecated so it might not be worth trying to fix. llvm-svn: 214943 2014-08-06 02:29:49 +02:00			`// FIXME: This behaves strangely. If, for example, you have 32 load + stores,`
			`// the first 16 loads will be interleaved with the stores, and the next 16 will`
			`// be clustered as expected. It should really split into 2 16 store batches.`
			`//`
			`// Loads are clustered until this returns false, rather than trying to schedule`
			`// groups of stores. This also means we have to deal with saying different`
			`// address space loads should be clustered, and ones which might cause bank`
			`// conflicts.`
			`//`
			`// This might be deprecated so it might not be worth that much effort to fix.`
			`bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode Load0, SDNode Load1,`
			`int64_t Offset0, int64_t Offset1,`
			`unsigned NumLoads) const {`
			`assert(Offset1 > Offset0 &&`
			`"Second offset should be larger than first offset!");`
			`// If we have less than 16 loads in a row, and the offsets are within 64`
			`// bytes, then schedule together.`

			`// A cacheline is 64 bytes (for global memory).`
			`return (NumLoads <= 16 && (Offset1 - Offset0) < 64);`
Add R600 backend A new backend supporting AMD GPUs: Radeon HD2XXX - HD7XXX llvm-svn: 169915 2012-12-11 22:25:42 +01:00			`}`

AMDGPU: Cleanup subtarget handling. Split AMDGPUSubtarget into amdgcn/r600 specific subclasses. This removes most of the static_casting of the basic codegen classes everywhere, and tries to restrict the features visible on the wrong target. llvm-svn: 273652 2016-06-24 08:30:11 +02:00			`// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td`
			`enum SIEncodingFamily {`
			`SI = 0,`
[AMDGPU] SDWA: merge VI and GFX9 pseudo instructions Summary: Previously there were two separate pseudo instruction for SDWA on VI and on GFX9. Created one pseudo instruction that is union of both of them. Added verifier to check that operands conform either VI or GFX9. Reviewers: dp, arsenm, vpykhtin Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, artem.tamazov Differential Revision: https://reviews.llvm.org/D34026 llvm-svn: 305886 2017-06-21 10:53:38 +02:00			`VI = 1,`
			`SDWA = 2,`
[AMDGPU][MC][GFX9] Added 16-bit renamed and "_legacy" VALU opcodes See Bug 33629: https://bugs.llvm.org//show_bug.cgi?id=33629 Reviewers: vpykhtin, SamWot, arsenm Differential Revision: https://reviews.llvm.org/D36322 llvm-svn: 310497 2017-08-09 19:10:47 +02:00			`SDWA9 = 3,`
AMDGPU/SI: Add d16 support for buffer intrinsics. Differential Revision: https://reviews.llvm.org/D38906 Reviewers: Matt and Brian. llvm-svn: 322402 2018-01-12 22:12:19 +01:00			`GFX80 = 4,`
			`GFX9 = 5`
AMDGPU: Cleanup subtarget handling. Split AMDGPUSubtarget into amdgcn/r600 specific subclasses. This removes most of the static_casting of the basic codegen classes everywhere, and tries to restrict the features visible on the wrong target. llvm-svn: 273652 2016-06-24 08:30:11 +02:00			`};`

			`static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {`
			`switch (ST.getGeneration()) {`
			`case AMDGPUSubtarget::SOUTHERN_ISLANDS:`
			`case AMDGPUSubtarget::SEA_ISLANDS:`
			`return SIEncodingFamily::SI;`
R600/SI: Don't shrink instructions whose e32 encoding doesn't exist v2: modify hasVALU32BitEncoding instead v3: - add pseudoToMCOpcode helper to AMDGPUInstInfo, which is used by both hasVALU32BitEncoding and AMDGPUMCInstLower::lower - report an error if a pseudo can't be lowered llvm-svn: 226188 2015-01-15 19:42:51 +01:00			`case AMDGPUSubtarget::VOLCANIC_ISLANDS:`
AMDGPU: Merge initial gfx9 support llvm-svn: 295554 2017-02-18 19:29:53 +01:00			`case AMDGPUSubtarget::GFX9:`
AMDGPU: Cleanup subtarget handling. Split AMDGPUSubtarget into amdgcn/r600 specific subclasses. This removes most of the static_casting of the basic codegen classes everywhere, and tries to restrict the features visible on the wrong target. llvm-svn: 273652 2016-06-24 08:30:11 +02:00			`return SIEncodingFamily::VI;`

			`// FIXME: This should never be called for r600 GPUs.`
			`case AMDGPUSubtarget::R600:`
			`case AMDGPUSubtarget::R700:`
			`case AMDGPUSubtarget::EVERGREEN:`
			`case AMDGPUSubtarget::NORTHERN_ISLANDS:`
			`return SIEncodingFamily::SI;`
R600/SI: Don't shrink instructions whose e32 encoding doesn't exist v2: modify hasVALU32BitEncoding instead v3: - add pseudoToMCOpcode helper to AMDGPUInstInfo, which is used by both hasVALU32BitEncoding and AMDGPUMCInstLower::lower - report an error if a pseudo can't be lowered llvm-svn: 226188 2015-01-15 19:42:51 +01:00			`}`
Fix "not all control paths return a value" warning on MSVC llvm-svn: 273872 2016-06-27 14:58:10 +02:00
			`llvm_unreachable("Unknown subtarget generation!");`
R600/SI: Don't shrink instructions whose e32 encoding doesn't exist v2: modify hasVALU32BitEncoding instead v3: - add pseudoToMCOpcode helper to AMDGPUInstInfo, which is used by both hasVALU32BitEncoding and AMDGPUMCInstLower::lower - report an error if a pseudo can't be lowered llvm-svn: 226188 2015-01-15 19:42:51 +01:00			`}`

			`int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {`
[AMDGPU] SDWA: merge VI and GFX9 pseudo instructions Summary: Previously there were two separate pseudo instruction for SDWA on VI and on GFX9. Created one pseudo instruction that is union of both of them. Added verifier to check that operands conform either VI or GFX9. Reviewers: dp, arsenm, vpykhtin Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, artem.tamazov Differential Revision: https://reviews.llvm.org/D34026 llvm-svn: 305886 2017-06-21 10:53:38 +02:00			`SIEncodingFamily Gen = subtargetEncodingFamily(ST);`
[AMDGPU][MC][GFX8][GFX9] Corrected names of integer v_{add/addc/sub/subrev/subb/subbrev} See bug 34765: https://bugs.llvm.org//show_bug.cgi?id=34765 Reviewers: tamazov, SamWot, arsenm, vpykhtin Differential Revision: https://reviews.llvm.org/D40088 llvm-svn: 318675 2017-11-20 19:24:21 +01:00
			`if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&`
			`ST.getGeneration() >= AMDGPUSubtarget::GFX9)`
			`Gen = SIEncodingFamily::GFX9;`

[AMDGPU] SDWA: merge VI and GFX9 pseudo instructions Summary: Previously there were two separate pseudo instruction for SDWA on VI and on GFX9. Created one pseudo instruction that is union of both of them. Added verifier to check that operands conform either VI or GFX9. Reviewers: dp, arsenm, vpykhtin Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, artem.tamazov Differential Revision: https://reviews.llvm.org/D34026 llvm-svn: 305886 2017-06-21 10:53:38 +02:00			`if (get(Opcode).TSFlags & SIInstrFlags::SDWA)`
			`Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9`
			`: SIEncodingFamily::SDWA;`
AMDGPU/SI: Adjust the encoding family for D16 buffer instructions when the target has UnpackedD16VMem feature. Reviewers: Matt and Brian Differential Revision: https://reviews.llvm.org/D42548 llvm-svn: 323988 2018-02-01 19:41:33 +01:00			`// Adjust the encoding family to GFX80 for D16 buffer instructions when the`
			`// subtarget has UnpackedD16VMem feature.`
			`// TODO: remove this when we discard GFX80 encoding.`
			`if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16)`
			`&& !(get(Opcode).TSFlags & SIInstrFlags::MIMG))`
			`Gen = SIEncodingFamily::GFX80;`
[AMDGPU] SDWA: merge VI and GFX9 pseudo instructions Summary: Previously there were two separate pseudo instruction for SDWA on VI and on GFX9. Created one pseudo instruction that is union of both of them. Added verifier to check that operands conform either VI or GFX9. Reviewers: dp, arsenm, vpykhtin Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, artem.tamazov Differential Revision: https://reviews.llvm.org/D34026 llvm-svn: 305886 2017-06-21 10:53:38 +02:00
			`int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);`
R600/SI: Don't shrink instructions whose e32 encoding doesn't exist v2: modify hasVALU32BitEncoding instead v3: - add pseudoToMCOpcode helper to AMDGPUInstInfo, which is used by both hasVALU32BitEncoding and AMDGPUMCInstLower::lower - report an error if a pseudo can't be lowered llvm-svn: 226188 2015-01-15 19:42:51 +01:00
			`// -1 means that Opcode is already a native instruction.`
			`if (MCOp == -1)`
			`return Opcode;`

			`// (uint16_t)-1 means that Opcode is a pseudo instruction that has`
			`// no encoding in the given subtarget generation.`
			`if (MCOp == (uint16_t)-1)`
			`return -1;`

			`return MCOp;`
			`}`
AMDGPU: Fix layering issue Move utility function that depends on codegen. Fixes build with r324487 reapplied. llvm-svn: 324746 2018-02-09 17:57:48 +01:00
			`// TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.`
			`bool AMDGPUInstrInfo::isUniformMMO(const MachineMemOperand *MMO) {`
			`const Value *Ptr = MMO->getValue();`
			`// UndefValue means this is a load of a kernel input. These are uniform.`
			`// Sometimes LDS instructions have constant pointers.`
			`// If Ptr is null, then that means this mem operand contains a`
			`// PseudoSourceValue like GOT.`
			`if (!Ptr \|\| isa<UndefValue>(Ptr) \|\|`
			`isa<Constant>(Ptr) \|\| isa<GlobalValue>(Ptr))`
			`return true;`

Reapply "AMDGPU: Add 32-bit constant address space" This reverts r324494 and reapplies r324487. llvm-svn: 324747 2018-02-09 17:57:57 +01:00			`if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)`
			`return true;`

AMDGPU: Fix layering issue Move utility function that depends on codegen. Fixes build with r324487 reapplied. llvm-svn: 324746 2018-02-09 17:57:48 +01:00			`if (const Argument *Arg = dyn_cast<Argument>(Ptr))`
			`return AMDGPU::isArgPassedInSGPR(Arg);`

			`const Instruction *I = dyn_cast<Instruction>(Ptr);`
			`return I && I->getMetadata("amdgpu.uniform");`
			`}`