AMDGPU: Add an option -disable-promote-alloca-to-lds

Add this option for debugging and providing workaround. By default it is off so no behavior change in backend. Differential Revision: https://reviews.llvm.org/D54158 llvm-svn: 346267
2024-11-22 10:42:39 +01:00 · 2018-11-06 21:28:17 +00:00 · 2018-11-06 21:28:17 +00:00 · c32fcd8c26
commit c32fcd8c26
parent 155b10f347
2 changed files with 11 additions and 0 deletions
--- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@ -70,6 +70,11 @@ static cl::opt<bool> DisablePromoteAllocaToVector(
  cl::desc("Disable promote alloca to vector"),
  cl::init(false));

+static cl::opt<bool> DisablePromoteAllocaToLDS(
+  "disable-promote-alloca-to-lds",
+  cl::desc("Disable promote alloca to LDS"),
+  cl::init(false));
+
 // FIXME: This can create globals so should be a module pass.
 class AMDGPUPromoteAlloca : public FunctionPass {
 private:
@ -706,6 +711,9 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
  if (tryPromoteAllocaToVector(&I))
    return true; // Promoted to vector.

+  if (DisablePromoteAllocaToLDS)
+    return false;
+
  const Function &ContainingFunction = *I.getParent()->getParent();
  CallingConv::ID CC = ContainingFunction.getCallingConv();

--- a/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll
+++ b/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll
@ -1,8 +1,11 @@
 ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca -disable-promote-alloca-to-lds< %s | FileCheck -check-prefix=NOLDS %s

 ; This normally would be fixed by instcombine to be compare to the GEP
 ; indices

+; NOLDS-NOT: addrspace(3)
+
 ; CHECK-LABEL: @lds_promoted_alloca_icmp_same_derived_pointer(
 ; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promoted_alloca_icmp_same_derived_pointer.alloca, i32 0, i32 %{{[0-9]+}}
 ; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a