From c32fcd8c26e79f7ecd4a37e5f5b04d96d362034d Mon Sep 17 00:00:00 2001 From: Yaxun Liu Date: Tue, 6 Nov 2018 21:28:17 +0000 Subject: [PATCH] AMDGPU: Add an option -disable-promote-alloca-to-lds Add this option for debugging and providing workaround. By default it is off so no behavior change in backend. Differential Revision: https://reviews.llvm.org/D54158 llvm-svn: 346267 --- lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp | 8 ++++++++ test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll | 3 +++ 2 files changed, 11 insertions(+) diff --git a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index fe9e4ca0ca4..ec7ea2baec0 100644 --- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -70,6 +70,11 @@ static cl::opt DisablePromoteAllocaToVector( cl::desc("Disable promote alloca to vector"), cl::init(false)); +static cl::opt DisablePromoteAllocaToLDS( + "disable-promote-alloca-to-lds", + cl::desc("Disable promote alloca to LDS"), + cl::init(false)); + // FIXME: This can create globals so should be a module pass. class AMDGPUPromoteAlloca : public FunctionPass { private: @@ -706,6 +711,9 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) { if (tryPromoteAllocaToVector(&I)) return true; // Promoted to vector. + if (DisablePromoteAllocaToLDS) + return false; + const Function &ContainingFunction = *I.getParent()->getParent(); CallingConv::ID CC = ContainingFunction.getCallingConv(); diff --git a/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll b/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll index ebef6122990..8d12a725594 100644 --- a/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll +++ b/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll @@ -1,8 +1,11 @@ ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca -disable-promote-alloca-to-lds< %s | FileCheck -check-prefix=NOLDS %s ; This normally would be fixed by instcombine to be compare to the GEP ; indices +; NOLDS-NOT: addrspace(3) + ; CHECK-LABEL: @lds_promoted_alloca_icmp_same_derived_pointer( ; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promoted_alloca_icmp_same_derived_pointer.alloca, i32 0, i32 %{{[0-9]+}} ; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a