1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

[NVPTX] Enable the load-store vectorizer on nvptx.

Reviewers: tra

Subscribers: jholewinski, arsenm, asbirlea

Differential Revision: https://reviews.llvm.org/D22592

llvm-svn: 276196
This commit is contained in:
Justin Lebar 2016-07-20 22:11:36 +00:00
parent 31d2c7e14d
commit 63ae2eb95c
4 changed files with 44 additions and 17 deletions

View File

@ -28,5 +28,5 @@ has_asmprinter = 1
type = Library type = Library
name = NVPTXCodeGen name = NVPTXCodeGen
parent = NVPTX parent = NVPTX
required_libraries = Analysis AsmPrinter CodeGen Core MC NVPTXAsmPrinter NVPTXDesc NVPTXInfo Scalar SelectionDAG Support Target TransformUtils required_libraries = Analysis AsmPrinter CodeGen Core MC NVPTXAsmPrinter NVPTXDesc NVPTXInfo Scalar SelectionDAG Support Target TransformUtils Vectorize
add_to_library_groups = NVPTX add_to_library_groups = NVPTX

View File

@ -46,6 +46,7 @@
#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Vectorize.h"
using namespace llvm; using namespace llvm;
@ -54,6 +55,13 @@ static cl::opt<bool> UseInferAddressSpaces(
cl::desc("Optimize address spaces using NVPTXInferAddressSpaces instead of " cl::desc("Optimize address spaces using NVPTXInferAddressSpaces instead of "
"NVPTXFavorNonGenericAddrSpaces")); "NVPTXFavorNonGenericAddrSpaces"));
// LSV is still relatively new; this switch lets us turn it off in case we
// encounter (or suspect) a bug.
static cl::opt<bool>
DisableLoadStoreVectorizer("disable-nvptx-load-store-vectorizer",
cl::desc("Disable load/store vectorizer"),
cl::init(false), cl::Hidden);
namespace llvm { namespace llvm {
void initializeNVVMIntrRangePass(PassRegistry&); void initializeNVVMIntrRangePass(PassRegistry&);
void initializeNVVMReflectPass(PassRegistry&); void initializeNVVMReflectPass(PassRegistry&);
@ -258,6 +266,8 @@ void NVPTXPassConfig::addIRPasses() {
addPass(createNVPTXLowerArgsPass(&getNVPTXTargetMachine())); addPass(createNVPTXLowerArgsPass(&getNVPTXTargetMachine()));
if (getOptLevel() != CodeGenOpt::None) { if (getOptLevel() != CodeGenOpt::None) {
addAddressSpaceInferencePasses(); addAddressSpaceInferencePasses();
if (!DisableLoadStoreVectorizer)
addPass(createLoadStoreVectorizerPass());
addStraightLineScalarOptimizationPasses(); addStraightLineScalarOptimizationPasses();
} }

View File

@ -0,0 +1,17 @@
; RUN: llc < %s | FileCheck -check-prefix=ENABLED %s
; RUN: llc -disable-nvptx-load-store-vectorizer < %s | FileCheck -check-prefix=DISABLED %s
target triple = "nvptx64-nvidia-cuda"
; Check that the load-store vectorizer is enabled by default for nvptx, and
; that it's disabled by the appropriate flag.
; ENABLED: ld.v2.{{.}}32
; DISABLED: ld.{{.}}32
; DISABLED: ld.{{.}}32
define i32 @f(i32* %p) {
%p.1 = getelementptr i32, i32* %p, i32 1
%v0 = load i32, i32* %p, align 8
%v1 = load i32, i32* %p.1, align 4
%sum = add i32 %v0, %v1
ret i32 %sum
}

View File

@ -45,10 +45,10 @@ define void @sum_of_array(i32 %x, i32 %y, float* nocapture %output) {
ret void ret void
} }
; PTX-LABEL: sum_of_array( ; PTX-LABEL: sum_of_array(
; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}} ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}}
; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}} ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}} ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}} ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
; IR-LABEL: @sum_of_array( ; IR-LABEL: @sum_of_array(
; TODO: GVN is unable to preserve the "inbounds" keyword on the first GEP. Need ; TODO: GVN is unable to preserve the "inbounds" keyword on the first GEP. Need
@ -90,10 +90,10 @@ define void @sum_of_array2(i32 %x, i32 %y, float* nocapture %output) {
ret void ret void
} }
; PTX-LABEL: sum_of_array2( ; PTX-LABEL: sum_of_array2(
; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}} ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}}
; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}} ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}} ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}} ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
; IR-LABEL: @sum_of_array2( ; IR-LABEL: @sum_of_array2(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}} ; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
@ -140,10 +140,10 @@ define void @sum_of_array3(i32 %x, i32 %y, float* nocapture %output) {
ret void ret void
} }
; PTX-LABEL: sum_of_array3( ; PTX-LABEL: sum_of_array3(
; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}} ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}}
; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}} ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}} ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}} ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
; IR-LABEL: @sum_of_array3( ; IR-LABEL: @sum_of_array3(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}} ; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
@ -186,10 +186,10 @@ define void @sum_of_array4(i32 %x, i32 %y, float* nocapture %output) {
ret void ret void
} }
; PTX-LABEL: sum_of_array4( ; PTX-LABEL: sum_of_array4(
; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}} ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}}
; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}} ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}} ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}} ; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
; IR-LABEL: @sum_of_array4( ; IR-LABEL: @sum_of_array4(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}} ; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}