From 26b66eafe7ef337d7fe832da1b276dd3eb62c3c0 Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Fri, 21 Jun 2013 18:51:49 +0000 Subject: [PATCH] [NVPTX] Add support for selecting CUDA vs OCL mode based on triple IR for CUDA should use "nvptx[64]-nvidia-cuda", and IR for NV OpenCL should use "nvptx[64]-nvidia-nvcl" llvm-svn: 184579 --- include/llvm/ADT/Triple.h | 7 +++++-- lib/Support/Triple.cpp | 6 ++++++ lib/Target/NVPTX/NVPTX.h | 3 +-- lib/Target/NVPTX/NVPTXSubtarget.cpp | 17 ++++++----------- test/CodeGen/NVPTX/generic-to-nvvm.ll | 3 ++- test/CodeGen/NVPTX/i1-global.ll | 4 ++-- test/CodeGen/NVPTX/i1-param.ll | 3 ++- test/CodeGen/NVPTX/load-sext-i1.ll | 4 ++-- test/CodeGen/NVPTX/refl1.ll | 4 +++- 9 files changed, 29 insertions(+), 22 deletions(-) diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h index 41e463d57b6..8406cc2861a 100644 --- a/include/llvm/ADT/Triple.h +++ b/include/llvm/ADT/Triple.h @@ -81,7 +81,8 @@ public: BGP, BGQ, Freescale, - IBM + IBM, + NVIDIA }; enum OSType { UnknownOS, @@ -107,7 +108,9 @@ public: NaCl, // Native Client CNK, // BG/P Compute-Node Kernel Bitrig, - AIX + AIX, + CUDA, // NVIDIA CUDA + NVCL // NVIDIA OpenCL }; enum EnvironmentType { UnknownEnvironment, diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 7c02ffb4ac7..3b1bff92130 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -104,6 +104,7 @@ const char *Triple::getVendorTypeName(VendorType Kind) { case BGQ: return "bgq"; case Freescale: return "fsl"; case IBM: return "ibm"; + case NVIDIA: return "nvidia"; } llvm_unreachable("Invalid VendorType!"); @@ -135,6 +136,8 @@ const char *Triple::getOSTypeName(OSType Kind) { case CNK: return "cnk"; case Bitrig: return "bitrig"; case AIX: return "aix"; + case CUDA: return "cuda"; + case NVCL: return "nvcl"; } llvm_unreachable("Invalid OSType"); @@ -260,6 +263,7 @@ static Triple::VendorType parseVendor(StringRef VendorName) { .Case("bgq", Triple::BGQ) .Case("fsl", Triple::Freescale) .Case("ibm", Triple::IBM) + .Case("nvidia", Triple::NVIDIA) .Default(Triple::UnknownVendor); } @@ -287,6 +291,8 @@ static Triple::OSType parseOS(StringRef OSName) { .StartsWith("cnk", Triple::CNK) .StartsWith("bitrig", Triple::Bitrig) .StartsWith("aix", Triple::AIX) + .StartsWith("cuda", Triple::CUDA) + .StartsWith("nvcl", Triple::NVCL) .Default(Triple::UnknownOS); } diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h index 179dc277f4e..85cdb8b642d 100644 --- a/lib/Target/NVPTX/NVPTX.h +++ b/lib/Target/NVPTX/NVPTX.h @@ -77,8 +77,7 @@ extern Target TheNVPTXTarget64; namespace NVPTX { enum DrvInterface { NVCL, - CUDA, - TEST + CUDA }; // A field inside TSFlags needs a shift and a mask. The usage is diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp index 2dcd73dcff9..c4d0d6e4193 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -19,23 +19,18 @@ using namespace llvm; -// Select Driver Interface -#include "llvm/Support/CommandLine.h" -namespace { -cl::opt DriverInterface( - cl::desc("Choose driver interface:"), - cl::values(clEnumValN(NVPTX::NVCL, "drvnvcl", "Nvidia OpenCL driver"), - clEnumValN(NVPTX::CUDA, "drvcuda", "Nvidia CUDA driver"), - clEnumValN(NVPTX::TEST, "drvtest", "Plain Test"), clEnumValEnd), - cl::init(NVPTX::NVCL)); -} NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, bool is64Bit) : NVPTXGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit), PTXVersion(0), SmVersion(20) { - drvInterface = DriverInterface; + Triple T(TT); + + if (T.getOS() == Triple::NVCL) + drvInterface = NVPTX::NVCL; + else + drvInterface = NVPTX::CUDA; // Provide the default CPU if none std::string defCPU = "sm_20"; diff --git a/test/CodeGen/NVPTX/generic-to-nvvm.ll b/test/CodeGen/NVPTX/generic-to-nvvm.ll index c9cb2f71f42..2a527989e41 100644 --- a/test/CodeGen/NVPTX/generic-to-nvvm.ll +++ b/test/CodeGen/NVPTX/generic-to-nvvm.ll @@ -1,6 +1,7 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -drvcuda | FileCheck %s +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" +target triple = "nvptx-nvidia-cuda" ; Ensure global variables in address space 0 are promoted to address space 1 diff --git a/test/CodeGen/NVPTX/i1-global.ll b/test/CodeGen/NVPTX/i1-global.ll index 0595325977e..1dd8ae40db4 100644 --- a/test/CodeGen/NVPTX/i1-global.ll +++ b/test/CodeGen/NVPTX/i1-global.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -drvcuda | FileCheck %s +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" - +target triple = "nvptx-nvidia-cuda" ; CHECK: .visible .global .align 1 .u8 mypred @mypred = addrspace(1) global i1 true, align 1 diff --git a/test/CodeGen/NVPTX/i1-param.ll b/test/CodeGen/NVPTX/i1-param.ll index fabd61a25d2..f4df8743932 100644 --- a/test/CodeGen/NVPTX/i1-param.ll +++ b/test/CodeGen/NVPTX/i1-param.ll @@ -1,6 +1,7 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -drvcuda | FileCheck %s +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" +target triple = "nvptx-nvidia-cuda" ; Make sure predicate (i1) operands to kernels get expanded out to .u8 diff --git a/test/CodeGen/NVPTX/load-sext-i1.ll b/test/CodeGen/NVPTX/load-sext-i1.ll index c9b2e9793bb..d836740eed9 100644 --- a/test/CodeGen/NVPTX/load-sext-i1.ll +++ b/test/CodeGen/NVPTX/load-sext-i1.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -drvcuda | FileCheck %s +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" - +target triple = "nvptx-nvidia-cuda" define void @main(i1* %a1, i32 %a2, i32* %arg3) { ; CHECK: ld.u8 diff --git a/test/CodeGen/NVPTX/refl1.ll b/test/CodeGen/NVPTX/refl1.ll index 5a9dac152e4..4aeff092495 100644 --- a/test/CodeGen/NVPTX/refl1.ll +++ b/test/CodeGen/NVPTX/refl1.ll @@ -1,4 +1,6 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -drvcuda | FileCheck %s +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + +target triple = "nvptx-nvidia-cuda" ; Function Attrs: nounwind ; CHECK: .entry foo