1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

This patch adds a new NVPTX back-end to LLVM which supports code generation for NVIDIA PTX 3.0. This back-end will (eventually) replace the current PTX back-end, while maintaining compatibility with it.

The new target machines are:

nvptx (old ptx32) => 32-bit PTX
nvptx64 (old ptx64) => 64-bit PTX

The sources are based on the internal NVIDIA NVPTX back-end, and
contain more functionality than the current PTX back-end currently
provides.

NV_CONTRIB

llvm-svn: 156196
This commit is contained in:
Justin Holewinski 2012-05-04 20:18:50 +00:00
parent 2b868d474e
commit 4ca961430f
86 changed files with 25901 additions and 11 deletions

View File

@ -78,6 +78,7 @@ set(LLVM_ALL_TARGETS
Mips
MBlaze
MSP430
NVPTX
PowerPC
PTX
Sparc

View File

@ -370,6 +370,7 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch],
hexagon-*) llvm_cv_target_arch="Hexagon" ;;
mblaze-*) llvm_cv_target_arch="MBlaze" ;;
ptx-*) llvm_cv_target_arch="PTX" ;;
nvptx-*) llvm_cv_target_arch="NVPTX" ;;
*) llvm_cv_target_arch="Unknown" ;;
esac])
@ -517,6 +518,7 @@ else
Hexagon) AC_SUBST(TARGET_HAS_JIT,0) ;;
MBlaze) AC_SUBST(TARGET_HAS_JIT,0) ;;
PTX) AC_SUBST(TARGET_HAS_JIT,0) ;;
NVPTX) AC_SUBST(TARGET_HAS_JIT,0) ;;
*) AC_SUBST(TARGET_HAS_JIT,0) ;;
esac
fi
@ -628,13 +630,13 @@ TARGETS_TO_BUILD=""
AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
[Build specific host targets: all or target1,target2,... Valid targets are:
host, x86, x86_64, sparc, powerpc, arm, mips, spu, hexagon,
xcore, msp430, ptx, and cpp (default=all)]),,
xcore, msp430, ptx, nvptx, and cpp (default=all)]),,
enableval=all)
if test "$enableval" = host-only ; then
enableval=host
fi
case "$enableval" in
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze PTX Hexagon" ;;
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze PTX NVPTX Hexagon" ;;
*)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
case "$a_target" in
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@ -651,6 +653,7 @@ case "$enableval" in
hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
mblaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
ptx) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
nvptx) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
host) case "$llvm_cv_target_arch" in
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@ -664,6 +667,7 @@ case "$enableval" in
MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
PTX) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
NVPTX) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
*) AC_MSG_ERROR([Can not set target to build]) ;;
esac ;;
*) AC_MSG_ERROR([Unrecognized target $a_target]) ;;

11
configure vendored
View File

@ -1420,7 +1420,7 @@ Optional Features:
--enable-targets Build specific host targets: all or
target1,target2,... Valid targets are: host, x86,
x86_64, sparc, powerpc, arm, mips, spu, hexagon,
xcore, msp430, ptx, and cpp (default=all)
xcore, msp430, ptx, nvptx, and cpp (default=all)
--enable-bindings Build specific language bindings:
all,auto,none,{binding-name} (default=auto)
--enable-libffi Check for the presence of libffi (default is NO)
@ -3903,6 +3903,7 @@ else
hexagon-*) llvm_cv_target_arch="Hexagon" ;;
mblaze-*) llvm_cv_target_arch="MBlaze" ;;
ptx-*) llvm_cv_target_arch="PTX" ;;
nvptx-*) llvm_cv_target_arch="NVPTX" ;;
*) llvm_cv_target_arch="Unknown" ;;
esac
fi
@ -5125,6 +5126,8 @@ else
MBlaze) TARGET_HAS_JIT=0
;;
PTX) TARGET_HAS_JIT=0
;;
NVPTX) TARGET_HAS_JIT=0
;;
*) TARGET_HAS_JIT=0
;;
@ -5310,7 +5313,7 @@ if test "$enableval" = host-only ; then
enableval=host
fi
case "$enableval" in
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze PTX Hexagon" ;;
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze PTX NVPTX Hexagon" ;;
*)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
case "$a_target" in
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@ -5327,6 +5330,7 @@ case "$enableval" in
hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
mblaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
ptx) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
nvptx) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
host) case "$llvm_cv_target_arch" in
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@ -5340,6 +5344,7 @@ case "$enableval" in
MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
PTX) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
NVPTX) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
*) { { echo "$as_me:$LINENO: error: Can not set target to build" >&5
echo "$as_me: error: Can not set target to build" >&2;}
{ (exit 1); exit 1; }; } ;;
@ -10401,7 +10406,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<EOF
#line 10404 "configure"
#line 10409 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H

View File

@ -64,6 +64,8 @@ public:
mblaze, // MBlaze: mblaze
ptx32, // PTX: ptx (32-bit)
ptx64, // PTX: ptx (64-bit)
nvptx, // NVPTX: 32-bit
nvptx64, // NVPTX: 64-bit
le32, // le32: generic little-endian 32-bit CPU (PNaCl / Emscripten)
amdil // amdil: amd IL
};

View File

@ -441,3 +441,4 @@ include "llvm/IntrinsicsCellSPU.td"
include "llvm/IntrinsicsXCore.td"
include "llvm/IntrinsicsPTX.td"
include "llvm/IntrinsicsHexagon.td"
include "llvm/IntrinsicsNVVM.td"

View File

@ -0,0 +1,872 @@
//===- IntrinsicsNVVM.td - Defines NVVM intrinsics ---------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines all of the NVVM-specific intrinsics for use with NVPTX.
//
//===----------------------------------------------------------------------===//
def llvm_anyi64ptr_ty : LLVMAnyPointerType<llvm_i64_ty>; // (space)i64*
//
// MISC
//
def int_nvvm_clz_i : GCCBuiltin<"__nvvm_clz_i">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_nvvm_clz_ll : GCCBuiltin<"__nvvm_clz_ll">,
Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
def int_nvvm_popc_i : GCCBuiltin<"__nvvm_popc_i">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_nvvm_popc_ll : GCCBuiltin<"__nvvm_popc_ll">,
Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
def int_nvvm_prmt : GCCBuiltin<"__nvvm_prmt">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, Commutative]>;
//
// Min Max
//
def int_nvvm_min_i : GCCBuiltin<"__nvvm_min_i">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_min_ui : GCCBuiltin<"__nvvm_min_ui">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_min_ll : GCCBuiltin<"__nvvm_min_ll">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_min_ull : GCCBuiltin<"__nvvm_min_ull">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_max_i : GCCBuiltin<"__nvvm_max_i">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_max_ui : GCCBuiltin<"__nvvm_max_ui">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_max_ll : GCCBuiltin<"__nvvm_max_ll">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_max_ull : GCCBuiltin<"__nvvm_max_ull">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_fmin_f : GCCBuiltin<"__nvvm_fmin_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_fmin_ftz_f : GCCBuiltin<"__nvvm_fmin_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_fmax_f : GCCBuiltin<"__nvvm_fmax_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty]
, [IntrNoMem, Commutative]>;
def int_nvvm_fmax_ftz_f : GCCBuiltin<"__nvvm_fmax_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_fmin_d : GCCBuiltin<"__nvvm_fmin_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_fmax_d : GCCBuiltin<"__nvvm_fmax_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
[IntrNoMem, Commutative]>;
//
// Multiplication
//
def int_nvvm_mulhi_i : GCCBuiltin<"__nvvm_mulhi_i">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_mulhi_ui : GCCBuiltin<"__nvvm_mulhi_ui">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_mulhi_ll : GCCBuiltin<"__nvvm_mulhi_ll">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_mulhi_ull : GCCBuiltin<"__nvvm_mulhi_ull">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_mul_rn_ftz_f : GCCBuiltin<"__nvvm_mul_rn_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_mul_rn_f : GCCBuiltin<"__nvvm_mul_rn_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_mul_rz_ftz_f : GCCBuiltin<"__nvvm_mul_rz_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_mul_rz_f : GCCBuiltin<"__nvvm_mul_rz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_mul_rm_ftz_f : GCCBuiltin<"__nvvm_mul_rm_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_mul_rm_f : GCCBuiltin<"__nvvm_mul_rm_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_mul_rp_ftz_f : GCCBuiltin<"__nvvm_mul_rp_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_mul_rp_f : GCCBuiltin<"__nvvm_mul_rp_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_mul_rn_d : GCCBuiltin<"__nvvm_mul_rn_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_mul_rz_d : GCCBuiltin<"__nvvm_mul_rz_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_mul_rm_d : GCCBuiltin<"__nvvm_mul_rm_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_mul_rp_d : GCCBuiltin<"__nvvm_mul_rp_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_mul24_i : GCCBuiltin<"__nvvm_mul24_i">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_mul24_ui : GCCBuiltin<"__nvvm_mul24_ui">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, Commutative]>;
//
// Div
//
def int_nvvm_div_approx_ftz_f : GCCBuiltin<"__nvvm_div_approx_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_div_approx_f : GCCBuiltin<"__nvvm_div_approx_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_div_rn_ftz_f : GCCBuiltin<"__nvvm_div_rn_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_div_rn_f : GCCBuiltin<"__nvvm_div_rn_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_div_rz_ftz_f : GCCBuiltin<"__nvvm_div_rz_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_div_rz_f : GCCBuiltin<"__nvvm_div_rz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_div_rm_ftz_f : GCCBuiltin<"__nvvm_div_rm_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_div_rm_f : GCCBuiltin<"__nvvm_div_rm_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_div_rp_ftz_f : GCCBuiltin<"__nvvm_div_rp_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_div_rp_f : GCCBuiltin<"__nvvm_div_rp_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_div_rn_d : GCCBuiltin<"__nvvm_div_rn_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_div_rz_d : GCCBuiltin<"__nvvm_div_rz_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_div_rm_d : GCCBuiltin<"__nvvm_div_rm_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_div_rp_d : GCCBuiltin<"__nvvm_div_rp_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
[IntrNoMem, Commutative]>;
//
// Brev
//
def int_nvvm_brev32 : GCCBuiltin<"__nvvm_brev32">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_nvvm_brev64 : GCCBuiltin<"__nvvm_brev64">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem]>;
//
// Sad
//
def int_nvvm_sad_i : GCCBuiltin<"__nvvm_sad_i">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_sad_ui : GCCBuiltin<"__nvvm_sad_ui">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, Commutative]>;
//
// Floor Ceil
//
def int_nvvm_floor_ftz_f : GCCBuiltin<"__nvvm_floor_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_floor_f : GCCBuiltin<"__nvvm_floor_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_floor_d : GCCBuiltin<"__nvvm_floor_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_ceil_ftz_f : GCCBuiltin<"__nvvm_ceil_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_ceil_f : GCCBuiltin<"__nvvm_ceil_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_ceil_d : GCCBuiltin<"__nvvm_ceil_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
//
// Abs
//
def int_nvvm_abs_i : GCCBuiltin<"__nvvm_abs_i">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_nvvm_abs_ll : GCCBuiltin<"__nvvm_abs_ll">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem]>;
def int_nvvm_fabs_ftz_f : GCCBuiltin<"__nvvm_fabs_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_fabs_f : GCCBuiltin<"__nvvm_fabs_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_fabs_d : GCCBuiltin<"__nvvm_fabs_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
//
// Round
//
def int_nvvm_round_ftz_f : GCCBuiltin<"__nvvm_round_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_round_f : GCCBuiltin<"__nvvm_round_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_round_d : GCCBuiltin<"__nvvm_round_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
//
// Trunc
//
def int_nvvm_trunc_ftz_f : GCCBuiltin<"__nvvm_trunc_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_trunc_f : GCCBuiltin<"__nvvm_trunc_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_trunc_d : GCCBuiltin<"__nvvm_trunc_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
//
// Saturate
//
def int_nvvm_saturate_ftz_f : GCCBuiltin<"__nvvm_saturate_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_saturate_f : GCCBuiltin<"__nvvm_saturate_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_saturate_d : GCCBuiltin<"__nvvm_saturate_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
//
// Exp2 Log2
//
def int_nvvm_ex2_approx_ftz_f : GCCBuiltin<"__nvvm_ex2_approx_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_ex2_approx_f : GCCBuiltin<"__nvvm_ex2_approx_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_ex2_approx_d : GCCBuiltin<"__nvvm_ex2_approx_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_lg2_approx_ftz_f : GCCBuiltin<"__nvvm_lg2_approx_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_lg2_approx_f : GCCBuiltin<"__nvvm_lg2_approx_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_lg2_approx_d : GCCBuiltin<"__nvvm_lg2_approx_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
//
// Sin Cos
//
def int_nvvm_sin_approx_ftz_f : GCCBuiltin<"__nvvm_sin_approx_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sin_approx_f : GCCBuiltin<"__nvvm_sin_approx_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_cos_approx_ftz_f : GCCBuiltin<"__nvvm_cos_approx_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_cos_approx_f : GCCBuiltin<"__nvvm_cos_approx_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
//
// Fma
//
def int_nvvm_fma_rn_ftz_f : GCCBuiltin<"__nvvm_fma_rn_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_fma_rn_f : GCCBuiltin<"__nvvm_fma_rn_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_fma_rz_ftz_f : GCCBuiltin<"__nvvm_fma_rz_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_fma_rz_f : GCCBuiltin<"__nvvm_fma_rz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_fma_rm_ftz_f : GCCBuiltin<"__nvvm_fma_rm_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_fma_rm_f : GCCBuiltin<"__nvvm_fma_rm_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_fma_rp_ftz_f : GCCBuiltin<"__nvvm_fma_rp_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_fma_rp_f : GCCBuiltin<"__nvvm_fma_rp_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_fma_rn_d : GCCBuiltin<"__nvvm_fma_rn_d">,
Intrinsic<[llvm_double_ty],
[llvm_double_ty, llvm_double_ty, llvm_double_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_fma_rz_d : GCCBuiltin<"__nvvm_fma_rz_d">,
Intrinsic<[llvm_double_ty],
[llvm_double_ty, llvm_double_ty, llvm_double_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_fma_rm_d : GCCBuiltin<"__nvvm_fma_rm_d">,
Intrinsic<[llvm_double_ty],
[llvm_double_ty, llvm_double_ty, llvm_double_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_fma_rp_d : GCCBuiltin<"__nvvm_fma_rp_d">,
Intrinsic<[llvm_double_ty],
[llvm_double_ty, llvm_double_ty, llvm_double_ty],
[IntrNoMem, Commutative]>;
//
// Rcp
//
def int_nvvm_rcp_rn_ftz_f : GCCBuiltin<"__nvvm_rcp_rn_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_rcp_rn_f : GCCBuiltin<"__nvvm_rcp_rn_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_rcp_rz_ftz_f : GCCBuiltin<"__nvvm_rcp_rz_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_rcp_rz_f : GCCBuiltin<"__nvvm_rcp_rz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_rcp_rm_ftz_f : GCCBuiltin<"__nvvm_rcp_rm_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_rcp_rm_f : GCCBuiltin<"__nvvm_rcp_rm_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_rcp_rp_ftz_f : GCCBuiltin<"__nvvm_rcp_rp_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_rcp_rp_f : GCCBuiltin<"__nvvm_rcp_rp_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_rcp_rn_d : GCCBuiltin<"__nvvm_rcp_rn_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_rcp_rz_d : GCCBuiltin<"__nvvm_rcp_rz_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_rcp_rm_d : GCCBuiltin<"__nvvm_rcp_rm_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_rcp_rp_d : GCCBuiltin<"__nvvm_rcp_rp_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_rcp_approx_ftz_d : GCCBuiltin<"__nvvm_rcp_approx_ftz_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
//
// Sqrt
//
def int_nvvm_sqrt_rn_ftz_f : GCCBuiltin<"__nvvm_sqrt_rn_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rn_f : GCCBuiltin<"__nvvm_sqrt_rn_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rz_ftz_f : GCCBuiltin<"__nvvm_sqrt_rz_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rz_f : GCCBuiltin<"__nvvm_sqrt_rz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rm_ftz_f : GCCBuiltin<"__nvvm_sqrt_rm_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rm_f : GCCBuiltin<"__nvvm_sqrt_rm_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rp_ftz_f : GCCBuiltin<"__nvvm_sqrt_rp_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rp_f : GCCBuiltin<"__nvvm_sqrt_rp_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_approx_ftz_f : GCCBuiltin<"__nvvm_sqrt_approx_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_approx_f : GCCBuiltin<"__nvvm_sqrt_approx_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rn_d : GCCBuiltin<"__nvvm_sqrt_rn_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rz_d : GCCBuiltin<"__nvvm_sqrt_rz_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rm_d : GCCBuiltin<"__nvvm_sqrt_rm_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rp_d : GCCBuiltin<"__nvvm_sqrt_rp_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
//
// Rsqrt
//
def int_nvvm_rsqrt_approx_ftz_f : GCCBuiltin<"__nvvm_rsqrt_approx_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_rsqrt_approx_f : GCCBuiltin<"__nvvm_rsqrt_approx_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_rsqrt_approx_d : GCCBuiltin<"__nvvm_rsqrt_approx_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
//
// Add
//
def int_nvvm_add_rn_ftz_f : GCCBuiltin<"__nvvm_add_rn_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_add_rn_f : GCCBuiltin<"__nvvm_add_rn_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_add_rz_ftz_f : GCCBuiltin<"__nvvm_add_rz_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_add_rz_f : GCCBuiltin<"__nvvm_add_rz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_add_rm_ftz_f : GCCBuiltin<"__nvvm_add_rm_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_add_rm_f : GCCBuiltin<"__nvvm_add_rm_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_add_rp_ftz_f : GCCBuiltin<"__nvvm_add_rp_ftz_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_add_rp_f : GCCBuiltin<"__nvvm_add_rp_f">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_add_rn_d : GCCBuiltin<"__nvvm_add_rn_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_add_rz_d : GCCBuiltin<"__nvvm_add_rz_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_add_rm_d : GCCBuiltin<"__nvvm_add_rm_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_add_rp_d : GCCBuiltin<"__nvvm_add_rp_d">,
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
[IntrNoMem, Commutative]>;
//
// Convert
//
def int_nvvm_d2f_rn_ftz : GCCBuiltin<"__nvvm_d2f_rn_ftz">,
Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_d2f_rn : GCCBuiltin<"__nvvm_d2f_rn">,
Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_d2f_rz_ftz : GCCBuiltin<"__nvvm_d2f_rz_ftz">,
Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_d2f_rz : GCCBuiltin<"__nvvm_d2f_rz">,
Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_d2f_rm_ftz : GCCBuiltin<"__nvvm_d2f_rm_ftz">,
Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_d2f_rm : GCCBuiltin<"__nvvm_d2f_rm">,
Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_d2f_rp_ftz : GCCBuiltin<"__nvvm_d2f_rp_ftz">,
Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_d2f_rp : GCCBuiltin<"__nvvm_d2f_rp">,
Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_d2i_rn : GCCBuiltin<"__nvvm_d2i_rn">,
Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_d2i_rz : GCCBuiltin<"__nvvm_d2i_rz">,
Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_d2i_rm : GCCBuiltin<"__nvvm_d2i_rm">,
Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_d2i_rp : GCCBuiltin<"__nvvm_d2i_rp">,
Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_d2ui_rn : GCCBuiltin<"__nvvm_d2ui_rn">,
Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_d2ui_rz : GCCBuiltin<"__nvvm_d2ui_rz">,
Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_d2ui_rm : GCCBuiltin<"__nvvm_d2ui_rm">,
Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_d2ui_rp : GCCBuiltin<"__nvvm_d2ui_rp">,
Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_i2d_rn : GCCBuiltin<"__nvvm_i2d_rn">,
Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_nvvm_i2d_rz : GCCBuiltin<"__nvvm_i2d_rz">,
Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_nvvm_i2d_rm : GCCBuiltin<"__nvvm_i2d_rm">,
Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_nvvm_i2d_rp : GCCBuiltin<"__nvvm_i2d_rp">,
Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_nvvm_ui2d_rn : GCCBuiltin<"__nvvm_ui2d_rn">,
Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_nvvm_ui2d_rz : GCCBuiltin<"__nvvm_ui2d_rz">,
Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_nvvm_ui2d_rm : GCCBuiltin<"__nvvm_ui2d_rm">,
Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_nvvm_ui2d_rp : GCCBuiltin<"__nvvm_ui2d_rp">,
Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_nvvm_f2i_rn_ftz : GCCBuiltin<"__nvvm_f2i_rn_ftz">,
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_f2i_rn : GCCBuiltin<"__nvvm_f2i_rn">,
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_f2i_rz_ftz : GCCBuiltin<"__nvvm_f2i_rz_ftz">,
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_f2i_rz : GCCBuiltin<"__nvvm_f2i_rz">,
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_f2i_rm_ftz : GCCBuiltin<"__nvvm_f2i_rm_ftz">,
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_f2i_rm : GCCBuiltin<"__nvvm_f2i_rm">,
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_f2i_rp_ftz : GCCBuiltin<"__nvvm_f2i_rp_ftz">,
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_f2i_rp : GCCBuiltin<"__nvvm_f2i_rp">,
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_f2ui_rn_ftz : GCCBuiltin<"__nvvm_f2ui_rn_ftz">,
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_f2ui_rn : GCCBuiltin<"__nvvm_f2ui_rn">,
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_f2ui_rz_ftz : GCCBuiltin<"__nvvm_f2ui_rz_ftz">,
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_f2ui_rz : GCCBuiltin<"__nvvm_f2ui_rz">,
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_f2ui_rm_ftz : GCCBuiltin<"__nvvm_f2ui_rm_ftz">,
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_f2ui_rm : GCCBuiltin<"__nvvm_f2ui_rm">,
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_f2ui_rp_ftz : GCCBuiltin<"__nvvm_f2ui_rp_ftz">,
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_f2ui_rp : GCCBuiltin<"__nvvm_f2ui_rp">,
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_i2f_rn : GCCBuiltin<"__nvvm_i2f_rn">,
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_nvvm_i2f_rz : GCCBuiltin<"__nvvm_i2f_rz">,
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_nvvm_i2f_rm : GCCBuiltin<"__nvvm_i2f_rm">,
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_nvvm_i2f_rp : GCCBuiltin<"__nvvm_i2f_rp">,
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_nvvm_ui2f_rn : GCCBuiltin<"__nvvm_ui2f_rn">,
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_nvvm_ui2f_rz : GCCBuiltin<"__nvvm_ui2f_rz">,
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_nvvm_ui2f_rm : GCCBuiltin<"__nvvm_ui2f_rm">,
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_nvvm_ui2f_rp : GCCBuiltin<"__nvvm_ui2f_rp">,
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_nvvm_lohi_i2d : GCCBuiltin<"__nvvm_lohi_i2d">,
Intrinsic<[llvm_double_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_d2i_lo : GCCBuiltin<"__nvvm_d2i_lo">,
Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_d2i_hi : GCCBuiltin<"__nvvm_d2i_hi">,
Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_f2ll_rn_ftz : GCCBuiltin<"__nvvm_f2ll_rn_ftz">,
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_f2ll_rn : GCCBuiltin<"__nvvm_f2ll_rn">,
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_f2ll_rz_ftz : GCCBuiltin<"__nvvm_f2ll_rz_ftz">,
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_f2ll_rz : GCCBuiltin<"__nvvm_f2ll_rz">,
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_f2ll_rm_ftz : GCCBuiltin<"__nvvm_f2ll_rm_ftz">,
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_f2ll_rm : GCCBuiltin<"__nvvm_f2ll_rm">,
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_f2ll_rp_ftz : GCCBuiltin<"__nvvm_f2ll_rp_ftz">,
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_f2ll_rp : GCCBuiltin<"__nvvm_f2ll_rp">,
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_f2ull_rn_ftz : GCCBuiltin<"__nvvm_f2ull_rn_ftz">,
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_f2ull_rn : GCCBuiltin<"__nvvm_f2ull_rn">,
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_f2ull_rz_ftz : GCCBuiltin<"__nvvm_f2ull_rz_ftz">,
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_f2ull_rz : GCCBuiltin<"__nvvm_f2ull_rz">,
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_f2ull_rm_ftz : GCCBuiltin<"__nvvm_f2ull_rm_ftz">,
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_f2ull_rm : GCCBuiltin<"__nvvm_f2ull_rm">,
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_f2ull_rp_ftz : GCCBuiltin<"__nvvm_f2ull_rp_ftz">,
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_f2ull_rp : GCCBuiltin<"__nvvm_f2ull_rp">,
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_d2ll_rn : GCCBuiltin<"__nvvm_d2ll_rn">,
Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_d2ll_rz : GCCBuiltin<"__nvvm_d2ll_rz">,
Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_d2ll_rm : GCCBuiltin<"__nvvm_d2ll_rm">,
Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_d2ll_rp : GCCBuiltin<"__nvvm_d2ll_rp">,
Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_d2ull_rn : GCCBuiltin<"__nvvm_d2ull_rn">,
Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_d2ull_rz : GCCBuiltin<"__nvvm_d2ull_rz">,
Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_d2ull_rm : GCCBuiltin<"__nvvm_d2ull_rm">,
Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_d2ull_rp : GCCBuiltin<"__nvvm_d2ull_rp">,
Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_ll2f_rn : GCCBuiltin<"__nvvm_ll2f_rn">,
Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
def int_nvvm_ll2f_rz : GCCBuiltin<"__nvvm_ll2f_rz">,
Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
def int_nvvm_ll2f_rm : GCCBuiltin<"__nvvm_ll2f_rm">,
Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
def int_nvvm_ll2f_rp : GCCBuiltin<"__nvvm_ll2f_rp">,
Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
def int_nvvm_ull2f_rn : GCCBuiltin<"__nvvm_ull2f_rn">,
Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
def int_nvvm_ull2f_rz : GCCBuiltin<"__nvvm_ull2f_rz">,
Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
def int_nvvm_ull2f_rm : GCCBuiltin<"__nvvm_ull2f_rm">,
Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
def int_nvvm_ull2f_rp : GCCBuiltin<"__nvvm_ull2f_rp">,
Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
def int_nvvm_ll2d_rn : GCCBuiltin<"__nvvm_ll2d_rn">,
Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
def int_nvvm_ll2d_rz : GCCBuiltin<"__nvvm_ll2d_rz">,
Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
def int_nvvm_ll2d_rm : GCCBuiltin<"__nvvm_ll2d_rm">,
Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
def int_nvvm_ll2d_rp : GCCBuiltin<"__nvvm_ll2d_rp">,
Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
def int_nvvm_ull2d_rn : GCCBuiltin<"__nvvm_ull2d_rn">,
Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
def int_nvvm_ull2d_rz : GCCBuiltin<"__nvvm_ull2d_rz">,
Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
def int_nvvm_ull2d_rm : GCCBuiltin<"__nvvm_ull2d_rm">,
Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
def int_nvvm_ull2d_rp : GCCBuiltin<"__nvvm_ull2d_rp">,
Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
def int_nvvm_f2h_rn_ftz : GCCBuiltin<"__nvvm_f2h_rn_ftz">,
Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_f2h_rn : GCCBuiltin<"__nvvm_f2h_rn">,
Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_h2f : GCCBuiltin<"__nvvm_h2f">,
Intrinsic<[llvm_float_ty], [llvm_i16_ty], [IntrNoMem]>;
//
// Bitcast
//
def int_nvvm_bitcast_f2i : GCCBuiltin<"__nvvm_bitcast_f2i">,
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_bitcast_i2f : GCCBuiltin<"__nvvm_bitcast_i2f">,
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_nvvm_bitcast_ll2d : GCCBuiltin<"__nvvm_bitcast_ll2d">,
Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
def int_nvvm_bitcast_d2ll : GCCBuiltin<"__nvvm_bitcast_d2ll">,
Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
// Atomic not available as an llvm intrinsic.
def int_nvvm_atomic_load_add_f32 : Intrinsic<[llvm_float_ty],
[LLVMAnyPointerType<llvm_float_ty>, llvm_float_ty],
[IntrReadWriteArgMem, NoCapture<0>]>;
def int_nvvm_atomic_load_inc_32 : Intrinsic<[llvm_i32_ty],
[LLVMAnyPointerType<llvm_i32_ty>, llvm_i32_ty],
[IntrReadWriteArgMem, NoCapture<0>]>;
def int_nvvm_atomic_load_dec_32 : Intrinsic<[llvm_i32_ty],
[LLVMAnyPointerType<llvm_i32_ty>, llvm_i32_ty],
[IntrReadWriteArgMem, NoCapture<0>]>;
// Bar.Sync
def int_cuda_syncthreads : GCCBuiltin<"__syncthreads">,
Intrinsic<[], [], []>;
def int_nvvm_barrier0 : GCCBuiltin<"__nvvm_bar0">,
Intrinsic<[], [], []>;
def int_nvvm_barrier0_popc : GCCBuiltin<"__nvvm_bar0_popc">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
def int_nvvm_barrier0_and : GCCBuiltin<"__nvvm_bar0_and">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
def int_nvvm_barrier0_or : GCCBuiltin<"__nvvm_bar0_or">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
// Membar
def int_nvvm_membar_cta : GCCBuiltin<"__nvvm_membar_cta">,
Intrinsic<[], [], []>;
def int_nvvm_membar_gl : GCCBuiltin<"__nvvm_membar_gl">,
Intrinsic<[], [], []>;
def int_nvvm_membar_sys : GCCBuiltin<"__nvvm_membar_sys">,
Intrinsic<[], [], []>;
// Accessing special registers
def int_nvvm_read_ptx_sreg_tid_x :
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_tid_x">;
def int_nvvm_read_ptx_sreg_tid_y :
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_tid_y">;
def int_nvvm_read_ptx_sreg_tid_z :
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_tid_z">;
def int_nvvm_read_ptx_sreg_ntid_x :
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_x">;
def int_nvvm_read_ptx_sreg_ntid_y :
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_y">;
def int_nvvm_read_ptx_sreg_ntid_z :
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_z">;
def int_nvvm_read_ptx_sreg_ctaid_x :
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_x">;
def int_nvvm_read_ptx_sreg_ctaid_y :
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_y">;
def int_nvvm_read_ptx_sreg_ctaid_z :
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_z">;
def int_nvvm_read_ptx_sreg_nctaid_x :
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_x">;
def int_nvvm_read_ptx_sreg_nctaid_y :
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_y">;
def int_nvvm_read_ptx_sreg_nctaid_z :
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_z">;
def int_nvvm_read_ptx_sreg_warpsize :
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_warpsize">;
// Generated within nvvm. Use for ldu on sm_20 or later
// @TODO: Revisit this, Changed LLVMAnyPointerType to LLVMPointerType
def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty],
[LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
"llvm.nvvm.ldu.global.i">;
def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty],
[LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
"llvm.nvvm.ldu.global.f">;
def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty],
[LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
"llvm.nvvm.ldu.global.p">;
// Use for generic pointers
// - These intrinsics are used to convert address spaces.
// - The input pointer and output pointer must have the same type, except for
// the address-space. (This restriction is not enforced here as there is
// currently no way to describe it).
// - This complements the llvm bitcast, which can be used to cast one type
// of pointer to another type of pointer, while the address space remains
// the same.
def int_nvvm_ptr_local_to_gen: Intrinsic<[llvm_anyptr_ty],
[llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
"llvm.nvvm.ptr.local.to.gen">;
def int_nvvm_ptr_shared_to_gen: Intrinsic<[llvm_anyptr_ty],
[llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
"llvm.nvvm.ptr.shared.to.gen">;
def int_nvvm_ptr_global_to_gen: Intrinsic<[llvm_anyptr_ty],
[llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
"llvm.nvvm.ptr.global.to.gen">;
def int_nvvm_ptr_constant_to_gen: Intrinsic<[llvm_anyptr_ty],
[llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
"llvm.nvvm.ptr.constant.to.gen">;
def int_nvvm_ptr_gen_to_global: Intrinsic<[llvm_anyptr_ty],
[llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
"llvm.nvvm.ptr.gen.to.global">;
def int_nvvm_ptr_gen_to_shared: Intrinsic<[llvm_anyptr_ty],
[llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
"llvm.nvvm.ptr.gen.to.shared">;
def int_nvvm_ptr_gen_to_local: Intrinsic<[llvm_anyptr_ty],
[llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
"llvm.nvvm.ptr.gen.to.local">;
def int_nvvm_ptr_gen_to_constant: Intrinsic<[llvm_anyptr_ty],
[llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
"llvm.nvvm.ptr.gen.to.constant">;
// Used in nvvm internally to help address space opt and ptx code generation
// This is for params that are passed to kernel functions by pointer by-val.
def int_nvvm_ptr_gen_to_param: Intrinsic<[llvm_anyptr_ty],
[llvm_anyptr_ty],
[IntrNoMem, NoCapture<0>],
"llvm.nvvm.ptr.gen.to.param">;
// Move intrinsics, used in nvvm internally
def int_nvvm_move_i8 : Intrinsic<[llvm_i8_ty], [llvm_i8_ty], [IntrNoMem],
"llvm.nvvm.move.i8">;
def int_nvvm_move_i16 : Intrinsic<[llvm_i16_ty], [llvm_i16_ty], [IntrNoMem],
"llvm.nvvm.move.i16">;
def int_nvvm_move_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem],
"llvm.nvvm.move.i32">;
def int_nvvm_move_i64 : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.move.i64">;
def int_nvvm_move_float : Intrinsic<[llvm_float_ty], [llvm_float_ty],
[IntrNoMem], "llvm.nvvm.move.float">;
def int_nvvm_move_double : Intrinsic<[llvm_double_ty], [llvm_double_ty],
[IntrNoMem], "llvm.nvvm.move.double">;
def int_nvvm_move_ptr : Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty],
[IntrNoMem, NoCapture<0>], "llvm.nvvm.move.ptr">;
/// Error / Warn
def int_nvvm_compiler_error :
Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.error">;
def int_nvvm_compiler_warn :
Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.warn">;

View File

@ -40,6 +40,8 @@ const char *Triple::getArchTypeName(ArchType Kind) {
case mblaze: return "mblaze";
case ptx32: return "ptx32";
case ptx64: return "ptx64";
case nvptx: return "nvptx";
case nvptx64: return "nvptx64";
case le32: return "le32";
case amdil: return "amdil";
}
@ -76,6 +78,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
case ptx32: return "ptx";
case ptx64: return "ptx";
case nvptx: return "nvptx";
case nvptx64: return "nvptx";
case le32: return "le32";
case amdil: return "amdil";
}
@ -162,6 +166,8 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
.Case("xcore", xcore)
.Case("ptx32", ptx32)
.Case("ptx64", ptx64)
.Case("nvptx", nvptx)
.Case("nvptx64", nvptx64)
.Case("le32", le32)
.Case("amdil", amdil)
.Default(UnknownArch);
@ -194,6 +200,8 @@ Triple::ArchType Triple::getArchTypeForDarwinArchName(StringRef Str) {
.Case("r600", Triple::r600)
.Case("ptx32", Triple::ptx32)
.Case("ptx64", Triple::ptx64)
.Case("nvptx", Triple::nvptx)
.Case("nvptx64", Triple::nvptx64)
.Case("amdil", Triple::amdil)
.Default(Triple::UnknownArch);
}
@ -217,6 +225,8 @@ const char *Triple::getArchNameForAssembler() {
.Case("r600", "r600")
.Case("ptx32", "ptx32")
.Case("ptx64", "ptx64")
.Case("nvptx", "nvptx")
.Case("nvptx64", "nvptx64")
.Case("le32", "le32")
.Case("amdil", "amdil")
.Default(NULL);
@ -251,6 +261,8 @@ static Triple::ArchType parseArch(StringRef ArchName) {
.Case("xcore", Triple::xcore)
.Case("ptx32", Triple::ptx32)
.Case("ptx64", Triple::ptx64)
.Case("nvptx", Triple::nvptx)
.Case("nvptx64", Triple::nvptx64)
.Case("le32", Triple::le32)
.Case("amdil", Triple::amdil)
.Default(Triple::UnknownArch);
@ -652,6 +664,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
case llvm::Triple::mblaze:
case llvm::Triple::mips:
case llvm::Triple::mipsel:
case llvm::Triple::nvptx:
case llvm::Triple::ppc:
case llvm::Triple::ptx32:
case llvm::Triple::r600:
@ -664,6 +677,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
case llvm::Triple::mips64:
case llvm::Triple::mips64el:
case llvm::Triple::nvptx64:
case llvm::Triple::ppc64:
case llvm::Triple::ptx64:
case llvm::Triple::sparcv9:
@ -701,6 +715,7 @@ Triple Triple::get32BitArchVariant() const {
case Triple::mblaze:
case Triple::mips:
case Triple::mipsel:
case Triple::nvptx:
case Triple::ppc:
case Triple::ptx32:
case Triple::r600:
@ -714,6 +729,7 @@ Triple Triple::get32BitArchVariant() const {
case Triple::mips64: T.setArch(Triple::mips); break;
case Triple::mips64el: T.setArch(Triple::mipsel); break;
case Triple::nvptx64: T.setArch(Triple::nvptx); break;
case Triple::ppc64: T.setArch(Triple::ppc); break;
case Triple::ptx64: T.setArch(Triple::ptx32); break;
case Triple::sparcv9: T.setArch(Triple::sparc); break;
@ -742,6 +758,7 @@ Triple Triple::get64BitArchVariant() const {
case Triple::mips64:
case Triple::mips64el:
case Triple::nvptx64:
case Triple::ppc64:
case Triple::ptx64:
case Triple::sparcv9:
@ -751,6 +768,7 @@ Triple Triple::get64BitArchVariant() const {
case Triple::mips: T.setArch(Triple::mips64); break;
case Triple::mipsel: T.setArch(Triple::mips64el); break;
case Triple::nvptx: T.setArch(Triple::nvptx64); break;
case Triple::ppc: T.setArch(Triple::ppc64); break;
case Triple::ptx32: T.setArch(Triple::ptx64); break;
case Triple::sparc: T.setArch(Triple::sparcv9); break;

View File

@ -16,7 +16,7 @@
;===------------------------------------------------------------------------===;
[common]
subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 Mips PTX PowerPC Sparc X86 XCore
subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PTX PowerPC Sparc X86 XCore
; This is a special group whose required libraries are extended (by llvm-build)
; with the best execution engine (the native JIT, if available, or the

View File

@ -0,0 +1,33 @@
set(LLVM_TARGET_DEFINITIONS NVPTX.td)
tablegen(LLVM NVPTXGenRegisterInfo.inc -gen-register-info)
tablegen(LLVM NVPTXGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM NVPTXGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM NVPTXGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM NVPTXGenSubtargetInfo.inc -gen-subtarget)
add_public_tablegen_target(NVPTXCommonTableGen)
set(NVPTXCodeGen_sources
NVPTXFrameLowering.cpp
NVPTXInstrInfo.cpp
NVPTXISelDAGToDAG.cpp
NVPTXISelLowering.cpp
NVPTXRegisterInfo.cpp
NVPTXSubtarget.cpp
NVPTXTargetMachine.cpp
NVPTXSplitBBatBar.cpp
NVPTXLowerAggrCopies.cpp
NVPTXutil.cpp
NVPTXAllocaHoisting.cpp
NVPTXAsmPrinter.cpp
NVPTXUtilities.cpp
VectorElementize.cpp
)
add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
add_subdirectory(TargetInfo)
add_subdirectory(InstPrinter)
add_subdirectory(MCTargetDesc)

View File

@ -0,0 +1,7 @@
include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
add_llvm_library(LLVMNVPTXAsmPrinter
NVPTXInstPrinter.cpp
)
add_dependencies(LLVMNVPTXAsmPrinter NVPTXCommonTableGen)

View File

@ -0,0 +1,23 @@
;===- ./lib/Target/NVPTX/InstPrinter/LLVMBuild.txt -------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
; This file is distributed under the University of Illinois Open Source
; License. See LICENSE.TXT for details.
;
;===------------------------------------------------------------------------===;
;
; This is an LLVMBuild description file for the components in this subdirectory.
;
; For more information on the LLVMBuild system, please see:
;
; http://llvm.org/docs/LLVMBuild.html
;
;===------------------------------------------------------------------------===;
[component_0]
type = Library
name = NVPTXAsmPrinter
parent = NVPTX
required_libraries = MC Support
add_to_library_groups = NVPTX

View File

@ -0,0 +1,15 @@
##===- lib/Target/NVPTX/AsmPrinter/Makefile ----------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
#
##===----------------------------------------------------------------------===##
LEVEL = ../../../..
LIBRARYNAME = LLVMNVPTXAsmPrinter
# Hack: we need to include 'main' ptx target directory to grab private headers
CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
include $(LEVEL)/Makefile.common

View File

@ -0,0 +1 @@
// Placeholder

View File

@ -0,0 +1,32 @@
;===- ./lib/Target/NVPTX/LLVMBuild.txt -------------------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
; This file is distributed under the University of Illinois Open Source
; License. See LICENSE.TXT for details.
;
;===------------------------------------------------------------------------===;
;
; This is an LLVMBuild description file for the components in this subdirectory.
;
; For more information on the LLVMBuild system, please see:
;
; http://llvm.org/docs/LLVMBuild.html
;
;===------------------------------------------------------------------------===;
[common]
subdirectories = InstPrinter MCTargetDesc TargetInfo
[component_0]
type = TargetGroup
name = NVPTX
parent = Target
has_asmprinter = 1
[component_1]
type = Library
name = NVPTXCodeGen
parent = NVPTX
required_libraries = Analysis AsmPrinter CodeGen Core MC NVPTXDesc NVPTXInfo SelectionDAG Support Target TransformUtils
add_to_library_groups = NVPTX

View File

@ -0,0 +1,9 @@
add_llvm_library(LLVMNVPTXDesc
NVPTXMCAsmInfo.cpp
NVPTXMCTargetDesc.cpp
)
add_dependencies(LLVMNVPTXDesc NVPTXCommonTableGen)
# Hack: we need to include 'main' target directory to grab private headers
#include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)

View File

@ -0,0 +1,23 @@
;===- ./lib/Target/NVPTX/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
; This file is distributed under the University of Illinois Open Source
; License. See LICENSE.TXT for details.
;
;===------------------------------------------------------------------------===;
;
; This is an LLVMBuild description file for the components in this subdirectory.
;
; For more information on the LLVMBuild system, please see:
;
; http://llvm.org/docs/LLVMBuild.html
;
;===------------------------------------------------------------------------===;
[component_0]
type = Library
name = NVPTXDesc
parent = NVPTX
required_libraries = MC NVPTXAsmPrinter NVPTXInfo Support
add_to_library_groups = NVPTX

View File

@ -0,0 +1,16 @@
##===- lib/Target/NVPTX/TargetDesc/Makefile ----------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
#
##===----------------------------------------------------------------------===##
LEVEL = ../../../..
LIBRARYNAME = LLVMNVPTXDesc
# Hack: we need to include 'main' target directory to grab private headers
CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
include $(LEVEL)/Makefile.common

View File

@ -0,0 +1,88 @@
//===-- NVPTXBaseInfo.h - Top-level definitions for NVPTX -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains small standalone helper functions and enum definitions for
// the NVPTX target useful for the compiler back-end and the MC libraries.
// As such, it deliberately does not include references to LLVM core
// code gen types, passes, etc..
//
//===----------------------------------------------------------------------===//
#ifndef NVPTXBASEINFO_H
#define NVPTXBASEINFO_H
namespace llvm {
enum AddressSpace {
ADDRESS_SPACE_GENERIC = 0,
ADDRESS_SPACE_GLOBAL = 1,
ADDRESS_SPACE_CONST_NOT_GEN = 2, // Not part of generic space
ADDRESS_SPACE_SHARED = 3,
ADDRESS_SPACE_CONST = 4,
ADDRESS_SPACE_LOCAL = 5,
// NVVM Internal
ADDRESS_SPACE_PARAM = 101
};
enum PropertyAnnotation {
PROPERTY_MAXNTID_X = 0,
PROPERTY_MAXNTID_Y,
PROPERTY_MAXNTID_Z,
PROPERTY_REQNTID_X,
PROPERTY_REQNTID_Y,
PROPERTY_REQNTID_Z,
PROPERTY_MINNCTAPERSM,
PROPERTY_ISTEXTURE,
PROPERTY_ISSURFACE,
PROPERTY_ISSAMPLER,
PROPERTY_ISREADONLY_IMAGE_PARAM,
PROPERTY_ISWRITEONLY_IMAGE_PARAM,
PROPERTY_ISKERNEL_FUNCTION,
PROPERTY_ALIGN,
// last property
PROPERTY_LAST
};
const unsigned AnnotationNameLen = 8; // length of each annotation name
const char
PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {
"maxntidx", // PROPERTY_MAXNTID_X
"maxntidy", // PROPERTY_MAXNTID_Y
"maxntidz", // PROPERTY_MAXNTID_Z
"reqntidx", // PROPERTY_REQNTID_X
"reqntidy", // PROPERTY_REQNTID_Y
"reqntidz", // PROPERTY_REQNTID_Z
"minctasm", // PROPERTY_MINNCTAPERSM
"texture", // PROPERTY_ISTEXTURE
"surface", // PROPERTY_ISSURFACE
"sampler", // PROPERTY_ISSAMPLER
"rdoimage", // PROPERTY_ISREADONLY_IMAGE_PARAM
"wroimage", // PROPERTY_ISWRITEONLY_IMAGE_PARAM
"kernel", // PROPERTY_ISKERNEL_FUNCTION
"align", // PROPERTY_ALIGN
// last property
"proplast", // PROPERTY_LAST
};
// name of named metadata used for global annotations
#if defined(__GNUC__)
// As this is declared to be static but some of the .cpp files that
// include NVVM.h do not use this array, gcc gives a warning when
// compiling those .cpp files, hence __attribute__((unused)).
__attribute__((unused))
#endif
static const char* NamedMDForAnnotations = "nvvm.annotations";
}
#endif

View File

@ -0,0 +1,63 @@
//===-- NVPTXMCAsmInfo.cpp - NVPTX asm properties -------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the declarations of the NVPTXMCAsmInfo properties.
//
//===----------------------------------------------------------------------===//
#include "NVPTXMCAsmInfo.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
bool CompileForDebugging;
// -debug-compile - Command line option to inform opt and llc passes to
// compile for debugging
static cl::opt<bool, true>
Debug("debug-compile", cl::desc("Compile for debugging"), cl::Hidden,
cl::location(CompileForDebugging),
cl::init(false));
void NVPTXMCAsmInfo::anchor() { }
NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) {
Triple TheTriple(TT);
if (TheTriple.getArch() == Triple::nvptx64)
PointerSize = 8;
CommentString = "//";
PrivateGlobalPrefix = "$L__";
AllowPeriodsInName = false;
HasSetDirective = false;
HasSingleParameterDotFile = false;
InlineAsmStart = " inline asm";
InlineAsmEnd = " inline asm";
SupportsDebugInformation = CompileForDebugging;
HasDotTypeDotSizeDirective = false;
Data8bitsDirective = " .b8 ";
Data16bitsDirective = " .b16 ";
Data32bitsDirective = " .b32 ";
Data64bitsDirective = " .b64 ";
PrivateGlobalPrefix = "";
ZeroDirective = " .b8";
AsciiDirective = " .b8";
AscizDirective = " .b8";
// @TODO: Can we just disable this?
GlobalDirective = "\t// .globl\t";
}

View File

@ -0,0 +1,30 @@
//===-- NVPTXMCAsmInfo.h - NVPTX asm properties ----------------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the declaration of the NVPTXMCAsmInfo class.
//
//===----------------------------------------------------------------------===//
#ifndef NVPTX_MCASM_INFO_H
#define NVPTX_MCASM_INFO_H
#include "llvm/MC/MCAsmInfo.h"
namespace llvm {
class Target;
class StringRef;
class NVPTXMCAsmInfo : public MCAsmInfo {
virtual void anchor();
public:
explicit NVPTXMCAsmInfo(const Target &T, const StringRef &TT);
};
} // namespace llvm
#endif // NVPTX_MCASM_INFO_H

View File

@ -0,0 +1,91 @@
//===-- NVPTXMCTargetDesc.cpp - NVPTX Target Descriptions -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file provides NVPTX specific target descriptions.
//
//===----------------------------------------------------------------------===//
#include "NVPTXMCTargetDesc.h"
#include "NVPTXMCAsmInfo.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "NVPTXGenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
#include "NVPTXGenSubtargetInfo.inc"
#define GET_REGINFO_MC_DESC
#include "NVPTXGenRegisterInfo.inc"
using namespace llvm;
static MCInstrInfo *createNVPTXMCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitNVPTXMCInstrInfo(X);
return X;
}
static MCRegisterInfo *createNVPTXMCRegisterInfo(StringRef TT) {
MCRegisterInfo *X = new MCRegisterInfo();
// PTX does not have a return address register.
InitNVPTXMCRegisterInfo(X, 0);
return X;
}
static MCSubtargetInfo *createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU,
StringRef FS) {
MCSubtargetInfo *X = new MCSubtargetInfo();
InitNVPTXMCSubtargetInfo(X, TT, CPU, FS);
return X;
}
static MCCodeGenInfo *createNVPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
// Force static initialization.
extern "C" void LLVMInitializeNVPTXTargetMC() {
// Register the MC asm info.
RegisterMCAsmInfo<NVPTXMCAsmInfo> X(TheNVPTXTarget32);
RegisterMCAsmInfo<NVPTXMCAsmInfo> Y(TheNVPTXTarget64);
// Register the MC codegen info.
TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget32,
createNVPTXMCCodeGenInfo);
TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget64,
createNVPTXMCCodeGenInfo);
// Register the MC instruction info.
TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget32, createNVPTXMCInstrInfo);
TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget64, createNVPTXMCInstrInfo);
// Register the MC register info.
TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget32,
createNVPTXMCRegisterInfo);
TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget64,
createNVPTXMCRegisterInfo);
// Register the MC subtarget info.
TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget32,
createNVPTXMCSubtargetInfo);
TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget64,
createNVPTXMCSubtargetInfo);
}

View File

@ -0,0 +1,36 @@
//===-- NVPTXMCTargetDesc.h - NVPTX Target Descriptions ---------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file provides NVPTX specific target descriptions.
//
//===----------------------------------------------------------------------===//
#ifndef NVPTXMCTARGETDESC_H
#define NVPTXMCTARGETDESC_H
namespace llvm {
class Target;
extern Target TheNVPTXTarget32;
extern Target TheNVPTXTarget64;
} // End llvm namespace
// Defines symbolic names for PTX registers.
#define GET_REGINFO_ENUM
#include "NVPTXGenRegisterInfo.inc"
// Defines symbolic names for the PTX instructions.
#define GET_INSTRINFO_ENUM
#include "NVPTXGenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
#include "NVPTXGenSubtargetInfo.inc"
#endif

23
lib/Target/NVPTX/Makefile Normal file
View File

@ -0,0 +1,23 @@
##===- lib/Target/NVPTX/Makefile ---------------------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
#
##===----------------------------------------------------------------------===##
LEVEL = ../../..
LIBRARYNAME = LLVMNVPTXCodeGen
TARGET = NVPTX
# Make sure that tblgen is run, first thing.
BUILT_SOURCES = NVPTXGenAsmWriter.inc \
NVPTXGenDAGISel.inc \
NVPTXGenInstrInfo.inc \
NVPTXGenRegisterInfo.inc \
NVPTXGenSubtargetInfo.inc
DIRS = InstPrinter TargetInfo MCTargetDesc
include $(LEVEL)/Makefile.common

View File

@ -0,0 +1,49 @@
//===-- ManagedStringPool.h - Managed String Pool ---------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// The strings allocated from a managed string pool are owned by the string
// pool and will be deleted together with the managed string pool.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_SUPPORT_MANAGED_STRING_H
#define LLVM_SUPPORT_MANAGED_STRING_H
#include "llvm/ADT/SmallVector.h"
#include <string>
namespace llvm {
/// ManagedStringPool - The strings allocated from a managed string pool are
/// owned by the string pool and will be deleted together with the managed
/// string pool.
class ManagedStringPool {
SmallVector<std::string *, 8> Pool;
public:
ManagedStringPool() {}
~ManagedStringPool() {
SmallVector<std::string *, 8>::iterator Current = Pool.begin();
while (Current != Pool.end()) {
delete *Current;
Current++;
}
}
std::string *getManagedString(const char *S) {
std::string *Str = new std::string(S);
Pool.push_back(Str);
return Str;
}
};
}
#endif

137
lib/Target/NVPTX/NVPTX.h Normal file
View File

@ -0,0 +1,137 @@
//===-- NVPTX.h - Top-level interface for NVPTX representation --*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the entry points for global functions defined in
// the LLVM NVPTX back-end.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TARGET_NVPTX_H
#define LLVM_TARGET_NVPTX_H
#include <cassert>
#include <iosfwd>
#include "llvm/Value.h"
#include "llvm/Module.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetMachine.h"
#include "MCTargetDesc/NVPTXBaseInfo.h"
namespace llvm {
class NVPTXTargetMachine;
class FunctionPass;
class formatted_raw_ostream;
namespace NVPTXCC {
enum CondCodes {
EQ,
NE,
LT,
LE,
GT,
GE
};
}
inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) {
switch (CC) {
default: assert(0 && "Unknown condition code");
case NVPTXCC::NE: return "ne";
case NVPTXCC::EQ: return "eq";
case NVPTXCC::LT: return "lt";
case NVPTXCC::LE: return "le";
case NVPTXCC::GT: return "gt";
case NVPTXCC::GE: return "ge";
}
}
FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
llvm::CodeGenOpt::Level OptLevel);
FunctionPass *createVectorElementizePass(NVPTXTargetMachine &);
FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &);
FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &);
FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &);
bool isImageOrSamplerVal(const Value *, const Module *);
extern Target TheNVPTXTarget32;
extern Target TheNVPTXTarget64;
namespace NVPTX
{
enum DrvInterface {
NVCL,
CUDA,
TEST
};
// A field inside TSFlags needs a shift and a mask. The usage is
// always as follows :
// ((TSFlags & fieldMask) >> fieldShift)
// The enum keeps the mask, the shift, and all valid values of the
// field in one place.
enum VecInstType {
VecInstTypeShift = 0,
VecInstTypeMask = 0xF,
VecNOP = 0,
VecLoad = 1,
VecStore = 2,
VecBuild = 3,
VecShuffle = 4,
VecExtract = 5,
VecInsert = 6,
VecDest = 7,
VecOther = 15
};
enum SimpleMove {
SimpleMoveMask = 0x10,
SimpleMoveShift = 4
};
enum LoadStore {
isLoadMask = 0x20,
isLoadShift = 5,
isStoreMask = 0x40,
isStoreShift = 6
};
namespace PTXLdStInstCode {
enum AddressSpace{
GENERIC = 0,
GLOBAL = 1,
CONSTANT = 2,
SHARED = 3,
PARAM = 4,
LOCAL = 5
};
enum FromType {
Unsigned = 0,
Signed,
Float
};
enum VecType {
Scalar = 1,
V2 = 2,
V4 = 4
};
}
}
} // end namespace llvm;
// Defines symbolic names for NVPTX registers. This defines a mapping from
// register name to register number.
#define GET_REGINFO_ENUM
#include "NVPTXGenRegisterInfo.inc"
// Defines symbolic names for the NVPTX instructions.
#define GET_INSTRINFO_ENUM
#include "NVPTXGenInstrInfo.inc"
#endif

44
lib/Target/NVPTX/NVPTX.td Normal file
View File

@ -0,0 +1,44 @@
//===- NVPTX.td - Describe the NVPTX Target Machine -----------*- tblgen -*-==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// This is the top level entry point for the NVPTX target.
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Target-independent interfaces
//===----------------------------------------------------------------------===//
include "llvm/Target/Target.td"
include "NVPTXRegisterInfo.td"
include "NVPTXInstrInfo.td"
//===----------------------------------------------------------------------===//
// Subtarget Features.
// - We use the SM version number instead of explicit feature table.
// - Need at least one feature to avoid generating zero sized array by
// TableGen in NVPTXGenSubtarget.inc.
//===----------------------------------------------------------------------===//
def FeatureDummy : SubtargetFeature<"dummy", "dummy", "true", "">;
//===----------------------------------------------------------------------===//
// NVPTX supported processors.
//===----------------------------------------------------------------------===//
class Proc<string Name, list<SubtargetFeature> Features>
: Processor<Name, NoItineraries, Features>;
def : Proc<"sm_10", [FeatureDummy]>;
def NVPTXInstrInfo : InstrInfo {
}
def NVPTX : Target {
let InstructionSet = NVPTXInstrInfo;
}

View File

@ -0,0 +1,48 @@
//===-- AllocaHoisting.cpp - Hosist allocas to the entry block --*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Hoist the alloca instructions in the non-entry blocks to the entry blocks.
//
//===----------------------------------------------------------------------===//
#include "llvm/Function.h"
#include "llvm/Instructions.h"
#include "llvm/Constants.h"
#include "NVPTXAllocaHoisting.h"
namespace llvm {
bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
bool functionModified = false;
Function::iterator I = function.begin();
TerminatorInst *firstTerminatorInst = (I++)->getTerminator();
for (Function::iterator E = function.end(); I != E; ++I) {
for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
AllocaInst *allocaInst = dyn_cast<AllocaInst>(BI++);
if (allocaInst && isa<ConstantInt>(allocaInst->getArraySize())) {
allocaInst->moveBefore(firstTerminatorInst);
functionModified = true;
}
}
}
return functionModified;
}
char NVPTXAllocaHoisting::ID = 1;
RegisterPass<NVPTXAllocaHoisting> X("alloca-hoisting",
"Hoisting alloca instructsion in non-entry "
"blocks to the entry block");
FunctionPass *createAllocaHoisting() {
return new NVPTXAllocaHoisting();
}
} // end namespace llvm

View File

@ -0,0 +1,49 @@
//===-- AllocaHoisting.h - Hosist allocas to the entry block ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Hoist the alloca instructions in the non-entry blocks to the entry blocks.
//
//===----------------------------------------------------------------------===//
#ifndef NVPTX_ALLOCA_HOISTING_H_
#define NVPTX_ALLOCA_HOISTING_H_
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/Pass.h"
#include "llvm/Target/TargetData.h"
namespace llvm {
class FunctionPass;
class Function;
// Hoisting the alloca instructions in the non-entry blocks to the entry
// block.
class NVPTXAllocaHoisting : public FunctionPass {
public:
static char ID; // Pass ID
NVPTXAllocaHoisting() : FunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetData>();
AU.addPreserved<MachineFunctionAnalysis>();
}
virtual const char *getPassName() const {
return "NVPTX specific alloca hoisting";
}
virtual bool runOnFunction(Function &function);
};
extern FunctionPass *createAllocaHoisting();
} // end namespace llvm
#endif // NVPTX_ALLOCA_HOISTING_H_

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,318 @@
//===-- NVPTXAsmPrinter.h - NVPTX LLVM assembly writer --------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains a printer that converts from our internal representation
// of machine-dependent LLVM code to NVPTX assembly language.
//
//===----------------------------------------------------------------------===//
#ifndef NVPTXASMPRINTER_H
#define NVPTXASMPRINTER_H
#include "NVPTX.h"
#include "NVPTXTargetMachine.h"
#include "NVPTXSubtarget.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/Mangler.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include <fstream>
// The ptx syntax and format is very different from that usually seem in a .s
// file,
// therefore we are not able to use the MCAsmStreamer interface here.
//
// We are handcrafting the output method here.
//
// A better approach is to clone the MCAsmStreamer to a MCPTXAsmStreamer
// (subclass of MCStreamer).
// This is defined in AsmPrinter.cpp.
// Used to process the constant expressions in initializers.
namespace nvptx {
const llvm::MCExpr *LowerConstant(const llvm::Constant *CV,
llvm::AsmPrinter &AP) ;
}
namespace llvm {
class LineReader {
private:
unsigned theCurLine ;
std::ifstream fstr;
char buff[512];
std::string theFileName;
SmallVector<unsigned, 32> lineOffset;
public:
LineReader(std::string filename) {
theCurLine = 0;
fstr.open(filename.c_str());
theFileName = filename;
}
std::string fileName() { return theFileName; }
~LineReader() {
fstr.close();
}
std::string readLine(unsigned line);
};
class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
class AggBuffer {
// Used to buffer the emitted string for initializing global
// aggregates.
//
// Normally an aggregate (array, vector or structure) is emitted
// as a u8[]. However, if one element/field of the aggregate
// is a non-NULL address, then the aggregate is emitted as u32[]
// or u64[].
//
// We first layout the aggregate in 'buffer' in bytes, except for
// those symbol addresses. For the i-th symbol address in the
//aggregate, its corresponding 4-byte or 8-byte elements in 'buffer'
// are filled with 0s. symbolPosInBuffer[i-1] records its position
// in 'buffer', and Symbols[i-1] records the Value*.
//
// Once we have this AggBuffer setup, we can choose how to print
// it out.
public:
unsigned size; // size of the buffer in bytes
unsigned char *buffer; // the buffer
unsigned numSymbols; // number of symbol addresses
SmallVector<unsigned, 4> symbolPosInBuffer;
SmallVector<Value *, 4> Symbols;
private:
unsigned curpos;
raw_ostream &O;
NVPTXAsmPrinter &AP;
public:
AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP)
:O(_O),AP(_AP) {
buffer = new unsigned char[_size];
size = _size;
curpos = 0;
numSymbols = 0;
}
~AggBuffer() {
delete [] buffer;
}
unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) {
assert((curpos+Num) <= size);
assert((curpos+Bytes) <= size);
for ( int i= 0; i < Num; ++i) {
buffer[curpos] = Ptr[i];
curpos ++;
}
for ( int i=Num; i < Bytes ; ++i) {
buffer[curpos] = 0;
curpos ++;
}
return curpos;
}
unsigned addZeros(int Num) {
assert((curpos+Num) <= size);
for ( int i= 0; i < Num; ++i) {
buffer[curpos] = 0;
curpos ++;
}
return curpos;
}
void addSymbol(Value *GVar) {
symbolPosInBuffer.push_back(curpos);
Symbols.push_back(GVar);
numSymbols++;
}
void print() {
if (numSymbols == 0) {
// print out in bytes
for (unsigned i=0; i<size; i++) {
if (i)
O << ", ";
O << (unsigned int)buffer[i];
}
}
else {
// print out in 4-bytes or 8-bytes
unsigned int pos = 0;
unsigned int nSym = 0;
unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
unsigned int nBytes = 4;
if (AP.nvptxSubtarget.is64Bit())
nBytes = 8;
for (pos=0; pos<size; pos+=nBytes) {
if (pos)
O << ", ";
if (pos == nextSymbolPos) {
Value *v = Symbols[nSym];
if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
MCSymbol *Name = AP.Mang->getSymbol(GVar);
O << *Name;
}
else if (ConstantExpr *Cexpr =
dyn_cast<ConstantExpr>(v)) {
O << *nvptx::LowerConstant(Cexpr, AP);
}
else
assert(0 && "symbol type unknown");
nSym++;
if (nSym >= numSymbols)
nextSymbolPos = size+1;
else
nextSymbolPos = symbolPosInBuffer[nSym];
}
else
if (nBytes == 4)
O << *(unsigned int*)(buffer+pos);
else
O << *(unsigned long long*)(buffer+pos);
}
}
}
};
friend class AggBuffer;
virtual void emitSrcInText(StringRef filename, unsigned line);
private :
virtual const char *getPassName() const {
return "NVPTX Assembly Printer";
}
const Function *F;
std::string CurrentFnName;
void EmitFunctionEntryLabel();
void EmitFunctionBodyStart();
void EmitFunctionBodyEnd();
void EmitInstruction(const MachineInstr *);
void EmitAlignment(unsigned NumBits, const GlobalValue *GV = 0) const {}
void printGlobalVariable(const GlobalVariable *GVar);
void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
const char *Modifier=0);
void printLdStCode(const MachineInstr *MI, int opNum, raw_ostream &O,
const char *Modifier=0);
void printVecModifiedImmediate(const MachineOperand &MO,
const char *Modifier, raw_ostream &O);
void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
const char *Modifier=0);
void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const;
// definition autogenerated.
void printInstruction(const MachineInstr *MI, raw_ostream &O);
void printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
bool=false);
void printParamName(int paramIndex, raw_ostream &O);
void printParamName(Function::const_arg_iterator I, int paramIndex,
raw_ostream &O);
void emitHeader(Module &M, raw_ostream &O);
void emitKernelFunctionDirectives(const Function& F,
raw_ostream &O) const;
void emitVirtualRegister(unsigned int vr, bool isVec, raw_ostream &O);
void emitFunctionExternParamList(const MachineFunction &MF);
void emitFunctionParamList(const Function *, raw_ostream &O);
void emitFunctionParamList(const MachineFunction &MF, raw_ostream &O);
void setAndEmitFunctionVirtualRegisters(const MachineFunction &MF);
void emitFunctionTempData(const MachineFunction &MF,
unsigned &FrameSize);
bool isImageType(const Type *Ty);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &);
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &);
void printReturnValStr(const Function *, raw_ostream &O);
void printReturnValStr(const MachineFunction &MF, raw_ostream &O);
protected:
bool doInitialization(Module &M);
bool doFinalization(Module &M);
private:
std::string CurrentBankselLabelInBasicBlock;
// This is specific per MachineFunction.
const MachineRegisterInfo *MRI;
// The contents are specific for each
// MachineFunction. But the size of the
// array is not.
std::map<unsigned, unsigned> *VRidGlobal2LocalMap;
// cache the subtarget here.
const NVPTXSubtarget &nvptxSubtarget;
// Build the map between type name and ID based on module's type
// symbol table.
std::map<const Type *, std::string> TypeNameMap;
// List of variables demoted to a function scope.
std::map<const Function *, std::vector<GlobalVariable *> > localDecls;
// To record filename to ID mapping
std::map<std::string, unsigned> filenameMap;
void recordAndEmitFilenames(Module &);
void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O);
void emitPTXAddressSpace(unsigned int AddressSpace,
raw_ostream &O) const;
std::string getPTXFundamentalTypeStr(const Type *Ty, bool=true) const ;
void printScalarConstant(Constant *CPV, raw_ostream &O) ;
void printFPConstant(const ConstantFP *Fp, raw_ostream &O) ;
void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer) ;
void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer) ;
void printOperandProper(const MachineOperand &MO);
void emitLinkageDirective(const GlobalValue* V, raw_ostream &O);
void emitDeclarations(Module &, raw_ostream &O);
void emitDeclaration(const Function *, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
void emitDemotedVars(const Function *, raw_ostream &);
LineReader *reader;
LineReader *getReader(std::string);
public:
NVPTXAsmPrinter(TargetMachine &TM,
MCStreamer &Streamer)
: AsmPrinter(TM, Streamer),
nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
CurrentBankselLabelInBasicBlock = "";
VRidGlobal2LocalMap = NULL;
reader = NULL;
}
~NVPTXAsmPrinter() {
if (!reader)
delete reader;
}
bool ignoreLoc(const MachineInstr &);
virtual void getVirtualRegisterName(unsigned, bool, raw_ostream &);
DebugLoc prevDebugLoc;
void emitLineNumberAsDotLoc(const MachineInstr &);
};
} // end of namespace
#endif

View File

@ -0,0 +1,76 @@
//=======- NVPTXFrameLowering.cpp - NVPTX Frame Information ---*- C++ -*-=====//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the NVPTX implementation of TargetFrameLowering class.
//
//===----------------------------------------------------------------------===//
#include "NVPTXFrameLowering.h"
#include "NVPTX.h"
#include "NVPTXRegisterInfo.h"
#include "NVPTXSubtarget.h"
#include "NVPTXTargetMachine.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const {
return true;
}
void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
if (MF.getFrameInfo()->hasStackObjects()) {
MachineBasicBlock &MBB = MF.front();
// Insert "mov.u32 %SP, %Depot"
MachineBasicBlock::iterator MBBI = MBB.begin();
// This instruction really occurs before first instruction
// in the BB, so giving it no debug location.
DebugLoc dl = DebugLoc();
if (tm.getSubtargetImpl()->hasGenericLdSt()) {
// mov %SPL, %depot;
// cvta.local %SP, %SPL;
if (is64bit) {
MachineInstr *MI = BuildMI(MBB, MBBI, dl,
tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64),
NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
BuildMI(MBB, MI, dl,
tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrameLocal)
.addReg(NVPTX::VRDepot);
} else {
MachineInstr *MI = BuildMI(MBB, MBBI, dl,
tm.getInstrInfo()->get(NVPTX::cvta_local_yes),
NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
BuildMI(MBB, MI, dl,
tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrameLocal)
.addReg(NVPTX::VRDepot);
}
}
else {
// mov %SP, %depot;
if (is64bit)
BuildMI(MBB, MBBI, dl,
tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrame)
.addReg(NVPTX::VRDepot);
else
BuildMI(MBB, MBBI, dl,
tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrame)
.addReg(NVPTX::VRDepot);
}
}
}
void NVPTXFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
}

View File

@ -0,0 +1,40 @@
//===--- NVPTXFrameLowering.h - Define frame lowering for NVPTX -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//
//
//===----------------------------------------------------------------------===//
#ifndef NVPTX_FRAMELOWERING_H
#define NVPTX_FRAMELOWERING_H
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
class NVPTXTargetMachine;
class NVPTXFrameLowering : public TargetFrameLowering {
NVPTXTargetMachine &tm;
bool is64bit;
public:
explicit NVPTXFrameLowering(NVPTXTargetMachine &_tm, bool _is64bit)
: TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0),
tm(_tm), is64bit(_is64bit) {}
virtual bool hasFP(const MachineFunction &MF) const;
virtual void emitPrologue(MachineFunction &MF) const;
virtual void emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const;
};
} // End llvm namespace
#endif

View File

@ -0,0 +1,681 @@
//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines an instruction selector for the NVPTX target.
//
//===----------------------------------------------------------------------===//
#include "llvm/Instructions.h"
#include "llvm/Support/raw_ostream.h"
#include "NVPTXISelDAGToDAG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/GlobalValue.h"
#undef DEBUG_TYPE
#define DEBUG_TYPE "nvptx-isel"
using namespace llvm;
static cl::opt<bool>
UseFMADInstruction("nvptx-mad-enable",
cl::ZeroOrMore,
cl::desc("NVPTX Specific: Enable generating FMAD instructions"),
cl::init(false));
static cl::opt<int>
FMAContractLevel("nvptx-fma-level",
cl::ZeroOrMore,
cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
" 1: do it 2: do it aggressively"),
cl::init(2));
static cl::opt<int>
UsePrecDivF32("nvptx-prec-divf32",
cl::ZeroOrMore,
cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
" IEEE Compliant F32 div.rnd if avaiable."),
cl::init(2));
/// createNVPTXISelDag - This pass converts a legalized DAG into a
/// NVPTX-specific DAG, ready for instruction scheduling.
FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
llvm::CodeGenOpt::Level OptLevel) {
return new NVPTXDAGToDAGISel(TM, OptLevel);
}
NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
CodeGenOpt::Level OptLevel)
: SelectionDAGISel(tm, OptLevel),
Subtarget(tm.getSubtarget<NVPTXSubtarget>())
{
// Always do fma.f32 fpcontract if the target supports the instruction.
// Always do fma.f64 fpcontract if the target supports the instruction.
// Do mad.f32 is nvptx-mad-enable is specified and the target does not
// support fma.f32.
doFMADF32 = (OptLevel > 0) && UseFMADInstruction && !Subtarget.hasFMAF32();
doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() &&
(FMAContractLevel>=1);
doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() &&
(FMAContractLevel>=1);
doFMAF32AGG = (OptLevel > 0) && Subtarget.hasFMAF32() &&
(FMAContractLevel==2);
doFMAF64AGG = (OptLevel > 0) && Subtarget.hasFMAF64() &&
(FMAContractLevel==2);
allowFMA = (FMAContractLevel >= 1) || UseFMADInstruction;
doMulWide = (OptLevel > 0);
// Decide how to translate f32 div
do_DIVF32_PREC = UsePrecDivF32;
// sm less than sm_20 does not support div.rnd. Use div.full.
if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20())
do_DIVF32_PREC = 1;
}
/// Select - Select instructions not customized! Used for
/// expanded, promoted and normal instructions.
SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) {
if (N->isMachineOpcode())
return NULL; // Already selected.
SDNode *ResNode = NULL;
switch (N->getOpcode()) {
case ISD::LOAD:
ResNode = SelectLoad(N);
break;
case ISD::STORE:
ResNode = SelectStore(N);
break;
}
if (ResNode)
return ResNode;
return SelectCode(N);
}
static unsigned int
getCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget)
{
const Value *Src = N->getSrcValue();
if (!Src)
return NVPTX::PTXLdStInstCode::LOCAL;
if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
switch (PT->getAddressSpace()) {
case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
return NVPTX::PTXLdStInstCode::CONSTANT;
case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
case llvm::ADDRESS_SPACE_CONST:
// If the arch supports generic address space, translate it to GLOBAL
// for correctness.
// If the arch does not support generic address space, then the arch
// does not really support ADDRESS_SPACE_CONST, translate it to
// to CONSTANT for better performance.
if (Subtarget.hasGenericLdSt())
return NVPTX::PTXLdStInstCode::GLOBAL;
else
return NVPTX::PTXLdStInstCode::CONSTANT;
default: break;
}
}
return NVPTX::PTXLdStInstCode::LOCAL;
}
SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT LoadedVT = LD->getMemoryVT();
SDNode *NVPTXLD= NULL;
// do not support pre/post inc/dec
if (LD->isIndexed())
return NULL;
if (!LoadedVT.isSimple())
return NULL;
// Address Space Setting
unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
// Volatile Setting
// - .volatile is only availalble for .global and .shared
bool isVolatile = LD->isVolatile();
if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
isVolatile = false;
// Vector Setting
MVT SimpleVT = LoadedVT.getSimpleVT();
unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
if (SimpleVT.isVector()) {
unsigned num = SimpleVT.getVectorNumElements();
if (num == 2)
vecType = NVPTX::PTXLdStInstCode::V2;
else if (num == 4)
vecType = NVPTX::PTXLdStInstCode::V4;
else
return NULL;
}
// Type Setting: fromType + fromTypeWidth
//
// Sign : ISD::SEXTLOAD
// Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
// type is integer
// Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
MVT ScalarVT = SimpleVT.getScalarType();
unsigned fromTypeWidth = ScalarVT.getSizeInBits();
unsigned int fromType;
if ((LD->getExtensionType() == ISD::SEXTLOAD))
fromType = NVPTX::PTXLdStInstCode::Signed;
else if (ScalarVT.isFloatingPoint())
fromType = NVPTX::PTXLdStInstCode::Float;
else
fromType = NVPTX::PTXLdStInstCode::Unsigned;
// Create the machine instruction DAG
SDValue Chain = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue Addr;
SDValue Offset, Base;
unsigned Opcode;
MVT::SimpleValueType TargetVT = LD->getValueType(0).getSimpleVT().SimpleTy;
if (SelectDirectAddr(N1, Addr)) {
switch (TargetVT) {
case MVT::i8: Opcode = NVPTX::LD_i8_avar; break;
case MVT::i16: Opcode = NVPTX::LD_i16_avar; break;
case MVT::i32: Opcode = NVPTX::LD_i32_avar; break;
case MVT::i64: Opcode = NVPTX::LD_i64_avar; break;
case MVT::f32: Opcode = NVPTX::LD_f32_avar; break;
case MVT::f64: Opcode = NVPTX::LD_f64_avar; break;
case MVT::v2i8: Opcode = NVPTX::LD_v2i8_avar; break;
case MVT::v2i16: Opcode = NVPTX::LD_v2i16_avar; break;
case MVT::v2i32: Opcode = NVPTX::LD_v2i32_avar; break;
case MVT::v2i64: Opcode = NVPTX::LD_v2i64_avar; break;
case MVT::v2f32: Opcode = NVPTX::LD_v2f32_avar; break;
case MVT::v2f64: Opcode = NVPTX::LD_v2f64_avar; break;
case MVT::v4i8: Opcode = NVPTX::LD_v4i8_avar; break;
case MVT::v4i16: Opcode = NVPTX::LD_v4i16_avar; break;
case MVT::v4i32: Opcode = NVPTX::LD_v4i32_avar; break;
case MVT::v4f32: Opcode = NVPTX::LD_v4f32_avar; break;
default: return NULL;
}
SDValue Ops[] = { getI32Imm(isVolatile),
getI32Imm(codeAddrSpace),
getI32Imm(vecType),
getI32Imm(fromType),
getI32Imm(fromTypeWidth),
Addr, Chain };
NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
MVT::Other, Ops, 7);
} else if (Subtarget.is64Bit()?
SelectADDRsi64(N1.getNode(), N1, Base, Offset):
SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
switch (TargetVT) {
case MVT::i8: Opcode = NVPTX::LD_i8_asi; break;
case MVT::i16: Opcode = NVPTX::LD_i16_asi; break;
case MVT::i32: Opcode = NVPTX::LD_i32_asi; break;
case MVT::i64: Opcode = NVPTX::LD_i64_asi; break;
case MVT::f32: Opcode = NVPTX::LD_f32_asi; break;
case MVT::f64: Opcode = NVPTX::LD_f64_asi; break;
case MVT::v2i8: Opcode = NVPTX::LD_v2i8_asi; break;
case MVT::v2i16: Opcode = NVPTX::LD_v2i16_asi; break;
case MVT::v2i32: Opcode = NVPTX::LD_v2i32_asi; break;
case MVT::v2i64: Opcode = NVPTX::LD_v2i64_asi; break;
case MVT::v2f32: Opcode = NVPTX::LD_v2f32_asi; break;
case MVT::v2f64: Opcode = NVPTX::LD_v2f64_asi; break;
case MVT::v4i8: Opcode = NVPTX::LD_v4i8_asi; break;
case MVT::v4i16: Opcode = NVPTX::LD_v4i16_asi; break;
case MVT::v4i32: Opcode = NVPTX::LD_v4i32_asi; break;
case MVT::v4f32: Opcode = NVPTX::LD_v4f32_asi; break;
default: return NULL;
}
SDValue Ops[] = { getI32Imm(isVolatile),
getI32Imm(codeAddrSpace),
getI32Imm(vecType),
getI32Imm(fromType),
getI32Imm(fromTypeWidth),
Base, Offset, Chain };
NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
MVT::Other, Ops, 8);
} else if (Subtarget.is64Bit()?
SelectADDRri64(N1.getNode(), N1, Base, Offset):
SelectADDRri(N1.getNode(), N1, Base, Offset)) {
switch (TargetVT) {
case MVT::i8: Opcode = NVPTX::LD_i8_ari; break;
case MVT::i16: Opcode = NVPTX::LD_i16_ari; break;
case MVT::i32: Opcode = NVPTX::LD_i32_ari; break;
case MVT::i64: Opcode = NVPTX::LD_i64_ari; break;
case MVT::f32: Opcode = NVPTX::LD_f32_ari; break;
case MVT::f64: Opcode = NVPTX::LD_f64_ari; break;
case MVT::v2i8: Opcode = NVPTX::LD_v2i8_ari; break;
case MVT::v2i16: Opcode = NVPTX::LD_v2i16_ari; break;
case MVT::v2i32: Opcode = NVPTX::LD_v2i32_ari; break;
case MVT::v2i64: Opcode = NVPTX::LD_v2i64_ari; break;
case MVT::v2f32: Opcode = NVPTX::LD_v2f32_ari; break;
case MVT::v2f64: Opcode = NVPTX::LD_v2f64_ari; break;
case MVT::v4i8: Opcode = NVPTX::LD_v4i8_ari; break;
case MVT::v4i16: Opcode = NVPTX::LD_v4i16_ari; break;
case MVT::v4i32: Opcode = NVPTX::LD_v4i32_ari; break;
case MVT::v4f32: Opcode = NVPTX::LD_v4f32_ari; break;
default: return NULL;
}
SDValue Ops[] = { getI32Imm(isVolatile),
getI32Imm(codeAddrSpace),
getI32Imm(vecType),
getI32Imm(fromType),
getI32Imm(fromTypeWidth),
Base, Offset, Chain };
NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
MVT::Other, Ops, 8);
}
else {
switch (TargetVT) {
case MVT::i8: Opcode = NVPTX::LD_i8_areg; break;
case MVT::i16: Opcode = NVPTX::LD_i16_areg; break;
case MVT::i32: Opcode = NVPTX::LD_i32_areg; break;
case MVT::i64: Opcode = NVPTX::LD_i64_areg; break;
case MVT::f32: Opcode = NVPTX::LD_f32_areg; break;
case MVT::f64: Opcode = NVPTX::LD_f64_areg; break;
case MVT::v2i8: Opcode = NVPTX::LD_v2i8_areg; break;
case MVT::v2i16: Opcode = NVPTX::LD_v2i16_areg; break;
case MVT::v2i32: Opcode = NVPTX::LD_v2i32_areg; break;
case MVT::v2i64: Opcode = NVPTX::LD_v2i64_areg; break;
case MVT::v2f32: Opcode = NVPTX::LD_v2f32_areg; break;
case MVT::v2f64: Opcode = NVPTX::LD_v2f64_areg; break;
case MVT::v4i8: Opcode = NVPTX::LD_v4i8_areg; break;
case MVT::v4i16: Opcode = NVPTX::LD_v4i16_areg; break;
case MVT::v4i32: Opcode = NVPTX::LD_v4i32_areg; break;
case MVT::v4f32: Opcode = NVPTX::LD_v4f32_areg; break;
default: return NULL;
}
SDValue Ops[] = { getI32Imm(isVolatile),
getI32Imm(codeAddrSpace),
getI32Imm(vecType),
getI32Imm(fromType),
getI32Imm(fromTypeWidth),
N1, Chain };
NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
MVT::Other, Ops, 7);
}
if (NVPTXLD != NULL) {
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
}
return NVPTXLD;
}
SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
StoreSDNode *ST = cast<StoreSDNode>(N);
EVT StoreVT = ST->getMemoryVT();
SDNode *NVPTXST = NULL;
// do not support pre/post inc/dec
if (ST->isIndexed())
return NULL;
if (!StoreVT.isSimple())
return NULL;
// Address Space Setting
unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
// Volatile Setting
// - .volatile is only availalble for .global and .shared
bool isVolatile = ST->isVolatile();
if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
isVolatile = false;
// Vector Setting
MVT SimpleVT = StoreVT.getSimpleVT();
unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
if (SimpleVT.isVector()) {
unsigned num = SimpleVT.getVectorNumElements();
if (num == 2)
vecType = NVPTX::PTXLdStInstCode::V2;
else if (num == 4)
vecType = NVPTX::PTXLdStInstCode::V4;
else
return NULL;
}
// Type Setting: toType + toTypeWidth
// - for integer type, always use 'u'
//
MVT ScalarVT = SimpleVT.getScalarType();
unsigned toTypeWidth = ScalarVT.getSizeInBits();
unsigned int toType;
if (ScalarVT.isFloatingPoint())
toType = NVPTX::PTXLdStInstCode::Float;
else
toType = NVPTX::PTXLdStInstCode::Unsigned;
// Create the machine instruction DAG
SDValue Chain = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
SDValue Addr;
SDValue Offset, Base;
unsigned Opcode;
MVT::SimpleValueType SourceVT =
N1.getNode()->getValueType(0).getSimpleVT().SimpleTy;
if (SelectDirectAddr(N2, Addr)) {
switch (SourceVT) {
case MVT::i8: Opcode = NVPTX::ST_i8_avar; break;
case MVT::i16: Opcode = NVPTX::ST_i16_avar; break;
case MVT::i32: Opcode = NVPTX::ST_i32_avar; break;
case MVT::i64: Opcode = NVPTX::ST_i64_avar; break;
case MVT::f32: Opcode = NVPTX::ST_f32_avar; break;
case MVT::f64: Opcode = NVPTX::ST_f64_avar; break;
case MVT::v2i8: Opcode = NVPTX::ST_v2i8_avar; break;
case MVT::v2i16: Opcode = NVPTX::ST_v2i16_avar; break;
case MVT::v2i32: Opcode = NVPTX::ST_v2i32_avar; break;
case MVT::v2i64: Opcode = NVPTX::ST_v2i64_avar; break;
case MVT::v2f32: Opcode = NVPTX::ST_v2f32_avar; break;
case MVT::v2f64: Opcode = NVPTX::ST_v2f64_avar; break;
case MVT::v4i8: Opcode = NVPTX::ST_v4i8_avar; break;
case MVT::v4i16: Opcode = NVPTX::ST_v4i16_avar; break;
case MVT::v4i32: Opcode = NVPTX::ST_v4i32_avar; break;
case MVT::v4f32: Opcode = NVPTX::ST_v4f32_avar; break;
default: return NULL;
}
SDValue Ops[] = { N1,
getI32Imm(isVolatile),
getI32Imm(codeAddrSpace),
getI32Imm(vecType),
getI32Imm(toType),
getI32Imm(toTypeWidth),
Addr, Chain };
NVPTXST = CurDAG->getMachineNode(Opcode, dl,
MVT::Other, Ops, 8);
} else if (Subtarget.is64Bit()?
SelectADDRsi64(N2.getNode(), N2, Base, Offset):
SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
switch (SourceVT) {
case MVT::i8: Opcode = NVPTX::ST_i8_asi; break;
case MVT::i16: Opcode = NVPTX::ST_i16_asi; break;
case MVT::i32: Opcode = NVPTX::ST_i32_asi; break;
case MVT::i64: Opcode = NVPTX::ST_i64_asi; break;
case MVT::f32: Opcode = NVPTX::ST_f32_asi; break;
case MVT::f64: Opcode = NVPTX::ST_f64_asi; break;
case MVT::v2i8: Opcode = NVPTX::ST_v2i8_asi; break;
case MVT::v2i16: Opcode = NVPTX::ST_v2i16_asi; break;
case MVT::v2i32: Opcode = NVPTX::ST_v2i32_asi; break;
case MVT::v2i64: Opcode = NVPTX::ST_v2i64_asi; break;
case MVT::v2f32: Opcode = NVPTX::ST_v2f32_asi; break;
case MVT::v2f64: Opcode = NVPTX::ST_v2f64_asi; break;
case MVT::v4i8: Opcode = NVPTX::ST_v4i8_asi; break;
case MVT::v4i16: Opcode = NVPTX::ST_v4i16_asi; break;
case MVT::v4i32: Opcode = NVPTX::ST_v4i32_asi; break;
case MVT::v4f32: Opcode = NVPTX::ST_v4f32_asi; break;
default: return NULL;
}
SDValue Ops[] = { N1,
getI32Imm(isVolatile),
getI32Imm(codeAddrSpace),
getI32Imm(vecType),
getI32Imm(toType),
getI32Imm(toTypeWidth),
Base, Offset, Chain };
NVPTXST = CurDAG->getMachineNode(Opcode, dl,
MVT::Other, Ops, 9);
} else if (Subtarget.is64Bit()?
SelectADDRri64(N2.getNode(), N2, Base, Offset):
SelectADDRri(N2.getNode(), N2, Base, Offset)) {
switch (SourceVT) {
case MVT::i8: Opcode = NVPTX::ST_i8_ari; break;
case MVT::i16: Opcode = NVPTX::ST_i16_ari; break;
case MVT::i32: Opcode = NVPTX::ST_i32_ari; break;
case MVT::i64: Opcode = NVPTX::ST_i64_ari; break;
case MVT::f32: Opcode = NVPTX::ST_f32_ari; break;
case MVT::f64: Opcode = NVPTX::ST_f64_ari; break;
case MVT::v2i8: Opcode = NVPTX::ST_v2i8_ari; break;
case MVT::v2i16: Opcode = NVPTX::ST_v2i16_ari; break;
case MVT::v2i32: Opcode = NVPTX::ST_v2i32_ari; break;
case MVT::v2i64: Opcode = NVPTX::ST_v2i64_ari; break;
case MVT::v2f32: Opcode = NVPTX::ST_v2f32_ari; break;
case MVT::v2f64: Opcode = NVPTX::ST_v2f64_ari; break;
case MVT::v4i8: Opcode = NVPTX::ST_v4i8_ari; break;
case MVT::v4i16: Opcode = NVPTX::ST_v4i16_ari; break;
case MVT::v4i32: Opcode = NVPTX::ST_v4i32_ari; break;
case MVT::v4f32: Opcode = NVPTX::ST_v4f32_ari; break;
default: return NULL;
}
SDValue Ops[] = { N1,
getI32Imm(isVolatile),
getI32Imm(codeAddrSpace),
getI32Imm(vecType),
getI32Imm(toType),
getI32Imm(toTypeWidth),
Base, Offset, Chain };
NVPTXST = CurDAG->getMachineNode(Opcode, dl,
MVT::Other, Ops, 9);
} else {
switch (SourceVT) {
case MVT::i8: Opcode = NVPTX::ST_i8_areg; break;
case MVT::i16: Opcode = NVPTX::ST_i16_areg; break;
case MVT::i32: Opcode = NVPTX::ST_i32_areg; break;
case MVT::i64: Opcode = NVPTX::ST_i64_areg; break;
case MVT::f32: Opcode = NVPTX::ST_f32_areg; break;
case MVT::f64: Opcode = NVPTX::ST_f64_areg; break;
case MVT::v2i8: Opcode = NVPTX::ST_v2i8_areg; break;
case MVT::v2i16: Opcode = NVPTX::ST_v2i16_areg; break;
case MVT::v2i32: Opcode = NVPTX::ST_v2i32_areg; break;
case MVT::v2i64: Opcode = NVPTX::ST_v2i64_areg; break;
case MVT::v2f32: Opcode = NVPTX::ST_v2f32_areg; break;
case MVT::v2f64: Opcode = NVPTX::ST_v2f64_areg; break;
case MVT::v4i8: Opcode = NVPTX::ST_v4i8_areg; break;
case MVT::v4i16: Opcode = NVPTX::ST_v4i16_areg; break;
case MVT::v4i32: Opcode = NVPTX::ST_v4i32_areg; break;
case MVT::v4f32: Opcode = NVPTX::ST_v4f32_areg; break;
default: return NULL;
}
SDValue Ops[] = { N1,
getI32Imm(isVolatile),
getI32Imm(codeAddrSpace),
getI32Imm(vecType),
getI32Imm(toType),
getI32Imm(toTypeWidth),
N2, Chain };
NVPTXST = CurDAG->getMachineNode(Opcode, dl,
MVT::Other, Ops, 8);
}
if (NVPTXST != NULL) {
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
}
return NVPTXST;
}
// SelectDirectAddr - Match a direct address for DAG.
// A direct address could be a globaladdress or externalsymbol.
bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
// Return true if TGA or ES.
if (N.getOpcode() == ISD::TargetGlobalAddress
|| N.getOpcode() == ISD::TargetExternalSymbol) {
Address = N;
return true;
}
if (N.getOpcode() == NVPTXISD::Wrapper) {
Address = N.getOperand(0);
return true;
}
if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
if (IID == Intrinsic::nvvm_ptr_gen_to_param)
if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
}
return false;
}
// symbol+offset
bool NVPTXDAGToDAGISel::SelectADDRsi_imp(SDNode *OpNode, SDValue Addr,
SDValue &Base, SDValue &Offset,
MVT mvt) {
if (Addr.getOpcode() == ISD::ADD) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
SDValue base=Addr.getOperand(0);
if (SelectDirectAddr(base, Base)) {
Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
return true;
}
}
}
return false;
}
// symbol+offset
bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
SDValue &Base, SDValue &Offset) {
return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
}
// symbol+offset
bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
SDValue &Base, SDValue &Offset) {
return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
}
// register+offset
bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr,
SDValue &Base, SDValue &Offset,
MVT mvt) {
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
Offset = CurDAG->getTargetConstant(0, mvt);
return true;
}
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
Addr.getOpcode() == ISD::TargetGlobalAddress)
return false; // direct calls.
if (Addr.getOpcode() == ISD::ADD) {
if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
return false;
}
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
if (FrameIndexSDNode *FIN =
dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
// Constant offset from frame ref.
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
else
Base = Addr.getOperand(0);
Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
return true;
}
}
return false;
}
// register+offset
bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
SDValue &Base, SDValue &Offset) {
return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
}
// register+offset
bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
SDValue &Base, SDValue &Offset) {
return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
}
bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
unsigned int spN) const {
const Value *Src = NULL;
// Even though MemIntrinsicSDNode is a subclas of MemSDNode,
// the classof() for MemSDNode does not include MemIntrinsicSDNode
// (See SelectionDAGNodes.h). So we need to check for both.
if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
Src = mN->getSrcValue();
}
else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
Src = mN->getSrcValue();
}
if (!Src)
return false;
if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
return (PT->getAddressSpace() == spN);
return false;
}
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
char ConstraintCode,
std::vector<SDValue> &OutOps) {
SDValue Op0, Op1;
switch (ConstraintCode) {
default: return true;
case 'm': // memory
if (SelectDirectAddr(Op, Op0)) {
OutOps.push_back(Op0);
OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
return false;
}
if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
OutOps.push_back(Op0);
OutOps.push_back(Op1);
return false;
}
break;
}
return true;
}
// Return true if N is a undef or a constant.
// If N was undef, return a (i8imm 0) in Retval
// If N was imm, convert it to i8imm and return in Retval
// Note: The convert to i8imm is required, otherwise the
// pattern matcher inserts a bunch of IMOVi8rr to convert
// the imm to i8imm, and this causes instruction selection
// to fail.
bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N,
SDValue &Retval) {
if (!(N.getOpcode() == ISD::UNDEF) &&
!(N.getOpcode() == ISD::Constant))
return false;
if (N.getOpcode() == ISD::UNDEF)
Retval = CurDAG->getTargetConstant(0, MVT::i8);
else {
ConstantSDNode *cn = cast<ConstantSDNode>(N.getNode());
unsigned retval = cn->getZExtValue();
Retval = CurDAG->getTargetConstant(retval, MVT::i8);
}
return true;
}

View File

@ -0,0 +1,105 @@
//===-- NVPTXISelDAGToDAG.h - A dag to dag inst selector for NVPTX --------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines an instruction selector for the NVPTX target.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "nvptx-isel"
#include "NVPTX.h"
#include "NVPTXISelLowering.h"
#include "NVPTXRegisterInfo.h"
#include "NVPTXTargetMachine.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Intrinsics.h"
using namespace llvm;
namespace {
class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
// If true, generate corresponding FPCONTRACT. This is
// language dependent (i.e. CUDA and OpenCL works differently).
bool doFMADF32;
bool doFMAF64;
bool doFMAF32;
bool doFMAF64AGG;
bool doFMAF32AGG;
bool allowFMA;
// 0: use div.approx
// 1: use div.full
// 2: For sm_20 and later, ieee-compliant div.rnd.f32 can be generated;
// Otherwise, use div.full
int do_DIVF32_PREC;
// If true, add .ftz to f32 instructions.
// This is only meaningful for sm_20 and later, as the default
// is not ftz.
// For sm earlier than sm_20, f32 denorms are always ftz by the
// hardware.
// We always add the .ftz modifier regardless of the sm value
// when Use32FTZ is true.
bool UseF32FTZ;
// If true, generate mul.wide from sext and mul
bool doMulWide;
public:
explicit NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
CodeGenOpt::Level OptLevel);
// Pass Name
virtual const char *getPassName() const {
return "NVPTX DAG->DAG Pattern Instruction Selection";
}
const NVPTXSubtarget &Subtarget;
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
char ConstraintCode,
std::vector<SDValue> &OutOps);
private:
// Include the pieces autogenerated from the target description.
#include "NVPTXGenDAGISel.inc"
SDNode *Select(SDNode *N);
SDNode* SelectLoad(SDNode *N);
SDNode* SelectStore(SDNode *N);
inline SDValue getI32Imm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i32);
}
// Match direct address complex pattern.
bool SelectDirectAddr(SDValue N, SDValue &Address);
bool SelectADDRri_imp(SDNode *OpNode, SDValue Addr, SDValue &Base,
SDValue &Offset, MVT mvt);
bool SelectADDRri(SDNode *OpNode, SDValue Addr, SDValue &Base,
SDValue &Offset);
bool SelectADDRri64(SDNode *OpNode, SDValue Addr, SDValue &Base,
SDValue &Offset);
bool SelectADDRsi_imp(SDNode *OpNode, SDValue Addr, SDValue &Base,
SDValue &Offset, MVT mvt);
bool SelectADDRsi(SDNode *OpNode, SDValue Addr, SDValue &Base,
SDValue &Offset);
bool SelectADDRsi64(SDNode *OpNode, SDValue Addr, SDValue &Base,
SDValue &Offset);
bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
bool UndefOrImm(SDValue Op, SDValue N, SDValue &Retval);
};
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,153 @@
//===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the interfaces that NVPTX uses to lower LLVM code into a
// selection DAG.
//
//===----------------------------------------------------------------------===//
#ifndef NVPTXISELLOWERING_H
#define NVPTXISELLOWERING_H
#include "NVPTX.h"
#include "NVPTXSubtarget.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetLowering.h"
namespace llvm {
namespace NVPTXISD {
enum NodeType {
// Start the numbering from where ISD NodeType finishes.
FIRST_NUMBER = ISD::BUILTIN_OP_END,
Wrapper,
CALL,
RET_FLAG,
LOAD_PARAM,
NVBuiltin,
DeclareParam,
DeclareScalarParam,
DeclareRetParam,
DeclareRet,
DeclareScalarRet,
LoadParam,
StoreParam,
StoreParamS32, // to sext and store a <32bit value, not used currently
StoreParamU32, // to zext and store a <32bit value, not used currently
MoveToParam,
PrintCall,
PrintCallUni,
CallArgBegin,
CallArg,
LastCallArg,
CallArgEnd,
CallVoid,
CallVal,
CallSymbol,
Prototype,
MoveParam,
MoveRetval,
MoveToRetval,
StoreRetval,
PseudoUseParam,
RETURN,
CallSeqBegin,
CallSeqEnd,
Dummy
};
}
//===--------------------------------------------------------------------===//
// TargetLowering Implementation
//===--------------------------------------------------------------------===//
class NVPTXTargetLowering : public TargetLowering {
public:
explicit NVPTXTargetLowering(NVPTXTargetMachine &TM);
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(const GlobalValue *GV, int64_t Offset,
SelectionDAG &DAG) const;
virtual const char *getTargetNodeName(unsigned Opcode) const;
bool isTypeSupportedInIntrinsic(MVT VT) const;
bool getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I,
unsigned Intrinsic) const;
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type
/// Used to guide target specific optimizations, like loop strength
/// reduction (LoopStrengthReduce.cpp) and memory optimization for
/// address mode (CodeGenPrepare.cpp)
virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const;
/// getFunctionAlignment - Return the Log2 alignment of this function.
virtual unsigned getFunctionAlignment(const Function *F) const;
virtual EVT getSetCCResultType(EVT VT) const {
return MVT::i1;
}
ConstraintType getConstraintType(const std::string &Constraint) const;
std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
virtual SDValue
LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl,
SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
// This will be re-added once the necessary changes to LowerCallTo are
// upstreamed.
// virtual SDValue
// LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
// bool isVarArg, bool doesNotRet, bool &isTailCall,
// const SmallVectorImpl<ISD::OutputArg> &Outs,
// const SmallVectorImpl<SDValue> &OutVals,
// const SmallVectorImpl<ISD::InputArg> &Ins,
// DebugLoc dl, SelectionDAG &DAG,
// SmallVectorImpl<SDValue> &InVals,
// Type *retTy, const ArgListTy &Args) const;
std::string getPrototype(Type *, const ArgListTy &,
const SmallVectorImpl<ISD::OutputArg> &,
unsigned retAlignment) const;
virtual SDValue
LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl,
SelectionDAG &DAG) const;
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const;
NVPTXTargetMachine *nvTM;
// PTX always uses 32-bit shift amounts
virtual MVT getShiftAmountTy(EVT LHSTy) const {
return MVT::i32;
}
private:
const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here
SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx, EVT =
MVT::i32) const;
SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT = MVT::i32) const;
SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx);
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
};
} // namespace llvm
#endif // NVPTXISELLOWERING_H

View File

@ -0,0 +1,43 @@
//===- NVPTXInstrFormats.td - NVPTX Instruction Formats-------*- tblgen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Describe NVPTX instructions format
//
//===----------------------------------------------------------------------===//
// Vector instruction type enum
class VecInstTypeEnum<bits<4> val> {
bits<4> Value=val;
}
def VecNOP : VecInstTypeEnum<0>;
// Generic NVPTX Format
class NVPTXInst<dag outs, dag ins, string asmstr, list<dag> pattern>
: Instruction {
field bits<14> Inst;
let Namespace = "NVPTX";
dag OutOperandList = outs;
dag InOperandList = ins;
let AsmString = asmstr;
let Pattern = pattern;
// TSFlagFields
bits<4> VecInstType = VecNOP.Value;
bit IsSimpleMove = 0;
bit IsLoad = 0;
bit IsStore = 0;
let TSFlags{3-0} = VecInstType;
let TSFlags{4-4} = IsSimpleMove;
let TSFlags{5-5} = IsLoad;
let TSFlags{6-6} = IsStore;
}

View File

@ -0,0 +1,326 @@
//===- NVPTXInstrInfo.cpp - NVPTX Instruction Information -----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the NVPTX implementation of the TargetInstrInfo class.
//
//===----------------------------------------------------------------------===//
#include "NVPTX.h"
#include "NVPTXInstrInfo.h"
#include "NVPTXTargetMachine.h"
#define GET_INSTRINFO_CTOR
#include "NVPTXGenInstrInfo.inc"
#include "llvm/Function.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include <cstdio>
using namespace llvm;
// FIXME: Add the subtarget support on this constructor.
NVPTXInstrInfo::NVPTXInstrInfo(NVPTXTargetMachine &tm)
: NVPTXGenInstrInfo(),
TM(tm),
RegInfo(*this, *TM.getSubtargetImpl()) {}
void NVPTXInstrInfo::copyPhysReg (MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
if (NVPTX::Int32RegsRegClass.contains(DestReg) &&
NVPTX::Int32RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::IMOV32rr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::Int8RegsRegClass.contains(DestReg) &&
NVPTX::Int8RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::IMOV8rr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::Int1RegsRegClass.contains(DestReg) &&
NVPTX::Int1RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::IMOV1rr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::Float32RegsRegClass.contains(DestReg) &&
NVPTX::Float32RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::FMOV32rr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::Int16RegsRegClass.contains(DestReg) &&
NVPTX::Int16RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::IMOV16rr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::Int64RegsRegClass.contains(DestReg) &&
NVPTX::Int64RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::IMOV64rr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::Float64RegsRegClass.contains(DestReg) &&
NVPTX::Float64RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::V4F32RegsRegClass.contains(DestReg) &&
NVPTX::V4F32RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::V4f32Mov), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::V4I32RegsRegClass.contains(DestReg) &&
NVPTX::V4I32RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::V4i32Mov), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::V2F32RegsRegClass.contains(DestReg) &&
NVPTX::V2F32RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::V2f32Mov), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::V2I32RegsRegClass.contains(DestReg) &&
NVPTX::V2I32RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::V2i32Mov), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::V4I8RegsRegClass.contains(DestReg) &&
NVPTX::V4I8RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::V4i8Mov), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::V2I8RegsRegClass.contains(DestReg) &&
NVPTX::V2I8RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::V2i8Mov), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::V4I16RegsRegClass.contains(DestReg) &&
NVPTX::V4I16RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::V4i16Mov), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::V2I16RegsRegClass.contains(DestReg) &&
NVPTX::V2I16RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::V2i16Mov), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::V2I64RegsRegClass.contains(DestReg) &&
NVPTX::V2I64RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::V2i64Mov), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::V2F64RegsRegClass.contains(DestReg) &&
NVPTX::V2F64RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::V2f64Mov), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else {
assert(0 && "Don't know how to copy a register");
}
}
bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI,
unsigned &SrcReg,
unsigned &DestReg) const {
// Look for the appropriate part of TSFlags
bool isMove = false;
unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::SimpleMoveMask) >>
NVPTX::SimpleMoveShift;
isMove = (TSFlags == 1);
if (isMove) {
MachineOperand dest = MI.getOperand(0);
MachineOperand src = MI.getOperand(1);
assert(dest.isReg() && "dest of a movrr is not a reg");
assert(src.isReg() && "src of a movrr is not a reg");
SrcReg = src.getReg();
DestReg = dest.getReg();
return true;
}
return false;
}
bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const
{
switch (MI.getOpcode()) {
default: return false;
case NVPTX::INT_PTX_SREG_NTID_X:
case NVPTX::INT_PTX_SREG_NTID_Y:
case NVPTX::INT_PTX_SREG_NTID_Z:
case NVPTX::INT_PTX_SREG_TID_X:
case NVPTX::INT_PTX_SREG_TID_Y:
case NVPTX::INT_PTX_SREG_TID_Z:
case NVPTX::INT_PTX_SREG_CTAID_X:
case NVPTX::INT_PTX_SREG_CTAID_Y:
case NVPTX::INT_PTX_SREG_CTAID_Z:
case NVPTX::INT_PTX_SREG_NCTAID_X:
case NVPTX::INT_PTX_SREG_NCTAID_Y:
case NVPTX::INT_PTX_SREG_NCTAID_Z:
case NVPTX::INT_PTX_SREG_WARPSIZE:
return true;
}
}
bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI,
unsigned &AddrSpace) const {
bool isLoad = false;
unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isLoadMask) >>
NVPTX::isLoadShift;
isLoad = (TSFlags == 1);
if (isLoad)
AddrSpace = getLdStCodeAddrSpace(MI);
return isLoad;
}
bool NVPTXInstrInfo::isStoreInstr(const MachineInstr &MI,
unsigned &AddrSpace) const {
bool isStore = false;
unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isStoreMask) >>
NVPTX::isStoreShift;
isStore = (TSFlags == 1);
if (isStore)
AddrSpace = getLdStCodeAddrSpace(MI);
return isStore;
}
bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const {
unsigned addrspace = 0;
if (MI->getOpcode() == NVPTX::INT_CUDA_SYNCTHREADS)
return false;
if (isLoadInstr(*MI, addrspace))
if (addrspace == NVPTX::PTXLdStInstCode::SHARED)
return false;
if (isStoreInstr(*MI, addrspace))
if (addrspace == NVPTX::PTXLdStInstCode::SHARED)
return false;
return true;
}
/// AnalyzeBranch - Analyze the branching code at the end of MBB, returning
/// true if it cannot be understood (e.g. it's a switch dispatch or isn't
/// implemented for a target). Upon success, this returns false and returns
/// with the following information in various cases:
///
/// 1. If this block ends with no branches (it just falls through to its succ)
/// just return false, leaving TBB/FBB null.
/// 2. If this block ends with only an unconditional branch, it sets TBB to be
/// the destination block.
/// 3. If this block ends with an conditional branch and it falls through to
/// an successor block, it sets TBB to be the branch destination block and a
/// list of operands that evaluate the condition. These
/// operands can be passed to other TargetInstrInfo methods to create new
/// branches.
/// 4. If this block ends with an conditional branch and an unconditional
/// block, it returns the 'true' destination in TBB, the 'false' destination
/// in FBB, and a list of operands that evaluate the condition. These
/// operands can be passed to other TargetInstrInfo methods to create new
/// branches.
///
/// Note that RemoveBranch and InsertBranch must be implemented to support
/// cases where this method returns success.
///
bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const {
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
return false;
// Get the last instruction in the block.
MachineInstr *LastInst = I;
// If there is only one terminator instruction, process it.
if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
if (LastInst->getOpcode() == NVPTX::GOTO) {
TBB = LastInst->getOperand(0).getMBB();
return false;
} else if (LastInst->getOpcode() == NVPTX::CBranch) {
// Block ends with fall-through condbranch.
TBB = LastInst->getOperand(1).getMBB();
Cond.push_back(LastInst->getOperand(0));
return false;
}
// Otherwise, don't know what this is.
return true;
}
// Get the instruction before it if it's a terminator.
MachineInstr *SecondLastInst = I;
// If there are three terminators, we don't know what sort of block this is.
if (SecondLastInst && I != MBB.begin() &&
isUnpredicatedTerminator(--I))
return true;
// If the block ends with NVPTX::GOTO and NVPTX:CBranch, handle it.
if (SecondLastInst->getOpcode() == NVPTX::CBranch &&
LastInst->getOpcode() == NVPTX::GOTO) {
TBB = SecondLastInst->getOperand(1).getMBB();
Cond.push_back(SecondLastInst->getOperand(0));
FBB = LastInst->getOperand(0).getMBB();
return false;
}
// If the block ends with two NVPTX:GOTOs, handle it. The second one is not
// executed, so remove it.
if (SecondLastInst->getOpcode() == NVPTX::GOTO &&
LastInst->getOpcode() == NVPTX::GOTO) {
TBB = SecondLastInst->getOperand(0).getMBB();
I = LastInst;
if (AllowModify)
I->eraseFromParent();
return false;
}
// Otherwise, can't handle this.
return true;
}
unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin()) return 0;
--I;
if (I->getOpcode() != NVPTX::GOTO && I->getOpcode() != NVPTX::CBranch)
return 0;
// Remove the branch.
I->eraseFromParent();
I = MBB.end();
if (I == MBB.begin()) return 1;
--I;
if (I->getOpcode() != NVPTX::CBranch)
return 1;
// Remove the branch.
I->eraseFromParent();
return 2;
}
unsigned
NVPTXInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const {
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 1 || Cond.size() == 0) &&
"NVPTX branch conditions have two components!");
// One-way branch.
if (FBB == 0) {
if (Cond.empty()) // Unconditional branch
BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(TBB);
else // Conditional branch
BuildMI(&MBB, DL, get(NVPTX::CBranch))
.addReg(Cond[0].getReg()).addMBB(TBB);
return 1;
}
// Two-way Conditional Branch.
BuildMI(&MBB, DL, get(NVPTX::CBranch))
.addReg(Cond[0].getReg()).addMBB(TBB);
BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(FBB);
return 2;
}

View File

@ -0,0 +1,83 @@
//===- NVPTXInstrInfo.h - NVPTX Instruction Information----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the niversity of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the NVPTX implementation of the TargetInstrInfo class.
//
//===----------------------------------------------------------------------===//
#ifndef NVPTXINSTRUCTIONINFO_H
#define NVPTXINSTRUCTIONINFO_H
#include "NVPTX.h"
#include "NVPTXRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "NVPTXGenInstrInfo.inc"
namespace llvm {
class NVPTXInstrInfo : public NVPTXGenInstrInfo
{
NVPTXTargetMachine &TM;
const NVPTXRegisterInfo RegInfo;
public:
explicit NVPTXInstrInfo(NVPTXTargetMachine &TM);
virtual const NVPTXRegisterInfo &getRegisterInfo() const { return RegInfo; }
/* The following virtual functions are used in register allocation.
* They are not implemented because the existing interface and the logic
* at the caller side do not work for the elementized vector load and store.
*
* virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
* int &FrameIndex) const;
* virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
* int &FrameIndex) const;
* virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
* MachineBasicBlock::iterator MBBI,
* unsigned SrcReg, bool isKill, int FrameIndex,
* const TargetRegisterClass *RC) const;
* virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
* MachineBasicBlock::iterator MBBI,
* unsigned DestReg, int FrameIndex,
* const TargetRegisterClass *RC) const;
*/
virtual void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const ;
virtual bool isMoveInstr(const MachineInstr &MI,
unsigned &SrcReg,
unsigned &DestReg) const;
bool isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
bool isStoreInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
bool isReadSpecialReg(MachineInstr &MI) const;
virtual bool CanTailMerge(const MachineInstr *MI) const ;
// Branch analysis.
virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const;
virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
virtual unsigned InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const;
unsigned getLdStCodeAddrSpace(const MachineInstr &MI) const {
return MI.getOperand(2).getImm();
}
};
} // namespace llvm
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,208 @@
//===- NVPTXLowerAggrCopies.cpp - ------------------------------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// Lower aggregate copies, memset, memcpy, memmov intrinsics into loops when
// the size is large or is not a compile-time constant.
//
//===----------------------------------------------------------------------===//
#include "llvm/Function.h"
#include "llvm/Constants.h"
#include "llvm/Module.h"
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Support/InstIterator.h"
#include "llvm/Support/IRBuilder.h"
#include "NVPTXLowerAggrCopies.h"
#include "llvm/Target/TargetData.h"
#include "llvm/LLVMContext.h"
using namespace llvm;
namespace llvm {
FunctionPass *createLowerAggrCopies();
}
char NVPTXLowerAggrCopies::ID = 0;
// Lower MemTransferInst or load-store pair to loop
static void convertTransferToLoop(Instruction *splitAt, Value *srcAddr,
Value *dstAddr, Value *len,
//unsigned numLoads,
bool srcVolatile, bool dstVolatile,
LLVMContext &Context, Function &F) {
Type *indType = len->getType();
BasicBlock *origBB = splitAt->getParent();
BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split");
BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB);
origBB->getTerminator()->setSuccessor(0, loopBB);
IRBuilder<> builder(origBB, origBB->getTerminator());
// srcAddr and dstAddr are expected to be pointer types,
// so no check is made here.
unsigned srcAS =
dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace();
unsigned dstAS =
dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
// Cast pointers to (char *)
srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS));
dstAddr = builder.CreateBitCast(dstAddr, Type::getInt8PtrTy(Context, dstAS));
IRBuilder<> loop(loopBB);
// The loop index (ind) is a phi node.
PHINode *ind = loop.CreatePHI(indType, 0);
// Incoming value for ind is 0
ind->addIncoming(ConstantInt::get(indType, 0), origBB);
// load from srcAddr+ind
Value *val = loop.CreateLoad(loop.CreateGEP(srcAddr, ind), srcVolatile);
// store at dstAddr+ind
loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), dstVolatile);
// The value for ind coming from backedge is (ind + 1)
Value *newind = loop.CreateAdd(ind, ConstantInt::get(indType, 1));
ind->addIncoming(newind, loopBB);
loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB);
}
// Lower MemSetInst to loop
static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr,
Value *len, Value *val, LLVMContext &Context,
Function &F) {
BasicBlock *origBB = splitAt->getParent();
BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split");
BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB);
origBB->getTerminator()->setSuccessor(0, loopBB);
IRBuilder<> builder(origBB, origBB->getTerminator());
unsigned dstAS =
dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
// Cast pointer to the type of value getting stored
dstAddr = builder.CreateBitCast(dstAddr,
PointerType::get(val->getType(), dstAS));
IRBuilder<> loop(loopBB);
PHINode *ind = loop.CreatePHI(len->getType(), 0);
ind->addIncoming(ConstantInt::get(len->getType(), 0), origBB);
loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), false);
Value *newind = loop.CreateAdd(ind, ConstantInt::get(len->getType(), 1));
ind->addIncoming(newind, loopBB);
loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB);
}
bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
SmallVector<LoadInst *, 4> aggrLoads;
SmallVector<MemTransferInst *, 4> aggrMemcpys;
SmallVector<MemSetInst *, 4> aggrMemsets;
TargetData *TD = &getAnalysis<TargetData>();
LLVMContext &Context = F.getParent()->getContext();
//
// Collect all the aggrLoads, aggrMemcpys and addrMemsets.
//
//const BasicBlock *firstBB = &F.front(); // first BB in F
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
//BasicBlock *bb = BI;
for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
++II) {
if (LoadInst * load = dyn_cast<LoadInst>(II)) {
if (load->hasOneUse() == false) continue;
if (TD->getTypeStoreSize(load->getType()) < MaxAggrCopySize) continue;
User *use = *(load->use_begin());
if (StoreInst * store = dyn_cast<StoreInst>(use)) {
if (store->getOperand(0) != load) //getValueOperand
continue;
aggrLoads.push_back(load);
}
} else if (MemTransferInst * intr = dyn_cast<MemTransferInst>(II)) {
Value *len = intr->getLength();
// If the number of elements being copied is greater
// than MaxAggrCopySize, lower it to a loop
if (ConstantInt * len_int = dyn_cast < ConstantInt > (len)) {
if (len_int->getZExtValue() >= MaxAggrCopySize) {
aggrMemcpys.push_back(intr);
}
} else {
// turn variable length memcpy/memmov into loop
aggrMemcpys.push_back(intr);
}
} else if (MemSetInst * memsetintr = dyn_cast<MemSetInst>(II)) {
Value *len = memsetintr->getLength();
if (ConstantInt * len_int = dyn_cast<ConstantInt>(len)) {
if (len_int->getZExtValue() >= MaxAggrCopySize) {
aggrMemsets.push_back(memsetintr);
}
} else {
// turn variable length memset into loop
aggrMemsets.push_back(memsetintr);
}
}
}
}
if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0)
&& (aggrMemsets.size() == 0)) return false;
//
// Do the transformation of an aggr load/copy/set to a loop
//
for (unsigned i = 0, e = aggrLoads.size(); i != e; ++i) {
LoadInst *load = aggrLoads[i];
StoreInst *store = dyn_cast<StoreInst>(*load->use_begin());
Value *srcAddr = load->getOperand(0);
Value *dstAddr = store->getOperand(1);
unsigned numLoads = TD->getTypeStoreSize(load->getType());
Value *len = ConstantInt::get(Type::getInt32Ty(Context), numLoads);
convertTransferToLoop(store, srcAddr, dstAddr, len, load->isVolatile(),
store->isVolatile(), Context, F);
store->eraseFromParent();
load->eraseFromParent();
}
for (unsigned i = 0, e = aggrMemcpys.size(); i != e; ++i) {
MemTransferInst *cpy = aggrMemcpys[i];
Value *len = cpy->getLength();
// llvm 2.7 version of memcpy does not have volatile
// operand yet. So always making it non-volatile
// optimistically, so that we don't see unnecessary
// st.volatile in ptx
convertTransferToLoop(cpy, cpy->getSource(), cpy->getDest(), len, false,
false, Context, F);
cpy->eraseFromParent();
}
for (unsigned i = 0, e = aggrMemsets.size(); i != e; ++i) {
MemSetInst *memsetinst = aggrMemsets[i];
Value *len = memsetinst->getLength();
Value *val = memsetinst->getValue();
convertMemSetToLoop(memsetinst, memsetinst->getDest(), len, val, Context,
F);
memsetinst->eraseFromParent();
}
return true;
}
FunctionPass *llvm::createLowerAggrCopies() {
return new NVPTXLowerAggrCopies();
}

View File

@ -0,0 +1,47 @@
//===-- llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h ------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the declaration of the NVIDIA specific lowering of
// aggregate copies
//
//===----------------------------------------------------------------------===//
#ifndef NVPTX_LOWER_AGGR_COPIES_H
#define NVPTX_LOWER_AGGR_COPIES_H
#include "llvm/Pass.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/Target/TargetData.h"
namespace llvm {
// actual analysis class, which is a functionpass
struct NVPTXLowerAggrCopies : public FunctionPass {
static char ID;
NVPTXLowerAggrCopies() : FunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetData>();
AU.addPreserved<MachineFunctionAnalysis>();
}
virtual bool runOnFunction(Function &F);
static const unsigned MaxAggrCopySize = 128;
virtual const char *getPassName() const {
return "Lower aggregate copies/intrinsics into loops";
}
};
extern FunctionPass *createLowerAggrCopies();
}
#endif

View File

@ -0,0 +1,20 @@
//===-- NVPTXNumRegisters.h - PTX Register Info ---------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef NVPTX_NUM_REGISTERS_H
#define NVPTX_NUM_REGISTERS_H
namespace llvm {
const unsigned NVPTXNumRegisters = 396;
}
#endif

View File

@ -0,0 +1,332 @@
//===- NVPTXRegisterInfo.cpp - NVPTX Register Information -----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the NVPTX implementation of the TargetRegisterInfo class.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "nvptx-reg-info"
#include "NVPTX.h"
#include "NVPTXRegisterInfo.h"
#include "NVPTXSubtarget.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
namespace llvm
{
std::string getNVPTXRegClassName (TargetRegisterClass const *RC) {
if (RC == &NVPTX::Float32RegsRegClass) {
return ".f32";
}
if (RC == &NVPTX::Float64RegsRegClass) {
return ".f64";
}
else if (RC == &NVPTX::Int64RegsRegClass) {
return ".s64";
}
else if (RC == &NVPTX::Int32RegsRegClass) {
return ".s32";
}
else if (RC == &NVPTX::Int16RegsRegClass) {
return ".s16";
}
// Int8Regs become 16-bit registers in PTX
else if (RC == &NVPTX::Int8RegsRegClass) {
return ".s16";
}
else if (RC == &NVPTX::Int1RegsRegClass) {
return ".pred";
}
else if (RC == &NVPTX::SpecialRegsRegClass) {
return "!Special!";
}
else if (RC == &NVPTX::V2F32RegsRegClass) {
return ".v2.f32";
}
else if (RC == &NVPTX::V4F32RegsRegClass) {
return ".v4.f32";
}
else if (RC == &NVPTX::V2I32RegsRegClass) {
return ".v2.s32";
}
else if (RC == &NVPTX::V4I32RegsRegClass) {
return ".v4.s32";
}
else if (RC == &NVPTX::V2F64RegsRegClass) {
return ".v2.f64";
}
else if (RC == &NVPTX::V2I64RegsRegClass) {
return ".v2.s64";
}
else if (RC == &NVPTX::V2I16RegsRegClass) {
return ".v2.s16";
}
else if (RC == &NVPTX::V4I16RegsRegClass) {
return ".v4.s16";
}
else if (RC == &NVPTX::V2I8RegsRegClass) {
return ".v2.s16";
}
else if (RC == &NVPTX::V4I8RegsRegClass) {
return ".v4.s16";
}
else {
return "INTERNAL";
}
return "";
}
std::string getNVPTXRegClassStr (TargetRegisterClass const *RC) {
if (RC == &NVPTX::Float32RegsRegClass) {
return "%f";
}
if (RC == &NVPTX::Float64RegsRegClass) {
return "%fd";
}
else if (RC == &NVPTX::Int64RegsRegClass) {
return "%rd";
}
else if (RC == &NVPTX::Int32RegsRegClass) {
return "%r";
}
else if (RC == &NVPTX::Int16RegsRegClass) {
return "%rs";
}
else if (RC == &NVPTX::Int8RegsRegClass) {
return "%rc";
}
else if (RC == &NVPTX::Int1RegsRegClass) {
return "%p";
}
else if (RC == &NVPTX::SpecialRegsRegClass) {
return "!Special!";
}
else if (RC == &NVPTX::V2F32RegsRegClass) {
return "%v2f";
}
else if (RC == &NVPTX::V4F32RegsRegClass) {
return "%v4f";
}
else if (RC == &NVPTX::V2I32RegsRegClass) {
return "%v2r";
}
else if (RC == &NVPTX::V4I32RegsRegClass) {
return "%v4r";
}
else if (RC == &NVPTX::V2F64RegsRegClass) {
return "%v2fd";
}
else if (RC == &NVPTX::V2I64RegsRegClass) {
return "%v2rd";
}
else if (RC == &NVPTX::V2I16RegsRegClass) {
return "%v2s";
}
else if (RC == &NVPTX::V4I16RegsRegClass) {
return "%v4rs";
}
else if (RC == &NVPTX::V2I8RegsRegClass) {
return "%v2rc";
}
else if (RC == &NVPTX::V4I8RegsRegClass) {
return "%v4rc";
}
else {
return "INTERNAL";
}
return "";
}
bool isNVPTXVectorRegClass(TargetRegisterClass const *RC) {
if (RC->getID() == NVPTX::V2F32RegsRegClassID)
return true;
if (RC->getID() == NVPTX::V2F64RegsRegClassID)
return true;
if (RC->getID() == NVPTX::V2I16RegsRegClassID)
return true;
if (RC->getID() == NVPTX::V2I32RegsRegClassID)
return true;
if (RC->getID() == NVPTX::V2I64RegsRegClassID)
return true;
if (RC->getID() == NVPTX::V2I8RegsRegClassID)
return true;
if (RC->getID() == NVPTX::V4F32RegsRegClassID)
return true;
if (RC->getID() == NVPTX::V4I16RegsRegClassID)
return true;
if (RC->getID() == NVPTX::V4I32RegsRegClassID)
return true;
if (RC->getID() == NVPTX::V4I8RegsRegClassID)
return true;
return false;
}
std::string getNVPTXElemClassName(TargetRegisterClass const *RC) {
if (RC->getID() == NVPTX::V2F32RegsRegClassID)
return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass);
if (RC->getID() == NVPTX::V2F64RegsRegClassID)
return getNVPTXRegClassName(&NVPTX::Float64RegsRegClass);
if (RC->getID() == NVPTX::V2I16RegsRegClassID)
return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass);
if (RC->getID() == NVPTX::V2I32RegsRegClassID)
return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass);
if (RC->getID() == NVPTX::V2I64RegsRegClassID)
return getNVPTXRegClassName(&NVPTX::Int64RegsRegClass);
if (RC->getID() == NVPTX::V2I8RegsRegClassID)
return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass);
if (RC->getID() == NVPTX::V4F32RegsRegClassID)
return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass);
if (RC->getID() == NVPTX::V4I16RegsRegClassID)
return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass);
if (RC->getID() == NVPTX::V4I32RegsRegClassID)
return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass);
if (RC->getID() == NVPTX::V4I8RegsRegClassID)
return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass);
assert(0 && "Not a vector register class");
return "Unsupported";
}
const TargetRegisterClass *getNVPTXElemClass(TargetRegisterClass const *RC) {
if (RC->getID() == NVPTX::V2F32RegsRegClassID)
return (&NVPTX::Float32RegsRegClass);
if (RC->getID() == NVPTX::V2F64RegsRegClassID)
return (&NVPTX::Float64RegsRegClass);
if (RC->getID() == NVPTX::V2I16RegsRegClassID)
return (&NVPTX::Int16RegsRegClass);
if (RC->getID() == NVPTX::V2I32RegsRegClassID)
return (&NVPTX::Int32RegsRegClass);
if (RC->getID() == NVPTX::V2I64RegsRegClassID)
return (&NVPTX::Int64RegsRegClass);
if (RC->getID() == NVPTX::V2I8RegsRegClassID)
return (&NVPTX::Int8RegsRegClass);
if (RC->getID() == NVPTX::V4F32RegsRegClassID)
return (&NVPTX::Float32RegsRegClass);
if (RC->getID() == NVPTX::V4I16RegsRegClassID)
return (&NVPTX::Int16RegsRegClass);
if (RC->getID() == NVPTX::V4I32RegsRegClassID)
return (&NVPTX::Int32RegsRegClass);
if (RC->getID() == NVPTX::V4I8RegsRegClassID)
return (&NVPTX::Int8RegsRegClass);
assert(0 && "Not a vector register class");
return 0;
}
int getNVPTXVectorSize(TargetRegisterClass const *RC) {
if (RC->getID() == NVPTX::V2F32RegsRegClassID)
return 2;
if (RC->getID() == NVPTX::V2F64RegsRegClassID)
return 2;
if (RC->getID() == NVPTX::V2I16RegsRegClassID)
return 2;
if (RC->getID() == NVPTX::V2I32RegsRegClassID)
return 2;
if (RC->getID() == NVPTX::V2I64RegsRegClassID)
return 2;
if (RC->getID() == NVPTX::V2I8RegsRegClassID)
return 2;
if (RC->getID() == NVPTX::V4F32RegsRegClassID)
return 4;
if (RC->getID() == NVPTX::V4I16RegsRegClassID)
return 4;
if (RC->getID() == NVPTX::V4I32RegsRegClassID)
return 4;
if (RC->getID() == NVPTX::V4I8RegsRegClassID)
return 4;
assert(0 && "Not a vector register class");
return -1;
}
}
NVPTXRegisterInfo::NVPTXRegisterInfo(const TargetInstrInfo &tii,
const NVPTXSubtarget &st)
: NVPTXGenRegisterInfo(0),
TII(tii),
ST(st) {
Is64Bit = st.is64Bit();
}
#define GET_REGINFO_TARGET_DESC
#include "NVPTXGenRegisterInfo.inc"
/// NVPTX Callee Saved Registers
const uint16_t* NVPTXRegisterInfo::
getCalleeSavedRegs(const MachineFunction *MF) const {
static const uint16_t CalleeSavedRegs[] = { 0 };
return CalleeSavedRegs;
}
// NVPTX Callee Saved Reg Classes
const TargetRegisterClass* const*
NVPTXRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 };
return CalleeSavedRegClasses;
}
BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
return Reserved;
}
void NVPTXRegisterInfo::
eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj,
RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
unsigned i = 0;
MachineInstr &MI = *II;
while (!MI.getOperand(i).isFI()) {
++i;
assert(i < MI.getNumOperands() &&
"Instr doesn't have FrameIndex operand!");
}
int FrameIndex = MI.getOperand(i).getIndex();
MachineFunction &MF = *MI.getParent()->getParent();
int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
MI.getOperand(i+1).getImm();
// Using I0 as the frame pointer
MI.getOperand(i).ChangeToRegister(NVPTX::VRFrame, false);
MI.getOperand(i+1).ChangeToImmediate(Offset);
}
int NVPTXRegisterInfo::
getDwarfRegNum(unsigned RegNum, bool isEH) const {
return 0;
}
unsigned NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return NVPTX::VRFrame;
}
unsigned NVPTXRegisterInfo::getRARegister() const {
return 0;
}
// This function eliminates ADJCALLSTACKDOWN,
// ADJCALLSTACKUP pseudo instructions
void NVPTXRegisterInfo::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
// Simply discard ADJCALLSTACKDOWN,
// ADJCALLSTACKUP instructions.
MBB.erase(I);
}

View File

@ -0,0 +1,94 @@
//===- NVPTXRegisterInfo.h - NVPTX Register Information Impl ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the NVPTX implementation of the TargetRegisterInfo class.
//
//===----------------------------------------------------------------------===//
#ifndef NVPTXREGISTERINFO_H
#define NVPTXREGISTERINFO_H
#include "ManagedStringPool.h"
#include "llvm/Target/TargetRegisterInfo.h"
#define GET_REGINFO_HEADER
#include "NVPTXGenRegisterInfo.inc"
#include "llvm/Target/TargetRegisterInfo.h"
#include <sstream>
namespace llvm {
// Forward Declarations.
class TargetInstrInfo;
class NVPTXSubtarget;
class NVPTXRegisterInfo : public NVPTXGenRegisterInfo {
private:
const TargetInstrInfo &TII;
const NVPTXSubtarget &ST;
bool Is64Bit;
// Hold Strings that can be free'd all together with NVPTXRegisterInfo
ManagedStringPool ManagedStrPool;
public:
NVPTXRegisterInfo(const TargetInstrInfo &tii,
const NVPTXSubtarget &st);
//------------------------------------------------------
// Pure virtual functions from TargetRegisterInfo
//------------------------------------------------------
// NVPTX callee saved registers
virtual const uint16_t*
getCalleeSavedRegs(const MachineFunction *MF = 0) const;
// NVPTX callee saved register classes
virtual const TargetRegisterClass* const *
getCalleeSavedRegClasses(const MachineFunction *MF) const;
virtual BitVector getReservedRegs(const MachineFunction &MF) const;
virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
int SPAdj,
RegScavenger *RS=NULL) const;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const;
virtual unsigned getFrameRegister(const MachineFunction &MF) const;
virtual unsigned getRARegister() const;
ManagedStringPool *getStrPool() const {
return const_cast<ManagedStringPool *>(&ManagedStrPool);
}
const char *getName(unsigned RegNo) const {
std::stringstream O;
O << "reg" << RegNo;
return getStrPool()->getManagedString(O.str().c_str())->c_str();
}
};
std::string getNVPTXRegClassName (const TargetRegisterClass *RC);
std::string getNVPTXRegClassStr (const TargetRegisterClass *RC);
bool isNVPTXVectorRegClass (const TargetRegisterClass *RC);
std::string getNVPTXElemClassName (const TargetRegisterClass *RC);
int getNVPTXVectorSize (const TargetRegisterClass *RC);
const TargetRegisterClass *getNVPTXElemClass(const TargetRegisterClass *RC);
} // end namespace llvm
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,45 @@
//===- NVPTXSection.h - NVPTX-specific section representation -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file declares the NVPTXSection class.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_NVPTXSECTION_H
#define LLVM_NVPTXSECTION_H
#include "llvm/MC/MCSection.h"
#include "llvm/GlobalVariable.h"
#include <vector>
namespace llvm {
/// NVPTXSection - Represents a section in PTX
/// PTX does not have sections. We create this class in order to use
/// the ASMPrint interface.
///
class NVPTXSection : public MCSection {
public:
NVPTXSection(SectionVariant V, SectionKind K) : MCSection(V, K) {}
~NVPTXSection() {};
/// Override this as NVPTX has its own way of printing switching
/// to a section.
virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
raw_ostream &OS) const {}
/// Base address of PTX sections is zero.
virtual bool isBaseAddressKnownZero() const { return true; }
virtual bool UseCodeAlign() const { return false; }
virtual bool isVirtualSection() const { return false; }
};
} // end namespace llvm
#endif

View File

@ -0,0 +1,77 @@
//===- NVPTXSplitBBatBar.cpp - Split BB at Barrier --*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// Split basic blocks so that a basic block that contains a barrier instruction
// only contains the barrier instruction.
//
//===----------------------------------------------------------------------===//
#include "llvm/Function.h"
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Support/InstIterator.h"
#include "NVPTXUtilities.h"
#include "NVPTXSplitBBatBar.h"
using namespace llvm;
namespace llvm {
FunctionPass *createSplitBBatBarPass();
}
char NVPTXSplitBBatBar::ID = 0;
bool NVPTXSplitBBatBar::runOnFunction(Function &F) {
SmallVector<Instruction *, 4> SplitPoints;
bool changed = false;
// Collect all the split points in SplitPoints
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
BasicBlock::iterator IB = BI->begin();
BasicBlock::iterator II = IB;
BasicBlock::iterator IE = BI->end();
// Skit the first intruction. No splitting is needed at this
// point even if this is a bar.
while (II != IE) {
if (IntrinsicInst *inst = dyn_cast<IntrinsicInst>(II)) {
Intrinsic::ID id = inst->getIntrinsicID();
// If this is a barrier, split at this instruction
// and the next instruction.
if (llvm::isBarrierIntrinsic(id)) {
if (II != IB)
SplitPoints.push_back(II);
II++;
if ((II != IE) && (!II->isTerminator())) {
SplitPoints.push_back(II);
II++;
}
continue;
}
}
II++;
}
}
for (unsigned i = 0; i != SplitPoints.size(); i++) {
changed = true;
Instruction *inst = SplitPoints[i];
inst->getParent()->splitBasicBlock(inst, "bar_split");
}
return changed;
}
// This interface will most likely not be necessary, because this pass will
// not be invoked by the driver, but will be used as a prerequisite to
// another pass.
FunctionPass *llvm::createSplitBBatBarPass() {
return new NVPTXSplitBBatBar();
}

View File

@ -0,0 +1,41 @@
//===-- llvm/lib/Target/NVPTX/NVPTXSplitBBatBar.h ---------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the declaration of the NVIDIA specific declarations
// for splitting basic blocks at barrier instructions.
//
//===----------------------------------------------------------------------===//
#ifndef NVPTX_SPLIT_BB_AT_BAR_H
#define NVPTX_SPLIT_BB_AT_BAR_H
#include "llvm/Pass.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
namespace llvm {
// actual analysis class, which is a functionpass
struct NVPTXSplitBBatBar : public FunctionPass {
static char ID;
NVPTXSplitBBatBar() : FunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<MachineFunctionAnalysis>();
}
virtual bool runOnFunction(Function &F);
virtual const char *getPassName() const {
return "Split basic blocks at barrier";
}
};
extern FunctionPass *createSplitBBatBarPass();
}
#endif //NVPTX_SPLIT_BB_AT_BAR_H

View File

@ -0,0 +1,57 @@
//===- NVPTXSubtarget.cpp - NVPTX Subtarget Information -------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the NVPTX specific subclass of TargetSubtarget.
//
//===----------------------------------------------------------------------===//
#include "NVPTXSubtarget.h"
#define GET_SUBTARGETINFO_ENUM
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
#include "NVPTXGenSubtargetInfo.inc"
using namespace llvm;
// Select Driver Interface
#include "llvm/Support/CommandLine.h"
namespace {
cl::opt<NVPTX::DrvInterface>
DriverInterface(cl::desc("Choose driver interface:"),
cl::values(
clEnumValN(NVPTX::NVCL, "drvnvcl", "Nvidia OpenCL driver"),
clEnumValN(NVPTX::CUDA, "drvcuda", "Nvidia CUDA driver"),
clEnumValN(NVPTX::TEST, "drvtest", "Plain Test"),
clEnumValEnd),
cl::init(NVPTX::NVCL));
}
NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, bool is64Bit)
:NVPTXGenSubtargetInfo(TT, "", FS), // Don't pass CPU to subtarget,
// because we don't register all
// nvptx targets.
Is64Bit(is64Bit) {
drvInterface = DriverInterface;
// Provide the default CPU if none
std::string defCPU = "sm_10";
// Get the TargetName from the FS if available
if (FS.empty() && CPU.empty())
TargetName = defCPU;
else if (!CPU.empty())
TargetName = CPU;
else
llvm_unreachable("we are not using FeatureStr");
// Set up the SmVersion
SmVersion = atoi(TargetName.c_str()+3);
}

View File

@ -0,0 +1,92 @@
//=====-- NVPTXSubtarget.h - Define Subtarget for the NVPTX ---*- C++ -*--====//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file declares the NVPTX specific subclass of TargetSubtarget.
//
//===----------------------------------------------------------------------===//
#ifndef NVPTXSUBTARGET_H
#define NVPTXSUBTARGET_H
#include "llvm/Target/TargetSubtargetInfo.h"
#include "NVPTX.h"
#define GET_SUBTARGETINFO_HEADER
#include "NVPTXGenSubtargetInfo.inc"
#include <string>
namespace llvm {
class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
unsigned int SmVersion;
std::string TargetName;
NVPTX::DrvInterface drvInterface;
bool dummy; // For the 'dummy' feature, see NVPTX.td
bool Is64Bit;
public:
/// This constructor initializes the data members to match that
/// of the specified module.
///
NVPTXSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, bool is64Bit);
bool hasBrkPt() const { return SmVersion >= 11; }
bool hasAtomRedG32() const { return SmVersion >= 11; }
bool hasAtomRedS32() const { return SmVersion >= 12; }
bool hasAtomRedG64() const { return SmVersion >= 12; }
bool hasAtomRedS64() const { return SmVersion >= 20; }
bool hasAtomRedGen32() const { return SmVersion >= 20; }
bool hasAtomRedGen64() const { return SmVersion >= 20; }
bool hasAtomAddF32() const { return SmVersion >= 20; }
bool hasVote() const { return SmVersion >= 12; }
bool hasDouble() const { return SmVersion >= 13; }
bool reqPTX20() const { return SmVersion >= 20; }
bool hasF32FTZ() const { return SmVersion >= 20; }
bool hasFMAF32() const { return SmVersion >= 20; }
bool hasFMAF64() const { return SmVersion >= 13; }
bool hasLDU() const { return SmVersion >= 20; }
bool hasGenericLdSt() const { return SmVersion >= 20; }
inline bool hasHWROT32() const { return false; }
inline bool hasSWROT32() const {
return true;
}
inline bool hasROT32() const { return hasHWROT32() || hasSWROT32() ; }
inline bool hasROT64() const { return SmVersion >= 20; }
bool is64Bit() const { return Is64Bit; }
unsigned int getSmVersion() const { return SmVersion; }
NVPTX::DrvInterface getDrvInterface() const { return drvInterface; }
std::string getTargetName() const { return TargetName; }
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
std::string getDataLayout() const {
const char *p;
if (is64Bit())
p = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-"
"f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-"
"n16:32:64";
else
p = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-"
"f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-"
"n16:32:64";
return std::string(p);
}
};
} // End llvm namespace
#endif // NVPTXSUBTARGET_H

View File

@ -0,0 +1,133 @@
//===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Top-level implementation for the NVPTX target.
//
//===----------------------------------------------------------------------===//
#include "NVPTXTargetMachine.h"
#include "NVPTX.h"
#include "NVPTXSplitBBatBar.h"
#include "NVPTXLowerAggrCopies.h"
#include "MCTargetDesc/NVPTXMCAsmInfo.h"
#include "NVPTXAllocaHoisting.h"
#include "llvm/PassManager.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/Verifier.h"
#include "llvm/Assembly/PrintModulePass.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
extern "C" void LLVMInitializeNVPTXTarget() {
// Register the target.
RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32);
RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64);
RegisterMCAsmInfo<NVPTXMCAsmInfo> A(TheNVPTXTarget32);
RegisterMCAsmInfo<NVPTXMCAsmInfo> B(TheNVPTXTarget64);
}
NVPTXTargetMachine::NVPTXTargetMachine(const Target &T,
StringRef TT,
StringRef CPU,
StringRef FS,
const TargetOptions& Options,
Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL,
bool is64bit)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, CPU, FS, is64bit),
DataLayout(Subtarget.getDataLayout()),
InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit)
/*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {
}
void NVPTXTargetMachine32::anchor() {}
NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
}
void NVPTXTargetMachine64::anchor() {}
NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
}
namespace llvm {
class NVPTXPassConfig : public TargetPassConfig {
public:
NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {}
NVPTXTargetMachine &getNVPTXTargetMachine() const {
return getTM<NVPTXTargetMachine>();
}
virtual bool addInstSelector();
virtual bool addPreRegAlloc();
};
}
TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM);
return PassConfig;
}
bool NVPTXPassConfig::addInstSelector() {
PM->add(createLowerAggrCopies());
PM->add(createSplitBBatBarPass());
PM->add(createAllocaHoisting());
PM->add(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
PM->add(createVectorElementizePass(getNVPTXTargetMachine()));
return false;
}
bool NVPTXPassConfig::addPreRegAlloc() {
return false;
}

View File

@ -0,0 +1,131 @@
//===-- NVPTXTargetMachine.h - Define TargetMachine for NVPTX ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file declares the NVPTX specific subclass of TargetMachine.
//
//===----------------------------------------------------------------------===//
#ifndef NVPTX_TARGETMACHINE_H
#define NVPTX_TARGETMACHINE_H
#include "NVPTXInstrInfo.h"
#include "NVPTXISelLowering.h"
#include "NVPTXRegisterInfo.h"
#include "NVPTXSubtarget.h"
#include "NVPTXFrameLowering.h"
#include "ManagedStringPool.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSelectionDAGInfo.h"
namespace llvm {
/// NVPTXTargetMachine
///
class NVPTXTargetMachine : public LLVMTargetMachine {
NVPTXSubtarget Subtarget;
const TargetData DataLayout; // Calculates type size & alignment
NVPTXInstrInfo InstrInfo;
NVPTXTargetLowering TLInfo;
TargetSelectionDAGInfo TSInfo;
// NVPTX does not have any call stack frame, but need a NVPTX specific
// FrameLowering class because TargetFrameLowering is abstract.
NVPTXFrameLowering FrameLowering;
// Hold Strings that can be free'd all together with NVPTXTargetMachine
ManagedStringPool ManagedStrPool;
//bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level,
// bool DisableVerify, MCContext *&OutCtx);
public:
//virtual bool addPassesToEmitFile(PassManagerBase &PM,
// formatted_raw_ostream &Out,
// CodeGenFileType FileType,
// CodeGenOpt::Level OptLevel,
// bool DisableVerify = true) ;
NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OP,
bool is64bit);
virtual const TargetFrameLowering *getFrameLowering() const {
return &FrameLowering;
}
virtual const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const TargetData *getTargetData() const { return &DataLayout;}
virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget;}
virtual const NVPTXRegisterInfo *getRegisterInfo() const {
return &(InstrInfo.getRegisterInfo());
}
virtual NVPTXTargetLowering *getTargetLowering() const {
return const_cast<NVPTXTargetLowering*>(&TLInfo);
}
virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const {
return &TSInfo;
}
//virtual bool addInstSelector(PassManagerBase &PM,
// CodeGenOpt::Level OptLevel);
//virtual bool addPreRegAlloc(PassManagerBase &, CodeGenOpt::Level);
ManagedStringPool *getManagedStrPool() const {
return const_cast<ManagedStringPool*>(&ManagedStrPool);
}
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
// Emission of machine code through JITCodeEmitter is not supported.
virtual bool addPassesToEmitMachineCode(PassManagerBase &,
JITCodeEmitter &,
bool = true) {
return true;
}
// Emission of machine code through MCJIT is not supported.
virtual bool addPassesToEmitMC(PassManagerBase &,
MCContext *&,
raw_ostream &,
bool = true) {
return true;
}
}; // NVPTXTargetMachine.
class NVPTXTargetMachine32 : public NVPTXTargetMachine {
virtual void anchor();
public:
NVPTXTargetMachine32(const Target &T, StringRef TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
};
class NVPTXTargetMachine64 : public NVPTXTargetMachine {
virtual void anchor();
public:
NVPTXTargetMachine64(const Target &T, StringRef TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
};
} // end namespace llvm
#endif

View File

@ -0,0 +1,105 @@
//===-- NVPTXTargetObjectFile.h - NVPTX Object Info -------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TARGET_NVPTX_TARGETOBJECTFILE_H
#define LLVM_TARGET_NVPTX_TARGETOBJECTFILE_H
#include "NVPTXSection.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include <string>
namespace llvm {
class GlobalVariable;
class Module;
class NVPTXTargetObjectFile : public TargetLoweringObjectFile {
public:
NVPTXTargetObjectFile() {};
~NVPTXTargetObjectFile() {
delete TextSection;
delete DataSection;
delete BSSSection;
delete ReadOnlySection;
delete StaticCtorSection;
delete StaticDtorSection;
delete LSDASection;
delete EHFrameSection;
delete DwarfAbbrevSection;
delete DwarfInfoSection;
delete DwarfLineSection;
delete DwarfFrameSection;
delete DwarfPubTypesSection;
delete DwarfDebugInlineSection;
delete DwarfStrSection;
delete DwarfLocSection;
delete DwarfARangesSection;
delete DwarfRangesSection;
delete DwarfMacroInfoSection;
};
virtual void Initialize(MCContext &ctx, const TargetMachine &TM) {
TextSection = new NVPTXSection(MCSection::SV_ELF,
SectionKind::getText());
DataSection = new NVPTXSection(MCSection::SV_ELF,
SectionKind::getDataRel());
BSSSection = new NVPTXSection(MCSection::SV_ELF,
SectionKind::getBSS());
ReadOnlySection = new NVPTXSection(MCSection::SV_ELF,
SectionKind::getReadOnly());
StaticCtorSection = new NVPTXSection(MCSection::SV_ELF,
SectionKind::getMetadata());
StaticDtorSection = new NVPTXSection(MCSection::SV_ELF,
SectionKind::getMetadata());
LSDASection = new NVPTXSection(MCSection::SV_ELF,
SectionKind::getMetadata());
EHFrameSection = new NVPTXSection(MCSection::SV_ELF,
SectionKind::getMetadata());
DwarfAbbrevSection = new NVPTXSection(MCSection::SV_ELF,
SectionKind::getMetadata());
DwarfInfoSection = new NVPTXSection(MCSection::SV_ELF,
SectionKind::getMetadata());
DwarfLineSection = new NVPTXSection(MCSection::SV_ELF,
SectionKind::getMetadata());
DwarfFrameSection = new NVPTXSection(MCSection::SV_ELF,
SectionKind::getMetadata());
DwarfPubTypesSection = new NVPTXSection(MCSection::SV_ELF,
SectionKind::getMetadata());
DwarfDebugInlineSection = new NVPTXSection(MCSection::SV_ELF,
SectionKind::getMetadata());
DwarfStrSection = new NVPTXSection(MCSection::SV_ELF,
SectionKind::getMetadata());
DwarfLocSection = new NVPTXSection(MCSection::SV_ELF,
SectionKind::getMetadata());
DwarfARangesSection = new NVPTXSection(MCSection::SV_ELF,
SectionKind::getMetadata());
DwarfRangesSection = new NVPTXSection(MCSection::SV_ELF,
SectionKind::getMetadata());
DwarfMacroInfoSection = new NVPTXSection(MCSection::SV_ELF,
SectionKind::getMetadata());
};
virtual const MCSection *getSectionForConstant(SectionKind Kind) const {
return ReadOnlySection;
};
virtual const MCSection *
getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
Mangler *Mang,
const TargetMachine &TM) const {
return DataSection;
};
};
} // end namespace llvm
#endif

View File

@ -0,0 +1,514 @@
//===- NVPTXUtilities.cpp - Utility Functions -----------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains miscellaneous utility functions
//===----------------------------------------------------------------------===//
#include "NVPTXUtilities.h"
#include "NVPTX.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Function.h"
#include "llvm/Module.h"
#include "llvm/Constants.h"
#include "llvm/Operator.h"
#include <algorithm>
#include <cstring>
#include <map>
#include <string>
#include <vector>
//#include <iostream>
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/InstIterator.h"
using namespace llvm;
typedef std::map<std::string, std::vector<unsigned> > key_val_pair_t;
typedef std::map<const GlobalValue *, key_val_pair_t> global_val_annot_t;
typedef std::map<const Module *, global_val_annot_t> per_module_annot_t;
ManagedStatic<per_module_annot_t> annotationCache;
static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {
assert(md && "Invalid mdnode for annotation");
assert((md->getNumOperands() % 2) == 1 && "Invalid number of operands");
// start index = 1, to skip the global variable key
// increment = 2, to skip the value for each property-value pairs
for (unsigned i = 1, e = md->getNumOperands(); i != e; i += 2) {
// property
const MDString *prop = dyn_cast<MDString>(md->getOperand(i));
assert(prop && "Annotation property not a string");
// value
ConstantInt *Val = dyn_cast<ConstantInt>(md->getOperand(i+1));
assert(Val && "Value operand not a constant int");
std::string keyname = prop->getString().str();
if (retval.find(keyname) != retval.end())
retval[keyname].push_back(Val->getZExtValue());
else {
std::vector<unsigned> tmp;
tmp.push_back(Val->getZExtValue());
retval[keyname] = tmp;
}
}
}
static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) {
NamedMDNode *NMD = m->getNamedMetadata(llvm::NamedMDForAnnotations);
if (!NMD)
return;
key_val_pair_t tmp;
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
const MDNode *elem = NMD->getOperand(i);
Value *entity = elem->getOperand(0);
// entity may be null due to DCE
if (!entity)
continue;
if (entity != gv)
continue;
// accumulate annotations for entity in tmp
cacheAnnotationFromMD(elem, tmp);
}
if (tmp.empty()) // no annotations for this gv
return;
if ((*annotationCache).find(m) != (*annotationCache).end())
(*annotationCache)[m][gv] = tmp;
else {
global_val_annot_t tmp1;
tmp1[gv] = tmp;
(*annotationCache)[m] = tmp1;
}
}
bool llvm::findOneNVVMAnnotation(const GlobalValue *gv, std::string prop,
unsigned &retval) {
const Module *m = gv->getParent();
if ((*annotationCache).find(m) == (*annotationCache).end())
cacheAnnotationFromMD(m, gv);
else if ((*annotationCache)[m].find(gv) == (*annotationCache)[m].end())
cacheAnnotationFromMD(m, gv);
if ((*annotationCache)[m][gv].find(prop) == (*annotationCache)[m][gv].end())
return false;
retval = (*annotationCache)[m][gv][prop][0];
return true;
}
bool llvm::findAllNVVMAnnotation(const GlobalValue *gv, std::string prop,
std::vector<unsigned> &retval) {
const Module *m = gv->getParent();
if ((*annotationCache).find(m) == (*annotationCache).end())
cacheAnnotationFromMD(m, gv);
else if ((*annotationCache)[m].find(gv) == (*annotationCache)[m].end())
cacheAnnotationFromMD(m, gv);
if ((*annotationCache)[m][gv].find(prop) == (*annotationCache)[m][gv].end())
return false;
retval = (*annotationCache)[m][gv][prop];
return true;
}
bool llvm::isTexture(const llvm::Value &val) {
if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
unsigned annot;
if (llvm::findOneNVVMAnnotation(gv,
llvm::PropertyAnnotationNames[llvm::PROPERTY_ISTEXTURE],
annot)) {
assert((annot == 1) && "Unexpected annotation on a texture symbol");
return true;
}
}
return false;
}
bool llvm::isSurface(const llvm::Value &val) {
if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
unsigned annot;
if (llvm::findOneNVVMAnnotation(gv,
llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSURFACE],
annot)) {
assert((annot == 1) && "Unexpected annotation on a surface symbol");
return true;
}
}
return false;
}
bool llvm::isSampler(const llvm::Value &val) {
if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
unsigned annot;
if (llvm::findOneNVVMAnnotation(gv,
llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
annot)) {
assert((annot == 1) && "Unexpected annotation on a sampler symbol");
return true;
}
}
if (const Argument *arg = dyn_cast<Argument>(&val)) {
const Function *func = arg->getParent();
std::vector<unsigned> annot;
if (llvm::findAllNVVMAnnotation(func,
llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
annot)) {
if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
return true;
}
}
return false;
}
bool llvm::isImageReadOnly(const llvm::Value &val) {
if (const Argument *arg = dyn_cast<Argument>(&val)) {
const Function *func = arg->getParent();
std::vector<unsigned> annot;
if (llvm::findAllNVVMAnnotation(func,
llvm::PropertyAnnotationNames[llvm::PROPERTY_ISREADONLY_IMAGE_PARAM],
annot)) {
if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
return true;
}
}
return false;
}
bool llvm::isImageWriteOnly(const llvm::Value &val) {
if (const Argument *arg = dyn_cast<Argument>(&val)) {
const Function *func = arg->getParent();
std::vector<unsigned> annot;
if (llvm::findAllNVVMAnnotation(func,
llvm::PropertyAnnotationNames[llvm::PROPERTY_ISWRITEONLY_IMAGE_PARAM],
annot)) {
if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
return true;
}
}
return false;
}
bool llvm::isImage(const llvm::Value &val) {
return llvm::isImageReadOnly(val) || llvm::isImageWriteOnly(val);
}
std::string llvm::getTextureName(const llvm::Value &val) {
assert(val.hasName() && "Found texture variable with no name");
return val.getName();
}
std::string llvm::getSurfaceName(const llvm::Value &val) {
assert(val.hasName() && "Found surface variable with no name");
return val.getName();
}
std::string llvm::getSamplerName(const llvm::Value &val) {
assert(val.hasName() && "Found sampler variable with no name");
return val.getName();
}
bool llvm::getMaxNTIDx(const Function &F, unsigned &x) {
return (llvm::findOneNVVMAnnotation(&F,
llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_X],
x));
}
bool llvm::getMaxNTIDy(const Function &F, unsigned &y) {
return (llvm::findOneNVVMAnnotation(&F,
llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Y],
y));
}
bool llvm::getMaxNTIDz(const Function &F, unsigned &z) {
return (llvm::findOneNVVMAnnotation(&F,
llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Z],
z));
}
bool llvm::getReqNTIDx(const Function &F, unsigned &x) {
return (llvm::findOneNVVMAnnotation(&F,
llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_X],
x));
}
bool llvm::getReqNTIDy(const Function &F, unsigned &y) {
return (llvm::findOneNVVMAnnotation(&F,
llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Y],
y));
}
bool llvm::getReqNTIDz(const Function &F, unsigned &z) {
return (llvm::findOneNVVMAnnotation(&F,
llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Z],
z));
}
bool llvm::getMinCTASm(const Function &F, unsigned &x) {
return (llvm::findOneNVVMAnnotation(&F,
llvm::PropertyAnnotationNames[llvm::PROPERTY_MINNCTAPERSM],
x));
}
bool llvm::isKernelFunction(const Function &F) {
unsigned x = 0;
bool retval = llvm::findOneNVVMAnnotation(&F,
llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION],
x);
if (retval == false) {
// There is no NVVM metadata, check the calling convention
if (F.getCallingConv() == llvm::CallingConv::PTX_Kernel)
return true;
else
return false;
}
return (x==1);
}
bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) {
std::vector<unsigned> Vs;
bool retval = llvm::findAllNVVMAnnotation(&F,
llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN],
Vs);
if (retval == false)
return false;
for (int i=0, e=Vs.size(); i<e; i++) {
unsigned v = Vs[i];
if ( (v >> 16) == index ) {
align = v & 0xFFFF;
return true;
}
}
return false;
}
bool llvm::getAlign(const CallInst &I, unsigned index, unsigned &align) {
if (MDNode *alignNode = I.getMetadata("callalign")) {
for (int i=0, n = alignNode->getNumOperands();
i<n; i++) {
if (const ConstantInt *CI =
dyn_cast<ConstantInt>(alignNode->getOperand(i))) {
unsigned v = CI->getZExtValue();
if ( (v>>16) == index ) {
align = v & 0xFFFF;
return true;
}
if ( (v>>16) > index ) {
return false;
}
}
}
}
return false;
}
bool llvm::isBarrierIntrinsic(Intrinsic::ID id) {
if ((id == Intrinsic::nvvm_barrier0) ||
(id == Intrinsic::nvvm_barrier0_popc) ||
(id == Intrinsic::nvvm_barrier0_and) ||
(id == Intrinsic::nvvm_barrier0_or) ||
(id == Intrinsic::cuda_syncthreads))
return true;
return false;
}
// Interface for checking all memory space transfer related intrinsics
bool llvm::isMemorySpaceTransferIntrinsic(Intrinsic::ID id) {
if (id == Intrinsic::nvvm_ptr_local_to_gen ||
id == Intrinsic::nvvm_ptr_shared_to_gen ||
id == Intrinsic::nvvm_ptr_global_to_gen ||
id == Intrinsic::nvvm_ptr_constant_to_gen ||
id == Intrinsic::nvvm_ptr_gen_to_global ||
id == Intrinsic::nvvm_ptr_gen_to_shared ||
id == Intrinsic::nvvm_ptr_gen_to_local ||
id == Intrinsic::nvvm_ptr_gen_to_constant ||
id == Intrinsic::nvvm_ptr_gen_to_param) {
return true;
}
return false;
}
// consider several special intrinsics in striping pointer casts, and
// provide an option to ignore GEP indicies for find out the base address only
// which could be used in simple alias disambigurate.
const Value *llvm::skipPointerTransfer(const Value *V,
bool ignore_GEP_indices) {
V = V->stripPointerCasts();
while (true) {
if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) {
if (isMemorySpaceTransferIntrinsic(IS->getIntrinsicID())) {
V = IS->getArgOperand(0)->stripPointerCasts();
continue;
}
} else if (ignore_GEP_indices)
if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
V = GEP->getPointerOperand()->stripPointerCasts();
continue;
}
break;
}
return V;
}
// consider several special intrinsics in striping pointer casts, and
// - ignore GEP indicies for find out the base address only, and
// - tracking PHINode
// which could be used in simple alias disambigurate.
const Value *llvm::skipPointerTransfer(const Value *V,
std::set<const Value *> &processed) {
if (processed.find(V) != processed.end())
return NULL;
processed.insert(V);
const Value *V2 = V->stripPointerCasts();
if (V2 != V && processed.find(V2) != processed.end())
return NULL;
processed.insert(V2);
V = V2;
while (true) {
if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) {
if (isMemorySpaceTransferIntrinsic(IS->getIntrinsicID())) {
V = IS->getArgOperand(0)->stripPointerCasts();
continue;
}
} else if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
V = GEP->getPointerOperand()->stripPointerCasts();
continue;
} else if (const PHINode *PN = dyn_cast<PHINode>(V)) {
if (V != V2 && processed.find(V) != processed.end())
return NULL;
processed.insert(PN);
const Value *common = 0;
for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
const Value *pv = PN->getIncomingValue(i);
const Value *base = skipPointerTransfer(pv, processed);
if (base) {
if (common == 0)
common = base;
else if (common != base)
return PN;
}
}
if (common == 0)
return PN;
V = common;
}
break;
}
return V;
}
// The following are some useful utilities for debuggung
BasicBlock *llvm::getParentBlock(Value *v) {
if (BasicBlock *B = dyn_cast<BasicBlock>(v))
return B;
if (Instruction *I = dyn_cast<Instruction>(v))
return I->getParent();
return 0;
}
Function *llvm::getParentFunction(Value *v) {
if (Function *F = dyn_cast<Function>(v))
return F;
if (Instruction *I = dyn_cast<Instruction>(v))
return I->getParent()->getParent();
if (BasicBlock *B = dyn_cast<BasicBlock>(v))
return B->getParent();
return 0;
}
// Dump a block by name
void llvm::dumpBlock(Value *v, char *blockName) {
Function *F = getParentFunction(v);
if (F == 0)
return;
for (Function::iterator it = F->begin(), ie = F->end(); it != ie; ++it) {
BasicBlock *B = it;
if (strcmp(B->getName().data(), blockName) == 0) {
B->dump();
return;
}
}
}
// Find an instruction by name
Instruction *llvm::getInst(Value *base, char *instName) {
Function *F = getParentFunction(base);
if (F == 0)
return 0;
for (inst_iterator it = inst_begin(F), ie = inst_end(F); it != ie; ++it) {
Instruction *I = &*it;
if (strcmp(I->getName().data(), instName) == 0) {
return I;
}
}
return 0;
}
// Dump an instruction by nane
void llvm::dumpInst(Value *base, char *instName) {
Instruction *I = getInst(base, instName);
if (I)
I->dump();
}
// Dump an instruction and all dependent instructions
void llvm::dumpInstRec(Value *v, std::set<Instruction *> *visited) {
if (Instruction *I = dyn_cast<Instruction>(v)) {
if (visited->find(I) != visited->end())
return;
visited->insert(I);
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
dumpInstRec(I->getOperand(i), visited);
I->dump();
}
}
// Dump an instruction and all dependent instructions
void llvm::dumpInstRec(Value *v) {
std::set<Instruction *> visited;
//BasicBlock *B = getParentBlock(v);
dumpInstRec(v, &visited);
}
// Dump the parent for Instruction, block or function
void llvm::dumpParent(Value *v) {
if (Instruction *I = dyn_cast<Instruction>(v)) {
I->getParent()->dump();
return;
}
if (BasicBlock *B = dyn_cast<BasicBlock>(v)) {
B->getParent()->dump();
return;
}
if (Function *F = dyn_cast<Function>(v)) {
F->getParent()->dump();
return;
}
}

View File

@ -0,0 +1,94 @@
//===-- NVPTXUtilities - Utilities -----------------------------*- C++ -*-====//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the declaration of the NVVM specific utility functions.
//
//===----------------------------------------------------------------------===//
#ifndef NVPTXUTILITIES_H
#define NVPTXUTILITIES_H
#include "llvm/Value.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Function.h"
#include "llvm/IntrinsicInst.h"
#include <cstdarg>
#include <set>
#include <string>
#include <vector>
namespace llvm
{
#define NVCL_IMAGE2D_READONLY_FUNCNAME "__is_image2D_readonly"
#define NVCL_IMAGE3D_READONLY_FUNCNAME "__is_image3D_readonly"
bool findOneNVVMAnnotation(const llvm::GlobalValue *, std::string, unsigned &);
bool findAllNVVMAnnotation(const llvm::GlobalValue *, std::string,
std::vector<unsigned> &);
bool isTexture(const llvm::Value &);
bool isSurface(const llvm::Value &);
bool isSampler(const llvm::Value &);
bool isImage(const llvm::Value &);
bool isImageReadOnly(const llvm::Value &);
bool isImageWriteOnly(const llvm::Value &);
std::string getTextureName(const llvm::Value &);
std::string getSurfaceName(const llvm::Value &);
std::string getSamplerName(const llvm::Value &);
bool getMaxNTIDx(const llvm::Function &, unsigned &);
bool getMaxNTIDy(const llvm::Function &, unsigned &);
bool getMaxNTIDz(const llvm::Function &, unsigned &);
bool getReqNTIDx(const llvm::Function &, unsigned &);
bool getReqNTIDy(const llvm::Function &, unsigned &);
bool getReqNTIDz(const llvm::Function &, unsigned &);
bool getMinCTASm(const llvm::Function &, unsigned &);
bool isKernelFunction(const llvm::Function &);
bool getAlign(const llvm::Function &, unsigned index, unsigned &);
bool getAlign(const llvm::CallInst &, unsigned index, unsigned &);
bool isBarrierIntrinsic(llvm::Intrinsic::ID);
/// make_vector - Helper function which is useful for building temporary vectors
/// to pass into type construction of CallInst ctors. This turns a null
/// terminated list of pointers (or other value types) into a real live vector.
///
template<typename T>
inline std::vector<T> make_vector(T A, ...) {
va_list Args;
va_start(Args, A);
std::vector<T> Result;
Result.push_back(A);
while (T Val = va_arg(Args, T))
Result.push_back(Val);
va_end(Args);
return Result;
}
bool isMemorySpaceTransferIntrinsic(Intrinsic::ID id);
const Value *skipPointerTransfer(const Value *V, bool ignore_GEP_indices);
const Value *skipPointerTransfer(const Value *V,
std::set<const Value *> &processed);
BasicBlock *getParentBlock(Value *v);
Function *getParentFunction(Value *v);
void dumpBlock(Value *v, char *blockName);
Instruction *getInst(Value *base, char *instName);
void dumpInst(Value *base, char *instName);
void dumpInstRec(Value *v, std::set<Instruction *> *visited);
void dumpInstRec(Value *v);
void dumpParent(Value *v);
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,91 @@
//===-- NVPTXutil.cpp - Functions exported to CodeGen --*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the functions that can be used in CodeGen.
//
//===----------------------------------------------------------------------===//
#include "NVPTXutil.h"
#include "NVPTX.h"
using namespace llvm;
namespace llvm {
bool isParamLoad(const MachineInstr *MI)
{
if ((MI->getOpcode() != NVPTX::LD_i32_avar) &&
(MI->getOpcode() != NVPTX::LD_i64_avar))
return false;
if (MI->getOperand(2).isImm() == false)
return false;
if (MI->getOperand(2).getImm() != NVPTX::PTXLdStInstCode::PARAM)
return false;
return true;
}
#define DATA_MASK 0x7f
#define DIGIT_WIDTH 7
#define MORE_BYTES 0x80
static int encode_leb128(uint64_t val, int *nbytes,
char *space, int splen)
{
char *a;
char *end = space + splen;
a = space;
do {
unsigned char uc;
if (a >= end)
return 1;
uc = val & DATA_MASK;
val >>= DIGIT_WIDTH;
if (val != 0)
uc |= MORE_BYTES;
*a = uc;
a++;
} while (val);
*nbytes = a - space;
return 0;
}
#undef DATA_MASK
#undef DIGIT_WIDTH
#undef MORE_BYTES
uint64_t encode_leb128(const char *str)
{
union { uint64_t x; char a[8]; } temp64;
temp64.x = 0;
for (unsigned i=0,e=strlen(str); i!=e; ++i)
temp64.a[i] = str[e-1-i];
char encoded[16];
int nbytes;
int retval = encode_leb128(temp64.x, &nbytes, encoded, 16);
assert(retval == 0 &&
"Encoding to leb128 failed");
assert(nbytes <= 8 &&
"Cannot support register names with leb128 encoding > 8 bytes");
temp64.x = 0;
for (int i=0; i<nbytes; ++i)
temp64.a[i] = encoded[i];
return temp64.x;
}
} // end namespace llvm

View File

@ -0,0 +1,25 @@
//===-- NVPTXutil.h - Functions exported to CodeGen --*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the functions that can be used in CodeGen.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TARGET_NVPTX_UTIL_H
#define LLVM_TARGET_NVPTX_UTIL_H
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
namespace llvm {
bool isParamLoad(const MachineInstr *);
uint64_t encode_leb128(const char *str);
}
#endif

View File

@ -0,0 +1,7 @@
#include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
add_llvm_library(LLVMNVPTXInfo
NVPTXTargetInfo.cpp
)
add_dependencies(LLVMNVPTXInfo NVPTXCommonTableGen)

View File

@ -0,0 +1,23 @@
;===- ./lib/Target/NVPTX/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
; This file is distributed under the University of Illinois Open Source
; License. See LICENSE.TXT for details.
;
;===------------------------------------------------------------------------===;
;
; This is an LLVMBuild description file for the components in this subdirectory.
;
; For more information on the LLVMBuild system, please see:
;
; http://llvm.org/docs/LLVMBuild.html
;
;===------------------------------------------------------------------------===;
[component_0]
type = Library
name = NVPTXInfo
parent = NVPTX
required_libraries = MC Support Target
add_to_library_groups = NVPTX

View File

@ -0,0 +1,15 @@
##===- lib/Target/NVPTX/TargetInfo/Makefile ----------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
#
##===----------------------------------------------------------------------===##
LEVEL = ../../../..
LIBRARYNAME = LLVMNVPTXInfo
# Hack: we need to include 'main' target directory to grab private headers
CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
include $(LEVEL)/Makefile.common

View File

@ -0,0 +1,23 @@
//===-- NVPTXTargetInfo.cpp - NVPTX Target Implementation -----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "NVPTX.h"
#include "llvm/Module.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
Target llvm::TheNVPTXTarget32;
Target llvm::TheNVPTXTarget64;
extern "C" void LLVMInitializeNVPTXTargetInfo() {
RegisterTarget<Triple::nvptx> X(TheNVPTXTarget32, "nvptx",
"NVIDIA PTX 32-bit");
RegisterTarget<Triple::nvptx64> Y(TheNVPTXTarget64, "nvptx64",
"NVIDIA PTX 64-bit");
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,125 @@
#ifndef __CL_COMMON_DEFINES_H__
#define __CL_COMMON_DEFINES_H__
// This file includes defines that are common to both kernel code and
// the NVPTX back-end.
//
// Common defines for Image intrinsics
// Channel order
enum {
CLK_R = 0x10B0,
CLK_A = 0x10B1,
CLK_RG = 0x10B2,
CLK_RA = 0x10B3,
CLK_RGB = 0x10B4,
CLK_RGBA = 0x10B5,
CLK_BGRA = 0x10B6,
CLK_ARGB = 0x10B7,
#if (__NV_CL_C_VERSION == __NV_CL_C_VERSION_1_0)
CLK_xRGB = 0x10B7,
#endif
CLK_INTENSITY = 0x10B8,
CLK_LUMINANCE = 0x10B9
#if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
,
CLK_Rx = 0x10BA,
CLK_RGx = 0x10BB,
CLK_RGBx = 0x10BC
#endif
};
typedef enum clk_channel_type {
// valid formats for float return types
CLK_SNORM_INT8 = 0x10D0, // four channel RGBA unorm8
CLK_SNORM_INT16 = 0x10D1, // four channel RGBA unorm16
CLK_UNORM_INT8 = 0x10D2, // four channel RGBA unorm8
CLK_UNORM_INT16 = 0x10D3, // four channel RGBA unorm16
CLK_HALF_FLOAT = 0x10DD, // four channel RGBA half
CLK_FLOAT = 0x10DE, // four channel RGBA float
#if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
CLK_UNORM_SHORT_565 = 0x10D4,
CLK_UNORM_SHORT_555 = 0x10D5,
CLK_UNORM_INT_101010 = 0x10D6,
#endif
// valid only for integer return types
CLK_SIGNED_INT8 = 0x10D7,
CLK_SIGNED_INT16 = 0x10D8,
CLK_SIGNED_INT32 = 0x10D9,
CLK_UNSIGNED_INT8 = 0x10DA,
CLK_UNSIGNED_INT16 = 0x10DB,
CLK_UNSIGNED_INT32 = 0x10DC,
// CI SPI for CPU
__CLK_UNORM_INT8888 , // four channel ARGB unorm8
__CLK_UNORM_INT8888R, // four channel BGRA unorm8
__CLK_VALID_IMAGE_TYPE_COUNT,
__CLK_INVALID_IMAGE_TYPE = __CLK_VALID_IMAGE_TYPE_COUNT,
__CLK_VALID_IMAGE_TYPE_MASK_BITS = 4, // number of bits required to
// represent any image type
__CLK_VALID_IMAGE_TYPE_MASK = ( 1 << __CLK_VALID_IMAGE_TYPE_MASK_BITS ) - 1
}clk_channel_type;
typedef enum clk_sampler_type {
__CLK_ADDRESS_BASE = 0,
CLK_ADDRESS_NONE = 0 << __CLK_ADDRESS_BASE,
CLK_ADDRESS_CLAMP = 1 << __CLK_ADDRESS_BASE,
CLK_ADDRESS_CLAMP_TO_EDGE = 2 << __CLK_ADDRESS_BASE,
CLK_ADDRESS_REPEAT = 3 << __CLK_ADDRESS_BASE,
CLK_ADDRESS_MIRROR = 4 << __CLK_ADDRESS_BASE,
#if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
CLK_ADDRESS_MIRRORED_REPEAT = CLK_ADDRESS_MIRROR,
#endif
__CLK_ADDRESS_MASK = CLK_ADDRESS_NONE | CLK_ADDRESS_CLAMP |
CLK_ADDRESS_CLAMP_TO_EDGE |
CLK_ADDRESS_REPEAT | CLK_ADDRESS_MIRROR,
__CLK_ADDRESS_BITS = 3, // number of bits required to
// represent address info
__CLK_NORMALIZED_BASE = __CLK_ADDRESS_BITS,
CLK_NORMALIZED_COORDS_FALSE = 0,
CLK_NORMALIZED_COORDS_TRUE = 1 << __CLK_NORMALIZED_BASE,
__CLK_NORMALIZED_MASK = CLK_NORMALIZED_COORDS_FALSE |
CLK_NORMALIZED_COORDS_TRUE,
__CLK_NORMALIZED_BITS = 1, // number of bits required to
// represent normalization
__CLK_FILTER_BASE = __CLK_NORMALIZED_BASE +
__CLK_NORMALIZED_BITS,
CLK_FILTER_NEAREST = 0 << __CLK_FILTER_BASE,
CLK_FILTER_LINEAR = 1 << __CLK_FILTER_BASE,
CLK_FILTER_ANISOTROPIC = 2 << __CLK_FILTER_BASE,
__CLK_FILTER_MASK = CLK_FILTER_NEAREST | CLK_FILTER_LINEAR |
CLK_FILTER_ANISOTROPIC,
__CLK_FILTER_BITS = 2, // number of bits required to
// represent address info
__CLK_MIP_BASE = __CLK_FILTER_BASE + __CLK_FILTER_BITS,
CLK_MIP_NEAREST = 0 << __CLK_MIP_BASE,
CLK_MIP_LINEAR = 1 << __CLK_MIP_BASE,
CLK_MIP_ANISOTROPIC = 2 << __CLK_MIP_BASE,
__CLK_MIP_MASK = CLK_MIP_NEAREST | CLK_MIP_LINEAR |
CLK_MIP_ANISOTROPIC,
__CLK_MIP_BITS = 2,
__CLK_SAMPLER_BITS = __CLK_MIP_BASE + __CLK_MIP_BITS,
__CLK_SAMPLER_MASK = __CLK_MIP_MASK | __CLK_FILTER_MASK |
__CLK_NORMALIZED_MASK | __CLK_ADDRESS_MASK,
__CLK_ANISOTROPIC_RATIO_BITS = 5,
__CLK_ANISOTROPIC_RATIO_MASK = (int) 0x80000000 >>
(__CLK_ANISOTROPIC_RATIO_BITS-1)
} clk_sampler_type;
// Memory synchronization
#define CLK_LOCAL_MEM_FENCE (1 << 0)
#define CLK_GLOBAL_MEM_FENCE (1 << 1)
#endif // __CL_COMMON_DEFINES_H__

View File

@ -0,0 +1,202 @@
#!/usr/bin/env python
num_regs = 396
outFile = open('NVPTXRegisterInfo.td', 'w')
outFile.write('''
//===-- NVPTXRegisterInfo.td - NVPTX Register defs ---------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Declarations that describe the PTX register file
//===----------------------------------------------------------------------===//
class NVPTXReg<string n> : Register<n> {
let Namespace = "NVPTX";
}
class NVPTXRegClass<list<ValueType> regTypes, int alignment, dag regList>
: RegisterClass <"NVPTX", regTypes, alignment, regList>;
//===----------------------------------------------------------------------===//
// Registers
//===----------------------------------------------------------------------===//
// Special Registers used as stack pointer
def VRFrame : NVPTXReg<"%SP">;
def VRFrameLocal : NVPTXReg<"%SPL">;
// Special Registers used as the stack
def VRDepot : NVPTXReg<"%Depot">;
''')
# Predicates
outFile.write('''
//===--- Predicate --------------------------------------------------------===//
''')
for i in range(0, num_regs):
outFile.write('def P%d : NVPTXReg<"%%p%d">;\n' % (i, i))
# Int8
outFile.write('''
//===--- 8-bit ------------------------------------------------------------===//
''')
for i in range(0, num_regs):
outFile.write('def RC%d : NVPTXReg<"%%rc%d">;\n' % (i, i))
# Int16
outFile.write('''
//===--- 16-bit -----------------------------------------------------------===//
''')
for i in range(0, num_regs):
outFile.write('def RS%d : NVPTXReg<"%%rs%d">;\n' % (i, i))
# Int32
outFile.write('''
//===--- 32-bit -----------------------------------------------------------===//
''')
for i in range(0, num_regs):
outFile.write('def R%d : NVPTXReg<"%%r%d">;\n' % (i, i))
# Int64
outFile.write('''
//===--- 64-bit -----------------------------------------------------------===//
''')
for i in range(0, num_regs):
outFile.write('def RL%d : NVPTXReg<"%%rl%d">;\n' % (i, i))
# F32
outFile.write('''
//===--- 32-bit float -----------------------------------------------------===//
''')
for i in range(0, num_regs):
outFile.write('def F%d : NVPTXReg<"%%f%d">;\n' % (i, i))
# F64
outFile.write('''
//===--- 64-bit float -----------------------------------------------------===//
''')
for i in range(0, num_regs):
outFile.write('def FL%d : NVPTXReg<"%%fl%d">;\n' % (i, i))
# Vector registers
outFile.write('''
//===--- Vector -----------------------------------------------------------===//
''')
for i in range(0, num_regs):
outFile.write('def v2b8_%d : NVPTXReg<"%%v2b8_%d">;\n' % (i, i))
for i in range(0, num_regs):
outFile.write('def v2b16_%d : NVPTXReg<"%%v2b16_%d">;\n' % (i, i))
for i in range(0, num_regs):
outFile.write('def v2b32_%d : NVPTXReg<"%%v2b32_%d">;\n' % (i, i))
for i in range(0, num_regs):
outFile.write('def v2b64_%d : NVPTXReg<"%%v2b64_%d">;\n' % (i, i))
for i in range(0, num_regs):
outFile.write('def v4b8_%d : NVPTXReg<"%%v4b8_%d">;\n' % (i, i))
for i in range(0, num_regs):
outFile.write('def v4b16_%d : NVPTXReg<"%%v4b16_%d">;\n' % (i, i))
for i in range(0, num_regs):
outFile.write('def v4b32_%d : NVPTXReg<"%%v4b32_%d">;\n' % (i, i))
# Argument registers
outFile.write('''
//===--- Arguments --------------------------------------------------------===//
''')
for i in range(0, num_regs):
outFile.write('def ia%d : NVPTXReg<"%%ia%d">;\n' % (i, i))
for i in range(0, num_regs):
outFile.write('def la%d : NVPTXReg<"%%la%d">;\n' % (i, i))
for i in range(0, num_regs):
outFile.write('def fa%d : NVPTXReg<"%%fa%d">;\n' % (i, i))
for i in range(0, num_regs):
outFile.write('def da%d : NVPTXReg<"%%da%d">;\n' % (i, i))
outFile.write('''
//===----------------------------------------------------------------------===//
// Register classes
//===----------------------------------------------------------------------===//
''')
outFile.write('def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%%u", 0, %d))>;\n' % (num_regs-1))
outFile.write('def Int8Regs : NVPTXRegClass<[i8], 8, (add (sequence "RC%%u", 0, %d))>;\n' % (num_regs-1))
outFile.write('def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%%u", 0, %d))>;\n' % (num_regs-1))
outFile.write('def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%%u", 0, %d))>;\n' % (num_regs-1))
outFile.write('def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%%u", 0, %d))>;\n' % (num_regs-1))
outFile.write('def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%%u", 0, %d))>;\n' % (num_regs-1))
outFile.write('def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%%u", 0, %d))>;\n' % (num_regs-1))
outFile.write('def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%%u", 0, %d))>;\n' % (num_regs-1))
outFile.write('def Int64ArgRegs : NVPTXRegClass<[i64], 64, (add (sequence "la%%u", 0, %d))>;\n' % (num_regs-1))
outFile.write('def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%%u", 0, %d))>;\n' % (num_regs-1))
outFile.write('def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%%u", 0, %d))>;\n' % (num_regs-1))
outFile.write('''
// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>;
''')
outFile.write('''
class NVPTXVecRegClass<list<ValueType> regTypes, int alignment, dag regList,
NVPTXRegClass sClass,
int e,
string n>
: NVPTXRegClass<regTypes, alignment, regList>
{
NVPTXRegClass scalarClass=sClass;
int elems=e;
string name=n;
}
''')
outFile.write('def V2F32Regs\n : NVPTXVecRegClass<[v2f32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n Float32Regs, 2, ".v2.f32">;\n' % (num_regs-1))
outFile.write('def V4F32Regs\n : NVPTXVecRegClass<[v4f32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n Float32Regs, 4, ".v4.f32">;\n' % (num_regs-1))
outFile.write('def V2I32Regs\n : NVPTXVecRegClass<[v2i32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n Int32Regs, 2, ".v2.u32">;\n' % (num_regs-1))
outFile.write('def V4I32Regs\n : NVPTXVecRegClass<[v4i32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n Int32Regs, 4, ".v4.u32">;\n' % (num_regs-1))
outFile.write('def V2F64Regs\n : NVPTXVecRegClass<[v2f64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n Float64Regs, 2, ".v2.f64">;\n' % (num_regs-1))
outFile.write('def V2I64Regs\n : NVPTXVecRegClass<[v2i64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n Int64Regs, 2, ".v2.u64">;\n' % (num_regs-1))
outFile.write('def V2I16Regs\n : NVPTXVecRegClass<[v2i16], 32, (add (sequence "v2b16_%%u", 0, %d)),\n Int16Regs, 2, ".v2.u16">;\n' % (num_regs-1))
outFile.write('def V4I16Regs\n : NVPTXVecRegClass<[v4i16], 64, (add (sequence "v4b16_%%u", 0, %d)),\n Int16Regs, 4, ".v4.u16">;\n' % (num_regs-1))
outFile.write('def V2I8Regs\n : NVPTXVecRegClass<[v2i8], 16, (add (sequence "v2b8_%%u", 0, %d)),\n Int8Regs, 2, ".v2.u8">;\n' % (num_regs-1))
outFile.write('def V4I8Regs\n : NVPTXVecRegClass<[v4i8], 32, (add (sequence "v4b8_%%u", 0, %d)),\n Int8Regs, 4, ".v4.u8">;\n' % (num_regs-1))
outFile.close()
outFile = open('NVPTXNumRegisters.h', 'w')
outFile.write('''
//===-- NVPTXNumRegisters.h - PTX Register Info ---------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef NVPTX_NUM_REGISTERS_H
#define NVPTX_NUM_REGISTERS_H
namespace llvm {
const unsigned NVPTXNumRegisters = %d;
}
#endif
''' % num_regs)
outFile.close()

View File

@ -310,6 +310,7 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch],
hexagon-*) llvm_cv_target_arch="Hexagon" ;;
mblaze-*) llvm_cv_target_arch="MBlaze" ;;
ptx-*) llvm_cv_target_arch="PTX" ;;
nvptx-*) llvm_cv_target_arch="NVPTX" ;;
*) llvm_cv_target_arch="Unknown" ;;
esac])
@ -457,6 +458,7 @@ else
Hexagon) AC_SUBST(TARGET_HAS_JIT,0) ;;
MBlaze) AC_SUBST(TARGET_HAS_JIT,0) ;;
PTX) AC_SUBST(TARGET_HAS_JIT,0) ;;
NVPTX) AC_SUBST(TARGET_HAS_JIT,0) ;;
*) AC_SUBST(TARGET_HAS_JIT,0) ;;
esac
fi
@ -567,13 +569,13 @@ TARGETS_TO_BUILD=""
AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
[Build specific host targets: all or target1,target2,... Valid targets are:
host, x86, x86_64, sparc, powerpc, arm, mips, spu, hexagon,
xcore, msp430, ptx, cbe, and cpp (default=all)]),,
xcore, msp430, ptx, nvptx, cbe, and cpp (default=all)]),,
enableval=all)
if test "$enableval" = host-only ; then
enableval=host
fi
case "$enableval" in
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CppBackend MBlaze PTX" ;;
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CppBackend MBlaze PTX NVPTX" ;;
*)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
case "$a_target" in
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@ -589,6 +591,7 @@ case "$enableval" in
cpp) TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
mblaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
ptx) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
nvptx) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
host) case "$llvm_cv_target_arch" in
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@ -602,6 +605,7 @@ case "$enableval" in
MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
PTX) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
NVPTX) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
*) AC_MSG_ERROR([Can not set target to build]) ;;
esac ;;
*) AC_MSG_ERROR([Unrecognized target $a_target]) ;;

View File

@ -1402,7 +1402,8 @@ Optional Features:
--enable-targets Build specific host targets: all or
target1,target2,... Valid targets are: host, x86,
x86_64, sparc, powerpc, arm, mips, spu, hexagon,
xcore, msp430, ptx, cbe, and cpp (default=all)
xcore, msp430, ptx, nvptx, cbe, and cpp
(default=all)
--enable-bindings Build specific language bindings:
all,auto,none,{binding-name} (default=auto)
--enable-libffi Check for the presence of libffi (default is NO)
@ -3846,6 +3847,7 @@ else
hexagon-*) llvm_cv_target_arch="Hexagon" ;;
mblaze-*) llvm_cv_target_arch="MBlaze" ;;
ptx-*) llvm_cv_target_arch="PTX" ;;
nvptx-*) llvm_cv_target_arch="NVPTX" ;;
*) llvm_cv_target_arch="Unknown" ;;
esac
fi
@ -5069,6 +5071,8 @@ else
MBlaze) TARGET_HAS_JIT=0
;;
PTX) TARGET_HAS_JIT=0
;;
NVPTX) TARGET_HAS_JIT=0
;;
*) TARGET_HAS_JIT=0
;;
@ -5254,7 +5258,7 @@ if test "$enableval" = host-only ; then
enableval=host
fi
case "$enableval" in
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CppBackend MBlaze PTX" ;;
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CppBackend MBlaze PTX NVPTX" ;;
*)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
case "$a_target" in
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@ -5270,6 +5274,7 @@ case "$enableval" in
cpp) TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
mblaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
ptx) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
nvptx) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
host) case "$llvm_cv_target_arch" in
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@ -5283,6 +5288,7 @@ case "$enableval" in
MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
PTX) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
NVPTX) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
*) { { echo "$as_me:$LINENO: error: Can not set target to build" >&5
echo "$as_me: error: Can not set target to build" >&2;}
{ (exit 1); exit 1; }; } ;;
@ -10307,7 +10313,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<EOF
#line 10303 "configure"
#line 10316 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H

View File

@ -0,0 +1,55 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
@texture = internal addrspace(1) global i64 0, align 8
; CHECK: .global .texref texture
@surface = internal addrspace(1) global i64 0, align 8
; CHECK: .global .surfref surface
; CHECK: .entry kernel_func_maxntid
define void @kernel_func_maxntid(float* %a) {
; CHECK: .maxntid 10, 20, 30
; CHECK: ret
ret void
}
; CHECK: .entry kernel_func_reqntid
define void @kernel_func_reqntid(float* %a) {
; CHECK: .reqntid 11, 22, 33
; CHECK: ret
ret void
}
; CHECK: .entry kernel_func_minctasm
define void @kernel_func_minctasm(float* %a) {
; CHECK: .minnctapersm 42
; CHECK: ret
ret void
}
!nvvm.annotations = !{!1, !2, !3, !4, !5, !6, !7, !8}
!1 = metadata !{void (float*)* @kernel_func_maxntid, metadata !"kernel", i32 1}
!2 = metadata !{void (float*)* @kernel_func_maxntid,
metadata !"maxntidx", i32 10,
metadata !"maxntidy", i32 20,
metadata !"maxntidz", i32 30}
!3 = metadata !{void (float*)* @kernel_func_reqntid, metadata !"kernel", i32 1}
!4 = metadata !{void (float*)* @kernel_func_reqntid,
metadata !"reqntidx", i32 11,
metadata !"reqntidy", i32 22,
metadata !"reqntidz", i32 33}
!5 = metadata !{void (float*)* @kernel_func_minctasm, metadata !"kernel", i32 1}
!6 = metadata !{void (float*)* @kernel_func_minctasm,
metadata !"minctasm", i32 42}
!7 = metadata !{i64 addrspace(1)* @texture, metadata !"texture", i32 1}
!8 = metadata !{i64 addrspace(1)* @surface, metadata !"surface", i32 1}

View File

@ -0,0 +1,72 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
;; These tests should run for all targets
;;===-- Basic instruction selection tests ---------------------------------===;;
;;; f64
define double @fadd_f64(double %a, double %b) {
; CHECK: add.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
; CHECK: ret
%ret = fadd double %a, %b
ret double %ret
}
define double @fsub_f64(double %a, double %b) {
; CHECK: sub.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
; CHECK: ret
%ret = fsub double %a, %b
ret double %ret
}
define double @fmul_f64(double %a, double %b) {
; CHECK: mul.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
; CHECK: ret
%ret = fmul double %a, %b
ret double %ret
}
define double @fdiv_f64(double %a, double %b) {
; CHECK: div.rn.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
; CHECK: ret
%ret = fdiv double %a, %b
ret double %ret
}
;; PTX does not have a floating-point rem instruction
;;; f32
define float @fadd_f32(float %a, float %b) {
; CHECK: add.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
; CHECK: ret
%ret = fadd float %a, %b
ret float %ret
}
define float @fsub_f32(float %a, float %b) {
; CHECK: sub.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
; CHECK: ret
%ret = fsub float %a, %b
ret float %ret
}
define float @fmul_f32(float %a, float %b) {
; CHECK: mul.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
; CHECK: ret
%ret = fmul float %a, %b
ret float %ret
}
define float @fdiv_f32(float %a, float %b) {
; CHECK: div.full.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
; CHECK: ret
%ret = fdiv float %a, %b
ret float %ret
}
;; PTX does not have a floating-point rem instruction

View File

@ -0,0 +1,72 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
;; These tests should run for all targets
;;===-- Basic instruction selection tests ---------------------------------===;;
;;; f64
define double @fadd_f64(double %a, double %b) {
; CHECK: add.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
; CHECK: ret
%ret = fadd double %a, %b
ret double %ret
}
define double @fsub_f64(double %a, double %b) {
; CHECK: sub.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
; CHECK: ret
%ret = fsub double %a, %b
ret double %ret
}
define double @fmul_f64(double %a, double %b) {
; CHECK: mul.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
; CHECK: ret
%ret = fmul double %a, %b
ret double %ret
}
define double @fdiv_f64(double %a, double %b) {
; CHECK: div.rn.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
; CHECK: ret
%ret = fdiv double %a, %b
ret double %ret
}
;; PTX does not have a floating-point rem instruction
;;; f32
define float @fadd_f32(float %a, float %b) {
; CHECK: add.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
; CHECK: ret
%ret = fadd float %a, %b
ret float %ret
}
define float @fsub_f32(float %a, float %b) {
; CHECK: sub.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
; CHECK: ret
%ret = fsub float %a, %b
ret float %ret
}
define float @fmul_f32(float %a, float %b) {
; CHECK: mul.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
; CHECK: ret
%ret = fmul float %a, %b
ret float %ret
}
define float @fdiv_f32(float %a, float %b) {
; CHECK: div.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
; CHECK: ret
%ret = fdiv float %a, %b
ret float %ret
}
;; PTX does not have a floating-point rem instruction

View File

@ -0,0 +1,295 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
;; These tests should run for all targets
;;===-- Basic instruction selection tests ---------------------------------===;;
;;; i64
define i64 @add_i64(i64 %a, i64 %b) {
; CHECK: add.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
; CHECK: ret
%ret = add i64 %a, %b
ret i64 %ret
}
define i64 @sub_i64(i64 %a, i64 %b) {
; CHECK: sub.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
; CHECK: ret
%ret = sub i64 %a, %b
ret i64 %ret
}
define i64 @mul_i64(i64 %a, i64 %b) {
; CHECK: mul.lo.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
; CHECK: ret
%ret = mul i64 %a, %b
ret i64 %ret
}
define i64 @sdiv_i64(i64 %a, i64 %b) {
; CHECK: div.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
; CHECK: ret
%ret = sdiv i64 %a, %b
ret i64 %ret
}
define i64 @udiv_i64(i64 %a, i64 %b) {
; CHECK: div.u64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
; CHECK: ret
%ret = udiv i64 %a, %b
ret i64 %ret
}
define i64 @srem_i64(i64 %a, i64 %b) {
; CHECK: rem.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
; CHECK: ret
%ret = srem i64 %a, %b
ret i64 %ret
}
define i64 @urem_i64(i64 %a, i64 %b) {
; CHECK: rem.u64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
; CHECK: ret
%ret = urem i64 %a, %b
ret i64 %ret
}
define i64 @and_i64(i64 %a, i64 %b) {
; CHECK: and.b64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
; CHECK: ret
%ret = and i64 %a, %b
ret i64 %ret
}
define i64 @or_i64(i64 %a, i64 %b) {
; CHECK: or.b64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
; CHECK: ret
%ret = or i64 %a, %b
ret i64 %ret
}
define i64 @xor_i64(i64 %a, i64 %b) {
; CHECK: xor.b64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
; CHECK: ret
%ret = xor i64 %a, %b
ret i64 %ret
}
define i64 @shl_i64(i64 %a, i64 %b) {
; PTX requires 32-bit shift amount
; CHECK: shl.b64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: ret
%ret = shl i64 %a, %b
ret i64 %ret
}
define i64 @ashr_i64(i64 %a, i64 %b) {
; PTX requires 32-bit shift amount
; CHECK: shr.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: ret
%ret = ashr i64 %a, %b
ret i64 %ret
}
define i64 @lshr_i64(i64 %a, i64 %b) {
; PTX requires 32-bit shift amount
; CHECK: shr.u64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: ret
%ret = lshr i64 %a, %b
ret i64 %ret
}
;;; i32
define i32 @add_i32(i32 %a, i32 %b) {
; CHECK: add.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: ret
%ret = add i32 %a, %b
ret i32 %ret
}
define i32 @sub_i32(i32 %a, i32 %b) {
; CHECK: sub.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: ret
%ret = sub i32 %a, %b
ret i32 %ret
}
define i32 @mul_i32(i32 %a, i32 %b) {
; CHECK: mul.lo.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: ret
%ret = mul i32 %a, %b
ret i32 %ret
}
define i32 @sdiv_i32(i32 %a, i32 %b) {
; CHECK: div.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: ret
%ret = sdiv i32 %a, %b
ret i32 %ret
}
define i32 @udiv_i32(i32 %a, i32 %b) {
; CHECK: div.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: ret
%ret = udiv i32 %a, %b
ret i32 %ret
}
define i32 @srem_i32(i32 %a, i32 %b) {
; CHECK: rem.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: ret
%ret = srem i32 %a, %b
ret i32 %ret
}
define i32 @urem_i32(i32 %a, i32 %b) {
; CHECK: rem.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: ret
%ret = urem i32 %a, %b
ret i32 %ret
}
define i32 @and_i32(i32 %a, i32 %b) {
; CHECK: and.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: ret
%ret = and i32 %a, %b
ret i32 %ret
}
define i32 @or_i32(i32 %a, i32 %b) {
; CHECK: or.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: ret
%ret = or i32 %a, %b
ret i32 %ret
}
define i32 @xor_i32(i32 %a, i32 %b) {
; CHECK: xor.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: ret
%ret = xor i32 %a, %b
ret i32 %ret
}
define i32 @shl_i32(i32 %a, i32 %b) {
; CHECK: shl.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: ret
%ret = shl i32 %a, %b
ret i32 %ret
}
define i32 @ashr_i32(i32 %a, i32 %b) {
; CHECK: shr.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: ret
%ret = ashr i32 %a, %b
ret i32 %ret
}
define i32 @lshr_i32(i32 %a, i32 %b) {
; CHECK: shr.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: ret
%ret = lshr i32 %a, %b
ret i32 %ret
}
;;; i16
define i16 @add_i16(i16 %a, i16 %b) {
; CHECK: add.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
; CHECK: ret
%ret = add i16 %a, %b
ret i16 %ret
}
define i16 @sub_i16(i16 %a, i16 %b) {
; CHECK: sub.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
; CHECK: ret
%ret = sub i16 %a, %b
ret i16 %ret
}
define i16 @mul_i16(i16 %a, i16 %b) {
; CHECK: mul.lo.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
; CHECK: ret
%ret = mul i16 %a, %b
ret i16 %ret
}
define i16 @sdiv_i16(i16 %a, i16 %b) {
; CHECK: div.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
; CHECK: ret
%ret = sdiv i16 %a, %b
ret i16 %ret
}
define i16 @udiv_i16(i16 %a, i16 %b) {
; CHECK: div.u16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
; CHECK: ret
%ret = udiv i16 %a, %b
ret i16 %ret
}
define i16 @srem_i16(i16 %a, i16 %b) {
; CHECK: rem.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
; CHECK: ret
%ret = srem i16 %a, %b
ret i16 %ret
}
define i16 @urem_i16(i16 %a, i16 %b) {
; CHECK: rem.u16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
; CHECK: ret
%ret = urem i16 %a, %b
ret i16 %ret
}
define i16 @and_i16(i16 %a, i16 %b) {
; CHECK: and.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
; CHECK: ret
%ret = and i16 %a, %b
ret i16 %ret
}
define i16 @or_i16(i16 %a, i16 %b) {
; CHECK: or.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
; CHECK: ret
%ret = or i16 %a, %b
ret i16 %ret
}
define i16 @xor_i16(i16 %a, i16 %b) {
; CHECK: xor.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
; CHECK: ret
%ret = xor i16 %a, %b
ret i16 %ret
}
define i16 @shl_i16(i16 %a, i16 %b) {
; PTX requires 32-bit shift amount
; CHECK: shl.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: ret
%ret = shl i16 %a, %b
ret i16 %ret
}
define i16 @ashr_i16(i16 %a, i16 %b) {
; PTX requires 32-bit shift amount
; CHECK: shr.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: ret
%ret = ashr i16 %a, %b
ret i16 %ret
}
define i16 @lshr_i16(i16 %a, i16 %b) {
; PTX requires 32-bit shift amount
; CHECK: shr.u16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: ret
%ret = lshr i16 %a, %b
ret i16 %ret
}

View File

@ -0,0 +1,32 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
;; Kernel function using ptx_kernel calling conv
; CHECK: .entry kernel_func
define ptx_kernel void @kernel_func(float* %a) {
; CHECK: ret
ret void
}
;; Device function
; CHECK: .func device_func
define void @device_func(float* %a) {
; CHECK: ret
ret void
}
;; Kernel function using NVVM metadata
; CHECK: .entry metadata_kernel
define void @metadata_kernel(float* %a) {
; CHECK: ret
ret void
}
!nvvm.annotations = !{!1}
!1 = metadata !{void (float*)* @metadata_kernel, metadata !"kernel", i32 1}

View File

@ -0,0 +1,389 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
;; These tests should run for all targets
;;===-- Basic instruction selection tests ---------------------------------===;;
;;; i64
define i64 @icmp_eq_i64(i64 %a, i64 %b) {
; CHECK: setp.eq.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp eq i64 %a, %b
%ret = zext i1 %cmp to i64
ret i64 %ret
}
define i64 @icmp_ne_i64(i64 %a, i64 %b) {
; CHECK: setp.ne.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp ne i64 %a, %b
%ret = zext i1 %cmp to i64
ret i64 %ret
}
define i64 @icmp_ugt_i64(i64 %a, i64 %b) {
; CHECK: setp.gt.u64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp ugt i64 %a, %b
%ret = zext i1 %cmp to i64
ret i64 %ret
}
define i64 @icmp_uge_i64(i64 %a, i64 %b) {
; CHECK: setp.ge.u64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp uge i64 %a, %b
%ret = zext i1 %cmp to i64
ret i64 %ret
}
define i64 @icmp_ult_i64(i64 %a, i64 %b) {
; CHECK: setp.lt.u64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp ult i64 %a, %b
%ret = zext i1 %cmp to i64
ret i64 %ret
}
define i64 @icmp_ule_i64(i64 %a, i64 %b) {
; CHECK: setp.le.u64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp ule i64 %a, %b
%ret = zext i1 %cmp to i64
ret i64 %ret
}
define i64 @icmp_sgt_i64(i64 %a, i64 %b) {
; CHECK: setp.gt.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp sgt i64 %a, %b
%ret = zext i1 %cmp to i64
ret i64 %ret
}
define i64 @icmp_sge_i64(i64 %a, i64 %b) {
; CHECK: setp.ge.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp sge i64 %a, %b
%ret = zext i1 %cmp to i64
ret i64 %ret
}
define i64 @icmp_slt_i64(i64 %a, i64 %b) {
; CHECK: setp.lt.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp slt i64 %a, %b
%ret = zext i1 %cmp to i64
ret i64 %ret
}
define i64 @icmp_sle_i64(i64 %a, i64 %b) {
; CHECK: setp.le.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp sle i64 %a, %b
%ret = zext i1 %cmp to i64
ret i64 %ret
}
;;; i32
define i32 @icmp_eq_i32(i32 %a, i32 %b) {
; CHECK: setp.eq.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp eq i32 %a, %b
%ret = zext i1 %cmp to i32
ret i32 %ret
}
define i32 @icmp_ne_i32(i32 %a, i32 %b) {
; CHECK: setp.ne.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp ne i32 %a, %b
%ret = zext i1 %cmp to i32
ret i32 %ret
}
define i32 @icmp_ugt_i32(i32 %a, i32 %b) {
; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp ugt i32 %a, %b
%ret = zext i1 %cmp to i32
ret i32 %ret
}
define i32 @icmp_uge_i32(i32 %a, i32 %b) {
; CHECK: setp.ge.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp uge i32 %a, %b
%ret = zext i1 %cmp to i32
ret i32 %ret
}
define i32 @icmp_ult_i32(i32 %a, i32 %b) {
; CHECK: setp.lt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp ult i32 %a, %b
%ret = zext i1 %cmp to i32
ret i32 %ret
}
define i32 @icmp_ule_i32(i32 %a, i32 %b) {
; CHECK: setp.le.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp ule i32 %a, %b
%ret = zext i1 %cmp to i32
ret i32 %ret
}
define i32 @icmp_sgt_i32(i32 %a, i32 %b) {
; CHECK: setp.gt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp sgt i32 %a, %b
%ret = zext i1 %cmp to i32
ret i32 %ret
}
define i32 @icmp_sge_i32(i32 %a, i32 %b) {
; CHECK: setp.ge.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp sge i32 %a, %b
%ret = zext i1 %cmp to i32
ret i32 %ret
}
define i32 @icmp_slt_i32(i32 %a, i32 %b) {
; CHECK: setp.lt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp slt i32 %a, %b
%ret = zext i1 %cmp to i32
ret i32 %ret
}
define i32 @icmp_sle_i32(i32 %a, i32 %b) {
; CHECK: setp.le.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp sle i32 %a, %b
%ret = zext i1 %cmp to i32
ret i32 %ret
}
;;; i16
define i16 @icmp_eq_i16(i16 %a, i16 %b) {
; CHECK: setp.eq.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp eq i16 %a, %b
%ret = zext i1 %cmp to i16
ret i16 %ret
}
define i16 @icmp_ne_i16(i16 %a, i16 %b) {
; CHECK: setp.ne.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp ne i16 %a, %b
%ret = zext i1 %cmp to i16
ret i16 %ret
}
define i16 @icmp_ugt_i16(i16 %a, i16 %b) {
; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp ugt i16 %a, %b
%ret = zext i1 %cmp to i16
ret i16 %ret
}
define i16 @icmp_uge_i16(i16 %a, i16 %b) {
; CHECK: setp.ge.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp uge i16 %a, %b
%ret = zext i1 %cmp to i16
ret i16 %ret
}
define i16 @icmp_ult_i16(i16 %a, i16 %b) {
; CHECK: setp.lt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp ult i16 %a, %b
%ret = zext i1 %cmp to i16
ret i16 %ret
}
define i16 @icmp_ule_i16(i16 %a, i16 %b) {
; CHECK: setp.le.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp ule i16 %a, %b
%ret = zext i1 %cmp to i16
ret i16 %ret
}
define i16 @icmp_sgt_i16(i16 %a, i16 %b) {
; CHECK: setp.gt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp sgt i16 %a, %b
%ret = zext i1 %cmp to i16
ret i16 %ret
}
define i16 @icmp_sge_i16(i16 %a, i16 %b) {
; CHECK: setp.ge.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp sge i16 %a, %b
%ret = zext i1 %cmp to i16
ret i16 %ret
}
define i16 @icmp_slt_i16(i16 %a, i16 %b) {
; CHECK: setp.lt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp slt i16 %a, %b
%ret = zext i1 %cmp to i16
ret i16 %ret
}
define i16 @icmp_sle_i16(i16 %a, i16 %b) {
; CHECK: setp.le.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp sle i16 %a, %b
%ret = zext i1 %cmp to i16
ret i16 %ret
}
;;; i8
define i8 @icmp_eq_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
; CHECK: setp.eq.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp eq i8 %a, %b
%ret = zext i1 %cmp to i8
ret i8 %ret
}
define i8 @icmp_ne_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
; CHECK: setp.ne.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp ne i8 %a, %b
%ret = zext i1 %cmp to i8
ret i8 %ret
}
define i8 @icmp_ugt_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp ugt i8 %a, %b
%ret = zext i1 %cmp to i8
ret i8 %ret
}
define i8 @icmp_uge_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
; CHECK: setp.ge.u16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp uge i8 %a, %b
%ret = zext i1 %cmp to i8
ret i8 %ret
}
define i8 @icmp_ult_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
; CHECK: setp.lt.u16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp ult i8 %a, %b
%ret = zext i1 %cmp to i8
ret i8 %ret
}
define i8 @icmp_ule_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
; CHECK: setp.le.u16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp ule i8 %a, %b
%ret = zext i1 %cmp to i8
ret i8 %ret
}
define i8 @icmp_sgt_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
; CHECK: setp.gt.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp sgt i8 %a, %b
%ret = zext i1 %cmp to i8
ret i8 %ret
}
define i8 @icmp_sge_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
; CHECK: setp.ge.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp sge i8 %a, %b
%ret = zext i1 %cmp to i8
ret i8 %ret
}
define i8 @icmp_slt_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
; CHECK: setp.lt.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp slt i8 %a, %b
%ret = zext i1 %cmp to i8
ret i8 %ret
}
define i8 @icmp_sle_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
; CHECK: setp.le.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp sle i8 %a, %b
%ret = zext i1 %cmp to i8
ret i8 %ret
}

View File

@ -0,0 +1,146 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
define i16 @cvt_i16_f32(float %x) {
; CHECK: cvt.rzi.u16.f32 %rs{{[0-9]+}}, %f{{[0-9]+}};
; CHECK: ret;
%a = fptoui float %x to i16
ret i16 %a
}
define i16 @cvt_i16_f64(double %x) {
; CHECK: cvt.rzi.u16.f64 %rs{{[0-9]+}}, %fl{{[0-9]+}};
; CHECK: ret;
%a = fptoui double %x to i16
ret i16 %a
}
define i32 @cvt_i32_f32(float %x) {
; CHECK: cvt.rzi.u32.f32 %r{{[0-9]+}}, %f{{[0-9]+}};
; CHECK: ret;
%a = fptoui float %x to i32
ret i32 %a
}
define i32 @cvt_i32_f64(double %x) {
; CHECK: cvt.rzi.u32.f64 %r{{[0-9]+}}, %fl{{[0-9]+}};
; CHECK: ret;
%a = fptoui double %x to i32
ret i32 %a
}
define i64 @cvt_i64_f32(float %x) {
; CHECK: cvt.rzi.u64.f32 %rl{{[0-9]+}}, %f{{[0-9]+}};
; CHECK: ret;
%a = fptoui float %x to i64
ret i64 %a
}
define i64 @cvt_i64_f64(double %x) {
; CHECK: cvt.rzi.u64.f64 %rl{{[0-9]+}}, %fl{{[0-9]+}};
; CHECK: ret;
%a = fptoui double %x to i64
ret i64 %a
}
define float @cvt_f32_i16(i16 %x) {
; CHECK: cvt.rn.f32.u16 %f{{[0-9]+}}, %rs{{[0-9]+}};
; CHECK: ret;
%a = uitofp i16 %x to float
ret float %a
}
define float @cvt_f32_i32(i32 %x) {
; CHECK: cvt.rn.f32.u32 %f{{[0-9]+}}, %r{{[0-9]+}};
; CHECK: ret;
%a = uitofp i32 %x to float
ret float %a
}
define float @cvt_f32_i64(i64 %x) {
; CHECK: cvt.rn.f32.u64 %f{{[0-9]+}}, %rl{{[0-9]+}};
; CHECK: ret;
%a = uitofp i64 %x to float
ret float %a
}
define float @cvt_f32_f64(double %x) {
; CHECK: cvt.rn.f32.f64 %f{{[0-9]+}}, %fl{{[0-9]+}};
; CHECK: ret;
%a = fptrunc double %x to float
ret float %a
}
define float @cvt_f32_s16(i16 %x) {
; CHECK: cvt.rn.f32.s16 %f{{[0-9]+}}, %rs{{[0-9]+}}
; CHECK: ret
%a = sitofp i16 %x to float
ret float %a
}
define float @cvt_f32_s32(i32 %x) {
; CHECK: cvt.rn.f32.s32 %f{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: ret
%a = sitofp i32 %x to float
ret float %a
}
define float @cvt_f32_s64(i64 %x) {
; CHECK: cvt.rn.f32.s64 %f{{[0-9]+}}, %rl{{[0-9]+}}
; CHECK: ret
%a = sitofp i64 %x to float
ret float %a
}
define double @cvt_f64_i16(i16 %x) {
; CHECK: cvt.rn.f64.u16 %fl{{[0-9]+}}, %rs{{[0-9]+}};
; CHECK: ret;
%a = uitofp i16 %x to double
ret double %a
}
define double @cvt_f64_i32(i32 %x) {
; CHECK: cvt.rn.f64.u32 %fl{{[0-9]+}}, %r{{[0-9]+}};
; CHECK: ret;
%a = uitofp i32 %x to double
ret double %a
}
define double @cvt_f64_i64(i64 %x) {
; CHECK: cvt.rn.f64.u64 %fl{{[0-9]+}}, %rl{{[0-9]+}};
; CHECK: ret;
%a = uitofp i64 %x to double
ret double %a
}
define double @cvt_f64_f32(float %x) {
; CHECK: cvt.f64.f32 %fl{{[0-9]+}}, %f{{[0-9]+}};
; CHECK: ret;
%a = fpext float %x to double
ret double %a
}
define double @cvt_f64_s16(i16 %x) {
; CHECK: cvt.rn.f64.s16 %fl{{[0-9]+}}, %rs{{[0-9]+}}
; CHECK: ret
%a = sitofp i16 %x to double
ret double %a
}
define double @cvt_f64_s32(i32 %x) {
; CHECK: cvt.rn.f64.s32 %fl{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: ret
%a = sitofp i32 %x to double
ret double %a
}
define double @cvt_f64_s64(i64 %x) {
; CHECK: cvt.rn.f64.s64 %fl{{[0-9]+}}, %rl{{[0-9]+}}
; CHECK: ret
%a = sitofp i64 %x to double
ret double %a
}

View File

@ -0,0 +1,55 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
; i16
define i16 @cvt_i16_i32(i32 %x) {
; CHECK: cvt.u16.u32 %rs{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: ret
%a = trunc i32 %x to i16
ret i16 %a
}
define i16 @cvt_i16_i64(i64 %x) {
; CHECK: cvt.u16.u64 %rs{{[0-9]+}}, %rl{{[0-9]+}}
; CHECK: ret
%a = trunc i64 %x to i16
ret i16 %a
}
; i32
define i32 @cvt_i32_i16(i16 %x) {
; CHECK: cvt.u32.u16 %r{{[0-9]+}}, %rs{{[0-9]+}}
; CHECK: ret
%a = zext i16 %x to i32
ret i32 %a
}
define i32 @cvt_i32_i64(i64 %x) {
; CHECK: cvt.u32.u64 %r{{[0-9]+}}, %rl{{[0-9]+}}
; CHECK: ret
%a = trunc i64 %x to i32
ret i32 %a
}
; i64
define i64 @cvt_i64_i16(i16 %x) {
; CHECK: cvt.u64.u16 %rl{{[0-9]+}}, %rs{{[0-9]+}}
; CHECK: ret
%a = zext i16 %x to i64
ret i64 %a
}
define i64 @cvt_i64_i32(i32 %x) {
; CHECK: cvt.u64.u32 %rl{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: ret
%a = zext i32 %x to i64
ret i64 %a
}

View File

@ -0,0 +1,64 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
;; Integer conversions happen inplicitly by loading/storing the proper types
; i16
define i16 @cvt_i16_i32(i32 %x) {
; CHECK: ld.param.u16 %rs[[R0:[0-9]+]], [cvt_i16_i32_param_{{[0-9]+}}]
; CHECK: st.param.b16 [func_retval{{[0-9]+}}+0], %rs[[R0]]
; CHECK: ret
%a = trunc i32 %x to i16
ret i16 %a
}
define i16 @cvt_i16_i64(i64 %x) {
; CHECK: ld.param.u16 %rs[[R0:[0-9]+]], [cvt_i16_i64_param_{{[0-9]+}}]
; CHECK: st.param.b16 [func_retval{{[0-9]+}}+0], %rs[[R0]]
; CHECK: ret
%a = trunc i64 %x to i16
ret i16 %a
}
; i32
define i32 @cvt_i32_i16(i16 %x) {
; CHECK: ld.param.u16 %r[[R0:[0-9]+]], [cvt_i32_i16_param_{{[0-9]+}}]
; CHECK: st.param.b32 [func_retval{{[0-9]+}}+0], %r[[R0]]
; CHECK: ret
%a = zext i16 %x to i32
ret i32 %a
}
define i32 @cvt_i32_i64(i64 %x) {
; CHECK: ld.param.u32 %r[[R0:[0-9]+]], [cvt_i32_i64_param_{{[0-9]+}}]
; CHECK: st.param.b32 [func_retval{{[0-9]+}}+0], %r[[R0]]
; CHECK: ret
%a = trunc i64 %x to i32
ret i32 %a
}
; i64
define i64 @cvt_i64_i16(i16 %x) {
; CHECK: ld.param.u16 %rl[[R0:[0-9]+]], [cvt_i64_i16_param_{{[0-9]+}}]
; CHECK: st.param.b64 [func_retval{{[0-9]+}}+0], %rl[[R0]]
; CHECK: ret
%a = zext i16 %x to i64
ret i64 %a
}
define i64 @cvt_i64_i32(i32 %x) {
; CHECK: ld.param.u32 %rl[[R0:[0-9]+]], [cvt_i64_i32_param_{{[0-9]+}}]
; CHECK: st.param.b64 [func_retval{{[0-9]+}}+0], %rl[[R0]]
; CHECK: ret
%a = zext i32 %x to i64
ret i64 %a
}

View File

@ -0,0 +1,24 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -nvptx-fma-level=1 | FileCheck %s -check-prefix=FMA
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -nvptx-fma-level=0 | FileCheck %s -check-prefix=MUL
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -nvptx-fma-level=1 | FileCheck %s -check-prefix=FMA
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -nvptx-fma-level=0 | FileCheck %s -check-prefix=MUL
define ptx_device float @test_mul_add_f(float %x, float %y, float %z) {
entry:
; FMA: fma.rn.f32
; MUL: mul.rn.f32
; MUL: add.rn.f32
%a = fmul float %x, %y
%b = fadd float %a, %z
ret float %b
}
define ptx_device double @test_mul_add_d(double %x, double %y, double %z) {
entry:
; FMA: fma.rn.f64
; MUL: mul.rn.f64
; MUL: add.rn.f64
%a = fmul double %x, %y
%b = fadd double %a, %z
ret double %b
}

17
test/CodeGen/NVPTX/fma.ll Normal file
View File

@ -0,0 +1,17 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
define ptx_device float @t1_f32(float %x, float %y, float %z) {
; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
; CHECK: ret;
%a = fmul float %x, %y
%b = fadd float %a, %z
ret float %b
}
define ptx_device double @t1_f64(double %x, double %y, double %z) {
; CHECK: fma.rn.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}};
; CHECK: ret;
%a = fmul double %x, %y
%b = fadd double %a, %z
ret double %b
}

View File

@ -0,0 +1,284 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
define ptx_device i32 @test_tid_x() {
; CHECK: mov.u32 %r0, %tid.x;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.tid.x()
ret i32 %x
}
define ptx_device i32 @test_tid_y() {
; CHECK: mov.u32 %r0, %tid.y;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.tid.y()
ret i32 %x
}
define ptx_device i32 @test_tid_z() {
; CHECK: mov.u32 %r0, %tid.z;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.tid.z()
ret i32 %x
}
define ptx_device i32 @test_tid_w() {
; CHECK: mov.u32 %r0, %tid.w;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.tid.w()
ret i32 %x
}
define ptx_device i32 @test_ntid_x() {
; CHECK: mov.u32 %r0, %ntid.x;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ntid.x()
ret i32 %x
}
define ptx_device i32 @test_ntid_y() {
; CHECK: mov.u32 %r0, %ntid.y;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ntid.y()
ret i32 %x
}
define ptx_device i32 @test_ntid_z() {
; CHECK: mov.u32 %r0, %ntid.z;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ntid.z()
ret i32 %x
}
define ptx_device i32 @test_ntid_w() {
; CHECK: mov.u32 %r0, %ntid.w;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ntid.w()
ret i32 %x
}
define ptx_device i32 @test_laneid() {
; CHECK: mov.u32 %r0, %laneid;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.laneid()
ret i32 %x
}
define ptx_device i32 @test_warpid() {
; CHECK: mov.u32 %r0, %warpid;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.warpid()
ret i32 %x
}
define ptx_device i32 @test_nwarpid() {
; CHECK: mov.u32 %r0, %nwarpid;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.nwarpid()
ret i32 %x
}
define ptx_device i32 @test_ctaid_x() {
; CHECK: mov.u32 %r0, %ctaid.x;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ctaid.x()
ret i32 %x
}
define ptx_device i32 @test_ctaid_y() {
; CHECK: mov.u32 %r0, %ctaid.y;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ctaid.y()
ret i32 %x
}
define ptx_device i32 @test_ctaid_z() {
; CHECK: mov.u32 %r0, %ctaid.z;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ctaid.z()
ret i32 %x
}
define ptx_device i32 @test_ctaid_w() {
; CHECK: mov.u32 %r0, %ctaid.w;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ctaid.w()
ret i32 %x
}
define ptx_device i32 @test_nctaid_x() {
; CHECK: mov.u32 %r0, %nctaid.x;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.nctaid.x()
ret i32 %x
}
define ptx_device i32 @test_nctaid_y() {
; CHECK: mov.u32 %r0, %nctaid.y;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.nctaid.y()
ret i32 %x
}
define ptx_device i32 @test_nctaid_z() {
; CHECK: mov.u32 %r0, %nctaid.z;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.nctaid.z()
ret i32 %x
}
define ptx_device i32 @test_nctaid_w() {
; CHECK: mov.u32 %r0, %nctaid.w;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.nctaid.w()
ret i32 %x
}
define ptx_device i32 @test_smid() {
; CHECK: mov.u32 %r0, %smid;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.smid()
ret i32 %x
}
define ptx_device i32 @test_nsmid() {
; CHECK: mov.u32 %r0, %nsmid;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.nsmid()
ret i32 %x
}
define ptx_device i32 @test_gridid() {
; CHECK: mov.u32 %r0, %gridid;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.gridid()
ret i32 %x
}
define ptx_device i32 @test_lanemask_eq() {
; CHECK: mov.u32 %r0, %lanemask_eq;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.lanemask.eq()
ret i32 %x
}
define ptx_device i32 @test_lanemask_le() {
; CHECK: mov.u32 %r0, %lanemask_le;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.lanemask.le()
ret i32 %x
}
define ptx_device i32 @test_lanemask_lt() {
; CHECK: mov.u32 %r0, %lanemask_lt;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.lanemask.lt()
ret i32 %x
}
define ptx_device i32 @test_lanemask_ge() {
; CHECK: mov.u32 %r0, %lanemask_ge;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.lanemask.ge()
ret i32 %x
}
define ptx_device i32 @test_lanemask_gt() {
; CHECK: mov.u32 %r0, %lanemask_gt;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.lanemask.gt()
ret i32 %x
}
define ptx_device i32 @test_clock() {
; CHECK: mov.u32 %r0, %clock;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.clock()
ret i32 %x
}
define ptx_device i64 @test_clock64() {
; CHECK: mov.u64 %rl0, %clock64;
; CHECK: ret;
%x = call i64 @llvm.ptx.read.clock64()
ret i64 %x
}
define ptx_device i32 @test_pm0() {
; CHECK: mov.u32 %r0, %pm0;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.pm0()
ret i32 %x
}
define ptx_device i32 @test_pm1() {
; CHECK: mov.u32 %r0, %pm1;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.pm1()
ret i32 %x
}
define ptx_device i32 @test_pm2() {
; CHECK: mov.u32 %r0, %pm2;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.pm2()
ret i32 %x
}
define ptx_device i32 @test_pm3() {
; CHECK: mov.u32 %r0, %pm3;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.pm3()
ret i32 %x
}
define ptx_device void @test_bar_sync() {
; CHECK: bar.sync 0
; CHECK: ret;
call void @llvm.ptx.bar.sync(i32 0)
ret void
}
declare i32 @llvm.ptx.read.tid.x()
declare i32 @llvm.ptx.read.tid.y()
declare i32 @llvm.ptx.read.tid.z()
declare i32 @llvm.ptx.read.tid.w()
declare i32 @llvm.ptx.read.ntid.x()
declare i32 @llvm.ptx.read.ntid.y()
declare i32 @llvm.ptx.read.ntid.z()
declare i32 @llvm.ptx.read.ntid.w()
declare i32 @llvm.ptx.read.laneid()
declare i32 @llvm.ptx.read.warpid()
declare i32 @llvm.ptx.read.nwarpid()
declare i32 @llvm.ptx.read.ctaid.x()
declare i32 @llvm.ptx.read.ctaid.y()
declare i32 @llvm.ptx.read.ctaid.z()
declare i32 @llvm.ptx.read.ctaid.w()
declare i32 @llvm.ptx.read.nctaid.x()
declare i32 @llvm.ptx.read.nctaid.y()
declare i32 @llvm.ptx.read.nctaid.z()
declare i32 @llvm.ptx.read.nctaid.w()
declare i32 @llvm.ptx.read.smid()
declare i32 @llvm.ptx.read.nsmid()
declare i32 @llvm.ptx.read.gridid()
declare i32 @llvm.ptx.read.lanemask.eq()
declare i32 @llvm.ptx.read.lanemask.le()
declare i32 @llvm.ptx.read.lanemask.lt()
declare i32 @llvm.ptx.read.lanemask.ge()
declare i32 @llvm.ptx.read.lanemask.gt()
declare i32 @llvm.ptx.read.clock()
declare i64 @llvm.ptx.read.clock64()
declare i32 @llvm.ptx.read.pm0()
declare i32 @llvm.ptx.read.pm1()
declare i32 @llvm.ptx.read.pm2()
declare i32 @llvm.ptx.read.pm3()
declare void @llvm.ptx.bar.sync(i32 %i)

View File

@ -0,0 +1,173 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s --check-prefix=PTX32
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s --check-prefix=PTX64
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
;; i8
define i8 @ld_global_i8(i8 addrspace(1)* %ptr) {
; PTX32: ld.global.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
; PTX64: ld.global.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
; PTX64: ret
%a = load i8 addrspace(1)* %ptr
ret i8 %a
}
define i8 @ld_shared_i8(i8 addrspace(3)* %ptr) {
; PTX32: ld.shared.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
; PTX64: ld.shared.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
; PTX64: ret
%a = load i8 addrspace(3)* %ptr
ret i8 %a
}
define i8 @ld_local_i8(i8 addrspace(5)* %ptr) {
; PTX32: ld.local.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
; PTX64: ld.local.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
; PTX64: ret
%a = load i8 addrspace(5)* %ptr
ret i8 %a
}
;; i16
define i16 @ld_global_i16(i16 addrspace(1)* %ptr) {
; PTX32: ld.global.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
; PTX64: ld.global.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
; PTX64: ret
%a = load i16 addrspace(1)* %ptr
ret i16 %a
}
define i16 @ld_shared_i16(i16 addrspace(3)* %ptr) {
; PTX32: ld.shared.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
; PTX64: ld.shared.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
; PTX64: ret
%a = load i16 addrspace(3)* %ptr
ret i16 %a
}
define i16 @ld_local_i16(i16 addrspace(5)* %ptr) {
; PTX32: ld.local.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
; PTX64: ld.local.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
; PTX64: ret
%a = load i16 addrspace(5)* %ptr
ret i16 %a
}
;; i32
define i32 @ld_global_i32(i32 addrspace(1)* %ptr) {
; PTX32: ld.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
; PTX64: ld.global.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
; PTX64: ret
%a = load i32 addrspace(1)* %ptr
ret i32 %a
}
define i32 @ld_shared_i32(i32 addrspace(3)* %ptr) {
; PTX32: ld.shared.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
; PTX64: ld.shared.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
; PTX64: ret
%a = load i32 addrspace(3)* %ptr
ret i32 %a
}
define i32 @ld_local_i32(i32 addrspace(5)* %ptr) {
; PTX32: ld.local.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
; PTX64: ld.local.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
; PTX64: ret
%a = load i32 addrspace(5)* %ptr
ret i32 %a
}
;; i64
define i64 @ld_global_i64(i64 addrspace(1)* %ptr) {
; PTX32: ld.global.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
; PTX64: ld.global.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
; PTX64: ret
%a = load i64 addrspace(1)* %ptr
ret i64 %a
}
define i64 @ld_shared_i64(i64 addrspace(3)* %ptr) {
; PTX32: ld.shared.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
; PTX64: ld.shared.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
; PTX64: ret
%a = load i64 addrspace(3)* %ptr
ret i64 %a
}
define i64 @ld_local_i64(i64 addrspace(5)* %ptr) {
; PTX32: ld.local.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
; PTX64: ld.local.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
; PTX64: ret
%a = load i64 addrspace(5)* %ptr
ret i64 %a
}
;; f32
define float @ld_global_f32(float addrspace(1)* %ptr) {
; PTX32: ld.global.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
; PTX64: ld.global.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
; PTX64: ret
%a = load float addrspace(1)* %ptr
ret float %a
}
define float @ld_shared_f32(float addrspace(3)* %ptr) {
; PTX32: ld.shared.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
; PTX64: ld.shared.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
; PTX64: ret
%a = load float addrspace(3)* %ptr
ret float %a
}
define float @ld_local_f32(float addrspace(5)* %ptr) {
; PTX32: ld.local.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
; PTX64: ld.local.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
; PTX64: ret
%a = load float addrspace(5)* %ptr
ret float %a
}
;; f64
define double @ld_global_f64(double addrspace(1)* %ptr) {
; PTX32: ld.global.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
; PTX64: ld.global.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
; PTX64: ret
%a = load double addrspace(1)* %ptr
ret double %a
}
define double @ld_shared_f64(double addrspace(3)* %ptr) {
; PTX32: ld.shared.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
; PTX64: ld.shared.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
; PTX64: ret
%a = load double addrspace(3)* %ptr
ret double %a
}
define double @ld_local_f64(double addrspace(5)* %ptr) {
; PTX32: ld.local.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
; PTX64: ld.local.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
; PTX64: ret
%a = load double addrspace(5)* %ptr
ret double %a
}

View File

@ -0,0 +1,63 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
;; i8
define i8 @ld_global_i8(i8 addrspace(0)* %ptr) {
; PTX32: ld.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
; PTX64: ld.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
; PTX64: ret
%a = load i8 addrspace(0)* %ptr
ret i8 %a
}
;; i16
define i16 @ld_global_i16(i16 addrspace(0)* %ptr) {
; PTX32: ld.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
; PTX64: ld.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
; PTX64: ret
%a = load i16 addrspace(0)* %ptr
ret i16 %a
}
;; i32
define i32 @ld_global_i32(i32 addrspace(0)* %ptr) {
; PTX32: ld.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
; PTX64: ld.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
; PTX64: ret
%a = load i32 addrspace(0)* %ptr
ret i32 %a
}
;; i64
define i64 @ld_global_i64(i64 addrspace(0)* %ptr) {
; PTX32: ld.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
; PTX64: ld.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
; PTX64: ret
%a = load i64 addrspace(0)* %ptr
ret i64 %a
}
;; f32
define float @ld_global_f32(float addrspace(0)* %ptr) {
; PTX32: ld.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
; PTX64: ld.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
; PTX64: ret
%a = load float addrspace(0)* %ptr
ret float %a
}
;; f64
define double @ld_global_f64(double addrspace(0)* %ptr) {
; PTX32: ld.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
; PTX64: ld.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
; PTX64: ret
%a = load double addrspace(0)* %ptr
ret double %a
}

View File

@ -0,0 +1,5 @@
config.suffixes = ['.ll', '.c', '.cpp']
targets = set(config.root.targets_to_build.split())
if not 'NVPTX' in targets:
config.unsupported = True

View File

@ -0,0 +1,179 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s --check-prefix=PTX32
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s --check-prefix=PTX64
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
;; i8
define void @st_global_i8(i8 addrspace(1)* %ptr, i8 %a) {
; PTX32: st.global.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}}
; PTX32: ret
; PTX64: st.global.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}}
; PTX64: ret
store i8 %a, i8 addrspace(1)* %ptr
ret void
}
define void @st_shared_i8(i8 addrspace(3)* %ptr, i8 %a) {
; PTX32: st.shared.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}}
; PTX32: ret
; PTX64: st.shared.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}}
; PTX64: ret
store i8 %a, i8 addrspace(3)* %ptr
ret void
}
define void @st_local_i8(i8 addrspace(5)* %ptr, i8 %a) {
; PTX32: st.local.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}}
; PTX32: ret
; PTX64: st.local.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}}
; PTX64: ret
store i8 %a, i8 addrspace(5)* %ptr
ret void
}
;; i16
define void @st_global_i16(i16 addrspace(1)* %ptr, i16 %a) {
; PTX32: st.global.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}}
; PTX32: ret
; PTX64: st.global.u16 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
; PTX64: ret
store i16 %a, i16 addrspace(1)* %ptr
ret void
}
define void @st_shared_i16(i16 addrspace(3)* %ptr, i16 %a) {
; PTX32: st.shared.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}}
; PTX32: ret
; PTX64: st.shared.u16 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
; PTX64: ret
store i16 %a, i16 addrspace(3)* %ptr
ret void
}
define void @st_local_i16(i16 addrspace(5)* %ptr, i16 %a) {
; PTX32: st.local.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}}
; PTX32: ret
; PTX64: st.local.u16 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
; PTX64: ret
store i16 %a, i16 addrspace(5)* %ptr
ret void
}
;; i32
define void @st_global_i32(i32 addrspace(1)* %ptr, i32 %a) {
; PTX32: st.global.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}
; PTX32: ret
; PTX64: st.global.u32 [%rl{{[0-9]+}}], %r{{[0-9]+}}
; PTX64: ret
store i32 %a, i32 addrspace(1)* %ptr
ret void
}
define void @st_shared_i32(i32 addrspace(3)* %ptr, i32 %a) {
; PTX32: st.shared.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}
; PTX32: ret
; PTX64: st.shared.u32 [%rl{{[0-9]+}}], %r{{[0-9]+}}
; PTX64: ret
store i32 %a, i32 addrspace(3)* %ptr
ret void
}
define void @st_local_i32(i32 addrspace(5)* %ptr, i32 %a) {
; PTX32: st.local.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}
; PTX32: ret
; PTX64: st.local.u32 [%rl{{[0-9]+}}], %r{{[0-9]+}}
; PTX64: ret
store i32 %a, i32 addrspace(5)* %ptr
ret void
}
;; i64
define void @st_global_i64(i64 addrspace(1)* %ptr, i64 %a) {
; PTX32: st.global.u64 [%r{{[0-9]+}}], %rl{{[0-9]+}}
; PTX32: ret
; PTX64: st.global.u64 [%rl{{[0-9]+}}], %rl{{[0-9]+}}
; PTX64: ret
store i64 %a, i64 addrspace(1)* %ptr
ret void
}
define void @st_shared_i64(i64 addrspace(3)* %ptr, i64 %a) {
; PTX32: st.shared.u64 [%r{{[0-9]+}}], %rl{{[0-9]+}}
; PTX32: ret
; PTX64: st.shared.u64 [%rl{{[0-9]+}}], %rl{{[0-9]+}}
; PTX64: ret
store i64 %a, i64 addrspace(3)* %ptr
ret void
}
define void @st_local_i64(i64 addrspace(5)* %ptr, i64 %a) {
; PTX32: st.local.u64 [%r{{[0-9]+}}], %rl{{[0-9]+}}
; PTX32: ret
; PTX64: st.local.u64 [%rl{{[0-9]+}}], %rl{{[0-9]+}}
; PTX64: ret
store i64 %a, i64 addrspace(5)* %ptr
ret void
}
;; f32
define void @st_global_f32(float addrspace(1)* %ptr, float %a) {
; PTX32: st.global.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}
; PTX32: ret
; PTX64: st.global.f32 [%rl{{[0-9]+}}], %f{{[0-9]+}}
; PTX64: ret
store float %a, float addrspace(1)* %ptr
ret void
}
define void @st_shared_f32(float addrspace(3)* %ptr, float %a) {
; PTX32: st.shared.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}
; PTX32: ret
; PTX64: st.shared.f32 [%rl{{[0-9]+}}], %f{{[0-9]+}}
; PTX64: ret
store float %a, float addrspace(3)* %ptr
ret void
}
define void @st_local_f32(float addrspace(5)* %ptr, float %a) {
; PTX32: st.local.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}
; PTX32: ret
; PTX64: st.local.f32 [%rl{{[0-9]+}}], %f{{[0-9]+}}
; PTX64: ret
store float %a, float addrspace(5)* %ptr
ret void
}
;; f64
define void @st_global_f64(double addrspace(1)* %ptr, double %a) {
; PTX32: st.global.f64 [%r{{[0-9]+}}], %fl{{[0-9]+}}
; PTX32: ret
; PTX64: st.global.f64 [%rl{{[0-9]+}}], %fl{{[0-9]+}}
; PTX64: ret
store double %a, double addrspace(1)* %ptr
ret void
}
define void @st_shared_f64(double addrspace(3)* %ptr, double %a) {
; PTX32: st.shared.f64 [%r{{[0-9]+}}], %fl{{[0-9]+}}
; PTX32: ret
; PTX64: st.shared.f64 [%rl{{[0-9]+}}], %fl{{[0-9]+}}
; PTX64: ret
store double %a, double addrspace(3)* %ptr
ret void
}
define void @st_local_f64(double addrspace(5)* %ptr, double %a) {
; PTX32: st.local.f64 [%r{{[0-9]+}}], %fl{{[0-9]+}}
; PTX32: ret
; PTX64: st.local.f64 [%rl{{[0-9]+}}], %fl{{[0-9]+}}
; PTX64: ret
store double %a, double addrspace(5)* %ptr
ret void
}

View File

@ -0,0 +1,69 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
;; i8
define void @st_global_i8(i8 addrspace(0)* %ptr, i8 %a) {
; PTX32: st.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}}
; PTX32: ret
; PTX64: st.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}}
; PTX64: ret
store i8 %a, i8 addrspace(0)* %ptr
ret void
}
;; i16
define void @st_global_i16(i16 addrspace(0)* %ptr, i16 %a) {
; PTX32: st.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}}
; PTX32: ret
; PTX64: st.u16 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
; PTX64: ret
store i16 %a, i16 addrspace(0)* %ptr
ret void
}
;; i32
define void @st_global_i32(i32 addrspace(0)* %ptr, i32 %a) {
; PTX32: st.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}
; PTX32: ret
; PTX64: st.u32 [%rl{{[0-9]+}}], %r{{[0-9]+}}
; PTX64: ret
store i32 %a, i32 addrspace(0)* %ptr
ret void
}
;; i64
define void @st_global_i64(i64 addrspace(0)* %ptr, i64 %a) {
; PTX32: st.u64 [%r{{[0-9]+}}], %rl{{[0-9]+}}
; PTX32: ret
; PTX64: st.u64 [%rl{{[0-9]+}}], %rl{{[0-9]+}}
; PTX64: ret
store i64 %a, i64 addrspace(0)* %ptr
ret void
}
;; f32
define void @st_global_f32(float addrspace(0)* %ptr, float %a) {
; PTX32: st.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}
; PTX32: ret
; PTX64: st.f32 [%rl{{[0-9]+}}], %f{{[0-9]+}}
; PTX64: ret
store float %a, float addrspace(0)* %ptr
ret void
}
;; f64
define void @st_global_f64(double addrspace(0)* %ptr, double %a) {
; PTX32: st.f64 [%r{{[0-9]+}}], %fl{{[0-9]+}}
; PTX32: ret
; PTX64: st.f64 [%rl{{[0-9]+}}], %fl{{[0-9]+}}
; PTX64: ret
store double %a, double addrspace(0)* %ptr
ret void
}