mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
This patch adds a new NVPTX back-end to LLVM which supports code generation for NVIDIA PTX 3.0. This back-end will (eventually) replace the current PTX back-end, while maintaining compatibility with it.
The new target machines are: nvptx (old ptx32) => 32-bit PTX nvptx64 (old ptx64) => 64-bit PTX The sources are based on the internal NVIDIA NVPTX back-end, and contain more functionality than the current PTX back-end currently provides. NV_CONTRIB llvm-svn: 156196
This commit is contained in:
parent
2b868d474e
commit
4ca961430f
@ -78,6 +78,7 @@ set(LLVM_ALL_TARGETS
|
||||
Mips
|
||||
MBlaze
|
||||
MSP430
|
||||
NVPTX
|
||||
PowerPC
|
||||
PTX
|
||||
Sparc
|
||||
|
@ -370,6 +370,7 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch],
|
||||
hexagon-*) llvm_cv_target_arch="Hexagon" ;;
|
||||
mblaze-*) llvm_cv_target_arch="MBlaze" ;;
|
||||
ptx-*) llvm_cv_target_arch="PTX" ;;
|
||||
nvptx-*) llvm_cv_target_arch="NVPTX" ;;
|
||||
*) llvm_cv_target_arch="Unknown" ;;
|
||||
esac])
|
||||
|
||||
@ -517,6 +518,7 @@ else
|
||||
Hexagon) AC_SUBST(TARGET_HAS_JIT,0) ;;
|
||||
MBlaze) AC_SUBST(TARGET_HAS_JIT,0) ;;
|
||||
PTX) AC_SUBST(TARGET_HAS_JIT,0) ;;
|
||||
NVPTX) AC_SUBST(TARGET_HAS_JIT,0) ;;
|
||||
*) AC_SUBST(TARGET_HAS_JIT,0) ;;
|
||||
esac
|
||||
fi
|
||||
@ -628,13 +630,13 @@ TARGETS_TO_BUILD=""
|
||||
AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
|
||||
[Build specific host targets: all or target1,target2,... Valid targets are:
|
||||
host, x86, x86_64, sparc, powerpc, arm, mips, spu, hexagon,
|
||||
xcore, msp430, ptx, and cpp (default=all)]),,
|
||||
xcore, msp430, ptx, nvptx, and cpp (default=all)]),,
|
||||
enableval=all)
|
||||
if test "$enableval" = host-only ; then
|
||||
enableval=host
|
||||
fi
|
||||
case "$enableval" in
|
||||
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze PTX Hexagon" ;;
|
||||
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze PTX NVPTX Hexagon" ;;
|
||||
*)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
|
||||
case "$a_target" in
|
||||
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
|
||||
@ -651,6 +653,7 @@ case "$enableval" in
|
||||
hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
|
||||
mblaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
|
||||
ptx) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
|
||||
nvptx) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
|
||||
host) case "$llvm_cv_target_arch" in
|
||||
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
|
||||
x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
|
||||
@ -664,6 +667,7 @@ case "$enableval" in
|
||||
MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
|
||||
Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
|
||||
PTX) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
|
||||
NVPTX) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
|
||||
*) AC_MSG_ERROR([Can not set target to build]) ;;
|
||||
esac ;;
|
||||
*) AC_MSG_ERROR([Unrecognized target $a_target]) ;;
|
||||
|
11
configure
vendored
11
configure
vendored
@ -1420,7 +1420,7 @@ Optional Features:
|
||||
--enable-targets Build specific host targets: all or
|
||||
target1,target2,... Valid targets are: host, x86,
|
||||
x86_64, sparc, powerpc, arm, mips, spu, hexagon,
|
||||
xcore, msp430, ptx, and cpp (default=all)
|
||||
xcore, msp430, ptx, nvptx, and cpp (default=all)
|
||||
--enable-bindings Build specific language bindings:
|
||||
all,auto,none,{binding-name} (default=auto)
|
||||
--enable-libffi Check for the presence of libffi (default is NO)
|
||||
@ -3903,6 +3903,7 @@ else
|
||||
hexagon-*) llvm_cv_target_arch="Hexagon" ;;
|
||||
mblaze-*) llvm_cv_target_arch="MBlaze" ;;
|
||||
ptx-*) llvm_cv_target_arch="PTX" ;;
|
||||
nvptx-*) llvm_cv_target_arch="NVPTX" ;;
|
||||
*) llvm_cv_target_arch="Unknown" ;;
|
||||
esac
|
||||
fi
|
||||
@ -5125,6 +5126,8 @@ else
|
||||
MBlaze) TARGET_HAS_JIT=0
|
||||
;;
|
||||
PTX) TARGET_HAS_JIT=0
|
||||
;;
|
||||
NVPTX) TARGET_HAS_JIT=0
|
||||
;;
|
||||
*) TARGET_HAS_JIT=0
|
||||
;;
|
||||
@ -5310,7 +5313,7 @@ if test "$enableval" = host-only ; then
|
||||
enableval=host
|
||||
fi
|
||||
case "$enableval" in
|
||||
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze PTX Hexagon" ;;
|
||||
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze PTX NVPTX Hexagon" ;;
|
||||
*)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
|
||||
case "$a_target" in
|
||||
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
|
||||
@ -5327,6 +5330,7 @@ case "$enableval" in
|
||||
hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
|
||||
mblaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
|
||||
ptx) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
|
||||
nvptx) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
|
||||
host) case "$llvm_cv_target_arch" in
|
||||
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
|
||||
x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
|
||||
@ -5340,6 +5344,7 @@ case "$enableval" in
|
||||
MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
|
||||
Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
|
||||
PTX) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
|
||||
NVPTX) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
|
||||
*) { { echo "$as_me:$LINENO: error: Can not set target to build" >&5
|
||||
echo "$as_me: error: Can not set target to build" >&2;}
|
||||
{ (exit 1); exit 1; }; } ;;
|
||||
@ -10401,7 +10406,7 @@ else
|
||||
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
|
||||
lt_status=$lt_dlunknown
|
||||
cat > conftest.$ac_ext <<EOF
|
||||
#line 10404 "configure"
|
||||
#line 10409 "configure"
|
||||
#include "confdefs.h"
|
||||
|
||||
#if HAVE_DLFCN_H
|
||||
|
@ -64,6 +64,8 @@ public:
|
||||
mblaze, // MBlaze: mblaze
|
||||
ptx32, // PTX: ptx (32-bit)
|
||||
ptx64, // PTX: ptx (64-bit)
|
||||
nvptx, // NVPTX: 32-bit
|
||||
nvptx64, // NVPTX: 64-bit
|
||||
le32, // le32: generic little-endian 32-bit CPU (PNaCl / Emscripten)
|
||||
amdil // amdil: amd IL
|
||||
};
|
||||
|
@ -441,3 +441,4 @@ include "llvm/IntrinsicsCellSPU.td"
|
||||
include "llvm/IntrinsicsXCore.td"
|
||||
include "llvm/IntrinsicsPTX.td"
|
||||
include "llvm/IntrinsicsHexagon.td"
|
||||
include "llvm/IntrinsicsNVVM.td"
|
||||
|
872
include/llvm/IntrinsicsNVVM.td
Normal file
872
include/llvm/IntrinsicsNVVM.td
Normal file
@ -0,0 +1,872 @@
|
||||
//===- IntrinsicsNVVM.td - Defines NVVM intrinsics ---------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines all of the NVVM-specific intrinsics for use with NVPTX.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def llvm_anyi64ptr_ty : LLVMAnyPointerType<llvm_i64_ty>; // (space)i64*
|
||||
|
||||
//
|
||||
// MISC
|
||||
//
|
||||
|
||||
def int_nvvm_clz_i : GCCBuiltin<"__nvvm_clz_i">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_nvvm_clz_ll : GCCBuiltin<"__nvvm_clz_ll">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_popc_i : GCCBuiltin<"__nvvm_popc_i">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_nvvm_popc_ll : GCCBuiltin<"__nvvm_popc_ll">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_prmt : GCCBuiltin<"__nvvm_prmt">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
|
||||
//
|
||||
// Min Max
|
||||
//
|
||||
|
||||
def int_nvvm_min_i : GCCBuiltin<"__nvvm_min_i">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_min_ui : GCCBuiltin<"__nvvm_min_ui">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
|
||||
def int_nvvm_min_ll : GCCBuiltin<"__nvvm_min_ll">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_min_ull : GCCBuiltin<"__nvvm_min_ull">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
|
||||
def int_nvvm_max_i : GCCBuiltin<"__nvvm_max_i">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_max_ui : GCCBuiltin<"__nvvm_max_ui">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
|
||||
def int_nvvm_max_ll : GCCBuiltin<"__nvvm_max_ll">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_max_ull : GCCBuiltin<"__nvvm_max_ull">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
|
||||
def int_nvvm_fmin_f : GCCBuiltin<"__nvvm_fmin_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_fmin_ftz_f : GCCBuiltin<"__nvvm_fmin_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
|
||||
def int_nvvm_fmax_f : GCCBuiltin<"__nvvm_fmax_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty]
|
||||
, [IntrNoMem, Commutative]>;
|
||||
def int_nvvm_fmax_ftz_f : GCCBuiltin<"__nvvm_fmax_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
|
||||
def int_nvvm_fmin_d : GCCBuiltin<"__nvvm_fmin_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_fmax_d : GCCBuiltin<"__nvvm_fmax_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
|
||||
//
|
||||
// Multiplication
|
||||
//
|
||||
|
||||
def int_nvvm_mulhi_i : GCCBuiltin<"__nvvm_mulhi_i">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_mulhi_ui : GCCBuiltin<"__nvvm_mulhi_ui">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
|
||||
def int_nvvm_mulhi_ll : GCCBuiltin<"__nvvm_mulhi_ll">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_mulhi_ull : GCCBuiltin<"__nvvm_mulhi_ull">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
|
||||
def int_nvvm_mul_rn_ftz_f : GCCBuiltin<"__nvvm_mul_rn_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_mul_rn_f : GCCBuiltin<"__nvvm_mul_rn_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_mul_rz_ftz_f : GCCBuiltin<"__nvvm_mul_rz_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_mul_rz_f : GCCBuiltin<"__nvvm_mul_rz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_mul_rm_ftz_f : GCCBuiltin<"__nvvm_mul_rm_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_mul_rm_f : GCCBuiltin<"__nvvm_mul_rm_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_mul_rp_ftz_f : GCCBuiltin<"__nvvm_mul_rp_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_mul_rp_f : GCCBuiltin<"__nvvm_mul_rp_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
|
||||
def int_nvvm_mul_rn_d : GCCBuiltin<"__nvvm_mul_rn_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_mul_rz_d : GCCBuiltin<"__nvvm_mul_rz_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_mul_rm_d : GCCBuiltin<"__nvvm_mul_rm_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_mul_rp_d : GCCBuiltin<"__nvvm_mul_rp_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
|
||||
def int_nvvm_mul24_i : GCCBuiltin<"__nvvm_mul24_i">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_mul24_ui : GCCBuiltin<"__nvvm_mul24_ui">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
|
||||
//
|
||||
// Div
|
||||
//
|
||||
|
||||
def int_nvvm_div_approx_ftz_f : GCCBuiltin<"__nvvm_div_approx_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_div_approx_f : GCCBuiltin<"__nvvm_div_approx_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
|
||||
def int_nvvm_div_rn_ftz_f : GCCBuiltin<"__nvvm_div_rn_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_div_rn_f : GCCBuiltin<"__nvvm_div_rn_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
|
||||
def int_nvvm_div_rz_ftz_f : GCCBuiltin<"__nvvm_div_rz_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_div_rz_f : GCCBuiltin<"__nvvm_div_rz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
|
||||
def int_nvvm_div_rm_ftz_f : GCCBuiltin<"__nvvm_div_rm_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_div_rm_f : GCCBuiltin<"__nvvm_div_rm_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
|
||||
def int_nvvm_div_rp_ftz_f : GCCBuiltin<"__nvvm_div_rp_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_div_rp_f : GCCBuiltin<"__nvvm_div_rp_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
|
||||
def int_nvvm_div_rn_d : GCCBuiltin<"__nvvm_div_rn_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_div_rz_d : GCCBuiltin<"__nvvm_div_rz_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_div_rm_d : GCCBuiltin<"__nvvm_div_rm_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_div_rp_d : GCCBuiltin<"__nvvm_div_rp_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
|
||||
//
|
||||
// Brev
|
||||
//
|
||||
|
||||
def int_nvvm_brev32 : GCCBuiltin<"__nvvm_brev32">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_nvvm_brev64 : GCCBuiltin<"__nvvm_brev64">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem]>;
|
||||
|
||||
//
|
||||
// Sad
|
||||
//
|
||||
|
||||
def int_nvvm_sad_i : GCCBuiltin<"__nvvm_sad_i">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_sad_ui : GCCBuiltin<"__nvvm_sad_ui">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
|
||||
//
|
||||
// Floor Ceil
|
||||
//
|
||||
|
||||
def int_nvvm_floor_ftz_f : GCCBuiltin<"__nvvm_floor_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_floor_f : GCCBuiltin<"__nvvm_floor_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_floor_d : GCCBuiltin<"__nvvm_floor_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_ceil_ftz_f : GCCBuiltin<"__nvvm_ceil_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_ceil_f : GCCBuiltin<"__nvvm_ceil_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_ceil_d : GCCBuiltin<"__nvvm_ceil_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
|
||||
//
|
||||
// Abs
|
||||
//
|
||||
|
||||
def int_nvvm_abs_i : GCCBuiltin<"__nvvm_abs_i">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_nvvm_abs_ll : GCCBuiltin<"__nvvm_abs_ll">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_fabs_ftz_f : GCCBuiltin<"__nvvm_fabs_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_fabs_f : GCCBuiltin<"__nvvm_fabs_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_fabs_d : GCCBuiltin<"__nvvm_fabs_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
|
||||
//
|
||||
// Round
|
||||
//
|
||||
|
||||
def int_nvvm_round_ftz_f : GCCBuiltin<"__nvvm_round_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_round_f : GCCBuiltin<"__nvvm_round_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_round_d : GCCBuiltin<"__nvvm_round_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
|
||||
//
|
||||
// Trunc
|
||||
//
|
||||
|
||||
def int_nvvm_trunc_ftz_f : GCCBuiltin<"__nvvm_trunc_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_trunc_f : GCCBuiltin<"__nvvm_trunc_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_trunc_d : GCCBuiltin<"__nvvm_trunc_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
|
||||
//
|
||||
// Saturate
|
||||
//
|
||||
|
||||
def int_nvvm_saturate_ftz_f : GCCBuiltin<"__nvvm_saturate_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_saturate_f : GCCBuiltin<"__nvvm_saturate_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_saturate_d : GCCBuiltin<"__nvvm_saturate_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
|
||||
//
|
||||
// Exp2 Log2
|
||||
//
|
||||
|
||||
def int_nvvm_ex2_approx_ftz_f : GCCBuiltin<"__nvvm_ex2_approx_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_ex2_approx_f : GCCBuiltin<"__nvvm_ex2_approx_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_ex2_approx_d : GCCBuiltin<"__nvvm_ex2_approx_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_lg2_approx_ftz_f : GCCBuiltin<"__nvvm_lg2_approx_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_lg2_approx_f : GCCBuiltin<"__nvvm_lg2_approx_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_lg2_approx_d : GCCBuiltin<"__nvvm_lg2_approx_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
|
||||
//
|
||||
// Sin Cos
|
||||
//
|
||||
|
||||
def int_nvvm_sin_approx_ftz_f : GCCBuiltin<"__nvvm_sin_approx_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_sin_approx_f : GCCBuiltin<"__nvvm_sin_approx_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_cos_approx_ftz_f : GCCBuiltin<"__nvvm_cos_approx_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_cos_approx_f : GCCBuiltin<"__nvvm_cos_approx_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
|
||||
//
|
||||
// Fma
|
||||
//
|
||||
|
||||
def int_nvvm_fma_rn_ftz_f : GCCBuiltin<"__nvvm_fma_rn_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_fma_rn_f : GCCBuiltin<"__nvvm_fma_rn_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_fma_rz_ftz_f : GCCBuiltin<"__nvvm_fma_rz_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_fma_rz_f : GCCBuiltin<"__nvvm_fma_rz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_fma_rm_ftz_f : GCCBuiltin<"__nvvm_fma_rm_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_fma_rm_f : GCCBuiltin<"__nvvm_fma_rm_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_fma_rp_ftz_f : GCCBuiltin<"__nvvm_fma_rp_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_fma_rp_f : GCCBuiltin<"__nvvm_fma_rp_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
|
||||
def int_nvvm_fma_rn_d : GCCBuiltin<"__nvvm_fma_rn_d">,
|
||||
Intrinsic<[llvm_double_ty],
|
||||
[llvm_double_ty, llvm_double_ty, llvm_double_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_fma_rz_d : GCCBuiltin<"__nvvm_fma_rz_d">,
|
||||
Intrinsic<[llvm_double_ty],
|
||||
[llvm_double_ty, llvm_double_ty, llvm_double_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_fma_rm_d : GCCBuiltin<"__nvvm_fma_rm_d">,
|
||||
Intrinsic<[llvm_double_ty],
|
||||
[llvm_double_ty, llvm_double_ty, llvm_double_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_fma_rp_d : GCCBuiltin<"__nvvm_fma_rp_d">,
|
||||
Intrinsic<[llvm_double_ty],
|
||||
[llvm_double_ty, llvm_double_ty, llvm_double_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
|
||||
//
|
||||
// Rcp
|
||||
//
|
||||
|
||||
def int_nvvm_rcp_rn_ftz_f : GCCBuiltin<"__nvvm_rcp_rn_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_rcp_rn_f : GCCBuiltin<"__nvvm_rcp_rn_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_rcp_rz_ftz_f : GCCBuiltin<"__nvvm_rcp_rz_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_rcp_rz_f : GCCBuiltin<"__nvvm_rcp_rz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_rcp_rm_ftz_f : GCCBuiltin<"__nvvm_rcp_rm_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_rcp_rm_f : GCCBuiltin<"__nvvm_rcp_rm_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_rcp_rp_ftz_f : GCCBuiltin<"__nvvm_rcp_rp_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_rcp_rp_f : GCCBuiltin<"__nvvm_rcp_rp_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_rcp_rn_d : GCCBuiltin<"__nvvm_rcp_rn_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
def int_nvvm_rcp_rz_d : GCCBuiltin<"__nvvm_rcp_rz_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
def int_nvvm_rcp_rm_d : GCCBuiltin<"__nvvm_rcp_rm_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
def int_nvvm_rcp_rp_d : GCCBuiltin<"__nvvm_rcp_rp_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_rcp_approx_ftz_d : GCCBuiltin<"__nvvm_rcp_approx_ftz_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
|
||||
//
|
||||
// Sqrt
|
||||
//
|
||||
|
||||
def int_nvvm_sqrt_rn_ftz_f : GCCBuiltin<"__nvvm_sqrt_rn_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_sqrt_rn_f : GCCBuiltin<"__nvvm_sqrt_rn_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_sqrt_rz_ftz_f : GCCBuiltin<"__nvvm_sqrt_rz_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_sqrt_rz_f : GCCBuiltin<"__nvvm_sqrt_rz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_sqrt_rm_ftz_f : GCCBuiltin<"__nvvm_sqrt_rm_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_sqrt_rm_f : GCCBuiltin<"__nvvm_sqrt_rm_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_sqrt_rp_ftz_f : GCCBuiltin<"__nvvm_sqrt_rp_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_sqrt_rp_f : GCCBuiltin<"__nvvm_sqrt_rp_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_sqrt_approx_ftz_f : GCCBuiltin<"__nvvm_sqrt_approx_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_sqrt_approx_f : GCCBuiltin<"__nvvm_sqrt_approx_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_sqrt_rn_d : GCCBuiltin<"__nvvm_sqrt_rn_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
def int_nvvm_sqrt_rz_d : GCCBuiltin<"__nvvm_sqrt_rz_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
def int_nvvm_sqrt_rm_d : GCCBuiltin<"__nvvm_sqrt_rm_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
def int_nvvm_sqrt_rp_d : GCCBuiltin<"__nvvm_sqrt_rp_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
|
||||
//
|
||||
// Rsqrt
|
||||
//
|
||||
|
||||
def int_nvvm_rsqrt_approx_ftz_f : GCCBuiltin<"__nvvm_rsqrt_approx_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_rsqrt_approx_f : GCCBuiltin<"__nvvm_rsqrt_approx_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_rsqrt_approx_d : GCCBuiltin<"__nvvm_rsqrt_approx_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
|
||||
//
|
||||
// Add
|
||||
//
|
||||
|
||||
def int_nvvm_add_rn_ftz_f : GCCBuiltin<"__nvvm_add_rn_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_add_rn_f : GCCBuiltin<"__nvvm_add_rn_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_add_rz_ftz_f : GCCBuiltin<"__nvvm_add_rz_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_add_rz_f : GCCBuiltin<"__nvvm_add_rz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_add_rm_ftz_f : GCCBuiltin<"__nvvm_add_rm_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_add_rm_f : GCCBuiltin<"__nvvm_add_rm_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_add_rp_ftz_f : GCCBuiltin<"__nvvm_add_rp_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_add_rp_f : GCCBuiltin<"__nvvm_add_rp_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
|
||||
def int_nvvm_add_rn_d : GCCBuiltin<"__nvvm_add_rn_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_add_rz_d : GCCBuiltin<"__nvvm_add_rz_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_add_rm_d : GCCBuiltin<"__nvvm_add_rm_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_add_rp_d : GCCBuiltin<"__nvvm_add_rp_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
|
||||
//
|
||||
// Convert
|
||||
//
|
||||
|
||||
def int_nvvm_d2f_rn_ftz : GCCBuiltin<"__nvvm_d2f_rn_ftz">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
def int_nvvm_d2f_rn : GCCBuiltin<"__nvvm_d2f_rn">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
def int_nvvm_d2f_rz_ftz : GCCBuiltin<"__nvvm_d2f_rz_ftz">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
def int_nvvm_d2f_rz : GCCBuiltin<"__nvvm_d2f_rz">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
def int_nvvm_d2f_rm_ftz : GCCBuiltin<"__nvvm_d2f_rm_ftz">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
def int_nvvm_d2f_rm : GCCBuiltin<"__nvvm_d2f_rm">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
def int_nvvm_d2f_rp_ftz : GCCBuiltin<"__nvvm_d2f_rp_ftz">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
def int_nvvm_d2f_rp : GCCBuiltin<"__nvvm_d2f_rp">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_d2i_rn : GCCBuiltin<"__nvvm_d2i_rn">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
def int_nvvm_d2i_rz : GCCBuiltin<"__nvvm_d2i_rz">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
def int_nvvm_d2i_rm : GCCBuiltin<"__nvvm_d2i_rm">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
def int_nvvm_d2i_rp : GCCBuiltin<"__nvvm_d2i_rp">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_d2ui_rn : GCCBuiltin<"__nvvm_d2ui_rn">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
def int_nvvm_d2ui_rz : GCCBuiltin<"__nvvm_d2ui_rz">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
def int_nvvm_d2ui_rm : GCCBuiltin<"__nvvm_d2ui_rm">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
def int_nvvm_d2ui_rp : GCCBuiltin<"__nvvm_d2ui_rp">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_i2d_rn : GCCBuiltin<"__nvvm_i2d_rn">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_nvvm_i2d_rz : GCCBuiltin<"__nvvm_i2d_rz">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_nvvm_i2d_rm : GCCBuiltin<"__nvvm_i2d_rm">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_nvvm_i2d_rp : GCCBuiltin<"__nvvm_i2d_rp">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_ui2d_rn : GCCBuiltin<"__nvvm_ui2d_rn">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_nvvm_ui2d_rz : GCCBuiltin<"__nvvm_ui2d_rz">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_nvvm_ui2d_rm : GCCBuiltin<"__nvvm_ui2d_rm">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_nvvm_ui2d_rp : GCCBuiltin<"__nvvm_ui2d_rp">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_f2i_rn_ftz : GCCBuiltin<"__nvvm_f2i_rn_ftz">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_f2i_rn : GCCBuiltin<"__nvvm_f2i_rn">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_f2i_rz_ftz : GCCBuiltin<"__nvvm_f2i_rz_ftz">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_f2i_rz : GCCBuiltin<"__nvvm_f2i_rz">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_f2i_rm_ftz : GCCBuiltin<"__nvvm_f2i_rm_ftz">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_f2i_rm : GCCBuiltin<"__nvvm_f2i_rm">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_f2i_rp_ftz : GCCBuiltin<"__nvvm_f2i_rp_ftz">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_f2i_rp : GCCBuiltin<"__nvvm_f2i_rp">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_f2ui_rn_ftz : GCCBuiltin<"__nvvm_f2ui_rn_ftz">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_f2ui_rn : GCCBuiltin<"__nvvm_f2ui_rn">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_f2ui_rz_ftz : GCCBuiltin<"__nvvm_f2ui_rz_ftz">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_f2ui_rz : GCCBuiltin<"__nvvm_f2ui_rz">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_f2ui_rm_ftz : GCCBuiltin<"__nvvm_f2ui_rm_ftz">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_f2ui_rm : GCCBuiltin<"__nvvm_f2ui_rm">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_f2ui_rp_ftz : GCCBuiltin<"__nvvm_f2ui_rp_ftz">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_f2ui_rp : GCCBuiltin<"__nvvm_f2ui_rp">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_i2f_rn : GCCBuiltin<"__nvvm_i2f_rn">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_nvvm_i2f_rz : GCCBuiltin<"__nvvm_i2f_rz">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_nvvm_i2f_rm : GCCBuiltin<"__nvvm_i2f_rm">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_nvvm_i2f_rp : GCCBuiltin<"__nvvm_i2f_rp">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_ui2f_rn : GCCBuiltin<"__nvvm_ui2f_rn">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_nvvm_ui2f_rz : GCCBuiltin<"__nvvm_ui2f_rz">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_nvvm_ui2f_rm : GCCBuiltin<"__nvvm_ui2f_rm">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_nvvm_ui2f_rp : GCCBuiltin<"__nvvm_ui2f_rp">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_lohi_i2d : GCCBuiltin<"__nvvm_lohi_i2d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
|
||||
def int_nvvm_d2i_lo : GCCBuiltin<"__nvvm_d2i_lo">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
def int_nvvm_d2i_hi : GCCBuiltin<"__nvvm_d2i_hi">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_f2ll_rn_ftz : GCCBuiltin<"__nvvm_f2ll_rn_ftz">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_f2ll_rn : GCCBuiltin<"__nvvm_f2ll_rn">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_f2ll_rz_ftz : GCCBuiltin<"__nvvm_f2ll_rz_ftz">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_f2ll_rz : GCCBuiltin<"__nvvm_f2ll_rz">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_f2ll_rm_ftz : GCCBuiltin<"__nvvm_f2ll_rm_ftz">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_f2ll_rm : GCCBuiltin<"__nvvm_f2ll_rm">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_f2ll_rp_ftz : GCCBuiltin<"__nvvm_f2ll_rp_ftz">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_f2ll_rp : GCCBuiltin<"__nvvm_f2ll_rp">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_f2ull_rn_ftz : GCCBuiltin<"__nvvm_f2ull_rn_ftz">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_f2ull_rn : GCCBuiltin<"__nvvm_f2ull_rn">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_f2ull_rz_ftz : GCCBuiltin<"__nvvm_f2ull_rz_ftz">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_f2ull_rz : GCCBuiltin<"__nvvm_f2ull_rz">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_f2ull_rm_ftz : GCCBuiltin<"__nvvm_f2ull_rm_ftz">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_f2ull_rm : GCCBuiltin<"__nvvm_f2ull_rm">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_f2ull_rp_ftz : GCCBuiltin<"__nvvm_f2ull_rp_ftz">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_f2ull_rp : GCCBuiltin<"__nvvm_f2ull_rp">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_d2ll_rn : GCCBuiltin<"__nvvm_d2ll_rn">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
def int_nvvm_d2ll_rz : GCCBuiltin<"__nvvm_d2ll_rz">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
def int_nvvm_d2ll_rm : GCCBuiltin<"__nvvm_d2ll_rm">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
def int_nvvm_d2ll_rp : GCCBuiltin<"__nvvm_d2ll_rp">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_d2ull_rn : GCCBuiltin<"__nvvm_d2ull_rn">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
def int_nvvm_d2ull_rz : GCCBuiltin<"__nvvm_d2ull_rz">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
def int_nvvm_d2ull_rm : GCCBuiltin<"__nvvm_d2ull_rm">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
def int_nvvm_d2ull_rp : GCCBuiltin<"__nvvm_d2ull_rp">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_ll2f_rn : GCCBuiltin<"__nvvm_ll2f_rn">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
|
||||
def int_nvvm_ll2f_rz : GCCBuiltin<"__nvvm_ll2f_rz">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
|
||||
def int_nvvm_ll2f_rm : GCCBuiltin<"__nvvm_ll2f_rm">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
|
||||
def int_nvvm_ll2f_rp : GCCBuiltin<"__nvvm_ll2f_rp">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
|
||||
def int_nvvm_ull2f_rn : GCCBuiltin<"__nvvm_ull2f_rn">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
|
||||
def int_nvvm_ull2f_rz : GCCBuiltin<"__nvvm_ull2f_rz">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
|
||||
def int_nvvm_ull2f_rm : GCCBuiltin<"__nvvm_ull2f_rm">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
|
||||
def int_nvvm_ull2f_rp : GCCBuiltin<"__nvvm_ull2f_rp">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_ll2d_rn : GCCBuiltin<"__nvvm_ll2d_rn">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
|
||||
def int_nvvm_ll2d_rz : GCCBuiltin<"__nvvm_ll2d_rz">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
|
||||
def int_nvvm_ll2d_rm : GCCBuiltin<"__nvvm_ll2d_rm">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
|
||||
def int_nvvm_ll2d_rp : GCCBuiltin<"__nvvm_ll2d_rp">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
|
||||
def int_nvvm_ull2d_rn : GCCBuiltin<"__nvvm_ull2d_rn">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
|
||||
def int_nvvm_ull2d_rz : GCCBuiltin<"__nvvm_ull2d_rz">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
|
||||
def int_nvvm_ull2d_rm : GCCBuiltin<"__nvvm_ull2d_rm">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
|
||||
def int_nvvm_ull2d_rp : GCCBuiltin<"__nvvm_ull2d_rp">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_f2h_rn_ftz : GCCBuiltin<"__nvvm_f2h_rn_ftz">,
|
||||
Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_f2h_rn : GCCBuiltin<"__nvvm_f2h_rn">,
|
||||
Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_h2f : GCCBuiltin<"__nvvm_h2f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_i16_ty], [IntrNoMem]>;
|
||||
|
||||
//
|
||||
// Bitcast
|
||||
//
|
||||
|
||||
def int_nvvm_bitcast_f2i : GCCBuiltin<"__nvvm_bitcast_f2i">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_bitcast_i2f : GCCBuiltin<"__nvvm_bitcast_i2f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_bitcast_ll2d : GCCBuiltin<"__nvvm_bitcast_ll2d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
|
||||
def int_nvvm_bitcast_d2ll : GCCBuiltin<"__nvvm_bitcast_d2ll">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
|
||||
|
||||
// Atomic not available as an llvm intrinsic.
|
||||
def int_nvvm_atomic_load_add_f32 : Intrinsic<[llvm_float_ty],
|
||||
[LLVMAnyPointerType<llvm_float_ty>, llvm_float_ty],
|
||||
[IntrReadWriteArgMem, NoCapture<0>]>;
|
||||
def int_nvvm_atomic_load_inc_32 : Intrinsic<[llvm_i32_ty],
|
||||
[LLVMAnyPointerType<llvm_i32_ty>, llvm_i32_ty],
|
||||
[IntrReadWriteArgMem, NoCapture<0>]>;
|
||||
def int_nvvm_atomic_load_dec_32 : Intrinsic<[llvm_i32_ty],
|
||||
[LLVMAnyPointerType<llvm_i32_ty>, llvm_i32_ty],
|
||||
[IntrReadWriteArgMem, NoCapture<0>]>;
|
||||
|
||||
// Bar.Sync
|
||||
def int_cuda_syncthreads : GCCBuiltin<"__syncthreads">,
|
||||
Intrinsic<[], [], []>;
|
||||
def int_nvvm_barrier0 : GCCBuiltin<"__nvvm_bar0">,
|
||||
Intrinsic<[], [], []>;
|
||||
def int_nvvm_barrier0_popc : GCCBuiltin<"__nvvm_bar0_popc">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
|
||||
def int_nvvm_barrier0_and : GCCBuiltin<"__nvvm_bar0_and">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
|
||||
def int_nvvm_barrier0_or : GCCBuiltin<"__nvvm_bar0_or">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
|
||||
|
||||
// Membar
|
||||
def int_nvvm_membar_cta : GCCBuiltin<"__nvvm_membar_cta">,
|
||||
Intrinsic<[], [], []>;
|
||||
def int_nvvm_membar_gl : GCCBuiltin<"__nvvm_membar_gl">,
|
||||
Intrinsic<[], [], []>;
|
||||
def int_nvvm_membar_sys : GCCBuiltin<"__nvvm_membar_sys">,
|
||||
Intrinsic<[], [], []>;
|
||||
|
||||
|
||||
// Accessing special registers
|
||||
def int_nvvm_read_ptx_sreg_tid_x :
|
||||
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_tid_x">;
|
||||
def int_nvvm_read_ptx_sreg_tid_y :
|
||||
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_tid_y">;
|
||||
def int_nvvm_read_ptx_sreg_tid_z :
|
||||
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_tid_z">;
|
||||
|
||||
def int_nvvm_read_ptx_sreg_ntid_x :
|
||||
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_x">;
|
||||
def int_nvvm_read_ptx_sreg_ntid_y :
|
||||
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_y">;
|
||||
def int_nvvm_read_ptx_sreg_ntid_z :
|
||||
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_z">;
|
||||
|
||||
def int_nvvm_read_ptx_sreg_ctaid_x :
|
||||
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_x">;
|
||||
def int_nvvm_read_ptx_sreg_ctaid_y :
|
||||
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_y">;
|
||||
def int_nvvm_read_ptx_sreg_ctaid_z :
|
||||
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_z">;
|
||||
|
||||
def int_nvvm_read_ptx_sreg_nctaid_x :
|
||||
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_x">;
|
||||
def int_nvvm_read_ptx_sreg_nctaid_y :
|
||||
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_y">;
|
||||
def int_nvvm_read_ptx_sreg_nctaid_z :
|
||||
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_z">;
|
||||
|
||||
def int_nvvm_read_ptx_sreg_warpsize :
|
||||
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_warpsize">;
|
||||
|
||||
|
||||
// Generated within nvvm. Use for ldu on sm_20 or later
|
||||
// @TODO: Revisit this, Changed LLVMAnyPointerType to LLVMPointerType
|
||||
def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty],
|
||||
[LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
|
||||
"llvm.nvvm.ldu.global.i">;
|
||||
def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty],
|
||||
[LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
|
||||
"llvm.nvvm.ldu.global.f">;
|
||||
def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty],
|
||||
[LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
|
||||
"llvm.nvvm.ldu.global.p">;
|
||||
|
||||
|
||||
// Use for generic pointers
|
||||
// - These intrinsics are used to convert address spaces.
|
||||
// - The input pointer and output pointer must have the same type, except for
|
||||
// the address-space. (This restriction is not enforced here as there is
|
||||
// currently no way to describe it).
|
||||
// - This complements the llvm bitcast, which can be used to cast one type
|
||||
// of pointer to another type of pointer, while the address space remains
|
||||
// the same.
|
||||
def int_nvvm_ptr_local_to_gen: Intrinsic<[llvm_anyptr_ty],
|
||||
[llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
|
||||
"llvm.nvvm.ptr.local.to.gen">;
|
||||
def int_nvvm_ptr_shared_to_gen: Intrinsic<[llvm_anyptr_ty],
|
||||
[llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
|
||||
"llvm.nvvm.ptr.shared.to.gen">;
|
||||
def int_nvvm_ptr_global_to_gen: Intrinsic<[llvm_anyptr_ty],
|
||||
[llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
|
||||
"llvm.nvvm.ptr.global.to.gen">;
|
||||
def int_nvvm_ptr_constant_to_gen: Intrinsic<[llvm_anyptr_ty],
|
||||
[llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
|
||||
"llvm.nvvm.ptr.constant.to.gen">;
|
||||
|
||||
def int_nvvm_ptr_gen_to_global: Intrinsic<[llvm_anyptr_ty],
|
||||
[llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
|
||||
"llvm.nvvm.ptr.gen.to.global">;
|
||||
def int_nvvm_ptr_gen_to_shared: Intrinsic<[llvm_anyptr_ty],
|
||||
[llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
|
||||
"llvm.nvvm.ptr.gen.to.shared">;
|
||||
def int_nvvm_ptr_gen_to_local: Intrinsic<[llvm_anyptr_ty],
|
||||
[llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
|
||||
"llvm.nvvm.ptr.gen.to.local">;
|
||||
def int_nvvm_ptr_gen_to_constant: Intrinsic<[llvm_anyptr_ty],
|
||||
[llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
|
||||
"llvm.nvvm.ptr.gen.to.constant">;
|
||||
|
||||
// Used in nvvm internally to help address space opt and ptx code generation
|
||||
// This is for params that are passed to kernel functions by pointer by-val.
|
||||
def int_nvvm_ptr_gen_to_param: Intrinsic<[llvm_anyptr_ty],
|
||||
[llvm_anyptr_ty],
|
||||
[IntrNoMem, NoCapture<0>],
|
||||
"llvm.nvvm.ptr.gen.to.param">;
|
||||
|
||||
// Move intrinsics, used in nvvm internally
|
||||
|
||||
def int_nvvm_move_i8 : Intrinsic<[llvm_i8_ty], [llvm_i8_ty], [IntrNoMem],
|
||||
"llvm.nvvm.move.i8">;
|
||||
def int_nvvm_move_i16 : Intrinsic<[llvm_i16_ty], [llvm_i16_ty], [IntrNoMem],
|
||||
"llvm.nvvm.move.i16">;
|
||||
def int_nvvm_move_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem],
|
||||
"llvm.nvvm.move.i32">;
|
||||
def int_nvvm_move_i64 : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem],
|
||||
"llvm.nvvm.move.i64">;
|
||||
def int_nvvm_move_float : Intrinsic<[llvm_float_ty], [llvm_float_ty],
|
||||
[IntrNoMem], "llvm.nvvm.move.float">;
|
||||
def int_nvvm_move_double : Intrinsic<[llvm_double_ty], [llvm_double_ty],
|
||||
[IntrNoMem], "llvm.nvvm.move.double">;
|
||||
def int_nvvm_move_ptr : Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty],
|
||||
[IntrNoMem, NoCapture<0>], "llvm.nvvm.move.ptr">;
|
||||
|
||||
|
||||
/// Error / Warn
|
||||
def int_nvvm_compiler_error :
|
||||
Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.error">;
|
||||
def int_nvvm_compiler_warn :
|
||||
Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.warn">;
|
@ -40,6 +40,8 @@ const char *Triple::getArchTypeName(ArchType Kind) {
|
||||
case mblaze: return "mblaze";
|
||||
case ptx32: return "ptx32";
|
||||
case ptx64: return "ptx64";
|
||||
case nvptx: return "nvptx";
|
||||
case nvptx64: return "nvptx64";
|
||||
case le32: return "le32";
|
||||
case amdil: return "amdil";
|
||||
}
|
||||
@ -76,6 +78,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
|
||||
|
||||
case ptx32: return "ptx";
|
||||
case ptx64: return "ptx";
|
||||
case nvptx: return "nvptx";
|
||||
case nvptx64: return "nvptx";
|
||||
case le32: return "le32";
|
||||
case amdil: return "amdil";
|
||||
}
|
||||
@ -162,6 +166,8 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
|
||||
.Case("xcore", xcore)
|
||||
.Case("ptx32", ptx32)
|
||||
.Case("ptx64", ptx64)
|
||||
.Case("nvptx", nvptx)
|
||||
.Case("nvptx64", nvptx64)
|
||||
.Case("le32", le32)
|
||||
.Case("amdil", amdil)
|
||||
.Default(UnknownArch);
|
||||
@ -194,6 +200,8 @@ Triple::ArchType Triple::getArchTypeForDarwinArchName(StringRef Str) {
|
||||
.Case("r600", Triple::r600)
|
||||
.Case("ptx32", Triple::ptx32)
|
||||
.Case("ptx64", Triple::ptx64)
|
||||
.Case("nvptx", Triple::nvptx)
|
||||
.Case("nvptx64", Triple::nvptx64)
|
||||
.Case("amdil", Triple::amdil)
|
||||
.Default(Triple::UnknownArch);
|
||||
}
|
||||
@ -217,6 +225,8 @@ const char *Triple::getArchNameForAssembler() {
|
||||
.Case("r600", "r600")
|
||||
.Case("ptx32", "ptx32")
|
||||
.Case("ptx64", "ptx64")
|
||||
.Case("nvptx", "nvptx")
|
||||
.Case("nvptx64", "nvptx64")
|
||||
.Case("le32", "le32")
|
||||
.Case("amdil", "amdil")
|
||||
.Default(NULL);
|
||||
@ -251,6 +261,8 @@ static Triple::ArchType parseArch(StringRef ArchName) {
|
||||
.Case("xcore", Triple::xcore)
|
||||
.Case("ptx32", Triple::ptx32)
|
||||
.Case("ptx64", Triple::ptx64)
|
||||
.Case("nvptx", Triple::nvptx)
|
||||
.Case("nvptx64", Triple::nvptx64)
|
||||
.Case("le32", Triple::le32)
|
||||
.Case("amdil", Triple::amdil)
|
||||
.Default(Triple::UnknownArch);
|
||||
@ -652,6 +664,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
|
||||
case llvm::Triple::mblaze:
|
||||
case llvm::Triple::mips:
|
||||
case llvm::Triple::mipsel:
|
||||
case llvm::Triple::nvptx:
|
||||
case llvm::Triple::ppc:
|
||||
case llvm::Triple::ptx32:
|
||||
case llvm::Triple::r600:
|
||||
@ -664,6 +677,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
|
||||
|
||||
case llvm::Triple::mips64:
|
||||
case llvm::Triple::mips64el:
|
||||
case llvm::Triple::nvptx64:
|
||||
case llvm::Triple::ppc64:
|
||||
case llvm::Triple::ptx64:
|
||||
case llvm::Triple::sparcv9:
|
||||
@ -701,6 +715,7 @@ Triple Triple::get32BitArchVariant() const {
|
||||
case Triple::mblaze:
|
||||
case Triple::mips:
|
||||
case Triple::mipsel:
|
||||
case Triple::nvptx:
|
||||
case Triple::ppc:
|
||||
case Triple::ptx32:
|
||||
case Triple::r600:
|
||||
@ -714,6 +729,7 @@ Triple Triple::get32BitArchVariant() const {
|
||||
|
||||
case Triple::mips64: T.setArch(Triple::mips); break;
|
||||
case Triple::mips64el: T.setArch(Triple::mipsel); break;
|
||||
case Triple::nvptx64: T.setArch(Triple::nvptx); break;
|
||||
case Triple::ppc64: T.setArch(Triple::ppc); break;
|
||||
case Triple::ptx64: T.setArch(Triple::ptx32); break;
|
||||
case Triple::sparcv9: T.setArch(Triple::sparc); break;
|
||||
@ -742,6 +758,7 @@ Triple Triple::get64BitArchVariant() const {
|
||||
|
||||
case Triple::mips64:
|
||||
case Triple::mips64el:
|
||||
case Triple::nvptx64:
|
||||
case Triple::ppc64:
|
||||
case Triple::ptx64:
|
||||
case Triple::sparcv9:
|
||||
@ -751,6 +768,7 @@ Triple Triple::get64BitArchVariant() const {
|
||||
|
||||
case Triple::mips: T.setArch(Triple::mips64); break;
|
||||
case Triple::mipsel: T.setArch(Triple::mips64el); break;
|
||||
case Triple::nvptx: T.setArch(Triple::nvptx64); break;
|
||||
case Triple::ppc: T.setArch(Triple::ppc64); break;
|
||||
case Triple::ptx32: T.setArch(Triple::ptx64); break;
|
||||
case Triple::sparc: T.setArch(Triple::sparcv9); break;
|
||||
|
@ -16,7 +16,7 @@
|
||||
;===------------------------------------------------------------------------===;
|
||||
|
||||
[common]
|
||||
subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 Mips PTX PowerPC Sparc X86 XCore
|
||||
subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PTX PowerPC Sparc X86 XCore
|
||||
|
||||
; This is a special group whose required libraries are extended (by llvm-build)
|
||||
; with the best execution engine (the native JIT, if available, or the
|
||||
|
33
lib/Target/NVPTX/CMakeLists.txt
Normal file
33
lib/Target/NVPTX/CMakeLists.txt
Normal file
@ -0,0 +1,33 @@
|
||||
set(LLVM_TARGET_DEFINITIONS NVPTX.td)
|
||||
|
||||
|
||||
tablegen(LLVM NVPTXGenRegisterInfo.inc -gen-register-info)
|
||||
tablegen(LLVM NVPTXGenInstrInfo.inc -gen-instr-info)
|
||||
tablegen(LLVM NVPTXGenAsmWriter.inc -gen-asm-writer)
|
||||
tablegen(LLVM NVPTXGenDAGISel.inc -gen-dag-isel)
|
||||
tablegen(LLVM NVPTXGenSubtargetInfo.inc -gen-subtarget)
|
||||
add_public_tablegen_target(NVPTXCommonTableGen)
|
||||
|
||||
set(NVPTXCodeGen_sources
|
||||
NVPTXFrameLowering.cpp
|
||||
NVPTXInstrInfo.cpp
|
||||
NVPTXISelDAGToDAG.cpp
|
||||
NVPTXISelLowering.cpp
|
||||
NVPTXRegisterInfo.cpp
|
||||
NVPTXSubtarget.cpp
|
||||
NVPTXTargetMachine.cpp
|
||||
NVPTXSplitBBatBar.cpp
|
||||
NVPTXLowerAggrCopies.cpp
|
||||
NVPTXutil.cpp
|
||||
NVPTXAllocaHoisting.cpp
|
||||
NVPTXAsmPrinter.cpp
|
||||
NVPTXUtilities.cpp
|
||||
VectorElementize.cpp
|
||||
)
|
||||
|
||||
add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
|
||||
|
||||
|
||||
add_subdirectory(TargetInfo)
|
||||
add_subdirectory(InstPrinter)
|
||||
add_subdirectory(MCTargetDesc)
|
7
lib/Target/NVPTX/InstPrinter/CMakeLists.txt
Normal file
7
lib/Target/NVPTX/InstPrinter/CMakeLists.txt
Normal file
@ -0,0 +1,7 @@
|
||||
include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
|
||||
|
||||
add_llvm_library(LLVMNVPTXAsmPrinter
|
||||
NVPTXInstPrinter.cpp
|
||||
)
|
||||
|
||||
add_dependencies(LLVMNVPTXAsmPrinter NVPTXCommonTableGen)
|
23
lib/Target/NVPTX/InstPrinter/LLVMBuild.txt
Normal file
23
lib/Target/NVPTX/InstPrinter/LLVMBuild.txt
Normal file
@ -0,0 +1,23 @@
|
||||
;===- ./lib/Target/NVPTX/InstPrinter/LLVMBuild.txt -------------*- Conf -*--===;
|
||||
;
|
||||
; The LLVM Compiler Infrastructure
|
||||
;
|
||||
; This file is distributed under the University of Illinois Open Source
|
||||
; License. See LICENSE.TXT for details.
|
||||
;
|
||||
;===------------------------------------------------------------------------===;
|
||||
;
|
||||
; This is an LLVMBuild description file for the components in this subdirectory.
|
||||
;
|
||||
; For more information on the LLVMBuild system, please see:
|
||||
;
|
||||
; http://llvm.org/docs/LLVMBuild.html
|
||||
;
|
||||
;===------------------------------------------------------------------------===;
|
||||
|
||||
[component_0]
|
||||
type = Library
|
||||
name = NVPTXAsmPrinter
|
||||
parent = NVPTX
|
||||
required_libraries = MC Support
|
||||
add_to_library_groups = NVPTX
|
15
lib/Target/NVPTX/InstPrinter/Makefile
Normal file
15
lib/Target/NVPTX/InstPrinter/Makefile
Normal file
@ -0,0 +1,15 @@
|
||||
##===- lib/Target/NVPTX/AsmPrinter/Makefile ----------------*- Makefile -*-===##
|
||||
#
|
||||
# The LLVM Compiler Infrastructure
|
||||
#
|
||||
# This file is distributed under the University of Illinois Open Source
|
||||
# License. See LICENSE.TXT for details.
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
LEVEL = ../../../..
|
||||
LIBRARYNAME = LLVMNVPTXAsmPrinter
|
||||
|
||||
# Hack: we need to include 'main' ptx target directory to grab private headers
|
||||
CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
|
||||
|
||||
include $(LEVEL)/Makefile.common
|
1
lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
Normal file
1
lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
Normal file
@ -0,0 +1 @@
|
||||
// Placeholder
|
32
lib/Target/NVPTX/LLVMBuild.txt
Normal file
32
lib/Target/NVPTX/LLVMBuild.txt
Normal file
@ -0,0 +1,32 @@
|
||||
;===- ./lib/Target/NVPTX/LLVMBuild.txt -------------------------*- Conf -*--===;
|
||||
;
|
||||
; The LLVM Compiler Infrastructure
|
||||
;
|
||||
; This file is distributed under the University of Illinois Open Source
|
||||
; License. See LICENSE.TXT for details.
|
||||
;
|
||||
;===------------------------------------------------------------------------===;
|
||||
;
|
||||
; This is an LLVMBuild description file for the components in this subdirectory.
|
||||
;
|
||||
; For more information on the LLVMBuild system, please see:
|
||||
;
|
||||
; http://llvm.org/docs/LLVMBuild.html
|
||||
;
|
||||
;===------------------------------------------------------------------------===;
|
||||
|
||||
[common]
|
||||
subdirectories = InstPrinter MCTargetDesc TargetInfo
|
||||
|
||||
[component_0]
|
||||
type = TargetGroup
|
||||
name = NVPTX
|
||||
parent = Target
|
||||
has_asmprinter = 1
|
||||
|
||||
[component_1]
|
||||
type = Library
|
||||
name = NVPTXCodeGen
|
||||
parent = NVPTX
|
||||
required_libraries = Analysis AsmPrinter CodeGen Core MC NVPTXDesc NVPTXInfo SelectionDAG Support Target TransformUtils
|
||||
add_to_library_groups = NVPTX
|
9
lib/Target/NVPTX/MCTargetDesc/CMakeLists.txt
Normal file
9
lib/Target/NVPTX/MCTargetDesc/CMakeLists.txt
Normal file
@ -0,0 +1,9 @@
|
||||
add_llvm_library(LLVMNVPTXDesc
|
||||
NVPTXMCAsmInfo.cpp
|
||||
NVPTXMCTargetDesc.cpp
|
||||
)
|
||||
|
||||
add_dependencies(LLVMNVPTXDesc NVPTXCommonTableGen)
|
||||
|
||||
# Hack: we need to include 'main' target directory to grab private headers
|
||||
#include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
|
23
lib/Target/NVPTX/MCTargetDesc/LLVMBuild.txt
Normal file
23
lib/Target/NVPTX/MCTargetDesc/LLVMBuild.txt
Normal file
@ -0,0 +1,23 @@
|
||||
;===- ./lib/Target/NVPTX/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===;
|
||||
;
|
||||
; The LLVM Compiler Infrastructure
|
||||
;
|
||||
; This file is distributed under the University of Illinois Open Source
|
||||
; License. See LICENSE.TXT for details.
|
||||
;
|
||||
;===------------------------------------------------------------------------===;
|
||||
;
|
||||
; This is an LLVMBuild description file for the components in this subdirectory.
|
||||
;
|
||||
; For more information on the LLVMBuild system, please see:
|
||||
;
|
||||
; http://llvm.org/docs/LLVMBuild.html
|
||||
;
|
||||
;===------------------------------------------------------------------------===;
|
||||
|
||||
[component_0]
|
||||
type = Library
|
||||
name = NVPTXDesc
|
||||
parent = NVPTX
|
||||
required_libraries = MC NVPTXAsmPrinter NVPTXInfo Support
|
||||
add_to_library_groups = NVPTX
|
16
lib/Target/NVPTX/MCTargetDesc/Makefile
Normal file
16
lib/Target/NVPTX/MCTargetDesc/Makefile
Normal file
@ -0,0 +1,16 @@
|
||||
##===- lib/Target/NVPTX/TargetDesc/Makefile ----------------*- Makefile -*-===##
|
||||
#
|
||||
# The LLVM Compiler Infrastructure
|
||||
#
|
||||
# This file is distributed under the University of Illinois Open Source
|
||||
# License. See LICENSE.TXT for details.
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
|
||||
LEVEL = ../../../..
|
||||
LIBRARYNAME = LLVMNVPTXDesc
|
||||
|
||||
# Hack: we need to include 'main' target directory to grab private headers
|
||||
CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
|
||||
|
||||
include $(LEVEL)/Makefile.common
|
88
lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
Normal file
88
lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
Normal file
@ -0,0 +1,88 @@
|
||||
//===-- NVPTXBaseInfo.h - Top-level definitions for NVPTX -------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains small standalone helper functions and enum definitions for
|
||||
// the NVPTX target useful for the compiler back-end and the MC libraries.
|
||||
// As such, it deliberately does not include references to LLVM core
|
||||
// code gen types, passes, etc..
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef NVPTXBASEINFO_H
|
||||
#define NVPTXBASEINFO_H
|
||||
|
||||
namespace llvm {
|
||||
|
||||
enum AddressSpace {
|
||||
ADDRESS_SPACE_GENERIC = 0,
|
||||
ADDRESS_SPACE_GLOBAL = 1,
|
||||
ADDRESS_SPACE_CONST_NOT_GEN = 2, // Not part of generic space
|
||||
ADDRESS_SPACE_SHARED = 3,
|
||||
ADDRESS_SPACE_CONST = 4,
|
||||
ADDRESS_SPACE_LOCAL = 5,
|
||||
|
||||
// NVVM Internal
|
||||
ADDRESS_SPACE_PARAM = 101
|
||||
};
|
||||
|
||||
enum PropertyAnnotation {
|
||||
PROPERTY_MAXNTID_X = 0,
|
||||
PROPERTY_MAXNTID_Y,
|
||||
PROPERTY_MAXNTID_Z,
|
||||
PROPERTY_REQNTID_X,
|
||||
PROPERTY_REQNTID_Y,
|
||||
PROPERTY_REQNTID_Z,
|
||||
PROPERTY_MINNCTAPERSM,
|
||||
PROPERTY_ISTEXTURE,
|
||||
PROPERTY_ISSURFACE,
|
||||
PROPERTY_ISSAMPLER,
|
||||
PROPERTY_ISREADONLY_IMAGE_PARAM,
|
||||
PROPERTY_ISWRITEONLY_IMAGE_PARAM,
|
||||
PROPERTY_ISKERNEL_FUNCTION,
|
||||
PROPERTY_ALIGN,
|
||||
|
||||
// last property
|
||||
PROPERTY_LAST
|
||||
};
|
||||
|
||||
const unsigned AnnotationNameLen = 8; // length of each annotation name
|
||||
const char
|
||||
PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {
|
||||
"maxntidx", // PROPERTY_MAXNTID_X
|
||||
"maxntidy", // PROPERTY_MAXNTID_Y
|
||||
"maxntidz", // PROPERTY_MAXNTID_Z
|
||||
"reqntidx", // PROPERTY_REQNTID_X
|
||||
"reqntidy", // PROPERTY_REQNTID_Y
|
||||
"reqntidz", // PROPERTY_REQNTID_Z
|
||||
"minctasm", // PROPERTY_MINNCTAPERSM
|
||||
"texture", // PROPERTY_ISTEXTURE
|
||||
"surface", // PROPERTY_ISSURFACE
|
||||
"sampler", // PROPERTY_ISSAMPLER
|
||||
"rdoimage", // PROPERTY_ISREADONLY_IMAGE_PARAM
|
||||
"wroimage", // PROPERTY_ISWRITEONLY_IMAGE_PARAM
|
||||
"kernel", // PROPERTY_ISKERNEL_FUNCTION
|
||||
"align", // PROPERTY_ALIGN
|
||||
|
||||
// last property
|
||||
"proplast", // PROPERTY_LAST
|
||||
};
|
||||
|
||||
// name of named metadata used for global annotations
|
||||
#if defined(__GNUC__)
|
||||
// As this is declared to be static but some of the .cpp files that
|
||||
// include NVVM.h do not use this array, gcc gives a warning when
|
||||
// compiling those .cpp files, hence __attribute__((unused)).
|
||||
__attribute__((unused))
|
||||
#endif
|
||||
static const char* NamedMDForAnnotations = "nvvm.annotations";
|
||||
|
||||
}
|
||||
|
||||
|
||||
#endif
|
63
lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
Normal file
63
lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
Normal file
@ -0,0 +1,63 @@
|
||||
//===-- NVPTXMCAsmInfo.cpp - NVPTX asm properties -------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the declarations of the NVPTXMCAsmInfo properties.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "NVPTXMCAsmInfo.h"
|
||||
#include "llvm/ADT/Triple.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
bool CompileForDebugging;
|
||||
|
||||
// -debug-compile - Command line option to inform opt and llc passes to
|
||||
// compile for debugging
|
||||
static cl::opt<bool, true>
|
||||
Debug("debug-compile", cl::desc("Compile for debugging"), cl::Hidden,
|
||||
cl::location(CompileForDebugging),
|
||||
cl::init(false));
|
||||
|
||||
void NVPTXMCAsmInfo::anchor() { }
|
||||
|
||||
NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) {
|
||||
Triple TheTriple(TT);
|
||||
if (TheTriple.getArch() == Triple::nvptx64)
|
||||
PointerSize = 8;
|
||||
|
||||
CommentString = "//";
|
||||
|
||||
PrivateGlobalPrefix = "$L__";
|
||||
|
||||
AllowPeriodsInName = false;
|
||||
|
||||
HasSetDirective = false;
|
||||
|
||||
HasSingleParameterDotFile = false;
|
||||
|
||||
InlineAsmStart = " inline asm";
|
||||
InlineAsmEnd = " inline asm";
|
||||
|
||||
SupportsDebugInformation = CompileForDebugging;
|
||||
HasDotTypeDotSizeDirective = false;
|
||||
|
||||
Data8bitsDirective = " .b8 ";
|
||||
Data16bitsDirective = " .b16 ";
|
||||
Data32bitsDirective = " .b32 ";
|
||||
Data64bitsDirective = " .b64 ";
|
||||
PrivateGlobalPrefix = "";
|
||||
ZeroDirective = " .b8";
|
||||
AsciiDirective = " .b8";
|
||||
AscizDirective = " .b8";
|
||||
|
||||
// @TODO: Can we just disable this?
|
||||
GlobalDirective = "\t// .globl\t";
|
||||
}
|
30
lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
Normal file
30
lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
Normal file
@ -0,0 +1,30 @@
|
||||
//===-- NVPTXMCAsmInfo.h - NVPTX asm properties ----------------*- C++ -*--===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the declaration of the NVPTXMCAsmInfo class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef NVPTX_MCASM_INFO_H
|
||||
#define NVPTX_MCASM_INFO_H
|
||||
|
||||
#include "llvm/MC/MCAsmInfo.h"
|
||||
|
||||
namespace llvm {
|
||||
class Target;
|
||||
class StringRef;
|
||||
|
||||
class NVPTXMCAsmInfo : public MCAsmInfo {
|
||||
virtual void anchor();
|
||||
public:
|
||||
explicit NVPTXMCAsmInfo(const Target &T, const StringRef &TT);
|
||||
};
|
||||
} // namespace llvm
|
||||
|
||||
#endif // NVPTX_MCASM_INFO_H
|
91
lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
Normal file
91
lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
Normal file
@ -0,0 +1,91 @@
|
||||
//===-- NVPTXMCTargetDesc.cpp - NVPTX Target Descriptions -------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file provides NVPTX specific target descriptions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "NVPTXMCTargetDesc.h"
|
||||
#include "NVPTXMCAsmInfo.h"
|
||||
#include "llvm/MC/MCCodeGenInfo.h"
|
||||
#include "llvm/MC/MCInstrInfo.h"
|
||||
#include "llvm/MC/MCRegisterInfo.h"
|
||||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
|
||||
#define GET_INSTRINFO_MC_DESC
|
||||
#include "NVPTXGenInstrInfo.inc"
|
||||
|
||||
#define GET_SUBTARGETINFO_MC_DESC
|
||||
#include "NVPTXGenSubtargetInfo.inc"
|
||||
|
||||
#define GET_REGINFO_MC_DESC
|
||||
#include "NVPTXGenRegisterInfo.inc"
|
||||
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
static MCInstrInfo *createNVPTXMCInstrInfo() {
|
||||
MCInstrInfo *X = new MCInstrInfo();
|
||||
InitNVPTXMCInstrInfo(X);
|
||||
return X;
|
||||
}
|
||||
|
||||
static MCRegisterInfo *createNVPTXMCRegisterInfo(StringRef TT) {
|
||||
MCRegisterInfo *X = new MCRegisterInfo();
|
||||
// PTX does not have a return address register.
|
||||
InitNVPTXMCRegisterInfo(X, 0);
|
||||
return X;
|
||||
}
|
||||
|
||||
static MCSubtargetInfo *createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU,
|
||||
StringRef FS) {
|
||||
MCSubtargetInfo *X = new MCSubtargetInfo();
|
||||
InitNVPTXMCSubtargetInfo(X, TT, CPU, FS);
|
||||
return X;
|
||||
}
|
||||
|
||||
static MCCodeGenInfo *createNVPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM,
|
||||
CodeModel::Model CM,
|
||||
CodeGenOpt::Level OL) {
|
||||
MCCodeGenInfo *X = new MCCodeGenInfo();
|
||||
X->InitMCCodeGenInfo(RM, CM, OL);
|
||||
return X;
|
||||
}
|
||||
|
||||
|
||||
// Force static initialization.
|
||||
extern "C" void LLVMInitializeNVPTXTargetMC() {
|
||||
// Register the MC asm info.
|
||||
RegisterMCAsmInfo<NVPTXMCAsmInfo> X(TheNVPTXTarget32);
|
||||
RegisterMCAsmInfo<NVPTXMCAsmInfo> Y(TheNVPTXTarget64);
|
||||
|
||||
// Register the MC codegen info.
|
||||
TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget32,
|
||||
createNVPTXMCCodeGenInfo);
|
||||
TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget64,
|
||||
createNVPTXMCCodeGenInfo);
|
||||
|
||||
// Register the MC instruction info.
|
||||
TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget32, createNVPTXMCInstrInfo);
|
||||
TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget64, createNVPTXMCInstrInfo);
|
||||
|
||||
// Register the MC register info.
|
||||
TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget32,
|
||||
createNVPTXMCRegisterInfo);
|
||||
TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget64,
|
||||
createNVPTXMCRegisterInfo);
|
||||
|
||||
// Register the MC subtarget info.
|
||||
TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget32,
|
||||
createNVPTXMCSubtargetInfo);
|
||||
TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget64,
|
||||
createNVPTXMCSubtargetInfo);
|
||||
|
||||
}
|
36
lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
Normal file
36
lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
Normal file
@ -0,0 +1,36 @@
|
||||
//===-- NVPTXMCTargetDesc.h - NVPTX Target Descriptions ---------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file provides NVPTX specific target descriptions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef NVPTXMCTARGETDESC_H
|
||||
#define NVPTXMCTARGETDESC_H
|
||||
|
||||
namespace llvm {
|
||||
class Target;
|
||||
|
||||
extern Target TheNVPTXTarget32;
|
||||
extern Target TheNVPTXTarget64;
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
// Defines symbolic names for PTX registers.
|
||||
#define GET_REGINFO_ENUM
|
||||
#include "NVPTXGenRegisterInfo.inc"
|
||||
|
||||
// Defines symbolic names for the PTX instructions.
|
||||
#define GET_INSTRINFO_ENUM
|
||||
#include "NVPTXGenInstrInfo.inc"
|
||||
|
||||
#define GET_SUBTARGETINFO_ENUM
|
||||
#include "NVPTXGenSubtargetInfo.inc"
|
||||
|
||||
#endif
|
23
lib/Target/NVPTX/Makefile
Normal file
23
lib/Target/NVPTX/Makefile
Normal file
@ -0,0 +1,23 @@
|
||||
##===- lib/Target/NVPTX/Makefile ---------------------------*- Makefile -*-===##
|
||||
#
|
||||
# The LLVM Compiler Infrastructure
|
||||
#
|
||||
# This file is distributed under the University of Illinois Open Source
|
||||
# License. See LICENSE.TXT for details.
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
|
||||
LEVEL = ../../..
|
||||
LIBRARYNAME = LLVMNVPTXCodeGen
|
||||
TARGET = NVPTX
|
||||
|
||||
# Make sure that tblgen is run, first thing.
|
||||
BUILT_SOURCES = NVPTXGenAsmWriter.inc \
|
||||
NVPTXGenDAGISel.inc \
|
||||
NVPTXGenInstrInfo.inc \
|
||||
NVPTXGenRegisterInfo.inc \
|
||||
NVPTXGenSubtargetInfo.inc
|
||||
|
||||
DIRS = InstPrinter TargetInfo MCTargetDesc
|
||||
|
||||
include $(LEVEL)/Makefile.common
|
49
lib/Target/NVPTX/ManagedStringPool.h
Normal file
49
lib/Target/NVPTX/ManagedStringPool.h
Normal file
@ -0,0 +1,49 @@
|
||||
//===-- ManagedStringPool.h - Managed String Pool ---------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// The strings allocated from a managed string pool are owned by the string
|
||||
// pool and will be deleted together with the managed string pool.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
#ifndef LLVM_SUPPORT_MANAGED_STRING_H
|
||||
#define LLVM_SUPPORT_MANAGED_STRING_H
|
||||
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include <string>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
/// ManagedStringPool - The strings allocated from a managed string pool are
|
||||
/// owned by the string pool and will be deleted together with the managed
|
||||
/// string pool.
|
||||
class ManagedStringPool {
|
||||
SmallVector<std::string *, 8> Pool;
|
||||
|
||||
public:
|
||||
ManagedStringPool() {}
|
||||
~ManagedStringPool() {
|
||||
SmallVector<std::string *, 8>::iterator Current = Pool.begin();
|
||||
while (Current != Pool.end()) {
|
||||
delete *Current;
|
||||
Current++;
|
||||
}
|
||||
}
|
||||
|
||||
std::string *getManagedString(const char *S) {
|
||||
std::string *Str = new std::string(S);
|
||||
Pool.push_back(Str);
|
||||
return Str;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
137
lib/Target/NVPTX/NVPTX.h
Normal file
137
lib/Target/NVPTX/NVPTX.h
Normal file
@ -0,0 +1,137 @@
|
||||
//===-- NVPTX.h - Top-level interface for NVPTX representation --*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the entry points for global functions defined in
|
||||
// the LLVM NVPTX back-end.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TARGET_NVPTX_H
|
||||
#define LLVM_TARGET_NVPTX_H
|
||||
|
||||
#include <cassert>
|
||||
#include <iosfwd>
|
||||
#include "llvm/Value.h"
|
||||
#include "llvm/Module.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
#include "MCTargetDesc/NVPTXBaseInfo.h"
|
||||
|
||||
namespace llvm {
|
||||
class NVPTXTargetMachine;
|
||||
class FunctionPass;
|
||||
class formatted_raw_ostream;
|
||||
|
||||
namespace NVPTXCC {
|
||||
enum CondCodes {
|
||||
EQ,
|
||||
NE,
|
||||
LT,
|
||||
LE,
|
||||
GT,
|
||||
GE
|
||||
};
|
||||
}
|
||||
|
||||
inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) {
|
||||
switch (CC) {
|
||||
default: assert(0 && "Unknown condition code");
|
||||
case NVPTXCC::NE: return "ne";
|
||||
case NVPTXCC::EQ: return "eq";
|
||||
case NVPTXCC::LT: return "lt";
|
||||
case NVPTXCC::LE: return "le";
|
||||
case NVPTXCC::GT: return "gt";
|
||||
case NVPTXCC::GE: return "ge";
|
||||
}
|
||||
}
|
||||
|
||||
FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
|
||||
llvm::CodeGenOpt::Level OptLevel);
|
||||
FunctionPass *createVectorElementizePass(NVPTXTargetMachine &);
|
||||
FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &);
|
||||
FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &);
|
||||
FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &);
|
||||
|
||||
bool isImageOrSamplerVal(const Value *, const Module *);
|
||||
|
||||
extern Target TheNVPTXTarget32;
|
||||
extern Target TheNVPTXTarget64;
|
||||
|
||||
namespace NVPTX
|
||||
{
|
||||
enum DrvInterface {
|
||||
NVCL,
|
||||
CUDA,
|
||||
TEST
|
||||
};
|
||||
|
||||
// A field inside TSFlags needs a shift and a mask. The usage is
|
||||
// always as follows :
|
||||
// ((TSFlags & fieldMask) >> fieldShift)
|
||||
// The enum keeps the mask, the shift, and all valid values of the
|
||||
// field in one place.
|
||||
enum VecInstType {
|
||||
VecInstTypeShift = 0,
|
||||
VecInstTypeMask = 0xF,
|
||||
|
||||
VecNOP = 0,
|
||||
VecLoad = 1,
|
||||
VecStore = 2,
|
||||
VecBuild = 3,
|
||||
VecShuffle = 4,
|
||||
VecExtract = 5,
|
||||
VecInsert = 6,
|
||||
VecDest = 7,
|
||||
VecOther = 15
|
||||
};
|
||||
|
||||
enum SimpleMove {
|
||||
SimpleMoveMask = 0x10,
|
||||
SimpleMoveShift = 4
|
||||
};
|
||||
enum LoadStore {
|
||||
isLoadMask = 0x20,
|
||||
isLoadShift = 5,
|
||||
isStoreMask = 0x40,
|
||||
isStoreShift = 6
|
||||
};
|
||||
|
||||
namespace PTXLdStInstCode {
|
||||
enum AddressSpace{
|
||||
GENERIC = 0,
|
||||
GLOBAL = 1,
|
||||
CONSTANT = 2,
|
||||
SHARED = 3,
|
||||
PARAM = 4,
|
||||
LOCAL = 5
|
||||
};
|
||||
enum FromType {
|
||||
Unsigned = 0,
|
||||
Signed,
|
||||
Float
|
||||
};
|
||||
enum VecType {
|
||||
Scalar = 1,
|
||||
V2 = 2,
|
||||
V4 = 4
|
||||
};
|
||||
}
|
||||
}
|
||||
} // end namespace llvm;
|
||||
|
||||
// Defines symbolic names for NVPTX registers. This defines a mapping from
|
||||
// register name to register number.
|
||||
#define GET_REGINFO_ENUM
|
||||
#include "NVPTXGenRegisterInfo.inc"
|
||||
|
||||
// Defines symbolic names for the NVPTX instructions.
|
||||
#define GET_INSTRINFO_ENUM
|
||||
#include "NVPTXGenInstrInfo.inc"
|
||||
|
||||
#endif
|
44
lib/Target/NVPTX/NVPTX.td
Normal file
44
lib/Target/NVPTX/NVPTX.td
Normal file
@ -0,0 +1,44 @@
|
||||
//===- NVPTX.td - Describe the NVPTX Target Machine -----------*- tblgen -*-==//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// This is the top level entry point for the NVPTX target.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Target-independent interfaces
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
include "llvm/Target/Target.td"
|
||||
|
||||
include "NVPTXRegisterInfo.td"
|
||||
include "NVPTXInstrInfo.td"
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Subtarget Features.
|
||||
// - We use the SM version number instead of explicit feature table.
|
||||
// - Need at least one feature to avoid generating zero sized array by
|
||||
// TableGen in NVPTXGenSubtarget.inc.
|
||||
//===----------------------------------------------------------------------===//
|
||||
def FeatureDummy : SubtargetFeature<"dummy", "dummy", "true", "">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// NVPTX supported processors.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class Proc<string Name, list<SubtargetFeature> Features>
|
||||
: Processor<Name, NoItineraries, Features>;
|
||||
|
||||
def : Proc<"sm_10", [FeatureDummy]>;
|
||||
|
||||
|
||||
def NVPTXInstrInfo : InstrInfo {
|
||||
}
|
||||
|
||||
def NVPTX : Target {
|
||||
let InstructionSet = NVPTXInstrInfo;
|
||||
}
|
48
lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
Normal file
48
lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
Normal file
@ -0,0 +1,48 @@
|
||||
//===-- AllocaHoisting.cpp - Hosist allocas to the entry block --*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Hoist the alloca instructions in the non-entry blocks to the entry blocks.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Function.h"
|
||||
#include "llvm/Instructions.h"
|
||||
#include "llvm/Constants.h"
|
||||
#include "NVPTXAllocaHoisting.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
|
||||
bool functionModified = false;
|
||||
Function::iterator I = function.begin();
|
||||
TerminatorInst *firstTerminatorInst = (I++)->getTerminator();
|
||||
|
||||
for (Function::iterator E = function.end(); I != E; ++I) {
|
||||
for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
|
||||
AllocaInst *allocaInst = dyn_cast<AllocaInst>(BI++);
|
||||
if (allocaInst && isa<ConstantInt>(allocaInst->getArraySize())) {
|
||||
allocaInst->moveBefore(firstTerminatorInst);
|
||||
functionModified = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return functionModified;
|
||||
}
|
||||
|
||||
char NVPTXAllocaHoisting::ID = 1;
|
||||
RegisterPass<NVPTXAllocaHoisting> X("alloca-hoisting",
|
||||
"Hoisting alloca instructsion in non-entry "
|
||||
"blocks to the entry block");
|
||||
|
||||
FunctionPass *createAllocaHoisting() {
|
||||
return new NVPTXAllocaHoisting();
|
||||
}
|
||||
|
||||
} // end namespace llvm
|
49
lib/Target/NVPTX/NVPTXAllocaHoisting.h
Normal file
49
lib/Target/NVPTX/NVPTXAllocaHoisting.h
Normal file
@ -0,0 +1,49 @@
|
||||
//===-- AllocaHoisting.h - Hosist allocas to the entry block ----*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Hoist the alloca instructions in the non-entry blocks to the entry blocks.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef NVPTX_ALLOCA_HOISTING_H_
|
||||
#define NVPTX_ALLOCA_HOISTING_H_
|
||||
|
||||
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Target/TargetData.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class FunctionPass;
|
||||
class Function;
|
||||
|
||||
// Hoisting the alloca instructions in the non-entry blocks to the entry
|
||||
// block.
|
||||
class NVPTXAllocaHoisting : public FunctionPass {
|
||||
public:
|
||||
static char ID; // Pass ID
|
||||
NVPTXAllocaHoisting() : FunctionPass(ID) {}
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.addRequired<TargetData>();
|
||||
AU.addPreserved<MachineFunctionAnalysis>();
|
||||
}
|
||||
|
||||
virtual const char *getPassName() const {
|
||||
return "NVPTX specific alloca hoisting";
|
||||
}
|
||||
|
||||
virtual bool runOnFunction(Function &function);
|
||||
};
|
||||
|
||||
extern FunctionPass *createAllocaHoisting();
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // NVPTX_ALLOCA_HOISTING_H_
|
2068
lib/Target/NVPTX/NVPTXAsmPrinter.cpp
Normal file
2068
lib/Target/NVPTX/NVPTXAsmPrinter.cpp
Normal file
File diff suppressed because it is too large
Load Diff
318
lib/Target/NVPTX/NVPTXAsmPrinter.h
Normal file
318
lib/Target/NVPTX/NVPTXAsmPrinter.h
Normal file
@ -0,0 +1,318 @@
|
||||
//===-- NVPTXAsmPrinter.h - NVPTX LLVM assembly writer --------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains a printer that converts from our internal representation
|
||||
// of machine-dependent LLVM code to NVPTX assembly language.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef NVPTXASMPRINTER_H
|
||||
#define NVPTXASMPRINTER_H
|
||||
|
||||
#include "NVPTX.h"
|
||||
#include "NVPTXTargetMachine.h"
|
||||
#include "NVPTXSubtarget.h"
|
||||
#include "llvm/Function.h"
|
||||
#include "llvm/CodeGen/AsmPrinter.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/MC/MCAsmInfo.h"
|
||||
#include "llvm/MC/MCExpr.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
#include "llvm/MC/MCSymbol.h"
|
||||
#include "llvm/Support/FormattedStream.h"
|
||||
#include "llvm/Target/Mangler.h"
|
||||
#include "llvm/ADT/SmallString.h"
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
#include <fstream>
|
||||
|
||||
// The ptx syntax and format is very different from that usually seem in a .s
|
||||
// file,
|
||||
// therefore we are not able to use the MCAsmStreamer interface here.
|
||||
//
|
||||
// We are handcrafting the output method here.
|
||||
//
|
||||
// A better approach is to clone the MCAsmStreamer to a MCPTXAsmStreamer
|
||||
// (subclass of MCStreamer).
|
||||
|
||||
// This is defined in AsmPrinter.cpp.
|
||||
// Used to process the constant expressions in initializers.
|
||||
namespace nvptx {
|
||||
const llvm::MCExpr *LowerConstant(const llvm::Constant *CV,
|
||||
llvm::AsmPrinter &AP) ;
|
||||
}
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class LineReader {
|
||||
private:
|
||||
unsigned theCurLine ;
|
||||
std::ifstream fstr;
|
||||
char buff[512];
|
||||
std::string theFileName;
|
||||
SmallVector<unsigned, 32> lineOffset;
|
||||
public:
|
||||
LineReader(std::string filename) {
|
||||
theCurLine = 0;
|
||||
fstr.open(filename.c_str());
|
||||
theFileName = filename;
|
||||
}
|
||||
std::string fileName() { return theFileName; }
|
||||
~LineReader() {
|
||||
fstr.close();
|
||||
}
|
||||
std::string readLine(unsigned line);
|
||||
};
|
||||
|
||||
|
||||
|
||||
class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
|
||||
|
||||
|
||||
class AggBuffer {
|
||||
// Used to buffer the emitted string for initializing global
|
||||
// aggregates.
|
||||
//
|
||||
// Normally an aggregate (array, vector or structure) is emitted
|
||||
// as a u8[]. However, if one element/field of the aggregate
|
||||
// is a non-NULL address, then the aggregate is emitted as u32[]
|
||||
// or u64[].
|
||||
//
|
||||
// We first layout the aggregate in 'buffer' in bytes, except for
|
||||
// those symbol addresses. For the i-th symbol address in the
|
||||
//aggregate, its corresponding 4-byte or 8-byte elements in 'buffer'
|
||||
// are filled with 0s. symbolPosInBuffer[i-1] records its position
|
||||
// in 'buffer', and Symbols[i-1] records the Value*.
|
||||
//
|
||||
// Once we have this AggBuffer setup, we can choose how to print
|
||||
// it out.
|
||||
public:
|
||||
unsigned size; // size of the buffer in bytes
|
||||
unsigned char *buffer; // the buffer
|
||||
unsigned numSymbols; // number of symbol addresses
|
||||
SmallVector<unsigned, 4> symbolPosInBuffer;
|
||||
SmallVector<Value *, 4> Symbols;
|
||||
|
||||
private:
|
||||
unsigned curpos;
|
||||
raw_ostream &O;
|
||||
NVPTXAsmPrinter &AP;
|
||||
|
||||
public:
|
||||
AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP)
|
||||
:O(_O),AP(_AP) {
|
||||
buffer = new unsigned char[_size];
|
||||
size = _size;
|
||||
curpos = 0;
|
||||
numSymbols = 0;
|
||||
}
|
||||
~AggBuffer() {
|
||||
delete [] buffer;
|
||||
}
|
||||
unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) {
|
||||
assert((curpos+Num) <= size);
|
||||
assert((curpos+Bytes) <= size);
|
||||
for ( int i= 0; i < Num; ++i) {
|
||||
buffer[curpos] = Ptr[i];
|
||||
curpos ++;
|
||||
}
|
||||
for ( int i=Num; i < Bytes ; ++i) {
|
||||
buffer[curpos] = 0;
|
||||
curpos ++;
|
||||
}
|
||||
return curpos;
|
||||
}
|
||||
unsigned addZeros(int Num) {
|
||||
assert((curpos+Num) <= size);
|
||||
for ( int i= 0; i < Num; ++i) {
|
||||
buffer[curpos] = 0;
|
||||
curpos ++;
|
||||
}
|
||||
return curpos;
|
||||
}
|
||||
void addSymbol(Value *GVar) {
|
||||
symbolPosInBuffer.push_back(curpos);
|
||||
Symbols.push_back(GVar);
|
||||
numSymbols++;
|
||||
}
|
||||
void print() {
|
||||
if (numSymbols == 0) {
|
||||
// print out in bytes
|
||||
for (unsigned i=0; i<size; i++) {
|
||||
if (i)
|
||||
O << ", ";
|
||||
O << (unsigned int)buffer[i];
|
||||
}
|
||||
}
|
||||
else {
|
||||
// print out in 4-bytes or 8-bytes
|
||||
unsigned int pos = 0;
|
||||
unsigned int nSym = 0;
|
||||
unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
|
||||
unsigned int nBytes = 4;
|
||||
if (AP.nvptxSubtarget.is64Bit())
|
||||
nBytes = 8;
|
||||
for (pos=0; pos<size; pos+=nBytes) {
|
||||
if (pos)
|
||||
O << ", ";
|
||||
if (pos == nextSymbolPos) {
|
||||
Value *v = Symbols[nSym];
|
||||
if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
|
||||
MCSymbol *Name = AP.Mang->getSymbol(GVar);
|
||||
O << *Name;
|
||||
}
|
||||
else if (ConstantExpr *Cexpr =
|
||||
dyn_cast<ConstantExpr>(v)) {
|
||||
O << *nvptx::LowerConstant(Cexpr, AP);
|
||||
}
|
||||
else
|
||||
assert(0 && "symbol type unknown");
|
||||
nSym++;
|
||||
if (nSym >= numSymbols)
|
||||
nextSymbolPos = size+1;
|
||||
else
|
||||
nextSymbolPos = symbolPosInBuffer[nSym];
|
||||
}
|
||||
else
|
||||
if (nBytes == 4)
|
||||
O << *(unsigned int*)(buffer+pos);
|
||||
else
|
||||
O << *(unsigned long long*)(buffer+pos);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
friend class AggBuffer;
|
||||
|
||||
virtual void emitSrcInText(StringRef filename, unsigned line);
|
||||
|
||||
private :
|
||||
virtual const char *getPassName() const {
|
||||
return "NVPTX Assembly Printer";
|
||||
}
|
||||
|
||||
const Function *F;
|
||||
std::string CurrentFnName;
|
||||
|
||||
void EmitFunctionEntryLabel();
|
||||
void EmitFunctionBodyStart();
|
||||
void EmitFunctionBodyEnd();
|
||||
|
||||
void EmitInstruction(const MachineInstr *);
|
||||
|
||||
void EmitAlignment(unsigned NumBits, const GlobalValue *GV = 0) const {}
|
||||
|
||||
void printGlobalVariable(const GlobalVariable *GVar);
|
||||
void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
|
||||
const char *Modifier=0);
|
||||
void printLdStCode(const MachineInstr *MI, int opNum, raw_ostream &O,
|
||||
const char *Modifier=0);
|
||||
void printVecModifiedImmediate(const MachineOperand &MO,
|
||||
const char *Modifier, raw_ostream &O);
|
||||
void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
|
||||
const char *Modifier=0);
|
||||
void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const;
|
||||
// definition autogenerated.
|
||||
void printInstruction(const MachineInstr *MI, raw_ostream &O);
|
||||
void printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
|
||||
bool=false);
|
||||
void printParamName(int paramIndex, raw_ostream &O);
|
||||
void printParamName(Function::const_arg_iterator I, int paramIndex,
|
||||
raw_ostream &O);
|
||||
void emitHeader(Module &M, raw_ostream &O);
|
||||
void emitKernelFunctionDirectives(const Function& F,
|
||||
raw_ostream &O) const;
|
||||
void emitVirtualRegister(unsigned int vr, bool isVec, raw_ostream &O);
|
||||
void emitFunctionExternParamList(const MachineFunction &MF);
|
||||
void emitFunctionParamList(const Function *, raw_ostream &O);
|
||||
void emitFunctionParamList(const MachineFunction &MF, raw_ostream &O);
|
||||
void setAndEmitFunctionVirtualRegisters(const MachineFunction &MF);
|
||||
void emitFunctionTempData(const MachineFunction &MF,
|
||||
unsigned &FrameSize);
|
||||
bool isImageType(const Type *Ty);
|
||||
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
|
||||
unsigned AsmVariant, const char *ExtraCode,
|
||||
raw_ostream &);
|
||||
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
|
||||
unsigned AsmVariant, const char *ExtraCode,
|
||||
raw_ostream &);
|
||||
void printReturnValStr(const Function *, raw_ostream &O);
|
||||
void printReturnValStr(const MachineFunction &MF, raw_ostream &O);
|
||||
|
||||
protected:
|
||||
bool doInitialization(Module &M);
|
||||
bool doFinalization(Module &M);
|
||||
|
||||
private:
|
||||
std::string CurrentBankselLabelInBasicBlock;
|
||||
|
||||
// This is specific per MachineFunction.
|
||||
const MachineRegisterInfo *MRI;
|
||||
// The contents are specific for each
|
||||
// MachineFunction. But the size of the
|
||||
// array is not.
|
||||
std::map<unsigned, unsigned> *VRidGlobal2LocalMap;
|
||||
// cache the subtarget here.
|
||||
const NVPTXSubtarget &nvptxSubtarget;
|
||||
// Build the map between type name and ID based on module's type
|
||||
// symbol table.
|
||||
std::map<const Type *, std::string> TypeNameMap;
|
||||
|
||||
// List of variables demoted to a function scope.
|
||||
std::map<const Function *, std::vector<GlobalVariable *> > localDecls;
|
||||
|
||||
// To record filename to ID mapping
|
||||
std::map<std::string, unsigned> filenameMap;
|
||||
void recordAndEmitFilenames(Module &);
|
||||
|
||||
void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O);
|
||||
void emitPTXAddressSpace(unsigned int AddressSpace,
|
||||
raw_ostream &O) const;
|
||||
std::string getPTXFundamentalTypeStr(const Type *Ty, bool=true) const ;
|
||||
void printScalarConstant(Constant *CPV, raw_ostream &O) ;
|
||||
void printFPConstant(const ConstantFP *Fp, raw_ostream &O) ;
|
||||
void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer) ;
|
||||
void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer) ;
|
||||
|
||||
void printOperandProper(const MachineOperand &MO);
|
||||
|
||||
void emitLinkageDirective(const GlobalValue* V, raw_ostream &O);
|
||||
void emitDeclarations(Module &, raw_ostream &O);
|
||||
void emitDeclaration(const Function *, raw_ostream &O);
|
||||
|
||||
static const char *getRegisterName(unsigned RegNo);
|
||||
void emitDemotedVars(const Function *, raw_ostream &);
|
||||
|
||||
LineReader *reader;
|
||||
LineReader *getReader(std::string);
|
||||
public:
|
||||
NVPTXAsmPrinter(TargetMachine &TM,
|
||||
MCStreamer &Streamer)
|
||||
: AsmPrinter(TM, Streamer),
|
||||
nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
|
||||
CurrentBankselLabelInBasicBlock = "";
|
||||
VRidGlobal2LocalMap = NULL;
|
||||
reader = NULL;
|
||||
}
|
||||
|
||||
~NVPTXAsmPrinter() {
|
||||
if (!reader)
|
||||
delete reader;
|
||||
}
|
||||
|
||||
bool ignoreLoc(const MachineInstr &);
|
||||
|
||||
virtual void getVirtualRegisterName(unsigned, bool, raw_ostream &);
|
||||
|
||||
DebugLoc prevDebugLoc;
|
||||
void emitLineNumberAsDotLoc(const MachineInstr &);
|
||||
};
|
||||
} // end of namespace
|
||||
|
||||
#endif
|
76
lib/Target/NVPTX/NVPTXFrameLowering.cpp
Normal file
76
lib/Target/NVPTX/NVPTXFrameLowering.cpp
Normal file
@ -0,0 +1,76 @@
|
||||
//=======- NVPTXFrameLowering.cpp - NVPTX Frame Information ---*- C++ -*-=====//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the NVPTX implementation of TargetFrameLowering class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "NVPTXFrameLowering.h"
|
||||
#include "NVPTX.h"
|
||||
#include "NVPTXRegisterInfo.h"
|
||||
#include "NVPTXSubtarget.h"
|
||||
#include "NVPTXTargetMachine.h"
|
||||
#include "llvm/ADT/BitVector.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/MC/MachineLocation.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
|
||||
if (MF.getFrameInfo()->hasStackObjects()) {
|
||||
MachineBasicBlock &MBB = MF.front();
|
||||
// Insert "mov.u32 %SP, %Depot"
|
||||
MachineBasicBlock::iterator MBBI = MBB.begin();
|
||||
// This instruction really occurs before first instruction
|
||||
// in the BB, so giving it no debug location.
|
||||
DebugLoc dl = DebugLoc();
|
||||
|
||||
if (tm.getSubtargetImpl()->hasGenericLdSt()) {
|
||||
// mov %SPL, %depot;
|
||||
// cvta.local %SP, %SPL;
|
||||
if (is64bit) {
|
||||
MachineInstr *MI = BuildMI(MBB, MBBI, dl,
|
||||
tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64),
|
||||
NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
|
||||
BuildMI(MBB, MI, dl,
|
||||
tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrameLocal)
|
||||
.addReg(NVPTX::VRDepot);
|
||||
} else {
|
||||
MachineInstr *MI = BuildMI(MBB, MBBI, dl,
|
||||
tm.getInstrInfo()->get(NVPTX::cvta_local_yes),
|
||||
NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
|
||||
BuildMI(MBB, MI, dl,
|
||||
tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrameLocal)
|
||||
.addReg(NVPTX::VRDepot);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// mov %SP, %depot;
|
||||
if (is64bit)
|
||||
BuildMI(MBB, MBBI, dl,
|
||||
tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrame)
|
||||
.addReg(NVPTX::VRDepot);
|
||||
else
|
||||
BuildMI(MBB, MBBI, dl,
|
||||
tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrame)
|
||||
.addReg(NVPTX::VRDepot);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void NVPTXFrameLowering::emitEpilogue(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB) const {
|
||||
}
|
40
lib/Target/NVPTX/NVPTXFrameLowering.h
Normal file
40
lib/Target/NVPTX/NVPTXFrameLowering.h
Normal file
@ -0,0 +1,40 @@
|
||||
//===--- NVPTXFrameLowering.h - Define frame lowering for NVPTX -*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef NVPTX_FRAMELOWERING_H
|
||||
#define NVPTX_FRAMELOWERING_H
|
||||
|
||||
#include "llvm/Target/TargetFrameLowering.h"
|
||||
|
||||
|
||||
namespace llvm {
|
||||
class NVPTXTargetMachine;
|
||||
|
||||
class NVPTXFrameLowering : public TargetFrameLowering {
|
||||
NVPTXTargetMachine &tm;
|
||||
bool is64bit;
|
||||
|
||||
public:
|
||||
explicit NVPTXFrameLowering(NVPTXTargetMachine &_tm, bool _is64bit)
|
||||
: TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0),
|
||||
tm(_tm), is64bit(_is64bit) {}
|
||||
|
||||
virtual bool hasFP(const MachineFunction &MF) const;
|
||||
virtual void emitPrologue(MachineFunction &MF) const;
|
||||
virtual void emitEpilogue(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB) const;
|
||||
};
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
#endif
|
681
lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
Normal file
681
lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
Normal file
@ -0,0 +1,681 @@
|
||||
//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines an instruction selector for the NVPTX target.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
#include "llvm/Instructions.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "NVPTXISelDAGToDAG.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Target/TargetIntrinsicInfo.h"
|
||||
#include "llvm/GlobalValue.h"
|
||||
|
||||
#undef DEBUG_TYPE
|
||||
#define DEBUG_TYPE "nvptx-isel"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
|
||||
static cl::opt<bool>
|
||||
UseFMADInstruction("nvptx-mad-enable",
|
||||
cl::ZeroOrMore,
|
||||
cl::desc("NVPTX Specific: Enable generating FMAD instructions"),
|
||||
cl::init(false));
|
||||
|
||||
static cl::opt<int>
|
||||
FMAContractLevel("nvptx-fma-level",
|
||||
cl::ZeroOrMore,
|
||||
cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
|
||||
" 1: do it 2: do it aggressively"),
|
||||
cl::init(2));
|
||||
|
||||
|
||||
static cl::opt<int>
|
||||
UsePrecDivF32("nvptx-prec-divf32",
|
||||
cl::ZeroOrMore,
|
||||
cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
|
||||
" IEEE Compliant F32 div.rnd if avaiable."),
|
||||
cl::init(2));
|
||||
|
||||
/// createNVPTXISelDag - This pass converts a legalized DAG into a
|
||||
/// NVPTX-specific DAG, ready for instruction scheduling.
|
||||
FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
|
||||
llvm::CodeGenOpt::Level OptLevel) {
|
||||
return new NVPTXDAGToDAGISel(TM, OptLevel);
|
||||
}
|
||||
|
||||
|
||||
NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
|
||||
CodeGenOpt::Level OptLevel)
|
||||
: SelectionDAGISel(tm, OptLevel),
|
||||
Subtarget(tm.getSubtarget<NVPTXSubtarget>())
|
||||
{
|
||||
// Always do fma.f32 fpcontract if the target supports the instruction.
|
||||
// Always do fma.f64 fpcontract if the target supports the instruction.
|
||||
// Do mad.f32 is nvptx-mad-enable is specified and the target does not
|
||||
// support fma.f32.
|
||||
|
||||
doFMADF32 = (OptLevel > 0) && UseFMADInstruction && !Subtarget.hasFMAF32();
|
||||
doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() &&
|
||||
(FMAContractLevel>=1);
|
||||
doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() &&
|
||||
(FMAContractLevel>=1);
|
||||
doFMAF32AGG = (OptLevel > 0) && Subtarget.hasFMAF32() &&
|
||||
(FMAContractLevel==2);
|
||||
doFMAF64AGG = (OptLevel > 0) && Subtarget.hasFMAF64() &&
|
||||
(FMAContractLevel==2);
|
||||
|
||||
allowFMA = (FMAContractLevel >= 1) || UseFMADInstruction;
|
||||
|
||||
doMulWide = (OptLevel > 0);
|
||||
|
||||
// Decide how to translate f32 div
|
||||
do_DIVF32_PREC = UsePrecDivF32;
|
||||
// sm less than sm_20 does not support div.rnd. Use div.full.
|
||||
if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20())
|
||||
do_DIVF32_PREC = 1;
|
||||
|
||||
}
|
||||
|
||||
/// Select - Select instructions not customized! Used for
|
||||
/// expanded, promoted and normal instructions.
|
||||
SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) {
|
||||
|
||||
if (N->isMachineOpcode())
|
||||
return NULL; // Already selected.
|
||||
|
||||
SDNode *ResNode = NULL;
|
||||
switch (N->getOpcode()) {
|
||||
case ISD::LOAD:
|
||||
ResNode = SelectLoad(N);
|
||||
break;
|
||||
case ISD::STORE:
|
||||
ResNode = SelectStore(N);
|
||||
break;
|
||||
}
|
||||
if (ResNode)
|
||||
return ResNode;
|
||||
return SelectCode(N);
|
||||
}
|
||||
|
||||
|
||||
static unsigned int
|
||||
getCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget)
|
||||
{
|
||||
const Value *Src = N->getSrcValue();
|
||||
if (!Src)
|
||||
return NVPTX::PTXLdStInstCode::LOCAL;
|
||||
|
||||
if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
|
||||
switch (PT->getAddressSpace()) {
|
||||
case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
|
||||
case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
|
||||
case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
|
||||
case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
|
||||
return NVPTX::PTXLdStInstCode::CONSTANT;
|
||||
case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
|
||||
case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
|
||||
case llvm::ADDRESS_SPACE_CONST:
|
||||
// If the arch supports generic address space, translate it to GLOBAL
|
||||
// for correctness.
|
||||
// If the arch does not support generic address space, then the arch
|
||||
// does not really support ADDRESS_SPACE_CONST, translate it to
|
||||
// to CONSTANT for better performance.
|
||||
if (Subtarget.hasGenericLdSt())
|
||||
return NVPTX::PTXLdStInstCode::GLOBAL;
|
||||
else
|
||||
return NVPTX::PTXLdStInstCode::CONSTANT;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
return NVPTX::PTXLdStInstCode::LOCAL;
|
||||
}
|
||||
|
||||
|
||||
SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
|
||||
DebugLoc dl = N->getDebugLoc();
|
||||
LoadSDNode *LD = cast<LoadSDNode>(N);
|
||||
EVT LoadedVT = LD->getMemoryVT();
|
||||
SDNode *NVPTXLD= NULL;
|
||||
|
||||
// do not support pre/post inc/dec
|
||||
if (LD->isIndexed())
|
||||
return NULL;
|
||||
|
||||
if (!LoadedVT.isSimple())
|
||||
return NULL;
|
||||
|
||||
// Address Space Setting
|
||||
unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
|
||||
|
||||
// Volatile Setting
|
||||
// - .volatile is only availalble for .global and .shared
|
||||
bool isVolatile = LD->isVolatile();
|
||||
if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
|
||||
codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
|
||||
codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
|
||||
isVolatile = false;
|
||||
|
||||
// Vector Setting
|
||||
MVT SimpleVT = LoadedVT.getSimpleVT();
|
||||
unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
|
||||
if (SimpleVT.isVector()) {
|
||||
unsigned num = SimpleVT.getVectorNumElements();
|
||||
if (num == 2)
|
||||
vecType = NVPTX::PTXLdStInstCode::V2;
|
||||
else if (num == 4)
|
||||
vecType = NVPTX::PTXLdStInstCode::V4;
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Type Setting: fromType + fromTypeWidth
|
||||
//
|
||||
// Sign : ISD::SEXTLOAD
|
||||
// Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
|
||||
// type is integer
|
||||
// Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
|
||||
MVT ScalarVT = SimpleVT.getScalarType();
|
||||
unsigned fromTypeWidth = ScalarVT.getSizeInBits();
|
||||
unsigned int fromType;
|
||||
if ((LD->getExtensionType() == ISD::SEXTLOAD))
|
||||
fromType = NVPTX::PTXLdStInstCode::Signed;
|
||||
else if (ScalarVT.isFloatingPoint())
|
||||
fromType = NVPTX::PTXLdStInstCode::Float;
|
||||
else
|
||||
fromType = NVPTX::PTXLdStInstCode::Unsigned;
|
||||
|
||||
// Create the machine instruction DAG
|
||||
SDValue Chain = N->getOperand(0);
|
||||
SDValue N1 = N->getOperand(1);
|
||||
SDValue Addr;
|
||||
SDValue Offset, Base;
|
||||
unsigned Opcode;
|
||||
MVT::SimpleValueType TargetVT = LD->getValueType(0).getSimpleVT().SimpleTy;
|
||||
|
||||
if (SelectDirectAddr(N1, Addr)) {
|
||||
switch (TargetVT) {
|
||||
case MVT::i8: Opcode = NVPTX::LD_i8_avar; break;
|
||||
case MVT::i16: Opcode = NVPTX::LD_i16_avar; break;
|
||||
case MVT::i32: Opcode = NVPTX::LD_i32_avar; break;
|
||||
case MVT::i64: Opcode = NVPTX::LD_i64_avar; break;
|
||||
case MVT::f32: Opcode = NVPTX::LD_f32_avar; break;
|
||||
case MVT::f64: Opcode = NVPTX::LD_f64_avar; break;
|
||||
case MVT::v2i8: Opcode = NVPTX::LD_v2i8_avar; break;
|
||||
case MVT::v2i16: Opcode = NVPTX::LD_v2i16_avar; break;
|
||||
case MVT::v2i32: Opcode = NVPTX::LD_v2i32_avar; break;
|
||||
case MVT::v2i64: Opcode = NVPTX::LD_v2i64_avar; break;
|
||||
case MVT::v2f32: Opcode = NVPTX::LD_v2f32_avar; break;
|
||||
case MVT::v2f64: Opcode = NVPTX::LD_v2f64_avar; break;
|
||||
case MVT::v4i8: Opcode = NVPTX::LD_v4i8_avar; break;
|
||||
case MVT::v4i16: Opcode = NVPTX::LD_v4i16_avar; break;
|
||||
case MVT::v4i32: Opcode = NVPTX::LD_v4i32_avar; break;
|
||||
case MVT::v4f32: Opcode = NVPTX::LD_v4f32_avar; break;
|
||||
default: return NULL;
|
||||
}
|
||||
SDValue Ops[] = { getI32Imm(isVolatile),
|
||||
getI32Imm(codeAddrSpace),
|
||||
getI32Imm(vecType),
|
||||
getI32Imm(fromType),
|
||||
getI32Imm(fromTypeWidth),
|
||||
Addr, Chain };
|
||||
NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
|
||||
MVT::Other, Ops, 7);
|
||||
} else if (Subtarget.is64Bit()?
|
||||
SelectADDRsi64(N1.getNode(), N1, Base, Offset):
|
||||
SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
|
||||
switch (TargetVT) {
|
||||
case MVT::i8: Opcode = NVPTX::LD_i8_asi; break;
|
||||
case MVT::i16: Opcode = NVPTX::LD_i16_asi; break;
|
||||
case MVT::i32: Opcode = NVPTX::LD_i32_asi; break;
|
||||
case MVT::i64: Opcode = NVPTX::LD_i64_asi; break;
|
||||
case MVT::f32: Opcode = NVPTX::LD_f32_asi; break;
|
||||
case MVT::f64: Opcode = NVPTX::LD_f64_asi; break;
|
||||
case MVT::v2i8: Opcode = NVPTX::LD_v2i8_asi; break;
|
||||
case MVT::v2i16: Opcode = NVPTX::LD_v2i16_asi; break;
|
||||
case MVT::v2i32: Opcode = NVPTX::LD_v2i32_asi; break;
|
||||
case MVT::v2i64: Opcode = NVPTX::LD_v2i64_asi; break;
|
||||
case MVT::v2f32: Opcode = NVPTX::LD_v2f32_asi; break;
|
||||
case MVT::v2f64: Opcode = NVPTX::LD_v2f64_asi; break;
|
||||
case MVT::v4i8: Opcode = NVPTX::LD_v4i8_asi; break;
|
||||
case MVT::v4i16: Opcode = NVPTX::LD_v4i16_asi; break;
|
||||
case MVT::v4i32: Opcode = NVPTX::LD_v4i32_asi; break;
|
||||
case MVT::v4f32: Opcode = NVPTX::LD_v4f32_asi; break;
|
||||
default: return NULL;
|
||||
}
|
||||
SDValue Ops[] = { getI32Imm(isVolatile),
|
||||
getI32Imm(codeAddrSpace),
|
||||
getI32Imm(vecType),
|
||||
getI32Imm(fromType),
|
||||
getI32Imm(fromTypeWidth),
|
||||
Base, Offset, Chain };
|
||||
NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
|
||||
MVT::Other, Ops, 8);
|
||||
} else if (Subtarget.is64Bit()?
|
||||
SelectADDRri64(N1.getNode(), N1, Base, Offset):
|
||||
SelectADDRri(N1.getNode(), N1, Base, Offset)) {
|
||||
switch (TargetVT) {
|
||||
case MVT::i8: Opcode = NVPTX::LD_i8_ari; break;
|
||||
case MVT::i16: Opcode = NVPTX::LD_i16_ari; break;
|
||||
case MVT::i32: Opcode = NVPTX::LD_i32_ari; break;
|
||||
case MVT::i64: Opcode = NVPTX::LD_i64_ari; break;
|
||||
case MVT::f32: Opcode = NVPTX::LD_f32_ari; break;
|
||||
case MVT::f64: Opcode = NVPTX::LD_f64_ari; break;
|
||||
case MVT::v2i8: Opcode = NVPTX::LD_v2i8_ari; break;
|
||||
case MVT::v2i16: Opcode = NVPTX::LD_v2i16_ari; break;
|
||||
case MVT::v2i32: Opcode = NVPTX::LD_v2i32_ari; break;
|
||||
case MVT::v2i64: Opcode = NVPTX::LD_v2i64_ari; break;
|
||||
case MVT::v2f32: Opcode = NVPTX::LD_v2f32_ari; break;
|
||||
case MVT::v2f64: Opcode = NVPTX::LD_v2f64_ari; break;
|
||||
case MVT::v4i8: Opcode = NVPTX::LD_v4i8_ari; break;
|
||||
case MVT::v4i16: Opcode = NVPTX::LD_v4i16_ari; break;
|
||||
case MVT::v4i32: Opcode = NVPTX::LD_v4i32_ari; break;
|
||||
case MVT::v4f32: Opcode = NVPTX::LD_v4f32_ari; break;
|
||||
default: return NULL;
|
||||
}
|
||||
SDValue Ops[] = { getI32Imm(isVolatile),
|
||||
getI32Imm(codeAddrSpace),
|
||||
getI32Imm(vecType),
|
||||
getI32Imm(fromType),
|
||||
getI32Imm(fromTypeWidth),
|
||||
Base, Offset, Chain };
|
||||
NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
|
||||
MVT::Other, Ops, 8);
|
||||
}
|
||||
else {
|
||||
switch (TargetVT) {
|
||||
case MVT::i8: Opcode = NVPTX::LD_i8_areg; break;
|
||||
case MVT::i16: Opcode = NVPTX::LD_i16_areg; break;
|
||||
case MVT::i32: Opcode = NVPTX::LD_i32_areg; break;
|
||||
case MVT::i64: Opcode = NVPTX::LD_i64_areg; break;
|
||||
case MVT::f32: Opcode = NVPTX::LD_f32_areg; break;
|
||||
case MVT::f64: Opcode = NVPTX::LD_f64_areg; break;
|
||||
case MVT::v2i8: Opcode = NVPTX::LD_v2i8_areg; break;
|
||||
case MVT::v2i16: Opcode = NVPTX::LD_v2i16_areg; break;
|
||||
case MVT::v2i32: Opcode = NVPTX::LD_v2i32_areg; break;
|
||||
case MVT::v2i64: Opcode = NVPTX::LD_v2i64_areg; break;
|
||||
case MVT::v2f32: Opcode = NVPTX::LD_v2f32_areg; break;
|
||||
case MVT::v2f64: Opcode = NVPTX::LD_v2f64_areg; break;
|
||||
case MVT::v4i8: Opcode = NVPTX::LD_v4i8_areg; break;
|
||||
case MVT::v4i16: Opcode = NVPTX::LD_v4i16_areg; break;
|
||||
case MVT::v4i32: Opcode = NVPTX::LD_v4i32_areg; break;
|
||||
case MVT::v4f32: Opcode = NVPTX::LD_v4f32_areg; break;
|
||||
default: return NULL;
|
||||
}
|
||||
SDValue Ops[] = { getI32Imm(isVolatile),
|
||||
getI32Imm(codeAddrSpace),
|
||||
getI32Imm(vecType),
|
||||
getI32Imm(fromType),
|
||||
getI32Imm(fromTypeWidth),
|
||||
N1, Chain };
|
||||
NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
|
||||
MVT::Other, Ops, 7);
|
||||
}
|
||||
|
||||
if (NVPTXLD != NULL) {
|
||||
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
|
||||
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
|
||||
cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
|
||||
}
|
||||
|
||||
return NVPTXLD;
|
||||
}
|
||||
|
||||
SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
|
||||
DebugLoc dl = N->getDebugLoc();
|
||||
StoreSDNode *ST = cast<StoreSDNode>(N);
|
||||
EVT StoreVT = ST->getMemoryVT();
|
||||
SDNode *NVPTXST = NULL;
|
||||
|
||||
// do not support pre/post inc/dec
|
||||
if (ST->isIndexed())
|
||||
return NULL;
|
||||
|
||||
if (!StoreVT.isSimple())
|
||||
return NULL;
|
||||
|
||||
// Address Space Setting
|
||||
unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
|
||||
|
||||
// Volatile Setting
|
||||
// - .volatile is only availalble for .global and .shared
|
||||
bool isVolatile = ST->isVolatile();
|
||||
if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
|
||||
codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
|
||||
codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
|
||||
isVolatile = false;
|
||||
|
||||
// Vector Setting
|
||||
MVT SimpleVT = StoreVT.getSimpleVT();
|
||||
unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
|
||||
if (SimpleVT.isVector()) {
|
||||
unsigned num = SimpleVT.getVectorNumElements();
|
||||
if (num == 2)
|
||||
vecType = NVPTX::PTXLdStInstCode::V2;
|
||||
else if (num == 4)
|
||||
vecType = NVPTX::PTXLdStInstCode::V4;
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Type Setting: toType + toTypeWidth
|
||||
// - for integer type, always use 'u'
|
||||
//
|
||||
MVT ScalarVT = SimpleVT.getScalarType();
|
||||
unsigned toTypeWidth = ScalarVT.getSizeInBits();
|
||||
unsigned int toType;
|
||||
if (ScalarVT.isFloatingPoint())
|
||||
toType = NVPTX::PTXLdStInstCode::Float;
|
||||
else
|
||||
toType = NVPTX::PTXLdStInstCode::Unsigned;
|
||||
|
||||
// Create the machine instruction DAG
|
||||
SDValue Chain = N->getOperand(0);
|
||||
SDValue N1 = N->getOperand(1);
|
||||
SDValue N2 = N->getOperand(2);
|
||||
SDValue Addr;
|
||||
SDValue Offset, Base;
|
||||
unsigned Opcode;
|
||||
MVT::SimpleValueType SourceVT =
|
||||
N1.getNode()->getValueType(0).getSimpleVT().SimpleTy;
|
||||
|
||||
if (SelectDirectAddr(N2, Addr)) {
|
||||
switch (SourceVT) {
|
||||
case MVT::i8: Opcode = NVPTX::ST_i8_avar; break;
|
||||
case MVT::i16: Opcode = NVPTX::ST_i16_avar; break;
|
||||
case MVT::i32: Opcode = NVPTX::ST_i32_avar; break;
|
||||
case MVT::i64: Opcode = NVPTX::ST_i64_avar; break;
|
||||
case MVT::f32: Opcode = NVPTX::ST_f32_avar; break;
|
||||
case MVT::f64: Opcode = NVPTX::ST_f64_avar; break;
|
||||
case MVT::v2i8: Opcode = NVPTX::ST_v2i8_avar; break;
|
||||
case MVT::v2i16: Opcode = NVPTX::ST_v2i16_avar; break;
|
||||
case MVT::v2i32: Opcode = NVPTX::ST_v2i32_avar; break;
|
||||
case MVT::v2i64: Opcode = NVPTX::ST_v2i64_avar; break;
|
||||
case MVT::v2f32: Opcode = NVPTX::ST_v2f32_avar; break;
|
||||
case MVT::v2f64: Opcode = NVPTX::ST_v2f64_avar; break;
|
||||
case MVT::v4i8: Opcode = NVPTX::ST_v4i8_avar; break;
|
||||
case MVT::v4i16: Opcode = NVPTX::ST_v4i16_avar; break;
|
||||
case MVT::v4i32: Opcode = NVPTX::ST_v4i32_avar; break;
|
||||
case MVT::v4f32: Opcode = NVPTX::ST_v4f32_avar; break;
|
||||
default: return NULL;
|
||||
}
|
||||
SDValue Ops[] = { N1,
|
||||
getI32Imm(isVolatile),
|
||||
getI32Imm(codeAddrSpace),
|
||||
getI32Imm(vecType),
|
||||
getI32Imm(toType),
|
||||
getI32Imm(toTypeWidth),
|
||||
Addr, Chain };
|
||||
NVPTXST = CurDAG->getMachineNode(Opcode, dl,
|
||||
MVT::Other, Ops, 8);
|
||||
} else if (Subtarget.is64Bit()?
|
||||
SelectADDRsi64(N2.getNode(), N2, Base, Offset):
|
||||
SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
|
||||
switch (SourceVT) {
|
||||
case MVT::i8: Opcode = NVPTX::ST_i8_asi; break;
|
||||
case MVT::i16: Opcode = NVPTX::ST_i16_asi; break;
|
||||
case MVT::i32: Opcode = NVPTX::ST_i32_asi; break;
|
||||
case MVT::i64: Opcode = NVPTX::ST_i64_asi; break;
|
||||
case MVT::f32: Opcode = NVPTX::ST_f32_asi; break;
|
||||
case MVT::f64: Opcode = NVPTX::ST_f64_asi; break;
|
||||
case MVT::v2i8: Opcode = NVPTX::ST_v2i8_asi; break;
|
||||
case MVT::v2i16: Opcode = NVPTX::ST_v2i16_asi; break;
|
||||
case MVT::v2i32: Opcode = NVPTX::ST_v2i32_asi; break;
|
||||
case MVT::v2i64: Opcode = NVPTX::ST_v2i64_asi; break;
|
||||
case MVT::v2f32: Opcode = NVPTX::ST_v2f32_asi; break;
|
||||
case MVT::v2f64: Opcode = NVPTX::ST_v2f64_asi; break;
|
||||
case MVT::v4i8: Opcode = NVPTX::ST_v4i8_asi; break;
|
||||
case MVT::v4i16: Opcode = NVPTX::ST_v4i16_asi; break;
|
||||
case MVT::v4i32: Opcode = NVPTX::ST_v4i32_asi; break;
|
||||
case MVT::v4f32: Opcode = NVPTX::ST_v4f32_asi; break;
|
||||
default: return NULL;
|
||||
}
|
||||
SDValue Ops[] = { N1,
|
||||
getI32Imm(isVolatile),
|
||||
getI32Imm(codeAddrSpace),
|
||||
getI32Imm(vecType),
|
||||
getI32Imm(toType),
|
||||
getI32Imm(toTypeWidth),
|
||||
Base, Offset, Chain };
|
||||
NVPTXST = CurDAG->getMachineNode(Opcode, dl,
|
||||
MVT::Other, Ops, 9);
|
||||
} else if (Subtarget.is64Bit()?
|
||||
SelectADDRri64(N2.getNode(), N2, Base, Offset):
|
||||
SelectADDRri(N2.getNode(), N2, Base, Offset)) {
|
||||
switch (SourceVT) {
|
||||
case MVT::i8: Opcode = NVPTX::ST_i8_ari; break;
|
||||
case MVT::i16: Opcode = NVPTX::ST_i16_ari; break;
|
||||
case MVT::i32: Opcode = NVPTX::ST_i32_ari; break;
|
||||
case MVT::i64: Opcode = NVPTX::ST_i64_ari; break;
|
||||
case MVT::f32: Opcode = NVPTX::ST_f32_ari; break;
|
||||
case MVT::f64: Opcode = NVPTX::ST_f64_ari; break;
|
||||
case MVT::v2i8: Opcode = NVPTX::ST_v2i8_ari; break;
|
||||
case MVT::v2i16: Opcode = NVPTX::ST_v2i16_ari; break;
|
||||
case MVT::v2i32: Opcode = NVPTX::ST_v2i32_ari; break;
|
||||
case MVT::v2i64: Opcode = NVPTX::ST_v2i64_ari; break;
|
||||
case MVT::v2f32: Opcode = NVPTX::ST_v2f32_ari; break;
|
||||
case MVT::v2f64: Opcode = NVPTX::ST_v2f64_ari; break;
|
||||
case MVT::v4i8: Opcode = NVPTX::ST_v4i8_ari; break;
|
||||
case MVT::v4i16: Opcode = NVPTX::ST_v4i16_ari; break;
|
||||
case MVT::v4i32: Opcode = NVPTX::ST_v4i32_ari; break;
|
||||
case MVT::v4f32: Opcode = NVPTX::ST_v4f32_ari; break;
|
||||
default: return NULL;
|
||||
}
|
||||
SDValue Ops[] = { N1,
|
||||
getI32Imm(isVolatile),
|
||||
getI32Imm(codeAddrSpace),
|
||||
getI32Imm(vecType),
|
||||
getI32Imm(toType),
|
||||
getI32Imm(toTypeWidth),
|
||||
Base, Offset, Chain };
|
||||
NVPTXST = CurDAG->getMachineNode(Opcode, dl,
|
||||
MVT::Other, Ops, 9);
|
||||
} else {
|
||||
switch (SourceVT) {
|
||||
case MVT::i8: Opcode = NVPTX::ST_i8_areg; break;
|
||||
case MVT::i16: Opcode = NVPTX::ST_i16_areg; break;
|
||||
case MVT::i32: Opcode = NVPTX::ST_i32_areg; break;
|
||||
case MVT::i64: Opcode = NVPTX::ST_i64_areg; break;
|
||||
case MVT::f32: Opcode = NVPTX::ST_f32_areg; break;
|
||||
case MVT::f64: Opcode = NVPTX::ST_f64_areg; break;
|
||||
case MVT::v2i8: Opcode = NVPTX::ST_v2i8_areg; break;
|
||||
case MVT::v2i16: Opcode = NVPTX::ST_v2i16_areg; break;
|
||||
case MVT::v2i32: Opcode = NVPTX::ST_v2i32_areg; break;
|
||||
case MVT::v2i64: Opcode = NVPTX::ST_v2i64_areg; break;
|
||||
case MVT::v2f32: Opcode = NVPTX::ST_v2f32_areg; break;
|
||||
case MVT::v2f64: Opcode = NVPTX::ST_v2f64_areg; break;
|
||||
case MVT::v4i8: Opcode = NVPTX::ST_v4i8_areg; break;
|
||||
case MVT::v4i16: Opcode = NVPTX::ST_v4i16_areg; break;
|
||||
case MVT::v4i32: Opcode = NVPTX::ST_v4i32_areg; break;
|
||||
case MVT::v4f32: Opcode = NVPTX::ST_v4f32_areg; break;
|
||||
default: return NULL;
|
||||
}
|
||||
SDValue Ops[] = { N1,
|
||||
getI32Imm(isVolatile),
|
||||
getI32Imm(codeAddrSpace),
|
||||
getI32Imm(vecType),
|
||||
getI32Imm(toType),
|
||||
getI32Imm(toTypeWidth),
|
||||
N2, Chain };
|
||||
NVPTXST = CurDAG->getMachineNode(Opcode, dl,
|
||||
MVT::Other, Ops, 8);
|
||||
}
|
||||
|
||||
if (NVPTXST != NULL) {
|
||||
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
|
||||
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
|
||||
cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
|
||||
}
|
||||
|
||||
return NVPTXST;
|
||||
}
|
||||
|
||||
// SelectDirectAddr - Match a direct address for DAG.
|
||||
// A direct address could be a globaladdress or externalsymbol.
|
||||
bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
|
||||
// Return true if TGA or ES.
|
||||
if (N.getOpcode() == ISD::TargetGlobalAddress
|
||||
|| N.getOpcode() == ISD::TargetExternalSymbol) {
|
||||
Address = N;
|
||||
return true;
|
||||
}
|
||||
if (N.getOpcode() == NVPTXISD::Wrapper) {
|
||||
Address = N.getOperand(0);
|
||||
return true;
|
||||
}
|
||||
if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
|
||||
unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
|
||||
if (IID == Intrinsic::nvvm_ptr_gen_to_param)
|
||||
if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
|
||||
return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// symbol+offset
|
||||
bool NVPTXDAGToDAGISel::SelectADDRsi_imp(SDNode *OpNode, SDValue Addr,
|
||||
SDValue &Base, SDValue &Offset,
|
||||
MVT mvt) {
|
||||
if (Addr.getOpcode() == ISD::ADD) {
|
||||
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
|
||||
SDValue base=Addr.getOperand(0);
|
||||
if (SelectDirectAddr(base, Base)) {
|
||||
Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// symbol+offset
|
||||
bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
|
||||
SDValue &Base, SDValue &Offset) {
|
||||
return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
|
||||
}
|
||||
|
||||
// symbol+offset
|
||||
bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
|
||||
SDValue &Base, SDValue &Offset) {
|
||||
return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
|
||||
}
|
||||
|
||||
// register+offset
|
||||
bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr,
|
||||
SDValue &Base, SDValue &Offset,
|
||||
MVT mvt) {
|
||||
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
|
||||
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
|
||||
Offset = CurDAG->getTargetConstant(0, mvt);
|
||||
return true;
|
||||
}
|
||||
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
|
||||
Addr.getOpcode() == ISD::TargetGlobalAddress)
|
||||
return false; // direct calls.
|
||||
|
||||
if (Addr.getOpcode() == ISD::ADD) {
|
||||
if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
|
||||
return false;
|
||||
}
|
||||
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
|
||||
if (FrameIndexSDNode *FIN =
|
||||
dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
|
||||
// Constant offset from frame ref.
|
||||
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
|
||||
else
|
||||
Base = Addr.getOperand(0);
|
||||
Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// register+offset
|
||||
bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
|
||||
SDValue &Base, SDValue &Offset) {
|
||||
return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
|
||||
}
|
||||
|
||||
// register+offset
|
||||
bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
|
||||
SDValue &Base, SDValue &Offset) {
|
||||
return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
|
||||
}
|
||||
|
||||
bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
|
||||
unsigned int spN) const {
|
||||
const Value *Src = NULL;
|
||||
// Even though MemIntrinsicSDNode is a subclas of MemSDNode,
|
||||
// the classof() for MemSDNode does not include MemIntrinsicSDNode
|
||||
// (See SelectionDAGNodes.h). So we need to check for both.
|
||||
if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
|
||||
Src = mN->getSrcValue();
|
||||
}
|
||||
else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
|
||||
Src = mN->getSrcValue();
|
||||
}
|
||||
if (!Src)
|
||||
return false;
|
||||
if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
|
||||
return (PT->getAddressSpace() == spN);
|
||||
return false;
|
||||
}
|
||||
|
||||
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
|
||||
/// inline asm expressions.
|
||||
bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
|
||||
char ConstraintCode,
|
||||
std::vector<SDValue> &OutOps) {
|
||||
SDValue Op0, Op1;
|
||||
switch (ConstraintCode) {
|
||||
default: return true;
|
||||
case 'm': // memory
|
||||
if (SelectDirectAddr(Op, Op0)) {
|
||||
OutOps.push_back(Op0);
|
||||
OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
|
||||
return false;
|
||||
}
|
||||
if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
|
||||
OutOps.push_back(Op0);
|
||||
OutOps.push_back(Op1);
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Return true if N is a undef or a constant.
|
||||
// If N was undef, return a (i8imm 0) in Retval
|
||||
// If N was imm, convert it to i8imm and return in Retval
|
||||
// Note: The convert to i8imm is required, otherwise the
|
||||
// pattern matcher inserts a bunch of IMOVi8rr to convert
|
||||
// the imm to i8imm, and this causes instruction selection
|
||||
// to fail.
|
||||
bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N,
|
||||
SDValue &Retval) {
|
||||
if (!(N.getOpcode() == ISD::UNDEF) &&
|
||||
!(N.getOpcode() == ISD::Constant))
|
||||
return false;
|
||||
|
||||
if (N.getOpcode() == ISD::UNDEF)
|
||||
Retval = CurDAG->getTargetConstant(0, MVT::i8);
|
||||
else {
|
||||
ConstantSDNode *cn = cast<ConstantSDNode>(N.getNode());
|
||||
unsigned retval = cn->getZExtValue();
|
||||
Retval = CurDAG->getTargetConstant(retval, MVT::i8);
|
||||
}
|
||||
return true;
|
||||
}
|
105
lib/Target/NVPTX/NVPTXISelDAGToDAG.h
Normal file
105
lib/Target/NVPTX/NVPTXISelDAGToDAG.h
Normal file
@ -0,0 +1,105 @@
|
||||
//===-- NVPTXISelDAGToDAG.h - A dag to dag inst selector for NVPTX --------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines an instruction selector for the NVPTX target.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "nvptx-isel"
|
||||
|
||||
#include "NVPTX.h"
|
||||
#include "NVPTXISelLowering.h"
|
||||
#include "NVPTXRegisterInfo.h"
|
||||
#include "NVPTXTargetMachine.h"
|
||||
#include "llvm/CodeGen/SelectionDAGISel.h"
|
||||
#include "llvm/Support/Compiler.h"
|
||||
#include "llvm/Intrinsics.h"
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
|
||||
|
||||
// If true, generate corresponding FPCONTRACT. This is
|
||||
// language dependent (i.e. CUDA and OpenCL works differently).
|
||||
bool doFMADF32;
|
||||
bool doFMAF64;
|
||||
bool doFMAF32;
|
||||
bool doFMAF64AGG;
|
||||
bool doFMAF32AGG;
|
||||
bool allowFMA;
|
||||
|
||||
// 0: use div.approx
|
||||
// 1: use div.full
|
||||
// 2: For sm_20 and later, ieee-compliant div.rnd.f32 can be generated;
|
||||
// Otherwise, use div.full
|
||||
int do_DIVF32_PREC;
|
||||
|
||||
// If true, add .ftz to f32 instructions.
|
||||
// This is only meaningful for sm_20 and later, as the default
|
||||
// is not ftz.
|
||||
// For sm earlier than sm_20, f32 denorms are always ftz by the
|
||||
// hardware.
|
||||
// We always add the .ftz modifier regardless of the sm value
|
||||
// when Use32FTZ is true.
|
||||
bool UseF32FTZ;
|
||||
|
||||
// If true, generate mul.wide from sext and mul
|
||||
bool doMulWide;
|
||||
|
||||
public:
|
||||
explicit NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
|
||||
CodeGenOpt::Level OptLevel);
|
||||
|
||||
// Pass Name
|
||||
virtual const char *getPassName() const {
|
||||
return "NVPTX DAG->DAG Pattern Instruction Selection";
|
||||
}
|
||||
|
||||
const NVPTXSubtarget &Subtarget;
|
||||
|
||||
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
|
||||
char ConstraintCode,
|
||||
std::vector<SDValue> &OutOps);
|
||||
private:
|
||||
// Include the pieces autogenerated from the target description.
|
||||
#include "NVPTXGenDAGISel.inc"
|
||||
|
||||
SDNode *Select(SDNode *N);
|
||||
SDNode* SelectLoad(SDNode *N);
|
||||
SDNode* SelectStore(SDNode *N);
|
||||
|
||||
inline SDValue getI32Imm(unsigned Imm) {
|
||||
return CurDAG->getTargetConstant(Imm, MVT::i32);
|
||||
}
|
||||
|
||||
// Match direct address complex pattern.
|
||||
bool SelectDirectAddr(SDValue N, SDValue &Address);
|
||||
|
||||
bool SelectADDRri_imp(SDNode *OpNode, SDValue Addr, SDValue &Base,
|
||||
SDValue &Offset, MVT mvt);
|
||||
bool SelectADDRri(SDNode *OpNode, SDValue Addr, SDValue &Base,
|
||||
SDValue &Offset);
|
||||
bool SelectADDRri64(SDNode *OpNode, SDValue Addr, SDValue &Base,
|
||||
SDValue &Offset);
|
||||
|
||||
bool SelectADDRsi_imp(SDNode *OpNode, SDValue Addr, SDValue &Base,
|
||||
SDValue &Offset, MVT mvt);
|
||||
bool SelectADDRsi(SDNode *OpNode, SDValue Addr, SDValue &Base,
|
||||
SDValue &Offset);
|
||||
bool SelectADDRsi64(SDNode *OpNode, SDValue Addr, SDValue &Base,
|
||||
SDValue &Offset);
|
||||
|
||||
|
||||
bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
|
||||
|
||||
bool UndefOrImm(SDValue Op, SDValue N, SDValue &Retval);
|
||||
|
||||
};
|
||||
}
|
1294
lib/Target/NVPTX/NVPTXISelLowering.cpp
Normal file
1294
lib/Target/NVPTX/NVPTXISelLowering.cpp
Normal file
File diff suppressed because it is too large
Load Diff
153
lib/Target/NVPTX/NVPTXISelLowering.h
Normal file
153
lib/Target/NVPTX/NVPTXISelLowering.h
Normal file
@ -0,0 +1,153 @@
|
||||
//===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines the interfaces that NVPTX uses to lower LLVM code into a
|
||||
// selection DAG.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef NVPTXISELLOWERING_H
|
||||
#define NVPTXISELLOWERING_H
|
||||
|
||||
#include "NVPTX.h"
|
||||
#include "NVPTXSubtarget.h"
|
||||
#include "llvm/CodeGen/SelectionDAG.h"
|
||||
#include "llvm/Target/TargetLowering.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace NVPTXISD {
|
||||
enum NodeType {
|
||||
// Start the numbering from where ISD NodeType finishes.
|
||||
FIRST_NUMBER = ISD::BUILTIN_OP_END,
|
||||
Wrapper,
|
||||
CALL,
|
||||
RET_FLAG,
|
||||
LOAD_PARAM,
|
||||
NVBuiltin,
|
||||
DeclareParam,
|
||||
DeclareScalarParam,
|
||||
DeclareRetParam,
|
||||
DeclareRet,
|
||||
DeclareScalarRet,
|
||||
LoadParam,
|
||||
StoreParam,
|
||||
StoreParamS32, // to sext and store a <32bit value, not used currently
|
||||
StoreParamU32, // to zext and store a <32bit value, not used currently
|
||||
MoveToParam,
|
||||
PrintCall,
|
||||
PrintCallUni,
|
||||
CallArgBegin,
|
||||
CallArg,
|
||||
LastCallArg,
|
||||
CallArgEnd,
|
||||
CallVoid,
|
||||
CallVal,
|
||||
CallSymbol,
|
||||
Prototype,
|
||||
MoveParam,
|
||||
MoveRetval,
|
||||
MoveToRetval,
|
||||
StoreRetval,
|
||||
PseudoUseParam,
|
||||
RETURN,
|
||||
CallSeqBegin,
|
||||
CallSeqEnd,
|
||||
Dummy
|
||||
};
|
||||
}
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// TargetLowering Implementation
|
||||
//===--------------------------------------------------------------------===//
|
||||
class NVPTXTargetLowering : public TargetLowering {
|
||||
public:
|
||||
explicit NVPTXTargetLowering(NVPTXTargetMachine &TM);
|
||||
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerGlobalAddress(const GlobalValue *GV, int64_t Offset,
|
||||
SelectionDAG &DAG) const;
|
||||
|
||||
virtual const char *getTargetNodeName(unsigned Opcode) const;
|
||||
|
||||
bool isTypeSupportedInIntrinsic(MVT VT) const;
|
||||
|
||||
bool getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I,
|
||||
unsigned Intrinsic) const;
|
||||
|
||||
/// isLegalAddressingMode - Return true if the addressing mode represented
|
||||
/// by AM is legal for this target, for a load/store of the specified type
|
||||
/// Used to guide target specific optimizations, like loop strength
|
||||
/// reduction (LoopStrengthReduce.cpp) and memory optimization for
|
||||
/// address mode (CodeGenPrepare.cpp)
|
||||
virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const;
|
||||
|
||||
/// getFunctionAlignment - Return the Log2 alignment of this function.
|
||||
virtual unsigned getFunctionAlignment(const Function *F) const;
|
||||
|
||||
virtual EVT getSetCCResultType(EVT VT) const {
|
||||
return MVT::i1;
|
||||
}
|
||||
|
||||
ConstraintType getConstraintType(const std::string &Constraint) const;
|
||||
std::pair<unsigned, const TargetRegisterClass*>
|
||||
getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
|
||||
|
||||
virtual SDValue
|
||||
LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl,
|
||||
SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &InVals) const;
|
||||
|
||||
// This will be re-added once the necessary changes to LowerCallTo are
|
||||
// upstreamed.
|
||||
// virtual SDValue
|
||||
// LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
|
||||
// bool isVarArg, bool doesNotRet, bool &isTailCall,
|
||||
// const SmallVectorImpl<ISD::OutputArg> &Outs,
|
||||
// const SmallVectorImpl<SDValue> &OutVals,
|
||||
// const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
// DebugLoc dl, SelectionDAG &DAG,
|
||||
// SmallVectorImpl<SDValue> &InVals,
|
||||
// Type *retTy, const ArgListTy &Args) const;
|
||||
|
||||
std::string getPrototype(Type *, const ArgListTy &,
|
||||
const SmallVectorImpl<ISD::OutputArg> &,
|
||||
unsigned retAlignment) const;
|
||||
|
||||
virtual SDValue
|
||||
LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
|
||||
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
||||
const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl,
|
||||
SelectionDAG &DAG) const;
|
||||
|
||||
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
|
||||
std::vector<SDValue> &Ops,
|
||||
SelectionDAG &DAG) const;
|
||||
|
||||
NVPTXTargetMachine *nvTM;
|
||||
|
||||
// PTX always uses 32-bit shift amounts
|
||||
virtual MVT getShiftAmountTy(EVT LHSTy) const {
|
||||
return MVT::i32;
|
||||
}
|
||||
|
||||
private:
|
||||
const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here
|
||||
|
||||
SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx, EVT =
|
||||
MVT::i32) const;
|
||||
SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT = MVT::i32) const;
|
||||
SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx);
|
||||
|
||||
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
|
||||
};
|
||||
} // namespace llvm
|
||||
|
||||
#endif // NVPTXISELLOWERING_H
|
43
lib/Target/NVPTX/NVPTXInstrFormats.td
Normal file
43
lib/Target/NVPTX/NVPTXInstrFormats.td
Normal file
@ -0,0 +1,43 @@
|
||||
//===- NVPTXInstrFormats.td - NVPTX Instruction Formats-------*- tblgen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Describe NVPTX instructions format
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Vector instruction type enum
|
||||
class VecInstTypeEnum<bits<4> val> {
|
||||
bits<4> Value=val;
|
||||
}
|
||||
def VecNOP : VecInstTypeEnum<0>;
|
||||
|
||||
// Generic NVPTX Format
|
||||
|
||||
class NVPTXInst<dag outs, dag ins, string asmstr, list<dag> pattern>
|
||||
: Instruction {
|
||||
field bits<14> Inst;
|
||||
|
||||
let Namespace = "NVPTX";
|
||||
dag OutOperandList = outs;
|
||||
dag InOperandList = ins;
|
||||
let AsmString = asmstr;
|
||||
let Pattern = pattern;
|
||||
|
||||
// TSFlagFields
|
||||
bits<4> VecInstType = VecNOP.Value;
|
||||
bit IsSimpleMove = 0;
|
||||
bit IsLoad = 0;
|
||||
bit IsStore = 0;
|
||||
|
||||
let TSFlags{3-0} = VecInstType;
|
||||
let TSFlags{4-4} = IsSimpleMove;
|
||||
let TSFlags{5-5} = IsLoad;
|
||||
let TSFlags{6-6} = IsStore;
|
||||
}
|
326
lib/Target/NVPTX/NVPTXInstrInfo.cpp
Normal file
326
lib/Target/NVPTX/NVPTXInstrInfo.cpp
Normal file
@ -0,0 +1,326 @@
|
||||
//===- NVPTXInstrInfo.cpp - NVPTX Instruction Information -----------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the NVPTX implementation of the TargetInstrInfo class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "NVPTX.h"
|
||||
#include "NVPTXInstrInfo.h"
|
||||
#include "NVPTXTargetMachine.h"
|
||||
#define GET_INSTRINFO_CTOR
|
||||
#include "NVPTXGenInstrInfo.inc"
|
||||
#include "llvm/Function.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include <cstdio>
|
||||
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
// FIXME: Add the subtarget support on this constructor.
|
||||
NVPTXInstrInfo::NVPTXInstrInfo(NVPTXTargetMachine &tm)
|
||||
: NVPTXGenInstrInfo(),
|
||||
TM(tm),
|
||||
RegInfo(*this, *TM.getSubtargetImpl()) {}
|
||||
|
||||
|
||||
void NVPTXInstrInfo::copyPhysReg (MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I, DebugLoc DL,
|
||||
unsigned DestReg, unsigned SrcReg,
|
||||
bool KillSrc) const {
|
||||
if (NVPTX::Int32RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::Int32RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::IMOV32rr), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::Int8RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::Int8RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::IMOV8rr), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::Int1RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::Int1RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::IMOV1rr), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::Float32RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::Float32RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::FMOV32rr), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::Int16RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::Int16RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::IMOV16rr), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::Int64RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::Int64RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::IMOV64rr), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::Float64RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::Float64RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::V4F32RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::V4F32RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::V4f32Mov), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::V4I32RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::V4I32RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::V4i32Mov), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::V2F32RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::V2F32RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::V2f32Mov), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::V2I32RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::V2I32RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::V2i32Mov), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::V4I8RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::V4I8RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::V4i8Mov), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::V2I8RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::V2I8RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::V2i8Mov), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::V4I16RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::V4I16RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::V4i16Mov), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::V2I16RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::V2I16RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::V2i16Mov), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::V2I64RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::V2I64RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::V2i64Mov), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::V2F64RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::V2F64RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::V2f64Mov), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else {
|
||||
assert(0 && "Don't know how to copy a register");
|
||||
}
|
||||
}
|
||||
|
||||
bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI,
|
||||
unsigned &SrcReg,
|
||||
unsigned &DestReg) const {
|
||||
// Look for the appropriate part of TSFlags
|
||||
bool isMove = false;
|
||||
|
||||
unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::SimpleMoveMask) >>
|
||||
NVPTX::SimpleMoveShift;
|
||||
isMove = (TSFlags == 1);
|
||||
|
||||
if (isMove) {
|
||||
MachineOperand dest = MI.getOperand(0);
|
||||
MachineOperand src = MI.getOperand(1);
|
||||
assert(dest.isReg() && "dest of a movrr is not a reg");
|
||||
assert(src.isReg() && "src of a movrr is not a reg");
|
||||
|
||||
SrcReg = src.getReg();
|
||||
DestReg = dest.getReg();
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const
|
||||
{
|
||||
switch (MI.getOpcode()) {
|
||||
default: return false;
|
||||
case NVPTX::INT_PTX_SREG_NTID_X:
|
||||
case NVPTX::INT_PTX_SREG_NTID_Y:
|
||||
case NVPTX::INT_PTX_SREG_NTID_Z:
|
||||
case NVPTX::INT_PTX_SREG_TID_X:
|
||||
case NVPTX::INT_PTX_SREG_TID_Y:
|
||||
case NVPTX::INT_PTX_SREG_TID_Z:
|
||||
case NVPTX::INT_PTX_SREG_CTAID_X:
|
||||
case NVPTX::INT_PTX_SREG_CTAID_Y:
|
||||
case NVPTX::INT_PTX_SREG_CTAID_Z:
|
||||
case NVPTX::INT_PTX_SREG_NCTAID_X:
|
||||
case NVPTX::INT_PTX_SREG_NCTAID_Y:
|
||||
case NVPTX::INT_PTX_SREG_NCTAID_Z:
|
||||
case NVPTX::INT_PTX_SREG_WARPSIZE:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI,
|
||||
unsigned &AddrSpace) const {
|
||||
bool isLoad = false;
|
||||
unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isLoadMask) >>
|
||||
NVPTX::isLoadShift;
|
||||
isLoad = (TSFlags == 1);
|
||||
if (isLoad)
|
||||
AddrSpace = getLdStCodeAddrSpace(MI);
|
||||
return isLoad;
|
||||
}
|
||||
|
||||
bool NVPTXInstrInfo::isStoreInstr(const MachineInstr &MI,
|
||||
unsigned &AddrSpace) const {
|
||||
bool isStore = false;
|
||||
unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isStoreMask) >>
|
||||
NVPTX::isStoreShift;
|
||||
isStore = (TSFlags == 1);
|
||||
if (isStore)
|
||||
AddrSpace = getLdStCodeAddrSpace(MI);
|
||||
return isStore;
|
||||
}
|
||||
|
||||
|
||||
bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const {
|
||||
unsigned addrspace = 0;
|
||||
if (MI->getOpcode() == NVPTX::INT_CUDA_SYNCTHREADS)
|
||||
return false;
|
||||
if (isLoadInstr(*MI, addrspace))
|
||||
if (addrspace == NVPTX::PTXLdStInstCode::SHARED)
|
||||
return false;
|
||||
if (isStoreInstr(*MI, addrspace))
|
||||
if (addrspace == NVPTX::PTXLdStInstCode::SHARED)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/// AnalyzeBranch - Analyze the branching code at the end of MBB, returning
|
||||
/// true if it cannot be understood (e.g. it's a switch dispatch or isn't
|
||||
/// implemented for a target). Upon success, this returns false and returns
|
||||
/// with the following information in various cases:
|
||||
///
|
||||
/// 1. If this block ends with no branches (it just falls through to its succ)
|
||||
/// just return false, leaving TBB/FBB null.
|
||||
/// 2. If this block ends with only an unconditional branch, it sets TBB to be
|
||||
/// the destination block.
|
||||
/// 3. If this block ends with an conditional branch and it falls through to
|
||||
/// an successor block, it sets TBB to be the branch destination block and a
|
||||
/// list of operands that evaluate the condition. These
|
||||
/// operands can be passed to other TargetInstrInfo methods to create new
|
||||
/// branches.
|
||||
/// 4. If this block ends with an conditional branch and an unconditional
|
||||
/// block, it returns the 'true' destination in TBB, the 'false' destination
|
||||
/// in FBB, and a list of operands that evaluate the condition. These
|
||||
/// operands can be passed to other TargetInstrInfo methods to create new
|
||||
/// branches.
|
||||
///
|
||||
/// Note that RemoveBranch and InsertBranch must be implemented to support
|
||||
/// cases where this method returns success.
|
||||
///
|
||||
bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock *&TBB,
|
||||
MachineBasicBlock *&FBB,
|
||||
SmallVectorImpl<MachineOperand> &Cond,
|
||||
bool AllowModify) const {
|
||||
// If the block has no terminators, it just falls into the block after it.
|
||||
MachineBasicBlock::iterator I = MBB.end();
|
||||
if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
|
||||
return false;
|
||||
|
||||
// Get the last instruction in the block.
|
||||
MachineInstr *LastInst = I;
|
||||
|
||||
// If there is only one terminator instruction, process it.
|
||||
if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
|
||||
if (LastInst->getOpcode() == NVPTX::GOTO) {
|
||||
TBB = LastInst->getOperand(0).getMBB();
|
||||
return false;
|
||||
} else if (LastInst->getOpcode() == NVPTX::CBranch) {
|
||||
// Block ends with fall-through condbranch.
|
||||
TBB = LastInst->getOperand(1).getMBB();
|
||||
Cond.push_back(LastInst->getOperand(0));
|
||||
return false;
|
||||
}
|
||||
// Otherwise, don't know what this is.
|
||||
return true;
|
||||
}
|
||||
|
||||
// Get the instruction before it if it's a terminator.
|
||||
MachineInstr *SecondLastInst = I;
|
||||
|
||||
// If there are three terminators, we don't know what sort of block this is.
|
||||
if (SecondLastInst && I != MBB.begin() &&
|
||||
isUnpredicatedTerminator(--I))
|
||||
return true;
|
||||
|
||||
// If the block ends with NVPTX::GOTO and NVPTX:CBranch, handle it.
|
||||
if (SecondLastInst->getOpcode() == NVPTX::CBranch &&
|
||||
LastInst->getOpcode() == NVPTX::GOTO) {
|
||||
TBB = SecondLastInst->getOperand(1).getMBB();
|
||||
Cond.push_back(SecondLastInst->getOperand(0));
|
||||
FBB = LastInst->getOperand(0).getMBB();
|
||||
return false;
|
||||
}
|
||||
|
||||
// If the block ends with two NVPTX:GOTOs, handle it. The second one is not
|
||||
// executed, so remove it.
|
||||
if (SecondLastInst->getOpcode() == NVPTX::GOTO &&
|
||||
LastInst->getOpcode() == NVPTX::GOTO) {
|
||||
TBB = SecondLastInst->getOperand(0).getMBB();
|
||||
I = LastInst;
|
||||
if (AllowModify)
|
||||
I->eraseFromParent();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Otherwise, can't handle this.
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
|
||||
MachineBasicBlock::iterator I = MBB.end();
|
||||
if (I == MBB.begin()) return 0;
|
||||
--I;
|
||||
if (I->getOpcode() != NVPTX::GOTO && I->getOpcode() != NVPTX::CBranch)
|
||||
return 0;
|
||||
|
||||
// Remove the branch.
|
||||
I->eraseFromParent();
|
||||
|
||||
I = MBB.end();
|
||||
|
||||
if (I == MBB.begin()) return 1;
|
||||
--I;
|
||||
if (I->getOpcode() != NVPTX::CBranch)
|
||||
return 1;
|
||||
|
||||
// Remove the branch.
|
||||
I->eraseFromParent();
|
||||
return 2;
|
||||
}
|
||||
|
||||
unsigned
|
||||
NVPTXInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
|
||||
MachineBasicBlock *FBB,
|
||||
const SmallVectorImpl<MachineOperand> &Cond,
|
||||
DebugLoc DL) const {
|
||||
// Shouldn't be a fall through.
|
||||
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
|
||||
assert((Cond.size() == 1 || Cond.size() == 0) &&
|
||||
"NVPTX branch conditions have two components!");
|
||||
|
||||
// One-way branch.
|
||||
if (FBB == 0) {
|
||||
if (Cond.empty()) // Unconditional branch
|
||||
BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(TBB);
|
||||
else // Conditional branch
|
||||
BuildMI(&MBB, DL, get(NVPTX::CBranch))
|
||||
.addReg(Cond[0].getReg()).addMBB(TBB);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Two-way Conditional Branch.
|
||||
BuildMI(&MBB, DL, get(NVPTX::CBranch))
|
||||
.addReg(Cond[0].getReg()).addMBB(TBB);
|
||||
BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(FBB);
|
||||
return 2;
|
||||
}
|
83
lib/Target/NVPTX/NVPTXInstrInfo.h
Normal file
83
lib/Target/NVPTX/NVPTXInstrInfo.h
Normal file
@ -0,0 +1,83 @@
|
||||
//===- NVPTXInstrInfo.h - NVPTX Instruction Information----------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the niversity of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the NVPTX implementation of the TargetInstrInfo class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef NVPTXINSTRUCTIONINFO_H
|
||||
#define NVPTXINSTRUCTIONINFO_H
|
||||
|
||||
#include "NVPTX.h"
|
||||
#include "NVPTXRegisterInfo.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
|
||||
#define GET_INSTRINFO_HEADER
|
||||
#include "NVPTXGenInstrInfo.inc"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class NVPTXInstrInfo : public NVPTXGenInstrInfo
|
||||
{
|
||||
NVPTXTargetMachine &TM;
|
||||
const NVPTXRegisterInfo RegInfo;
|
||||
public:
|
||||
explicit NVPTXInstrInfo(NVPTXTargetMachine &TM);
|
||||
|
||||
virtual const NVPTXRegisterInfo &getRegisterInfo() const { return RegInfo; }
|
||||
|
||||
/* The following virtual functions are used in register allocation.
|
||||
* They are not implemented because the existing interface and the logic
|
||||
* at the caller side do not work for the elementized vector load and store.
|
||||
*
|
||||
* virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
|
||||
* int &FrameIndex) const;
|
||||
* virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
|
||||
* int &FrameIndex) const;
|
||||
* virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
|
||||
* MachineBasicBlock::iterator MBBI,
|
||||
* unsigned SrcReg, bool isKill, int FrameIndex,
|
||||
* const TargetRegisterClass *RC) const;
|
||||
* virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
|
||||
* MachineBasicBlock::iterator MBBI,
|
||||
* unsigned DestReg, int FrameIndex,
|
||||
* const TargetRegisterClass *RC) const;
|
||||
*/
|
||||
|
||||
virtual void copyPhysReg(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I, DebugLoc DL,
|
||||
unsigned DestReg, unsigned SrcReg,
|
||||
bool KillSrc) const ;
|
||||
virtual bool isMoveInstr(const MachineInstr &MI,
|
||||
unsigned &SrcReg,
|
||||
unsigned &DestReg) const;
|
||||
bool isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
|
||||
bool isStoreInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
|
||||
bool isReadSpecialReg(MachineInstr &MI) const;
|
||||
|
||||
virtual bool CanTailMerge(const MachineInstr *MI) const ;
|
||||
// Branch analysis.
|
||||
virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
|
||||
MachineBasicBlock *&FBB,
|
||||
SmallVectorImpl<MachineOperand> &Cond,
|
||||
bool AllowModify) const;
|
||||
virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
|
||||
virtual unsigned InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
|
||||
MachineBasicBlock *FBB,
|
||||
const SmallVectorImpl<MachineOperand> &Cond,
|
||||
DebugLoc DL) const;
|
||||
unsigned getLdStCodeAddrSpace(const MachineInstr &MI) const {
|
||||
return MI.getOperand(2).getImm();
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
} // namespace llvm
|
||||
|
||||
#endif
|
2837
lib/Target/NVPTX/NVPTXInstrInfo.td
Normal file
2837
lib/Target/NVPTX/NVPTXInstrInfo.td
Normal file
File diff suppressed because it is too large
Load Diff
1675
lib/Target/NVPTX/NVPTXIntrinsics.td
Normal file
1675
lib/Target/NVPTX/NVPTXIntrinsics.td
Normal file
File diff suppressed because it is too large
Load Diff
208
lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
Normal file
208
lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
Normal file
@ -0,0 +1,208 @@
|
||||
//===- NVPTXLowerAggrCopies.cpp - ------------------------------*- C++ -*--===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Lower aggregate copies, memset, memcpy, memmov intrinsics into loops when
|
||||
// the size is large or is not a compile-time constant.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Function.h"
|
||||
#include "llvm/Constants.h"
|
||||
#include "llvm/Module.h"
|
||||
#include "llvm/Instructions.h"
|
||||
#include "llvm/Intrinsics.h"
|
||||
#include "llvm/IntrinsicInst.h"
|
||||
#include "llvm/Support/InstIterator.h"
|
||||
#include "llvm/Support/IRBuilder.h"
|
||||
#include "NVPTXLowerAggrCopies.h"
|
||||
#include "llvm/Target/TargetData.h"
|
||||
#include "llvm/LLVMContext.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace llvm {
|
||||
FunctionPass *createLowerAggrCopies();
|
||||
}
|
||||
|
||||
char NVPTXLowerAggrCopies::ID = 0;
|
||||
|
||||
// Lower MemTransferInst or load-store pair to loop
|
||||
static void convertTransferToLoop(Instruction *splitAt, Value *srcAddr,
|
||||
Value *dstAddr, Value *len,
|
||||
//unsigned numLoads,
|
||||
bool srcVolatile, bool dstVolatile,
|
||||
LLVMContext &Context, Function &F) {
|
||||
Type *indType = len->getType();
|
||||
|
||||
BasicBlock *origBB = splitAt->getParent();
|
||||
BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split");
|
||||
BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB);
|
||||
|
||||
origBB->getTerminator()->setSuccessor(0, loopBB);
|
||||
IRBuilder<> builder(origBB, origBB->getTerminator());
|
||||
|
||||
// srcAddr and dstAddr are expected to be pointer types,
|
||||
// so no check is made here.
|
||||
unsigned srcAS =
|
||||
dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace();
|
||||
unsigned dstAS =
|
||||
dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
|
||||
|
||||
// Cast pointers to (char *)
|
||||
srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS));
|
||||
dstAddr = builder.CreateBitCast(dstAddr, Type::getInt8PtrTy(Context, dstAS));
|
||||
|
||||
IRBuilder<> loop(loopBB);
|
||||
// The loop index (ind) is a phi node.
|
||||
PHINode *ind = loop.CreatePHI(indType, 0);
|
||||
// Incoming value for ind is 0
|
||||
ind->addIncoming(ConstantInt::get(indType, 0), origBB);
|
||||
|
||||
// load from srcAddr+ind
|
||||
Value *val = loop.CreateLoad(loop.CreateGEP(srcAddr, ind), srcVolatile);
|
||||
// store at dstAddr+ind
|
||||
loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), dstVolatile);
|
||||
|
||||
// The value for ind coming from backedge is (ind + 1)
|
||||
Value *newind = loop.CreateAdd(ind, ConstantInt::get(indType, 1));
|
||||
ind->addIncoming(newind, loopBB);
|
||||
|
||||
loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB);
|
||||
}
|
||||
|
||||
// Lower MemSetInst to loop
|
||||
static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr,
|
||||
Value *len, Value *val, LLVMContext &Context,
|
||||
Function &F) {
|
||||
BasicBlock *origBB = splitAt->getParent();
|
||||
BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split");
|
||||
BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB);
|
||||
|
||||
origBB->getTerminator()->setSuccessor(0, loopBB);
|
||||
IRBuilder<> builder(origBB, origBB->getTerminator());
|
||||
|
||||
unsigned dstAS =
|
||||
dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
|
||||
|
||||
// Cast pointer to the type of value getting stored
|
||||
dstAddr = builder.CreateBitCast(dstAddr,
|
||||
PointerType::get(val->getType(), dstAS));
|
||||
|
||||
IRBuilder<> loop(loopBB);
|
||||
PHINode *ind = loop.CreatePHI(len->getType(), 0);
|
||||
ind->addIncoming(ConstantInt::get(len->getType(), 0), origBB);
|
||||
|
||||
loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), false);
|
||||
|
||||
Value *newind = loop.CreateAdd(ind, ConstantInt::get(len->getType(), 1));
|
||||
ind->addIncoming(newind, loopBB);
|
||||
|
||||
loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB);
|
||||
}
|
||||
|
||||
bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
|
||||
SmallVector<LoadInst *, 4> aggrLoads;
|
||||
SmallVector<MemTransferInst *, 4> aggrMemcpys;
|
||||
SmallVector<MemSetInst *, 4> aggrMemsets;
|
||||
|
||||
TargetData *TD = &getAnalysis<TargetData>();
|
||||
LLVMContext &Context = F.getParent()->getContext();
|
||||
|
||||
//
|
||||
// Collect all the aggrLoads, aggrMemcpys and addrMemsets.
|
||||
//
|
||||
//const BasicBlock *firstBB = &F.front(); // first BB in F
|
||||
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
|
||||
//BasicBlock *bb = BI;
|
||||
for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
|
||||
++II) {
|
||||
if (LoadInst * load = dyn_cast<LoadInst>(II)) {
|
||||
|
||||
if (load->hasOneUse() == false) continue;
|
||||
|
||||
if (TD->getTypeStoreSize(load->getType()) < MaxAggrCopySize) continue;
|
||||
|
||||
User *use = *(load->use_begin());
|
||||
if (StoreInst * store = dyn_cast<StoreInst>(use)) {
|
||||
if (store->getOperand(0) != load) //getValueOperand
|
||||
continue;
|
||||
aggrLoads.push_back(load);
|
||||
}
|
||||
} else if (MemTransferInst * intr = dyn_cast<MemTransferInst>(II)) {
|
||||
Value *len = intr->getLength();
|
||||
// If the number of elements being copied is greater
|
||||
// than MaxAggrCopySize, lower it to a loop
|
||||
if (ConstantInt * len_int = dyn_cast < ConstantInt > (len)) {
|
||||
if (len_int->getZExtValue() >= MaxAggrCopySize) {
|
||||
aggrMemcpys.push_back(intr);
|
||||
}
|
||||
} else {
|
||||
// turn variable length memcpy/memmov into loop
|
||||
aggrMemcpys.push_back(intr);
|
||||
}
|
||||
} else if (MemSetInst * memsetintr = dyn_cast<MemSetInst>(II)) {
|
||||
Value *len = memsetintr->getLength();
|
||||
if (ConstantInt * len_int = dyn_cast<ConstantInt>(len)) {
|
||||
if (len_int->getZExtValue() >= MaxAggrCopySize) {
|
||||
aggrMemsets.push_back(memsetintr);
|
||||
}
|
||||
} else {
|
||||
// turn variable length memset into loop
|
||||
aggrMemsets.push_back(memsetintr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0)
|
||||
&& (aggrMemsets.size() == 0)) return false;
|
||||
|
||||
//
|
||||
// Do the transformation of an aggr load/copy/set to a loop
|
||||
//
|
||||
for (unsigned i = 0, e = aggrLoads.size(); i != e; ++i) {
|
||||
LoadInst *load = aggrLoads[i];
|
||||
StoreInst *store = dyn_cast<StoreInst>(*load->use_begin());
|
||||
Value *srcAddr = load->getOperand(0);
|
||||
Value *dstAddr = store->getOperand(1);
|
||||
unsigned numLoads = TD->getTypeStoreSize(load->getType());
|
||||
Value *len = ConstantInt::get(Type::getInt32Ty(Context), numLoads);
|
||||
|
||||
convertTransferToLoop(store, srcAddr, dstAddr, len, load->isVolatile(),
|
||||
store->isVolatile(), Context, F);
|
||||
|
||||
store->eraseFromParent();
|
||||
load->eraseFromParent();
|
||||
}
|
||||
|
||||
for (unsigned i = 0, e = aggrMemcpys.size(); i != e; ++i) {
|
||||
MemTransferInst *cpy = aggrMemcpys[i];
|
||||
Value *len = cpy->getLength();
|
||||
// llvm 2.7 version of memcpy does not have volatile
|
||||
// operand yet. So always making it non-volatile
|
||||
// optimistically, so that we don't see unnecessary
|
||||
// st.volatile in ptx
|
||||
convertTransferToLoop(cpy, cpy->getSource(), cpy->getDest(), len, false,
|
||||
false, Context, F);
|
||||
cpy->eraseFromParent();
|
||||
}
|
||||
|
||||
for (unsigned i = 0, e = aggrMemsets.size(); i != e; ++i) {
|
||||
MemSetInst *memsetinst = aggrMemsets[i];
|
||||
Value *len = memsetinst->getLength();
|
||||
Value *val = memsetinst->getValue();
|
||||
convertMemSetToLoop(memsetinst, memsetinst->getDest(), len, val, Context,
|
||||
F);
|
||||
memsetinst->eraseFromParent();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
FunctionPass *llvm::createLowerAggrCopies() {
|
||||
return new NVPTXLowerAggrCopies();
|
||||
}
|
47
lib/Target/NVPTX/NVPTXLowerAggrCopies.h
Normal file
47
lib/Target/NVPTX/NVPTXLowerAggrCopies.h
Normal file
@ -0,0 +1,47 @@
|
||||
//===-- llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h ------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the declaration of the NVIDIA specific lowering of
|
||||
// aggregate copies
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef NVPTX_LOWER_AGGR_COPIES_H
|
||||
#define NVPTX_LOWER_AGGR_COPIES_H
|
||||
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
|
||||
#include "llvm/Target/TargetData.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
// actual analysis class, which is a functionpass
|
||||
struct NVPTXLowerAggrCopies : public FunctionPass {
|
||||
static char ID;
|
||||
|
||||
NVPTXLowerAggrCopies() : FunctionPass(ID) {}
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.addRequired<TargetData>();
|
||||
AU.addPreserved<MachineFunctionAnalysis>();
|
||||
}
|
||||
|
||||
virtual bool runOnFunction(Function &F);
|
||||
|
||||
static const unsigned MaxAggrCopySize = 128;
|
||||
|
||||
virtual const char *getPassName() const {
|
||||
return "Lower aggregate copies/intrinsics into loops";
|
||||
}
|
||||
};
|
||||
|
||||
extern FunctionPass *createLowerAggrCopies();
|
||||
}
|
||||
|
||||
#endif
|
20
lib/Target/NVPTX/NVPTXNumRegisters.h
Normal file
20
lib/Target/NVPTX/NVPTXNumRegisters.h
Normal file
@ -0,0 +1,20 @@
|
||||
|
||||
//===-- NVPTXNumRegisters.h - PTX Register Info ---------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef NVPTX_NUM_REGISTERS_H
|
||||
#define NVPTX_NUM_REGISTERS_H
|
||||
|
||||
namespace llvm {
|
||||
|
||||
const unsigned NVPTXNumRegisters = 396;
|
||||
|
||||
}
|
||||
|
||||
#endif
|
332
lib/Target/NVPTX/NVPTXRegisterInfo.cpp
Normal file
332
lib/Target/NVPTX/NVPTXRegisterInfo.cpp
Normal file
@ -0,0 +1,332 @@
|
||||
//===- NVPTXRegisterInfo.cpp - NVPTX Register Information -----------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the NVPTX implementation of the TargetRegisterInfo class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "nvptx-reg-info"
|
||||
|
||||
#include "NVPTX.h"
|
||||
#include "NVPTXRegisterInfo.h"
|
||||
#include "NVPTXSubtarget.h"
|
||||
#include "llvm/ADT/BitVector.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/MC/MachineLocation.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace llvm
|
||||
{
|
||||
std::string getNVPTXRegClassName (TargetRegisterClass const *RC) {
|
||||
if (RC == &NVPTX::Float32RegsRegClass) {
|
||||
return ".f32";
|
||||
}
|
||||
if (RC == &NVPTX::Float64RegsRegClass) {
|
||||
return ".f64";
|
||||
}
|
||||
else if (RC == &NVPTX::Int64RegsRegClass) {
|
||||
return ".s64";
|
||||
}
|
||||
else if (RC == &NVPTX::Int32RegsRegClass) {
|
||||
return ".s32";
|
||||
}
|
||||
else if (RC == &NVPTX::Int16RegsRegClass) {
|
||||
return ".s16";
|
||||
}
|
||||
// Int8Regs become 16-bit registers in PTX
|
||||
else if (RC == &NVPTX::Int8RegsRegClass) {
|
||||
return ".s16";
|
||||
}
|
||||
else if (RC == &NVPTX::Int1RegsRegClass) {
|
||||
return ".pred";
|
||||
}
|
||||
else if (RC == &NVPTX::SpecialRegsRegClass) {
|
||||
return "!Special!";
|
||||
}
|
||||
else if (RC == &NVPTX::V2F32RegsRegClass) {
|
||||
return ".v2.f32";
|
||||
}
|
||||
else if (RC == &NVPTX::V4F32RegsRegClass) {
|
||||
return ".v4.f32";
|
||||
}
|
||||
else if (RC == &NVPTX::V2I32RegsRegClass) {
|
||||
return ".v2.s32";
|
||||
}
|
||||
else if (RC == &NVPTX::V4I32RegsRegClass) {
|
||||
return ".v4.s32";
|
||||
}
|
||||
else if (RC == &NVPTX::V2F64RegsRegClass) {
|
||||
return ".v2.f64";
|
||||
}
|
||||
else if (RC == &NVPTX::V2I64RegsRegClass) {
|
||||
return ".v2.s64";
|
||||
}
|
||||
else if (RC == &NVPTX::V2I16RegsRegClass) {
|
||||
return ".v2.s16";
|
||||
}
|
||||
else if (RC == &NVPTX::V4I16RegsRegClass) {
|
||||
return ".v4.s16";
|
||||
}
|
||||
else if (RC == &NVPTX::V2I8RegsRegClass) {
|
||||
return ".v2.s16";
|
||||
}
|
||||
else if (RC == &NVPTX::V4I8RegsRegClass) {
|
||||
return ".v4.s16";
|
||||
}
|
||||
else {
|
||||
return "INTERNAL";
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
std::string getNVPTXRegClassStr (TargetRegisterClass const *RC) {
|
||||
if (RC == &NVPTX::Float32RegsRegClass) {
|
||||
return "%f";
|
||||
}
|
||||
if (RC == &NVPTX::Float64RegsRegClass) {
|
||||
return "%fd";
|
||||
}
|
||||
else if (RC == &NVPTX::Int64RegsRegClass) {
|
||||
return "%rd";
|
||||
}
|
||||
else if (RC == &NVPTX::Int32RegsRegClass) {
|
||||
return "%r";
|
||||
}
|
||||
else if (RC == &NVPTX::Int16RegsRegClass) {
|
||||
return "%rs";
|
||||
}
|
||||
else if (RC == &NVPTX::Int8RegsRegClass) {
|
||||
return "%rc";
|
||||
}
|
||||
else if (RC == &NVPTX::Int1RegsRegClass) {
|
||||
return "%p";
|
||||
}
|
||||
else if (RC == &NVPTX::SpecialRegsRegClass) {
|
||||
return "!Special!";
|
||||
}
|
||||
else if (RC == &NVPTX::V2F32RegsRegClass) {
|
||||
return "%v2f";
|
||||
}
|
||||
else if (RC == &NVPTX::V4F32RegsRegClass) {
|
||||
return "%v4f";
|
||||
}
|
||||
else if (RC == &NVPTX::V2I32RegsRegClass) {
|
||||
return "%v2r";
|
||||
}
|
||||
else if (RC == &NVPTX::V4I32RegsRegClass) {
|
||||
return "%v4r";
|
||||
}
|
||||
else if (RC == &NVPTX::V2F64RegsRegClass) {
|
||||
return "%v2fd";
|
||||
}
|
||||
else if (RC == &NVPTX::V2I64RegsRegClass) {
|
||||
return "%v2rd";
|
||||
}
|
||||
else if (RC == &NVPTX::V2I16RegsRegClass) {
|
||||
return "%v2s";
|
||||
}
|
||||
else if (RC == &NVPTX::V4I16RegsRegClass) {
|
||||
return "%v4rs";
|
||||
}
|
||||
else if (RC == &NVPTX::V2I8RegsRegClass) {
|
||||
return "%v2rc";
|
||||
}
|
||||
else if (RC == &NVPTX::V4I8RegsRegClass) {
|
||||
return "%v4rc";
|
||||
}
|
||||
else {
|
||||
return "INTERNAL";
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
bool isNVPTXVectorRegClass(TargetRegisterClass const *RC) {
|
||||
if (RC->getID() == NVPTX::V2F32RegsRegClassID)
|
||||
return true;
|
||||
if (RC->getID() == NVPTX::V2F64RegsRegClassID)
|
||||
return true;
|
||||
if (RC->getID() == NVPTX::V2I16RegsRegClassID)
|
||||
return true;
|
||||
if (RC->getID() == NVPTX::V2I32RegsRegClassID)
|
||||
return true;
|
||||
if (RC->getID() == NVPTX::V2I64RegsRegClassID)
|
||||
return true;
|
||||
if (RC->getID() == NVPTX::V2I8RegsRegClassID)
|
||||
return true;
|
||||
if (RC->getID() == NVPTX::V4F32RegsRegClassID)
|
||||
return true;
|
||||
if (RC->getID() == NVPTX::V4I16RegsRegClassID)
|
||||
return true;
|
||||
if (RC->getID() == NVPTX::V4I32RegsRegClassID)
|
||||
return true;
|
||||
if (RC->getID() == NVPTX::V4I8RegsRegClassID)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string getNVPTXElemClassName(TargetRegisterClass const *RC) {
|
||||
if (RC->getID() == NVPTX::V2F32RegsRegClassID)
|
||||
return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V2F64RegsRegClassID)
|
||||
return getNVPTXRegClassName(&NVPTX::Float64RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V2I16RegsRegClassID)
|
||||
return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V2I32RegsRegClassID)
|
||||
return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V2I64RegsRegClassID)
|
||||
return getNVPTXRegClassName(&NVPTX::Int64RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V2I8RegsRegClassID)
|
||||
return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V4F32RegsRegClassID)
|
||||
return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V4I16RegsRegClassID)
|
||||
return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V4I32RegsRegClassID)
|
||||
return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V4I8RegsRegClassID)
|
||||
return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass);
|
||||
assert(0 && "Not a vector register class");
|
||||
return "Unsupported";
|
||||
}
|
||||
|
||||
const TargetRegisterClass *getNVPTXElemClass(TargetRegisterClass const *RC) {
|
||||
if (RC->getID() == NVPTX::V2F32RegsRegClassID)
|
||||
return (&NVPTX::Float32RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V2F64RegsRegClassID)
|
||||
return (&NVPTX::Float64RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V2I16RegsRegClassID)
|
||||
return (&NVPTX::Int16RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V2I32RegsRegClassID)
|
||||
return (&NVPTX::Int32RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V2I64RegsRegClassID)
|
||||
return (&NVPTX::Int64RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V2I8RegsRegClassID)
|
||||
return (&NVPTX::Int8RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V4F32RegsRegClassID)
|
||||
return (&NVPTX::Float32RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V4I16RegsRegClassID)
|
||||
return (&NVPTX::Int16RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V4I32RegsRegClassID)
|
||||
return (&NVPTX::Int32RegsRegClass);
|
||||
if (RC->getID() == NVPTX::V4I8RegsRegClassID)
|
||||
return (&NVPTX::Int8RegsRegClass);
|
||||
assert(0 && "Not a vector register class");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int getNVPTXVectorSize(TargetRegisterClass const *RC) {
|
||||
if (RC->getID() == NVPTX::V2F32RegsRegClassID)
|
||||
return 2;
|
||||
if (RC->getID() == NVPTX::V2F64RegsRegClassID)
|
||||
return 2;
|
||||
if (RC->getID() == NVPTX::V2I16RegsRegClassID)
|
||||
return 2;
|
||||
if (RC->getID() == NVPTX::V2I32RegsRegClassID)
|
||||
return 2;
|
||||
if (RC->getID() == NVPTX::V2I64RegsRegClassID)
|
||||
return 2;
|
||||
if (RC->getID() == NVPTX::V2I8RegsRegClassID)
|
||||
return 2;
|
||||
if (RC->getID() == NVPTX::V4F32RegsRegClassID)
|
||||
return 4;
|
||||
if (RC->getID() == NVPTX::V4I16RegsRegClassID)
|
||||
return 4;
|
||||
if (RC->getID() == NVPTX::V4I32RegsRegClassID)
|
||||
return 4;
|
||||
if (RC->getID() == NVPTX::V4I8RegsRegClassID)
|
||||
return 4;
|
||||
assert(0 && "Not a vector register class");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
NVPTXRegisterInfo::NVPTXRegisterInfo(const TargetInstrInfo &tii,
|
||||
const NVPTXSubtarget &st)
|
||||
: NVPTXGenRegisterInfo(0),
|
||||
TII(tii),
|
||||
ST(st) {
|
||||
Is64Bit = st.is64Bit();
|
||||
}
|
||||
|
||||
|
||||
#define GET_REGINFO_TARGET_DESC
|
||||
#include "NVPTXGenRegisterInfo.inc"
|
||||
|
||||
/// NVPTX Callee Saved Registers
|
||||
const uint16_t* NVPTXRegisterInfo::
|
||||
getCalleeSavedRegs(const MachineFunction *MF) const {
|
||||
static const uint16_t CalleeSavedRegs[] = { 0 };
|
||||
return CalleeSavedRegs;
|
||||
}
|
||||
|
||||
// NVPTX Callee Saved Reg Classes
|
||||
const TargetRegisterClass* const*
|
||||
NVPTXRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
|
||||
static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 };
|
||||
return CalleeSavedRegClasses;
|
||||
}
|
||||
|
||||
BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
|
||||
BitVector Reserved(getNumRegs());
|
||||
return Reserved;
|
||||
}
|
||||
|
||||
void NVPTXRegisterInfo::
|
||||
eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||
int SPAdj,
|
||||
RegScavenger *RS) const {
|
||||
assert(SPAdj == 0 && "Unexpected");
|
||||
|
||||
unsigned i = 0;
|
||||
MachineInstr &MI = *II;
|
||||
while (!MI.getOperand(i).isFI()) {
|
||||
++i;
|
||||
assert(i < MI.getNumOperands() &&
|
||||
"Instr doesn't have FrameIndex operand!");
|
||||
}
|
||||
|
||||
int FrameIndex = MI.getOperand(i).getIndex();
|
||||
|
||||
MachineFunction &MF = *MI.getParent()->getParent();
|
||||
int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
|
||||
MI.getOperand(i+1).getImm();
|
||||
|
||||
// Using I0 as the frame pointer
|
||||
MI.getOperand(i).ChangeToRegister(NVPTX::VRFrame, false);
|
||||
MI.getOperand(i+1).ChangeToImmediate(Offset);
|
||||
}
|
||||
|
||||
|
||||
int NVPTXRegisterInfo::
|
||||
getDwarfRegNum(unsigned RegNum, bool isEH) const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
|
||||
return NVPTX::VRFrame;
|
||||
}
|
||||
|
||||
unsigned NVPTXRegisterInfo::getRARegister() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// This function eliminates ADJCALLSTACKDOWN,
|
||||
// ADJCALLSTACKUP pseudo instructions
|
||||
void NVPTXRegisterInfo::
|
||||
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I) const {
|
||||
// Simply discard ADJCALLSTACKDOWN,
|
||||
// ADJCALLSTACKUP instructions.
|
||||
MBB.erase(I);
|
||||
}
|
94
lib/Target/NVPTX/NVPTXRegisterInfo.h
Normal file
94
lib/Target/NVPTX/NVPTXRegisterInfo.h
Normal file
@ -0,0 +1,94 @@
|
||||
//===- NVPTXRegisterInfo.h - NVPTX Register Information Impl ----*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the NVPTX implementation of the TargetRegisterInfo class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef NVPTXREGISTERINFO_H
|
||||
#define NVPTXREGISTERINFO_H
|
||||
|
||||
#include "ManagedStringPool.h"
|
||||
#include "llvm/Target/TargetRegisterInfo.h"
|
||||
|
||||
|
||||
#define GET_REGINFO_HEADER
|
||||
#include "NVPTXGenRegisterInfo.inc"
|
||||
#include "llvm/Target/TargetRegisterInfo.h"
|
||||
#include <sstream>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
// Forward Declarations.
|
||||
class TargetInstrInfo;
|
||||
class NVPTXSubtarget;
|
||||
|
||||
class NVPTXRegisterInfo : public NVPTXGenRegisterInfo {
|
||||
private:
|
||||
const TargetInstrInfo &TII;
|
||||
const NVPTXSubtarget &ST;
|
||||
bool Is64Bit;
|
||||
// Hold Strings that can be free'd all together with NVPTXRegisterInfo
|
||||
ManagedStringPool ManagedStrPool;
|
||||
|
||||
public:
|
||||
NVPTXRegisterInfo(const TargetInstrInfo &tii,
|
||||
const NVPTXSubtarget &st);
|
||||
|
||||
|
||||
//------------------------------------------------------
|
||||
// Pure virtual functions from TargetRegisterInfo
|
||||
//------------------------------------------------------
|
||||
|
||||
// NVPTX callee saved registers
|
||||
virtual const uint16_t*
|
||||
getCalleeSavedRegs(const MachineFunction *MF = 0) const;
|
||||
|
||||
// NVPTX callee saved register classes
|
||||
virtual const TargetRegisterClass* const *
|
||||
getCalleeSavedRegClasses(const MachineFunction *MF) const;
|
||||
|
||||
virtual BitVector getReservedRegs(const MachineFunction &MF) const;
|
||||
|
||||
virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
|
||||
int SPAdj,
|
||||
RegScavenger *RS=NULL) const;
|
||||
|
||||
void eliminateCallFramePseudoInstr(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I) const;
|
||||
|
||||
virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const;
|
||||
virtual unsigned getFrameRegister(const MachineFunction &MF) const;
|
||||
virtual unsigned getRARegister() const;
|
||||
|
||||
ManagedStringPool *getStrPool() const {
|
||||
return const_cast<ManagedStringPool *>(&ManagedStrPool);
|
||||
}
|
||||
|
||||
const char *getName(unsigned RegNo) const {
|
||||
std::stringstream O;
|
||||
O << "reg" << RegNo;
|
||||
return getStrPool()->getManagedString(O.str().c_str())->c_str();
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
std::string getNVPTXRegClassName (const TargetRegisterClass *RC);
|
||||
std::string getNVPTXRegClassStr (const TargetRegisterClass *RC);
|
||||
bool isNVPTXVectorRegClass (const TargetRegisterClass *RC);
|
||||
std::string getNVPTXElemClassName (const TargetRegisterClass *RC);
|
||||
int getNVPTXVectorSize (const TargetRegisterClass *RC);
|
||||
const TargetRegisterClass *getNVPTXElemClass(const TargetRegisterClass *RC);
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
|
||||
#endif
|
7235
lib/Target/NVPTX/NVPTXRegisterInfo.td
Normal file
7235
lib/Target/NVPTX/NVPTXRegisterInfo.td
Normal file
File diff suppressed because it is too large
Load Diff
45
lib/Target/NVPTX/NVPTXSection.h
Normal file
45
lib/Target/NVPTX/NVPTXSection.h
Normal file
@ -0,0 +1,45 @@
|
||||
//===- NVPTXSection.h - NVPTX-specific section representation -*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file declares the NVPTXSection class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_NVPTXSECTION_H
|
||||
#define LLVM_NVPTXSECTION_H
|
||||
|
||||
#include "llvm/MC/MCSection.h"
|
||||
#include "llvm/GlobalVariable.h"
|
||||
#include <vector>
|
||||
|
||||
namespace llvm {
|
||||
/// NVPTXSection - Represents a section in PTX
|
||||
/// PTX does not have sections. We create this class in order to use
|
||||
/// the ASMPrint interface.
|
||||
///
|
||||
class NVPTXSection : public MCSection {
|
||||
|
||||
public:
|
||||
NVPTXSection(SectionVariant V, SectionKind K) : MCSection(V, K) {}
|
||||
~NVPTXSection() {};
|
||||
|
||||
/// Override this as NVPTX has its own way of printing switching
|
||||
/// to a section.
|
||||
virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
|
||||
raw_ostream &OS) const {}
|
||||
|
||||
/// Base address of PTX sections is zero.
|
||||
virtual bool isBaseAddressKnownZero() const { return true; }
|
||||
virtual bool UseCodeAlign() const { return false; }
|
||||
virtual bool isVirtualSection() const { return false; }
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif
|
77
lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
Normal file
77
lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
Normal file
@ -0,0 +1,77 @@
|
||||
//===- NVPTXSplitBBatBar.cpp - Split BB at Barrier --*- C++ -*--===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Split basic blocks so that a basic block that contains a barrier instruction
|
||||
// only contains the barrier instruction.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Function.h"
|
||||
#include "llvm/Instructions.h"
|
||||
#include "llvm/Intrinsics.h"
|
||||
#include "llvm/IntrinsicInst.h"
|
||||
#include "llvm/Support/InstIterator.h"
|
||||
#include "NVPTXUtilities.h"
|
||||
#include "NVPTXSplitBBatBar.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace llvm {
|
||||
FunctionPass *createSplitBBatBarPass();
|
||||
}
|
||||
|
||||
char NVPTXSplitBBatBar::ID = 0;
|
||||
|
||||
bool NVPTXSplitBBatBar::runOnFunction(Function &F) {
|
||||
|
||||
SmallVector<Instruction *, 4> SplitPoints;
|
||||
bool changed = false;
|
||||
|
||||
// Collect all the split points in SplitPoints
|
||||
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
|
||||
BasicBlock::iterator IB = BI->begin();
|
||||
BasicBlock::iterator II = IB;
|
||||
BasicBlock::iterator IE = BI->end();
|
||||
|
||||
// Skit the first intruction. No splitting is needed at this
|
||||
// point even if this is a bar.
|
||||
while (II != IE) {
|
||||
if (IntrinsicInst *inst = dyn_cast<IntrinsicInst>(II)) {
|
||||
Intrinsic::ID id = inst->getIntrinsicID();
|
||||
// If this is a barrier, split at this instruction
|
||||
// and the next instruction.
|
||||
if (llvm::isBarrierIntrinsic(id)) {
|
||||
if (II != IB)
|
||||
SplitPoints.push_back(II);
|
||||
II++;
|
||||
if ((II != IE) && (!II->isTerminator())) {
|
||||
SplitPoints.push_back(II);
|
||||
II++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
II++;
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i != SplitPoints.size(); i++) {
|
||||
changed = true;
|
||||
Instruction *inst = SplitPoints[i];
|
||||
inst->getParent()->splitBasicBlock(inst, "bar_split");
|
||||
}
|
||||
|
||||
return changed;
|
||||
}
|
||||
|
||||
// This interface will most likely not be necessary, because this pass will
|
||||
// not be invoked by the driver, but will be used as a prerequisite to
|
||||
// another pass.
|
||||
FunctionPass *llvm::createSplitBBatBarPass() {
|
||||
return new NVPTXSplitBBatBar();
|
||||
}
|
41
lib/Target/NVPTX/NVPTXSplitBBatBar.h
Normal file
41
lib/Target/NVPTX/NVPTXSplitBBatBar.h
Normal file
@ -0,0 +1,41 @@
|
||||
//===-- llvm/lib/Target/NVPTX/NVPTXSplitBBatBar.h ---------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the declaration of the NVIDIA specific declarations
|
||||
// for splitting basic blocks at barrier instructions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef NVPTX_SPLIT_BB_AT_BAR_H
|
||||
#define NVPTX_SPLIT_BB_AT_BAR_H
|
||||
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
// actual analysis class, which is a functionpass
|
||||
struct NVPTXSplitBBatBar : public FunctionPass {
|
||||
static char ID;
|
||||
|
||||
NVPTXSplitBBatBar() : FunctionPass(ID) {}
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.addPreserved<MachineFunctionAnalysis>();
|
||||
}
|
||||
virtual bool runOnFunction(Function &F);
|
||||
|
||||
virtual const char *getPassName() const {
|
||||
return "Split basic blocks at barrier";
|
||||
}
|
||||
};
|
||||
|
||||
extern FunctionPass *createSplitBBatBarPass();
|
||||
}
|
||||
|
||||
#endif //NVPTX_SPLIT_BB_AT_BAR_H
|
57
lib/Target/NVPTX/NVPTXSubtarget.cpp
Normal file
57
lib/Target/NVPTX/NVPTXSubtarget.cpp
Normal file
@ -0,0 +1,57 @@
|
||||
//===- NVPTXSubtarget.cpp - NVPTX Subtarget Information -------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements the NVPTX specific subclass of TargetSubtarget.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "NVPTXSubtarget.h"
|
||||
#define GET_SUBTARGETINFO_ENUM
|
||||
#define GET_SUBTARGETINFO_TARGET_DESC
|
||||
#define GET_SUBTARGETINFO_CTOR
|
||||
#include "NVPTXGenSubtargetInfo.inc"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
// Select Driver Interface
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
namespace {
|
||||
cl::opt<NVPTX::DrvInterface>
|
||||
DriverInterface(cl::desc("Choose driver interface:"),
|
||||
cl::values(
|
||||
clEnumValN(NVPTX::NVCL, "drvnvcl", "Nvidia OpenCL driver"),
|
||||
clEnumValN(NVPTX::CUDA, "drvcuda", "Nvidia CUDA driver"),
|
||||
clEnumValN(NVPTX::TEST, "drvtest", "Plain Test"),
|
||||
clEnumValEnd),
|
||||
cl::init(NVPTX::NVCL));
|
||||
}
|
||||
|
||||
NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU,
|
||||
const std::string &FS, bool is64Bit)
|
||||
:NVPTXGenSubtargetInfo(TT, "", FS), // Don't pass CPU to subtarget,
|
||||
// because we don't register all
|
||||
// nvptx targets.
|
||||
Is64Bit(is64Bit) {
|
||||
|
||||
drvInterface = DriverInterface;
|
||||
|
||||
// Provide the default CPU if none
|
||||
std::string defCPU = "sm_10";
|
||||
|
||||
// Get the TargetName from the FS if available
|
||||
if (FS.empty() && CPU.empty())
|
||||
TargetName = defCPU;
|
||||
else if (!CPU.empty())
|
||||
TargetName = CPU;
|
||||
else
|
||||
llvm_unreachable("we are not using FeatureStr");
|
||||
|
||||
// Set up the SmVersion
|
||||
SmVersion = atoi(TargetName.c_str()+3);
|
||||
}
|
92
lib/Target/NVPTX/NVPTXSubtarget.h
Normal file
92
lib/Target/NVPTX/NVPTXSubtarget.h
Normal file
@ -0,0 +1,92 @@
|
||||
//=====-- NVPTXSubtarget.h - Define Subtarget for the NVPTX ---*- C++ -*--====//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file declares the NVPTX specific subclass of TargetSubtarget.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef NVPTXSUBTARGET_H
|
||||
#define NVPTXSUBTARGET_H
|
||||
|
||||
#include "llvm/Target/TargetSubtargetInfo.h"
|
||||
#include "NVPTX.h"
|
||||
|
||||
#define GET_SUBTARGETINFO_HEADER
|
||||
#include "NVPTXGenSubtargetInfo.inc"
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
|
||||
|
||||
unsigned int SmVersion;
|
||||
std::string TargetName;
|
||||
NVPTX::DrvInterface drvInterface;
|
||||
bool dummy; // For the 'dummy' feature, see NVPTX.td
|
||||
bool Is64Bit;
|
||||
|
||||
public:
|
||||
/// This constructor initializes the data members to match that
|
||||
/// of the specified module.
|
||||
///
|
||||
NVPTXSubtarget(const std::string &TT, const std::string &CPU,
|
||||
const std::string &FS, bool is64Bit);
|
||||
|
||||
bool hasBrkPt() const { return SmVersion >= 11; }
|
||||
bool hasAtomRedG32() const { return SmVersion >= 11; }
|
||||
bool hasAtomRedS32() const { return SmVersion >= 12; }
|
||||
bool hasAtomRedG64() const { return SmVersion >= 12; }
|
||||
bool hasAtomRedS64() const { return SmVersion >= 20; }
|
||||
bool hasAtomRedGen32() const { return SmVersion >= 20; }
|
||||
bool hasAtomRedGen64() const { return SmVersion >= 20; }
|
||||
bool hasAtomAddF32() const { return SmVersion >= 20; }
|
||||
bool hasVote() const { return SmVersion >= 12; }
|
||||
bool hasDouble() const { return SmVersion >= 13; }
|
||||
bool reqPTX20() const { return SmVersion >= 20; }
|
||||
bool hasF32FTZ() const { return SmVersion >= 20; }
|
||||
bool hasFMAF32() const { return SmVersion >= 20; }
|
||||
bool hasFMAF64() const { return SmVersion >= 13; }
|
||||
bool hasLDU() const { return SmVersion >= 20; }
|
||||
bool hasGenericLdSt() const { return SmVersion >= 20; }
|
||||
inline bool hasHWROT32() const { return false; }
|
||||
inline bool hasSWROT32() const {
|
||||
return true;
|
||||
}
|
||||
inline bool hasROT32() const { return hasHWROT32() || hasSWROT32() ; }
|
||||
inline bool hasROT64() const { return SmVersion >= 20; }
|
||||
|
||||
|
||||
bool is64Bit() const { return Is64Bit; }
|
||||
|
||||
unsigned int getSmVersion() const { return SmVersion; }
|
||||
NVPTX::DrvInterface getDrvInterface() const { return drvInterface; }
|
||||
std::string getTargetName() const { return TargetName; }
|
||||
|
||||
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
|
||||
|
||||
std::string getDataLayout() const {
|
||||
const char *p;
|
||||
if (is64Bit())
|
||||
p = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-"
|
||||
"f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-"
|
||||
"n16:32:64";
|
||||
else
|
||||
p = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-"
|
||||
"f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-"
|
||||
"n16:32:64";
|
||||
|
||||
return std::string(p);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
#endif // NVPTXSUBTARGET_H
|
133
lib/Target/NVPTX/NVPTXTargetMachine.cpp
Normal file
133
lib/Target/NVPTX/NVPTXTargetMachine.cpp
Normal file
@ -0,0 +1,133 @@
|
||||
//===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Top-level implementation for the NVPTX target.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "NVPTXTargetMachine.h"
|
||||
#include "NVPTX.h"
|
||||
#include "NVPTXSplitBBatBar.h"
|
||||
#include "NVPTXLowerAggrCopies.h"
|
||||
#include "MCTargetDesc/NVPTXMCAsmInfo.h"
|
||||
#include "NVPTXAllocaHoisting.h"
|
||||
#include "llvm/PassManager.h"
|
||||
#include "llvm/Analysis/Passes.h"
|
||||
#include "llvm/Analysis/Verifier.h"
|
||||
#include "llvm/Assembly/PrintModulePass.h"
|
||||
#include "llvm/ADT/OwningPtr.h"
|
||||
#include "llvm/CodeGen/AsmPrinter.h"
|
||||
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
|
||||
#include "llvm/CodeGen/MachineModuleInfo.h"
|
||||
#include "llvm/CodeGen/Passes.h"
|
||||
#include "llvm/MC/MCAsmInfo.h"
|
||||
#include "llvm/MC/MCInstrInfo.h"
|
||||
#include "llvm/MC/MCStreamer.h"
|
||||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Target/TargetData.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
#include "llvm/Target/TargetLowering.h"
|
||||
#include "llvm/Target/TargetLoweringObjectFile.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
#include "llvm/Target/TargetOptions.h"
|
||||
#include "llvm/Target/TargetRegisterInfo.h"
|
||||
#include "llvm/Target/TargetSubtargetInfo.h"
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/FormattedStream.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
|
||||
extern "C" void LLVMInitializeNVPTXTarget() {
|
||||
// Register the target.
|
||||
RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32);
|
||||
RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64);
|
||||
|
||||
RegisterMCAsmInfo<NVPTXMCAsmInfo> A(TheNVPTXTarget32);
|
||||
RegisterMCAsmInfo<NVPTXMCAsmInfo> B(TheNVPTXTarget64);
|
||||
|
||||
}
|
||||
|
||||
NVPTXTargetMachine::NVPTXTargetMachine(const Target &T,
|
||||
StringRef TT,
|
||||
StringRef CPU,
|
||||
StringRef FS,
|
||||
const TargetOptions& Options,
|
||||
Reloc::Model RM,
|
||||
CodeModel::Model CM,
|
||||
CodeGenOpt::Level OL,
|
||||
bool is64bit)
|
||||
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
|
||||
Subtarget(TT, CPU, FS, is64bit),
|
||||
DataLayout(Subtarget.getDataLayout()),
|
||||
InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit)
|
||||
/*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {
|
||||
}
|
||||
|
||||
|
||||
|
||||
void NVPTXTargetMachine32::anchor() {}
|
||||
|
||||
NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, StringRef TT,
|
||||
StringRef CPU, StringRef FS,
|
||||
const TargetOptions &Options,
|
||||
Reloc::Model RM, CodeModel::Model CM,
|
||||
CodeGenOpt::Level OL)
|
||||
: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
|
||||
}
|
||||
|
||||
void NVPTXTargetMachine64::anchor() {}
|
||||
|
||||
NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, StringRef TT,
|
||||
StringRef CPU, StringRef FS,
|
||||
const TargetOptions &Options,
|
||||
Reloc::Model RM, CodeModel::Model CM,
|
||||
CodeGenOpt::Level OL)
|
||||
: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
|
||||
}
|
||||
|
||||
|
||||
namespace llvm {
|
||||
class NVPTXPassConfig : public TargetPassConfig {
|
||||
public:
|
||||
NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM)
|
||||
: TargetPassConfig(TM, PM) {}
|
||||
|
||||
NVPTXTargetMachine &getNVPTXTargetMachine() const {
|
||||
return getTM<NVPTXTargetMachine>();
|
||||
}
|
||||
|
||||
virtual bool addInstSelector();
|
||||
virtual bool addPreRegAlloc();
|
||||
};
|
||||
}
|
||||
|
||||
TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
|
||||
NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM);
|
||||
return PassConfig;
|
||||
}
|
||||
|
||||
bool NVPTXPassConfig::addInstSelector() {
|
||||
PM->add(createLowerAggrCopies());
|
||||
PM->add(createSplitBBatBarPass());
|
||||
PM->add(createAllocaHoisting());
|
||||
PM->add(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
|
||||
PM->add(createVectorElementizePass(getNVPTXTargetMachine()));
|
||||
return false;
|
||||
}
|
||||
|
||||
bool NVPTXPassConfig::addPreRegAlloc() {
|
||||
return false;
|
||||
}
|
131
lib/Target/NVPTX/NVPTXTargetMachine.h
Normal file
131
lib/Target/NVPTX/NVPTXTargetMachine.h
Normal file
@ -0,0 +1,131 @@
|
||||
//===-- NVPTXTargetMachine.h - Define TargetMachine for NVPTX ---*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file declares the NVPTX specific subclass of TargetMachine.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
#ifndef NVPTX_TARGETMACHINE_H
|
||||
#define NVPTX_TARGETMACHINE_H
|
||||
|
||||
#include "NVPTXInstrInfo.h"
|
||||
#include "NVPTXISelLowering.h"
|
||||
#include "NVPTXRegisterInfo.h"
|
||||
#include "NVPTXSubtarget.h"
|
||||
#include "NVPTXFrameLowering.h"
|
||||
#include "ManagedStringPool.h"
|
||||
#include "llvm/Target/TargetData.h"
|
||||
#include "llvm/Target/TargetFrameLowering.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
#include "llvm/Target/TargetSelectionDAGInfo.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
/// NVPTXTargetMachine
|
||||
///
|
||||
class NVPTXTargetMachine : public LLVMTargetMachine {
|
||||
NVPTXSubtarget Subtarget;
|
||||
const TargetData DataLayout; // Calculates type size & alignment
|
||||
NVPTXInstrInfo InstrInfo;
|
||||
NVPTXTargetLowering TLInfo;
|
||||
TargetSelectionDAGInfo TSInfo;
|
||||
|
||||
// NVPTX does not have any call stack frame, but need a NVPTX specific
|
||||
// FrameLowering class because TargetFrameLowering is abstract.
|
||||
NVPTXFrameLowering FrameLowering;
|
||||
|
||||
// Hold Strings that can be free'd all together with NVPTXTargetMachine
|
||||
ManagedStringPool ManagedStrPool;
|
||||
|
||||
//bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level,
|
||||
// bool DisableVerify, MCContext *&OutCtx);
|
||||
|
||||
public:
|
||||
//virtual bool addPassesToEmitFile(PassManagerBase &PM,
|
||||
// formatted_raw_ostream &Out,
|
||||
// CodeGenFileType FileType,
|
||||
// CodeGenOpt::Level OptLevel,
|
||||
// bool DisableVerify = true) ;
|
||||
|
||||
NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU,
|
||||
StringRef FS, const TargetOptions &Options,
|
||||
Reloc::Model RM, CodeModel::Model CM,
|
||||
CodeGenOpt::Level OP,
|
||||
bool is64bit);
|
||||
|
||||
virtual const TargetFrameLowering *getFrameLowering() const {
|
||||
return &FrameLowering;
|
||||
}
|
||||
virtual const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; }
|
||||
virtual const TargetData *getTargetData() const { return &DataLayout;}
|
||||
virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget;}
|
||||
|
||||
virtual const NVPTXRegisterInfo *getRegisterInfo() const {
|
||||
return &(InstrInfo.getRegisterInfo());
|
||||
}
|
||||
|
||||
virtual NVPTXTargetLowering *getTargetLowering() const {
|
||||
return const_cast<NVPTXTargetLowering*>(&TLInfo);
|
||||
}
|
||||
|
||||
virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const {
|
||||
return &TSInfo;
|
||||
}
|
||||
|
||||
//virtual bool addInstSelector(PassManagerBase &PM,
|
||||
// CodeGenOpt::Level OptLevel);
|
||||
|
||||
//virtual bool addPreRegAlloc(PassManagerBase &, CodeGenOpt::Level);
|
||||
|
||||
ManagedStringPool *getManagedStrPool() const {
|
||||
return const_cast<ManagedStringPool*>(&ManagedStrPool);
|
||||
}
|
||||
|
||||
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
|
||||
|
||||
// Emission of machine code through JITCodeEmitter is not supported.
|
||||
virtual bool addPassesToEmitMachineCode(PassManagerBase &,
|
||||
JITCodeEmitter &,
|
||||
bool = true) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Emission of machine code through MCJIT is not supported.
|
||||
virtual bool addPassesToEmitMC(PassManagerBase &,
|
||||
MCContext *&,
|
||||
raw_ostream &,
|
||||
bool = true) {
|
||||
return true;
|
||||
}
|
||||
|
||||
}; // NVPTXTargetMachine.
|
||||
|
||||
class NVPTXTargetMachine32 : public NVPTXTargetMachine {
|
||||
virtual void anchor();
|
||||
public:
|
||||
NVPTXTargetMachine32(const Target &T, StringRef TT, StringRef CPU,
|
||||
StringRef FS, const TargetOptions &Options,
|
||||
Reloc::Model RM, CodeModel::Model CM,
|
||||
CodeGenOpt::Level OL);
|
||||
};
|
||||
|
||||
class NVPTXTargetMachine64 : public NVPTXTargetMachine {
|
||||
virtual void anchor();
|
||||
public:
|
||||
NVPTXTargetMachine64(const Target &T, StringRef TT, StringRef CPU,
|
||||
StringRef FS, const TargetOptions &Options,
|
||||
Reloc::Model RM, CodeModel::Model CM,
|
||||
CodeGenOpt::Level OL);
|
||||
};
|
||||
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif
|
105
lib/Target/NVPTX/NVPTXTargetObjectFile.h
Normal file
105
lib/Target/NVPTX/NVPTXTargetObjectFile.h
Normal file
@ -0,0 +1,105 @@
|
||||
//===-- NVPTXTargetObjectFile.h - NVPTX Object Info -------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TARGET_NVPTX_TARGETOBJECTFILE_H
|
||||
#define LLVM_TARGET_NVPTX_TARGETOBJECTFILE_H
|
||||
|
||||
#include "NVPTXSection.h"
|
||||
#include "llvm/Target/TargetLoweringObjectFile.h"
|
||||
#include <string>
|
||||
|
||||
namespace llvm {
|
||||
class GlobalVariable;
|
||||
class Module;
|
||||
|
||||
class NVPTXTargetObjectFile : public TargetLoweringObjectFile {
|
||||
|
||||
public:
|
||||
NVPTXTargetObjectFile() {};
|
||||
~NVPTXTargetObjectFile() {
|
||||
delete TextSection;
|
||||
delete DataSection;
|
||||
delete BSSSection;
|
||||
delete ReadOnlySection;
|
||||
|
||||
delete StaticCtorSection;
|
||||
delete StaticDtorSection;
|
||||
delete LSDASection;
|
||||
delete EHFrameSection;
|
||||
delete DwarfAbbrevSection;
|
||||
delete DwarfInfoSection;
|
||||
delete DwarfLineSection;
|
||||
delete DwarfFrameSection;
|
||||
delete DwarfPubTypesSection;
|
||||
delete DwarfDebugInlineSection;
|
||||
delete DwarfStrSection;
|
||||
delete DwarfLocSection;
|
||||
delete DwarfARangesSection;
|
||||
delete DwarfRangesSection;
|
||||
delete DwarfMacroInfoSection;
|
||||
};
|
||||
|
||||
virtual void Initialize(MCContext &ctx, const TargetMachine &TM) {
|
||||
TextSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getText());
|
||||
DataSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getDataRel());
|
||||
BSSSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getBSS());
|
||||
ReadOnlySection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getReadOnly());
|
||||
|
||||
StaticCtorSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getMetadata());
|
||||
StaticDtorSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getMetadata());
|
||||
LSDASection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getMetadata());
|
||||
EHFrameSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getMetadata());
|
||||
DwarfAbbrevSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getMetadata());
|
||||
DwarfInfoSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getMetadata());
|
||||
DwarfLineSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getMetadata());
|
||||
DwarfFrameSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getMetadata());
|
||||
DwarfPubTypesSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getMetadata());
|
||||
DwarfDebugInlineSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getMetadata());
|
||||
DwarfStrSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getMetadata());
|
||||
DwarfLocSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getMetadata());
|
||||
DwarfARangesSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getMetadata());
|
||||
DwarfRangesSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getMetadata());
|
||||
DwarfMacroInfoSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getMetadata());
|
||||
};
|
||||
|
||||
virtual const MCSection *getSectionForConstant(SectionKind Kind) const {
|
||||
return ReadOnlySection;
|
||||
};
|
||||
|
||||
virtual const MCSection *
|
||||
getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
|
||||
Mangler *Mang,
|
||||
const TargetMachine &TM) const {
|
||||
return DataSection;
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif
|
514
lib/Target/NVPTX/NVPTXUtilities.cpp
Normal file
514
lib/Target/NVPTX/NVPTXUtilities.cpp
Normal file
@ -0,0 +1,514 @@
|
||||
//===- NVPTXUtilities.cpp - Utility Functions -----------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains miscellaneous utility functions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "NVPTXUtilities.h"
|
||||
#include "NVPTX.h"
|
||||
#include "llvm/GlobalVariable.h"
|
||||
#include "llvm/Function.h"
|
||||
#include "llvm/Module.h"
|
||||
#include "llvm/Constants.h"
|
||||
#include "llvm/Operator.h"
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
//#include <iostream>
|
||||
#include "llvm/Support/ManagedStatic.h"
|
||||
#include "llvm/Support/InstIterator.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
typedef std::map<std::string, std::vector<unsigned> > key_val_pair_t;
|
||||
typedef std::map<const GlobalValue *, key_val_pair_t> global_val_annot_t;
|
||||
typedef std::map<const Module *, global_val_annot_t> per_module_annot_t;
|
||||
|
||||
ManagedStatic<per_module_annot_t> annotationCache;
|
||||
|
||||
|
||||
static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {
|
||||
assert(md && "Invalid mdnode for annotation");
|
||||
assert((md->getNumOperands() % 2) == 1 && "Invalid number of operands");
|
||||
// start index = 1, to skip the global variable key
|
||||
// increment = 2, to skip the value for each property-value pairs
|
||||
for (unsigned i = 1, e = md->getNumOperands(); i != e; i += 2) {
|
||||
// property
|
||||
const MDString *prop = dyn_cast<MDString>(md->getOperand(i));
|
||||
assert(prop && "Annotation property not a string");
|
||||
|
||||
// value
|
||||
ConstantInt *Val = dyn_cast<ConstantInt>(md->getOperand(i+1));
|
||||
assert(Val && "Value operand not a constant int");
|
||||
|
||||
std::string keyname = prop->getString().str();
|
||||
if (retval.find(keyname) != retval.end())
|
||||
retval[keyname].push_back(Val->getZExtValue());
|
||||
else {
|
||||
std::vector<unsigned> tmp;
|
||||
tmp.push_back(Val->getZExtValue());
|
||||
retval[keyname] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) {
|
||||
NamedMDNode *NMD = m->getNamedMetadata(llvm::NamedMDForAnnotations);
|
||||
if (!NMD)
|
||||
return;
|
||||
key_val_pair_t tmp;
|
||||
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
|
||||
const MDNode *elem = NMD->getOperand(i);
|
||||
|
||||
Value *entity = elem->getOperand(0);
|
||||
// entity may be null due to DCE
|
||||
if (!entity)
|
||||
continue;
|
||||
if (entity != gv)
|
||||
continue;
|
||||
|
||||
// accumulate annotations for entity in tmp
|
||||
cacheAnnotationFromMD(elem, tmp);
|
||||
}
|
||||
|
||||
if (tmp.empty()) // no annotations for this gv
|
||||
return;
|
||||
|
||||
if ((*annotationCache).find(m) != (*annotationCache).end())
|
||||
(*annotationCache)[m][gv] = tmp;
|
||||
else {
|
||||
global_val_annot_t tmp1;
|
||||
tmp1[gv] = tmp;
|
||||
(*annotationCache)[m] = tmp1;
|
||||
}
|
||||
}
|
||||
|
||||
bool llvm::findOneNVVMAnnotation(const GlobalValue *gv, std::string prop,
|
||||
unsigned &retval) {
|
||||
const Module *m = gv->getParent();
|
||||
if ((*annotationCache).find(m) == (*annotationCache).end())
|
||||
cacheAnnotationFromMD(m, gv);
|
||||
else if ((*annotationCache)[m].find(gv) == (*annotationCache)[m].end())
|
||||
cacheAnnotationFromMD(m, gv);
|
||||
if ((*annotationCache)[m][gv].find(prop) == (*annotationCache)[m][gv].end())
|
||||
return false;
|
||||
retval = (*annotationCache)[m][gv][prop][0];
|
||||
return true;
|
||||
}
|
||||
|
||||
bool llvm::findAllNVVMAnnotation(const GlobalValue *gv, std::string prop,
|
||||
std::vector<unsigned> &retval) {
|
||||
const Module *m = gv->getParent();
|
||||
if ((*annotationCache).find(m) == (*annotationCache).end())
|
||||
cacheAnnotationFromMD(m, gv);
|
||||
else if ((*annotationCache)[m].find(gv) == (*annotationCache)[m].end())
|
||||
cacheAnnotationFromMD(m, gv);
|
||||
if ((*annotationCache)[m][gv].find(prop) == (*annotationCache)[m][gv].end())
|
||||
return false;
|
||||
retval = (*annotationCache)[m][gv][prop];
|
||||
return true;
|
||||
}
|
||||
|
||||
bool llvm::isTexture(const llvm::Value &val) {
|
||||
if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
|
||||
unsigned annot;
|
||||
if (llvm::findOneNVVMAnnotation(gv,
|
||||
llvm::PropertyAnnotationNames[llvm::PROPERTY_ISTEXTURE],
|
||||
annot)) {
|
||||
assert((annot == 1) && "Unexpected annotation on a texture symbol");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool llvm::isSurface(const llvm::Value &val) {
|
||||
if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
|
||||
unsigned annot;
|
||||
if (llvm::findOneNVVMAnnotation(gv,
|
||||
llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSURFACE],
|
||||
annot)) {
|
||||
assert((annot == 1) && "Unexpected annotation on a surface symbol");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool llvm::isSampler(const llvm::Value &val) {
|
||||
if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
|
||||
unsigned annot;
|
||||
if (llvm::findOneNVVMAnnotation(gv,
|
||||
llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
|
||||
annot)) {
|
||||
assert((annot == 1) && "Unexpected annotation on a sampler symbol");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (const Argument *arg = dyn_cast<Argument>(&val)) {
|
||||
const Function *func = arg->getParent();
|
||||
std::vector<unsigned> annot;
|
||||
if (llvm::findAllNVVMAnnotation(func,
|
||||
llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
|
||||
annot)) {
|
||||
if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool llvm::isImageReadOnly(const llvm::Value &val) {
|
||||
if (const Argument *arg = dyn_cast<Argument>(&val)) {
|
||||
const Function *func = arg->getParent();
|
||||
std::vector<unsigned> annot;
|
||||
if (llvm::findAllNVVMAnnotation(func,
|
||||
llvm::PropertyAnnotationNames[llvm::PROPERTY_ISREADONLY_IMAGE_PARAM],
|
||||
annot)) {
|
||||
if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool llvm::isImageWriteOnly(const llvm::Value &val) {
|
||||
if (const Argument *arg = dyn_cast<Argument>(&val)) {
|
||||
const Function *func = arg->getParent();
|
||||
std::vector<unsigned> annot;
|
||||
if (llvm::findAllNVVMAnnotation(func,
|
||||
llvm::PropertyAnnotationNames[llvm::PROPERTY_ISWRITEONLY_IMAGE_PARAM],
|
||||
annot)) {
|
||||
if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool llvm::isImage(const llvm::Value &val) {
|
||||
return llvm::isImageReadOnly(val) || llvm::isImageWriteOnly(val);
|
||||
}
|
||||
|
||||
std::string llvm::getTextureName(const llvm::Value &val) {
|
||||
assert(val.hasName() && "Found texture variable with no name");
|
||||
return val.getName();
|
||||
}
|
||||
|
||||
std::string llvm::getSurfaceName(const llvm::Value &val) {
|
||||
assert(val.hasName() && "Found surface variable with no name");
|
||||
return val.getName();
|
||||
}
|
||||
|
||||
std::string llvm::getSamplerName(const llvm::Value &val) {
|
||||
assert(val.hasName() && "Found sampler variable with no name");
|
||||
return val.getName();
|
||||
}
|
||||
|
||||
bool llvm::getMaxNTIDx(const Function &F, unsigned &x) {
|
||||
return (llvm::findOneNVVMAnnotation(&F,
|
||||
llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_X],
|
||||
x));
|
||||
}
|
||||
|
||||
bool llvm::getMaxNTIDy(const Function &F, unsigned &y) {
|
||||
return (llvm::findOneNVVMAnnotation(&F,
|
||||
llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Y],
|
||||
y));
|
||||
}
|
||||
|
||||
bool llvm::getMaxNTIDz(const Function &F, unsigned &z) {
|
||||
return (llvm::findOneNVVMAnnotation(&F,
|
||||
llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Z],
|
||||
z));
|
||||
}
|
||||
|
||||
bool llvm::getReqNTIDx(const Function &F, unsigned &x) {
|
||||
return (llvm::findOneNVVMAnnotation(&F,
|
||||
llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_X],
|
||||
x));
|
||||
}
|
||||
|
||||
bool llvm::getReqNTIDy(const Function &F, unsigned &y) {
|
||||
return (llvm::findOneNVVMAnnotation(&F,
|
||||
llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Y],
|
||||
y));
|
||||
}
|
||||
|
||||
bool llvm::getReqNTIDz(const Function &F, unsigned &z) {
|
||||
return (llvm::findOneNVVMAnnotation(&F,
|
||||
llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Z],
|
||||
z));
|
||||
}
|
||||
|
||||
bool llvm::getMinCTASm(const Function &F, unsigned &x) {
|
||||
return (llvm::findOneNVVMAnnotation(&F,
|
||||
llvm::PropertyAnnotationNames[llvm::PROPERTY_MINNCTAPERSM],
|
||||
x));
|
||||
}
|
||||
|
||||
bool llvm::isKernelFunction(const Function &F) {
|
||||
unsigned x = 0;
|
||||
bool retval = llvm::findOneNVVMAnnotation(&F,
|
||||
llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION],
|
||||
x);
|
||||
if (retval == false) {
|
||||
// There is no NVVM metadata, check the calling convention
|
||||
if (F.getCallingConv() == llvm::CallingConv::PTX_Kernel)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
return (x==1);
|
||||
}
|
||||
|
||||
bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) {
|
||||
std::vector<unsigned> Vs;
|
||||
bool retval = llvm::findAllNVVMAnnotation(&F,
|
||||
llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN],
|
||||
Vs);
|
||||
if (retval == false)
|
||||
return false;
|
||||
for (int i=0, e=Vs.size(); i<e; i++) {
|
||||
unsigned v = Vs[i];
|
||||
if ( (v >> 16) == index ) {
|
||||
align = v & 0xFFFF;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool llvm::getAlign(const CallInst &I, unsigned index, unsigned &align) {
|
||||
if (MDNode *alignNode = I.getMetadata("callalign")) {
|
||||
for (int i=0, n = alignNode->getNumOperands();
|
||||
i<n; i++) {
|
||||
if (const ConstantInt *CI =
|
||||
dyn_cast<ConstantInt>(alignNode->getOperand(i))) {
|
||||
unsigned v = CI->getZExtValue();
|
||||
if ( (v>>16) == index ) {
|
||||
align = v & 0xFFFF;
|
||||
return true;
|
||||
}
|
||||
if ( (v>>16) > index ) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool llvm::isBarrierIntrinsic(Intrinsic::ID id) {
|
||||
if ((id == Intrinsic::nvvm_barrier0) ||
|
||||
(id == Intrinsic::nvvm_barrier0_popc) ||
|
||||
(id == Intrinsic::nvvm_barrier0_and) ||
|
||||
(id == Intrinsic::nvvm_barrier0_or) ||
|
||||
(id == Intrinsic::cuda_syncthreads))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Interface for checking all memory space transfer related intrinsics
|
||||
bool llvm::isMemorySpaceTransferIntrinsic(Intrinsic::ID id) {
|
||||
if (id == Intrinsic::nvvm_ptr_local_to_gen ||
|
||||
id == Intrinsic::nvvm_ptr_shared_to_gen ||
|
||||
id == Intrinsic::nvvm_ptr_global_to_gen ||
|
||||
id == Intrinsic::nvvm_ptr_constant_to_gen ||
|
||||
id == Intrinsic::nvvm_ptr_gen_to_global ||
|
||||
id == Intrinsic::nvvm_ptr_gen_to_shared ||
|
||||
id == Intrinsic::nvvm_ptr_gen_to_local ||
|
||||
id == Intrinsic::nvvm_ptr_gen_to_constant ||
|
||||
id == Intrinsic::nvvm_ptr_gen_to_param) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// consider several special intrinsics in striping pointer casts, and
|
||||
// provide an option to ignore GEP indicies for find out the base address only
|
||||
// which could be used in simple alias disambigurate.
|
||||
const Value *llvm::skipPointerTransfer(const Value *V,
|
||||
bool ignore_GEP_indices) {
|
||||
V = V->stripPointerCasts();
|
||||
while (true) {
|
||||
if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) {
|
||||
if (isMemorySpaceTransferIntrinsic(IS->getIntrinsicID())) {
|
||||
V = IS->getArgOperand(0)->stripPointerCasts();
|
||||
continue;
|
||||
}
|
||||
} else if (ignore_GEP_indices)
|
||||
if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
|
||||
V = GEP->getPointerOperand()->stripPointerCasts();
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return V;
|
||||
}
|
||||
|
||||
// consider several special intrinsics in striping pointer casts, and
|
||||
// - ignore GEP indicies for find out the base address only, and
|
||||
// - tracking PHINode
|
||||
// which could be used in simple alias disambigurate.
|
||||
const Value *llvm::skipPointerTransfer(const Value *V,
|
||||
std::set<const Value *> &processed) {
|
||||
if (processed.find(V) != processed.end())
|
||||
return NULL;
|
||||
processed.insert(V);
|
||||
|
||||
const Value *V2 = V->stripPointerCasts();
|
||||
if (V2 != V && processed.find(V2) != processed.end())
|
||||
return NULL;
|
||||
processed.insert(V2);
|
||||
|
||||
V = V2;
|
||||
|
||||
while (true) {
|
||||
if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) {
|
||||
if (isMemorySpaceTransferIntrinsic(IS->getIntrinsicID())) {
|
||||
V = IS->getArgOperand(0)->stripPointerCasts();
|
||||
continue;
|
||||
}
|
||||
} else if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
|
||||
V = GEP->getPointerOperand()->stripPointerCasts();
|
||||
continue;
|
||||
} else if (const PHINode *PN = dyn_cast<PHINode>(V)) {
|
||||
if (V != V2 && processed.find(V) != processed.end())
|
||||
return NULL;
|
||||
processed.insert(PN);
|
||||
const Value *common = 0;
|
||||
for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
|
||||
const Value *pv = PN->getIncomingValue(i);
|
||||
const Value *base = skipPointerTransfer(pv, processed);
|
||||
if (base) {
|
||||
if (common == 0)
|
||||
common = base;
|
||||
else if (common != base)
|
||||
return PN;
|
||||
}
|
||||
}
|
||||
if (common == 0)
|
||||
return PN;
|
||||
V = common;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return V;
|
||||
}
|
||||
|
||||
|
||||
// The following are some useful utilities for debuggung
|
||||
|
||||
BasicBlock *llvm::getParentBlock(Value *v) {
|
||||
if (BasicBlock *B = dyn_cast<BasicBlock>(v))
|
||||
return B;
|
||||
|
||||
if (Instruction *I = dyn_cast<Instruction>(v))
|
||||
return I->getParent();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
Function *llvm::getParentFunction(Value *v) {
|
||||
if (Function *F = dyn_cast<Function>(v))
|
||||
return F;
|
||||
|
||||
if (Instruction *I = dyn_cast<Instruction>(v))
|
||||
return I->getParent()->getParent();
|
||||
|
||||
if (BasicBlock *B = dyn_cast<BasicBlock>(v))
|
||||
return B->getParent();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Dump a block by name
|
||||
void llvm::dumpBlock(Value *v, char *blockName) {
|
||||
Function *F = getParentFunction(v);
|
||||
if (F == 0)
|
||||
return;
|
||||
|
||||
for (Function::iterator it = F->begin(), ie = F->end(); it != ie; ++it) {
|
||||
BasicBlock *B = it;
|
||||
if (strcmp(B->getName().data(), blockName) == 0) {
|
||||
B->dump();
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Find an instruction by name
|
||||
Instruction *llvm::getInst(Value *base, char *instName) {
|
||||
Function *F = getParentFunction(base);
|
||||
if (F == 0)
|
||||
return 0;
|
||||
|
||||
for (inst_iterator it = inst_begin(F), ie = inst_end(F); it != ie; ++it) {
|
||||
Instruction *I = &*it;
|
||||
if (strcmp(I->getName().data(), instName) == 0) {
|
||||
return I;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Dump an instruction by nane
|
||||
void llvm::dumpInst(Value *base, char *instName) {
|
||||
Instruction *I = getInst(base, instName);
|
||||
if (I)
|
||||
I->dump();
|
||||
}
|
||||
|
||||
// Dump an instruction and all dependent instructions
|
||||
void llvm::dumpInstRec(Value *v, std::set<Instruction *> *visited) {
|
||||
if (Instruction *I = dyn_cast<Instruction>(v)) {
|
||||
|
||||
if (visited->find(I) != visited->end())
|
||||
return;
|
||||
|
||||
visited->insert(I);
|
||||
|
||||
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
|
||||
dumpInstRec(I->getOperand(i), visited);
|
||||
|
||||
I->dump();
|
||||
}
|
||||
}
|
||||
|
||||
// Dump an instruction and all dependent instructions
|
||||
void llvm::dumpInstRec(Value *v) {
|
||||
std::set<Instruction *> visited;
|
||||
|
||||
//BasicBlock *B = getParentBlock(v);
|
||||
|
||||
dumpInstRec(v, &visited);
|
||||
}
|
||||
|
||||
// Dump the parent for Instruction, block or function
|
||||
void llvm::dumpParent(Value *v) {
|
||||
if (Instruction *I = dyn_cast<Instruction>(v)) {
|
||||
I->getParent()->dump();
|
||||
return;
|
||||
}
|
||||
|
||||
if (BasicBlock *B = dyn_cast<BasicBlock>(v)) {
|
||||
B->getParent()->dump();
|
||||
return;
|
||||
}
|
||||
|
||||
if (Function *F = dyn_cast<Function>(v)) {
|
||||
F->getParent()->dump();
|
||||
return;
|
||||
}
|
||||
}
|
94
lib/Target/NVPTX/NVPTXUtilities.h
Normal file
94
lib/Target/NVPTX/NVPTXUtilities.h
Normal file
@ -0,0 +1,94 @@
|
||||
//===-- NVPTXUtilities - Utilities -----------------------------*- C++ -*-====//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the declaration of the NVVM specific utility functions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef NVPTXUTILITIES_H
|
||||
#define NVPTXUTILITIES_H
|
||||
|
||||
#include "llvm/Value.h"
|
||||
#include "llvm/GlobalVariable.h"
|
||||
#include "llvm/Function.h"
|
||||
#include "llvm/IntrinsicInst.h"
|
||||
#include <cstdarg>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace llvm
|
||||
{
|
||||
|
||||
#define NVCL_IMAGE2D_READONLY_FUNCNAME "__is_image2D_readonly"
|
||||
#define NVCL_IMAGE3D_READONLY_FUNCNAME "__is_image3D_readonly"
|
||||
|
||||
bool findOneNVVMAnnotation(const llvm::GlobalValue *, std::string, unsigned &);
|
||||
bool findAllNVVMAnnotation(const llvm::GlobalValue *, std::string,
|
||||
std::vector<unsigned> &);
|
||||
|
||||
bool isTexture(const llvm::Value &);
|
||||
bool isSurface(const llvm::Value &);
|
||||
bool isSampler(const llvm::Value &);
|
||||
bool isImage(const llvm::Value &);
|
||||
bool isImageReadOnly(const llvm::Value &);
|
||||
bool isImageWriteOnly(const llvm::Value &);
|
||||
|
||||
std::string getTextureName(const llvm::Value &);
|
||||
std::string getSurfaceName(const llvm::Value &);
|
||||
std::string getSamplerName(const llvm::Value &);
|
||||
|
||||
bool getMaxNTIDx(const llvm::Function &, unsigned &);
|
||||
bool getMaxNTIDy(const llvm::Function &, unsigned &);
|
||||
bool getMaxNTIDz(const llvm::Function &, unsigned &);
|
||||
|
||||
bool getReqNTIDx(const llvm::Function &, unsigned &);
|
||||
bool getReqNTIDy(const llvm::Function &, unsigned &);
|
||||
bool getReqNTIDz(const llvm::Function &, unsigned &);
|
||||
|
||||
bool getMinCTASm(const llvm::Function &, unsigned &);
|
||||
bool isKernelFunction(const llvm::Function &);
|
||||
|
||||
bool getAlign(const llvm::Function &, unsigned index, unsigned &);
|
||||
bool getAlign(const llvm::CallInst &, unsigned index, unsigned &);
|
||||
|
||||
bool isBarrierIntrinsic(llvm::Intrinsic::ID);
|
||||
|
||||
/// make_vector - Helper function which is useful for building temporary vectors
|
||||
/// to pass into type construction of CallInst ctors. This turns a null
|
||||
/// terminated list of pointers (or other value types) into a real live vector.
|
||||
///
|
||||
template<typename T>
|
||||
inline std::vector<T> make_vector(T A, ...) {
|
||||
va_list Args;
|
||||
va_start(Args, A);
|
||||
std::vector<T> Result;
|
||||
Result.push_back(A);
|
||||
while (T Val = va_arg(Args, T))
|
||||
Result.push_back(Val);
|
||||
va_end(Args);
|
||||
return Result;
|
||||
}
|
||||
|
||||
bool isMemorySpaceTransferIntrinsic(Intrinsic::ID id);
|
||||
const Value *skipPointerTransfer(const Value *V, bool ignore_GEP_indices);
|
||||
const Value *skipPointerTransfer(const Value *V,
|
||||
std::set<const Value *> &processed);
|
||||
BasicBlock *getParentBlock(Value *v);
|
||||
Function *getParentFunction(Value *v);
|
||||
void dumpBlock(Value *v, char *blockName);
|
||||
Instruction *getInst(Value *base, char *instName);
|
||||
void dumpInst(Value *base, char *instName);
|
||||
void dumpInstRec(Value *v, std::set<Instruction *> *visited);
|
||||
void dumpInstRec(Value *v);
|
||||
void dumpParent(Value *v);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
1481
lib/Target/NVPTX/NVPTXVector.td
Normal file
1481
lib/Target/NVPTX/NVPTXVector.td
Normal file
File diff suppressed because it is too large
Load Diff
91
lib/Target/NVPTX/NVPTXutil.cpp
Normal file
91
lib/Target/NVPTX/NVPTXutil.cpp
Normal file
@ -0,0 +1,91 @@
|
||||
//===-- NVPTXutil.cpp - Functions exported to CodeGen --*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the functions that can be used in CodeGen.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "NVPTXutil.h"
|
||||
#include "NVPTX.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace llvm {
|
||||
|
||||
bool isParamLoad(const MachineInstr *MI)
|
||||
{
|
||||
if ((MI->getOpcode() != NVPTX::LD_i32_avar) &&
|
||||
(MI->getOpcode() != NVPTX::LD_i64_avar))
|
||||
return false;
|
||||
if (MI->getOperand(2).isImm() == false)
|
||||
return false;
|
||||
if (MI->getOperand(2).getImm() != NVPTX::PTXLdStInstCode::PARAM)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
#define DATA_MASK 0x7f
|
||||
#define DIGIT_WIDTH 7
|
||||
#define MORE_BYTES 0x80
|
||||
|
||||
static int encode_leb128(uint64_t val, int *nbytes,
|
||||
char *space, int splen)
|
||||
{
|
||||
char *a;
|
||||
char *end = space + splen;
|
||||
|
||||
a = space;
|
||||
do {
|
||||
unsigned char uc;
|
||||
|
||||
if (a >= end)
|
||||
return 1;
|
||||
uc = val & DATA_MASK;
|
||||
val >>= DIGIT_WIDTH;
|
||||
if (val != 0)
|
||||
uc |= MORE_BYTES;
|
||||
*a = uc;
|
||||
a++;
|
||||
} while (val);
|
||||
*nbytes = a - space;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#undef DATA_MASK
|
||||
#undef DIGIT_WIDTH
|
||||
#undef MORE_BYTES
|
||||
|
||||
uint64_t encode_leb128(const char *str)
|
||||
{
|
||||
union { uint64_t x; char a[8]; } temp64;
|
||||
|
||||
temp64.x = 0;
|
||||
|
||||
for (unsigned i=0,e=strlen(str); i!=e; ++i)
|
||||
temp64.a[i] = str[e-1-i];
|
||||
|
||||
char encoded[16];
|
||||
int nbytes;
|
||||
|
||||
int retval = encode_leb128(temp64.x, &nbytes, encoded, 16);
|
||||
|
||||
assert(retval == 0 &&
|
||||
"Encoding to leb128 failed");
|
||||
|
||||
assert(nbytes <= 8 &&
|
||||
"Cannot support register names with leb128 encoding > 8 bytes");
|
||||
|
||||
temp64.x = 0;
|
||||
for (int i=0; i<nbytes; ++i)
|
||||
temp64.a[i] = encoded[i];
|
||||
|
||||
return temp64.x;
|
||||
}
|
||||
|
||||
} // end namespace llvm
|
25
lib/Target/NVPTX/NVPTXutil.h
Normal file
25
lib/Target/NVPTX/NVPTXutil.h
Normal file
@ -0,0 +1,25 @@
|
||||
//===-- NVPTXutil.h - Functions exported to CodeGen --*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the functions that can be used in CodeGen.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TARGET_NVPTX_UTIL_H
|
||||
#define LLVM_TARGET_NVPTX_UTIL_H
|
||||
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
|
||||
namespace llvm {
|
||||
bool isParamLoad(const MachineInstr *);
|
||||
uint64_t encode_leb128(const char *str);
|
||||
}
|
||||
|
||||
#endif
|
7
lib/Target/NVPTX/TargetInfo/CMakeLists.txt
Normal file
7
lib/Target/NVPTX/TargetInfo/CMakeLists.txt
Normal file
@ -0,0 +1,7 @@
|
||||
#include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
|
||||
|
||||
add_llvm_library(LLVMNVPTXInfo
|
||||
NVPTXTargetInfo.cpp
|
||||
)
|
||||
|
||||
add_dependencies(LLVMNVPTXInfo NVPTXCommonTableGen)
|
23
lib/Target/NVPTX/TargetInfo/LLVMBuild.txt
Normal file
23
lib/Target/NVPTX/TargetInfo/LLVMBuild.txt
Normal file
@ -0,0 +1,23 @@
|
||||
;===- ./lib/Target/NVPTX/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===;
|
||||
;
|
||||
; The LLVM Compiler Infrastructure
|
||||
;
|
||||
; This file is distributed under the University of Illinois Open Source
|
||||
; License. See LICENSE.TXT for details.
|
||||
;
|
||||
;===------------------------------------------------------------------------===;
|
||||
;
|
||||
; This is an LLVMBuild description file for the components in this subdirectory.
|
||||
;
|
||||
; For more information on the LLVMBuild system, please see:
|
||||
;
|
||||
; http://llvm.org/docs/LLVMBuild.html
|
||||
;
|
||||
;===------------------------------------------------------------------------===;
|
||||
|
||||
[component_0]
|
||||
type = Library
|
||||
name = NVPTXInfo
|
||||
parent = NVPTX
|
||||
required_libraries = MC Support Target
|
||||
add_to_library_groups = NVPTX
|
15
lib/Target/NVPTX/TargetInfo/Makefile
Normal file
15
lib/Target/NVPTX/TargetInfo/Makefile
Normal file
@ -0,0 +1,15 @@
|
||||
##===- lib/Target/NVPTX/TargetInfo/Makefile ----------------*- Makefile -*-===##
|
||||
#
|
||||
# The LLVM Compiler Infrastructure
|
||||
#
|
||||
# This file is distributed under the University of Illinois Open Source
|
||||
# License. See LICENSE.TXT for details.
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
LEVEL = ../../../..
|
||||
LIBRARYNAME = LLVMNVPTXInfo
|
||||
|
||||
# Hack: we need to include 'main' target directory to grab private headers
|
||||
CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
|
||||
|
||||
include $(LEVEL)/Makefile.common
|
23
lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
Normal file
23
lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
Normal file
@ -0,0 +1,23 @@
|
||||
//===-- NVPTXTargetInfo.cpp - NVPTX Target Implementation -----------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "NVPTX.h"
|
||||
#include "llvm/Module.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
using namespace llvm;
|
||||
|
||||
Target llvm::TheNVPTXTarget32;
|
||||
Target llvm::TheNVPTXTarget64;
|
||||
|
||||
extern "C" void LLVMInitializeNVPTXTargetInfo() {
|
||||
RegisterTarget<Triple::nvptx> X(TheNVPTXTarget32, "nvptx",
|
||||
"NVIDIA PTX 32-bit");
|
||||
RegisterTarget<Triple::nvptx64> Y(TheNVPTXTarget64, "nvptx64",
|
||||
"NVIDIA PTX 64-bit");
|
||||
}
|
1250
lib/Target/NVPTX/VectorElementize.cpp
Normal file
1250
lib/Target/NVPTX/VectorElementize.cpp
Normal file
File diff suppressed because it is too large
Load Diff
125
lib/Target/NVPTX/cl_common_defines.h
Normal file
125
lib/Target/NVPTX/cl_common_defines.h
Normal file
@ -0,0 +1,125 @@
|
||||
#ifndef __CL_COMMON_DEFINES_H__
|
||||
#define __CL_COMMON_DEFINES_H__
|
||||
// This file includes defines that are common to both kernel code and
|
||||
// the NVPTX back-end.
|
||||
|
||||
//
|
||||
// Common defines for Image intrinsics
|
||||
// Channel order
|
||||
enum {
|
||||
CLK_R = 0x10B0,
|
||||
CLK_A = 0x10B1,
|
||||
CLK_RG = 0x10B2,
|
||||
CLK_RA = 0x10B3,
|
||||
CLK_RGB = 0x10B4,
|
||||
CLK_RGBA = 0x10B5,
|
||||
CLK_BGRA = 0x10B6,
|
||||
CLK_ARGB = 0x10B7,
|
||||
|
||||
#if (__NV_CL_C_VERSION == __NV_CL_C_VERSION_1_0)
|
||||
CLK_xRGB = 0x10B7,
|
||||
#endif
|
||||
|
||||
CLK_INTENSITY = 0x10B8,
|
||||
CLK_LUMINANCE = 0x10B9
|
||||
|
||||
#if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
|
||||
,
|
||||
CLK_Rx = 0x10BA,
|
||||
CLK_RGx = 0x10BB,
|
||||
CLK_RGBx = 0x10BC
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
typedef enum clk_channel_type {
|
||||
// valid formats for float return types
|
||||
CLK_SNORM_INT8 = 0x10D0, // four channel RGBA unorm8
|
||||
CLK_SNORM_INT16 = 0x10D1, // four channel RGBA unorm16
|
||||
CLK_UNORM_INT8 = 0x10D2, // four channel RGBA unorm8
|
||||
CLK_UNORM_INT16 = 0x10D3, // four channel RGBA unorm16
|
||||
CLK_HALF_FLOAT = 0x10DD, // four channel RGBA half
|
||||
CLK_FLOAT = 0x10DE, // four channel RGBA float
|
||||
|
||||
#if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
|
||||
CLK_UNORM_SHORT_565 = 0x10D4,
|
||||
CLK_UNORM_SHORT_555 = 0x10D5,
|
||||
CLK_UNORM_INT_101010 = 0x10D6,
|
||||
#endif
|
||||
|
||||
// valid only for integer return types
|
||||
CLK_SIGNED_INT8 = 0x10D7,
|
||||
CLK_SIGNED_INT16 = 0x10D8,
|
||||
CLK_SIGNED_INT32 = 0x10D9,
|
||||
CLK_UNSIGNED_INT8 = 0x10DA,
|
||||
CLK_UNSIGNED_INT16 = 0x10DB,
|
||||
CLK_UNSIGNED_INT32 = 0x10DC,
|
||||
|
||||
// CI SPI for CPU
|
||||
__CLK_UNORM_INT8888 , // four channel ARGB unorm8
|
||||
__CLK_UNORM_INT8888R, // four channel BGRA unorm8
|
||||
|
||||
__CLK_VALID_IMAGE_TYPE_COUNT,
|
||||
__CLK_INVALID_IMAGE_TYPE = __CLK_VALID_IMAGE_TYPE_COUNT,
|
||||
__CLK_VALID_IMAGE_TYPE_MASK_BITS = 4, // number of bits required to
|
||||
// represent any image type
|
||||
__CLK_VALID_IMAGE_TYPE_MASK = ( 1 << __CLK_VALID_IMAGE_TYPE_MASK_BITS ) - 1
|
||||
}clk_channel_type;
|
||||
|
||||
typedef enum clk_sampler_type {
|
||||
__CLK_ADDRESS_BASE = 0,
|
||||
CLK_ADDRESS_NONE = 0 << __CLK_ADDRESS_BASE,
|
||||
CLK_ADDRESS_CLAMP = 1 << __CLK_ADDRESS_BASE,
|
||||
CLK_ADDRESS_CLAMP_TO_EDGE = 2 << __CLK_ADDRESS_BASE,
|
||||
CLK_ADDRESS_REPEAT = 3 << __CLK_ADDRESS_BASE,
|
||||
CLK_ADDRESS_MIRROR = 4 << __CLK_ADDRESS_BASE,
|
||||
|
||||
#if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
|
||||
CLK_ADDRESS_MIRRORED_REPEAT = CLK_ADDRESS_MIRROR,
|
||||
#endif
|
||||
__CLK_ADDRESS_MASK = CLK_ADDRESS_NONE | CLK_ADDRESS_CLAMP |
|
||||
CLK_ADDRESS_CLAMP_TO_EDGE |
|
||||
CLK_ADDRESS_REPEAT | CLK_ADDRESS_MIRROR,
|
||||
__CLK_ADDRESS_BITS = 3, // number of bits required to
|
||||
// represent address info
|
||||
|
||||
__CLK_NORMALIZED_BASE = __CLK_ADDRESS_BITS,
|
||||
CLK_NORMALIZED_COORDS_FALSE = 0,
|
||||
CLK_NORMALIZED_COORDS_TRUE = 1 << __CLK_NORMALIZED_BASE,
|
||||
__CLK_NORMALIZED_MASK = CLK_NORMALIZED_COORDS_FALSE |
|
||||
CLK_NORMALIZED_COORDS_TRUE,
|
||||
__CLK_NORMALIZED_BITS = 1, // number of bits required to
|
||||
// represent normalization
|
||||
|
||||
__CLK_FILTER_BASE = __CLK_NORMALIZED_BASE +
|
||||
__CLK_NORMALIZED_BITS,
|
||||
CLK_FILTER_NEAREST = 0 << __CLK_FILTER_BASE,
|
||||
CLK_FILTER_LINEAR = 1 << __CLK_FILTER_BASE,
|
||||
CLK_FILTER_ANISOTROPIC = 2 << __CLK_FILTER_BASE,
|
||||
__CLK_FILTER_MASK = CLK_FILTER_NEAREST | CLK_FILTER_LINEAR |
|
||||
CLK_FILTER_ANISOTROPIC,
|
||||
__CLK_FILTER_BITS = 2, // number of bits required to
|
||||
// represent address info
|
||||
|
||||
__CLK_MIP_BASE = __CLK_FILTER_BASE + __CLK_FILTER_BITS,
|
||||
CLK_MIP_NEAREST = 0 << __CLK_MIP_BASE,
|
||||
CLK_MIP_LINEAR = 1 << __CLK_MIP_BASE,
|
||||
CLK_MIP_ANISOTROPIC = 2 << __CLK_MIP_BASE,
|
||||
__CLK_MIP_MASK = CLK_MIP_NEAREST | CLK_MIP_LINEAR |
|
||||
CLK_MIP_ANISOTROPIC,
|
||||
__CLK_MIP_BITS = 2,
|
||||
|
||||
__CLK_SAMPLER_BITS = __CLK_MIP_BASE + __CLK_MIP_BITS,
|
||||
__CLK_SAMPLER_MASK = __CLK_MIP_MASK | __CLK_FILTER_MASK |
|
||||
__CLK_NORMALIZED_MASK | __CLK_ADDRESS_MASK,
|
||||
|
||||
__CLK_ANISOTROPIC_RATIO_BITS = 5,
|
||||
__CLK_ANISOTROPIC_RATIO_MASK = (int) 0x80000000 >>
|
||||
(__CLK_ANISOTROPIC_RATIO_BITS-1)
|
||||
} clk_sampler_type;
|
||||
|
||||
// Memory synchronization
|
||||
#define CLK_LOCAL_MEM_FENCE (1 << 0)
|
||||
#define CLK_GLOBAL_MEM_FENCE (1 << 1)
|
||||
|
||||
#endif // __CL_COMMON_DEFINES_H__
|
202
lib/Target/NVPTX/gen-register-defs.py
Normal file
202
lib/Target/NVPTX/gen-register-defs.py
Normal file
@ -0,0 +1,202 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
num_regs = 396
|
||||
|
||||
outFile = open('NVPTXRegisterInfo.td', 'w')
|
||||
|
||||
outFile.write('''
|
||||
//===-- NVPTXRegisterInfo.td - NVPTX Register defs ---------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Declarations that describe the PTX register file
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class NVPTXReg<string n> : Register<n> {
|
||||
let Namespace = "NVPTX";
|
||||
}
|
||||
|
||||
class NVPTXRegClass<list<ValueType> regTypes, int alignment, dag regList>
|
||||
: RegisterClass <"NVPTX", regTypes, alignment, regList>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Registers
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Special Registers used as stack pointer
|
||||
def VRFrame : NVPTXReg<"%SP">;
|
||||
def VRFrameLocal : NVPTXReg<"%SPL">;
|
||||
|
||||
// Special Registers used as the stack
|
||||
def VRDepot : NVPTXReg<"%Depot">;
|
||||
''')
|
||||
|
||||
# Predicates
|
||||
outFile.write('''
|
||||
//===--- Predicate --------------------------------------------------------===//
|
||||
''')
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def P%d : NVPTXReg<"%%p%d">;\n' % (i, i))
|
||||
|
||||
# Int8
|
||||
outFile.write('''
|
||||
//===--- 8-bit ------------------------------------------------------------===//
|
||||
''')
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def RC%d : NVPTXReg<"%%rc%d">;\n' % (i, i))
|
||||
|
||||
# Int16
|
||||
outFile.write('''
|
||||
//===--- 16-bit -----------------------------------------------------------===//
|
||||
''')
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def RS%d : NVPTXReg<"%%rs%d">;\n' % (i, i))
|
||||
|
||||
# Int32
|
||||
outFile.write('''
|
||||
//===--- 32-bit -----------------------------------------------------------===//
|
||||
''')
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def R%d : NVPTXReg<"%%r%d">;\n' % (i, i))
|
||||
|
||||
# Int64
|
||||
outFile.write('''
|
||||
//===--- 64-bit -----------------------------------------------------------===//
|
||||
''')
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def RL%d : NVPTXReg<"%%rl%d">;\n' % (i, i))
|
||||
|
||||
# F32
|
||||
outFile.write('''
|
||||
//===--- 32-bit float -----------------------------------------------------===//
|
||||
''')
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def F%d : NVPTXReg<"%%f%d">;\n' % (i, i))
|
||||
|
||||
# F64
|
||||
outFile.write('''
|
||||
//===--- 64-bit float -----------------------------------------------------===//
|
||||
''')
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def FL%d : NVPTXReg<"%%fl%d">;\n' % (i, i))
|
||||
|
||||
# Vector registers
|
||||
outFile.write('''
|
||||
//===--- Vector -----------------------------------------------------------===//
|
||||
''')
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def v2b8_%d : NVPTXReg<"%%v2b8_%d">;\n' % (i, i))
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def v2b16_%d : NVPTXReg<"%%v2b16_%d">;\n' % (i, i))
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def v2b32_%d : NVPTXReg<"%%v2b32_%d">;\n' % (i, i))
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def v2b64_%d : NVPTXReg<"%%v2b64_%d">;\n' % (i, i))
|
||||
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def v4b8_%d : NVPTXReg<"%%v4b8_%d">;\n' % (i, i))
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def v4b16_%d : NVPTXReg<"%%v4b16_%d">;\n' % (i, i))
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def v4b32_%d : NVPTXReg<"%%v4b32_%d">;\n' % (i, i))
|
||||
|
||||
# Argument registers
|
||||
outFile.write('''
|
||||
//===--- Arguments --------------------------------------------------------===//
|
||||
''')
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def ia%d : NVPTXReg<"%%ia%d">;\n' % (i, i))
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def la%d : NVPTXReg<"%%la%d">;\n' % (i, i))
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def fa%d : NVPTXReg<"%%fa%d">;\n' % (i, i))
|
||||
for i in range(0, num_regs):
|
||||
outFile.write('def da%d : NVPTXReg<"%%da%d">;\n' % (i, i))
|
||||
|
||||
outFile.write('''
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Register classes
|
||||
//===----------------------------------------------------------------------===//
|
||||
''')
|
||||
|
||||
outFile.write('def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%%u", 0, %d))>;\n' % (num_regs-1))
|
||||
outFile.write('def Int8Regs : NVPTXRegClass<[i8], 8, (add (sequence "RC%%u", 0, %d))>;\n' % (num_regs-1))
|
||||
outFile.write('def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%%u", 0, %d))>;\n' % (num_regs-1))
|
||||
outFile.write('def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%%u", 0, %d))>;\n' % (num_regs-1))
|
||||
outFile.write('def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%%u", 0, %d))>;\n' % (num_regs-1))
|
||||
|
||||
outFile.write('def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%%u", 0, %d))>;\n' % (num_regs-1))
|
||||
outFile.write('def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%%u", 0, %d))>;\n' % (num_regs-1))
|
||||
|
||||
outFile.write('def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%%u", 0, %d))>;\n' % (num_regs-1))
|
||||
outFile.write('def Int64ArgRegs : NVPTXRegClass<[i64], 64, (add (sequence "la%%u", 0, %d))>;\n' % (num_regs-1))
|
||||
outFile.write('def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%%u", 0, %d))>;\n' % (num_regs-1))
|
||||
outFile.write('def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%%u", 0, %d))>;\n' % (num_regs-1))
|
||||
|
||||
outFile.write('''
|
||||
// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
|
||||
def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>;
|
||||
''')
|
||||
|
||||
outFile.write('''
|
||||
class NVPTXVecRegClass<list<ValueType> regTypes, int alignment, dag regList,
|
||||
NVPTXRegClass sClass,
|
||||
int e,
|
||||
string n>
|
||||
: NVPTXRegClass<regTypes, alignment, regList>
|
||||
{
|
||||
NVPTXRegClass scalarClass=sClass;
|
||||
int elems=e;
|
||||
string name=n;
|
||||
}
|
||||
''')
|
||||
|
||||
|
||||
outFile.write('def V2F32Regs\n : NVPTXVecRegClass<[v2f32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n Float32Regs, 2, ".v2.f32">;\n' % (num_regs-1))
|
||||
outFile.write('def V4F32Regs\n : NVPTXVecRegClass<[v4f32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n Float32Regs, 4, ".v4.f32">;\n' % (num_regs-1))
|
||||
|
||||
outFile.write('def V2I32Regs\n : NVPTXVecRegClass<[v2i32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n Int32Regs, 2, ".v2.u32">;\n' % (num_regs-1))
|
||||
outFile.write('def V4I32Regs\n : NVPTXVecRegClass<[v4i32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n Int32Regs, 4, ".v4.u32">;\n' % (num_regs-1))
|
||||
|
||||
outFile.write('def V2F64Regs\n : NVPTXVecRegClass<[v2f64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n Float64Regs, 2, ".v2.f64">;\n' % (num_regs-1))
|
||||
outFile.write('def V2I64Regs\n : NVPTXVecRegClass<[v2i64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n Int64Regs, 2, ".v2.u64">;\n' % (num_regs-1))
|
||||
|
||||
outFile.write('def V2I16Regs\n : NVPTXVecRegClass<[v2i16], 32, (add (sequence "v2b16_%%u", 0, %d)),\n Int16Regs, 2, ".v2.u16">;\n' % (num_regs-1))
|
||||
outFile.write('def V4I16Regs\n : NVPTXVecRegClass<[v4i16], 64, (add (sequence "v4b16_%%u", 0, %d)),\n Int16Regs, 4, ".v4.u16">;\n' % (num_regs-1))
|
||||
|
||||
outFile.write('def V2I8Regs\n : NVPTXVecRegClass<[v2i8], 16, (add (sequence "v2b8_%%u", 0, %d)),\n Int8Regs, 2, ".v2.u8">;\n' % (num_regs-1))
|
||||
outFile.write('def V4I8Regs\n : NVPTXVecRegClass<[v4i8], 32, (add (sequence "v4b8_%%u", 0, %d)),\n Int8Regs, 4, ".v4.u8">;\n' % (num_regs-1))
|
||||
|
||||
outFile.close()
|
||||
|
||||
|
||||
outFile = open('NVPTXNumRegisters.h', 'w')
|
||||
outFile.write('''
|
||||
//===-- NVPTXNumRegisters.h - PTX Register Info ---------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef NVPTX_NUM_REGISTERS_H
|
||||
#define NVPTX_NUM_REGISTERS_H
|
||||
|
||||
namespace llvm {
|
||||
|
||||
const unsigned NVPTXNumRegisters = %d;
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
''' % num_regs)
|
||||
|
||||
outFile.close()
|
@ -310,6 +310,7 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch],
|
||||
hexagon-*) llvm_cv_target_arch="Hexagon" ;;
|
||||
mblaze-*) llvm_cv_target_arch="MBlaze" ;;
|
||||
ptx-*) llvm_cv_target_arch="PTX" ;;
|
||||
nvptx-*) llvm_cv_target_arch="NVPTX" ;;
|
||||
*) llvm_cv_target_arch="Unknown" ;;
|
||||
esac])
|
||||
|
||||
@ -457,6 +458,7 @@ else
|
||||
Hexagon) AC_SUBST(TARGET_HAS_JIT,0) ;;
|
||||
MBlaze) AC_SUBST(TARGET_HAS_JIT,0) ;;
|
||||
PTX) AC_SUBST(TARGET_HAS_JIT,0) ;;
|
||||
NVPTX) AC_SUBST(TARGET_HAS_JIT,0) ;;
|
||||
*) AC_SUBST(TARGET_HAS_JIT,0) ;;
|
||||
esac
|
||||
fi
|
||||
@ -567,13 +569,13 @@ TARGETS_TO_BUILD=""
|
||||
AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
|
||||
[Build specific host targets: all or target1,target2,... Valid targets are:
|
||||
host, x86, x86_64, sparc, powerpc, arm, mips, spu, hexagon,
|
||||
xcore, msp430, ptx, cbe, and cpp (default=all)]),,
|
||||
xcore, msp430, ptx, nvptx, cbe, and cpp (default=all)]),,
|
||||
enableval=all)
|
||||
if test "$enableval" = host-only ; then
|
||||
enableval=host
|
||||
fi
|
||||
case "$enableval" in
|
||||
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CppBackend MBlaze PTX" ;;
|
||||
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CppBackend MBlaze PTX NVPTX" ;;
|
||||
*)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
|
||||
case "$a_target" in
|
||||
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
|
||||
@ -589,6 +591,7 @@ case "$enableval" in
|
||||
cpp) TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
|
||||
mblaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
|
||||
ptx) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
|
||||
nvptx) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
|
||||
host) case "$llvm_cv_target_arch" in
|
||||
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
|
||||
x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
|
||||
@ -602,6 +605,7 @@ case "$enableval" in
|
||||
MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
|
||||
Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
|
||||
PTX) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
|
||||
NVPTX) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
|
||||
*) AC_MSG_ERROR([Can not set target to build]) ;;
|
||||
esac ;;
|
||||
*) AC_MSG_ERROR([Unrecognized target $a_target]) ;;
|
||||
|
12
projects/sample/configure
vendored
12
projects/sample/configure
vendored
@ -1402,7 +1402,8 @@ Optional Features:
|
||||
--enable-targets Build specific host targets: all or
|
||||
target1,target2,... Valid targets are: host, x86,
|
||||
x86_64, sparc, powerpc, arm, mips, spu, hexagon,
|
||||
xcore, msp430, ptx, cbe, and cpp (default=all)
|
||||
xcore, msp430, ptx, nvptx, cbe, and cpp
|
||||
(default=all)
|
||||
--enable-bindings Build specific language bindings:
|
||||
all,auto,none,{binding-name} (default=auto)
|
||||
--enable-libffi Check for the presence of libffi (default is NO)
|
||||
@ -3846,6 +3847,7 @@ else
|
||||
hexagon-*) llvm_cv_target_arch="Hexagon" ;;
|
||||
mblaze-*) llvm_cv_target_arch="MBlaze" ;;
|
||||
ptx-*) llvm_cv_target_arch="PTX" ;;
|
||||
nvptx-*) llvm_cv_target_arch="NVPTX" ;;
|
||||
*) llvm_cv_target_arch="Unknown" ;;
|
||||
esac
|
||||
fi
|
||||
@ -5069,6 +5071,8 @@ else
|
||||
MBlaze) TARGET_HAS_JIT=0
|
||||
;;
|
||||
PTX) TARGET_HAS_JIT=0
|
||||
;;
|
||||
NVPTX) TARGET_HAS_JIT=0
|
||||
;;
|
||||
*) TARGET_HAS_JIT=0
|
||||
;;
|
||||
@ -5254,7 +5258,7 @@ if test "$enableval" = host-only ; then
|
||||
enableval=host
|
||||
fi
|
||||
case "$enableval" in
|
||||
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CppBackend MBlaze PTX" ;;
|
||||
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CppBackend MBlaze PTX NVPTX" ;;
|
||||
*)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
|
||||
case "$a_target" in
|
||||
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
|
||||
@ -5270,6 +5274,7 @@ case "$enableval" in
|
||||
cpp) TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
|
||||
mblaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
|
||||
ptx) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
|
||||
nvptx) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
|
||||
host) case "$llvm_cv_target_arch" in
|
||||
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
|
||||
x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
|
||||
@ -5283,6 +5288,7 @@ case "$enableval" in
|
||||
MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
|
||||
Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
|
||||
PTX) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
|
||||
NVPTX) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
|
||||
*) { { echo "$as_me:$LINENO: error: Can not set target to build" >&5
|
||||
echo "$as_me: error: Can not set target to build" >&2;}
|
||||
{ (exit 1); exit 1; }; } ;;
|
||||
@ -10307,7 +10313,7 @@ else
|
||||
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
|
||||
lt_status=$lt_dlunknown
|
||||
cat > conftest.$ac_ext <<EOF
|
||||
#line 10303 "configure"
|
||||
#line 10316 "configure"
|
||||
#include "confdefs.h"
|
||||
|
||||
#if HAVE_DLFCN_H
|
||||
|
55
test/CodeGen/NVPTX/annotations.ll
Normal file
55
test/CodeGen/NVPTX/annotations.ll
Normal file
@ -0,0 +1,55 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
|
||||
|
||||
|
||||
@texture = internal addrspace(1) global i64 0, align 8
|
||||
; CHECK: .global .texref texture
|
||||
@surface = internal addrspace(1) global i64 0, align 8
|
||||
; CHECK: .global .surfref surface
|
||||
|
||||
|
||||
; CHECK: .entry kernel_func_maxntid
|
||||
define void @kernel_func_maxntid(float* %a) {
|
||||
; CHECK: .maxntid 10, 20, 30
|
||||
; CHECK: ret
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: .entry kernel_func_reqntid
|
||||
define void @kernel_func_reqntid(float* %a) {
|
||||
; CHECK: .reqntid 11, 22, 33
|
||||
; CHECK: ret
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: .entry kernel_func_minctasm
|
||||
define void @kernel_func_minctasm(float* %a) {
|
||||
; CHECK: .minnctapersm 42
|
||||
; CHECK: ret
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
|
||||
!nvvm.annotations = !{!1, !2, !3, !4, !5, !6, !7, !8}
|
||||
|
||||
!1 = metadata !{void (float*)* @kernel_func_maxntid, metadata !"kernel", i32 1}
|
||||
!2 = metadata !{void (float*)* @kernel_func_maxntid,
|
||||
metadata !"maxntidx", i32 10,
|
||||
metadata !"maxntidy", i32 20,
|
||||
metadata !"maxntidz", i32 30}
|
||||
|
||||
!3 = metadata !{void (float*)* @kernel_func_reqntid, metadata !"kernel", i32 1}
|
||||
!4 = metadata !{void (float*)* @kernel_func_reqntid,
|
||||
metadata !"reqntidx", i32 11,
|
||||
metadata !"reqntidy", i32 22,
|
||||
metadata !"reqntidz", i32 33}
|
||||
|
||||
!5 = metadata !{void (float*)* @kernel_func_minctasm, metadata !"kernel", i32 1}
|
||||
!6 = metadata !{void (float*)* @kernel_func_minctasm,
|
||||
metadata !"minctasm", i32 42}
|
||||
|
||||
!7 = metadata !{i64 addrspace(1)* @texture, metadata !"texture", i32 1}
|
||||
!8 = metadata !{i64 addrspace(1)* @surface, metadata !"surface", i32 1}
|
72
test/CodeGen/NVPTX/arithmetic-fp-sm10.ll
Normal file
72
test/CodeGen/NVPTX/arithmetic-fp-sm10.ll
Normal file
@ -0,0 +1,72 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
|
||||
|
||||
;; These tests should run for all targets
|
||||
|
||||
;;===-- Basic instruction selection tests ---------------------------------===;;
|
||||
|
||||
|
||||
;;; f64
|
||||
|
||||
define double @fadd_f64(double %a, double %b) {
|
||||
; CHECK: add.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = fadd double %a, %b
|
||||
ret double %ret
|
||||
}
|
||||
|
||||
define double @fsub_f64(double %a, double %b) {
|
||||
; CHECK: sub.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = fsub double %a, %b
|
||||
ret double %ret
|
||||
}
|
||||
|
||||
define double @fmul_f64(double %a, double %b) {
|
||||
; CHECK: mul.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = fmul double %a, %b
|
||||
ret double %ret
|
||||
}
|
||||
|
||||
define double @fdiv_f64(double %a, double %b) {
|
||||
; CHECK: div.rn.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = fdiv double %a, %b
|
||||
ret double %ret
|
||||
}
|
||||
|
||||
;; PTX does not have a floating-point rem instruction
|
||||
|
||||
|
||||
;;; f32
|
||||
|
||||
define float @fadd_f32(float %a, float %b) {
|
||||
; CHECK: add.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = fadd float %a, %b
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define float @fsub_f32(float %a, float %b) {
|
||||
; CHECK: sub.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = fsub float %a, %b
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define float @fmul_f32(float %a, float %b) {
|
||||
; CHECK: mul.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = fmul float %a, %b
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define float @fdiv_f32(float %a, float %b) {
|
||||
; CHECK: div.full.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = fdiv float %a, %b
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
;; PTX does not have a floating-point rem instruction
|
72
test/CodeGen/NVPTX/arithmetic-fp-sm20.ll
Normal file
72
test/CodeGen/NVPTX/arithmetic-fp-sm20.ll
Normal file
@ -0,0 +1,72 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
|
||||
|
||||
;; These tests should run for all targets
|
||||
|
||||
;;===-- Basic instruction selection tests ---------------------------------===;;
|
||||
|
||||
|
||||
;;; f64
|
||||
|
||||
define double @fadd_f64(double %a, double %b) {
|
||||
; CHECK: add.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = fadd double %a, %b
|
||||
ret double %ret
|
||||
}
|
||||
|
||||
define double @fsub_f64(double %a, double %b) {
|
||||
; CHECK: sub.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = fsub double %a, %b
|
||||
ret double %ret
|
||||
}
|
||||
|
||||
define double @fmul_f64(double %a, double %b) {
|
||||
; CHECK: mul.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = fmul double %a, %b
|
||||
ret double %ret
|
||||
}
|
||||
|
||||
define double @fdiv_f64(double %a, double %b) {
|
||||
; CHECK: div.rn.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = fdiv double %a, %b
|
||||
ret double %ret
|
||||
}
|
||||
|
||||
;; PTX does not have a floating-point rem instruction
|
||||
|
||||
|
||||
;;; f32
|
||||
|
||||
define float @fadd_f32(float %a, float %b) {
|
||||
; CHECK: add.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = fadd float %a, %b
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define float @fsub_f32(float %a, float %b) {
|
||||
; CHECK: sub.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = fsub float %a, %b
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define float @fmul_f32(float %a, float %b) {
|
||||
; CHECK: mul.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = fmul float %a, %b
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define float @fdiv_f32(float %a, float %b) {
|
||||
; CHECK: div.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = fdiv float %a, %b
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
;; PTX does not have a floating-point rem instruction
|
295
test/CodeGen/NVPTX/arithmetic-int.ll
Normal file
295
test/CodeGen/NVPTX/arithmetic-int.ll
Normal file
@ -0,0 +1,295 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
|
||||
|
||||
;; These tests should run for all targets
|
||||
|
||||
;;===-- Basic instruction selection tests ---------------------------------===;;
|
||||
|
||||
|
||||
;;; i64
|
||||
|
||||
define i64 @add_i64(i64 %a, i64 %b) {
|
||||
; CHECK: add.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = add i64 %a, %b
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @sub_i64(i64 %a, i64 %b) {
|
||||
; CHECK: sub.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = sub i64 %a, %b
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @mul_i64(i64 %a, i64 %b) {
|
||||
; CHECK: mul.lo.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = mul i64 %a, %b
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @sdiv_i64(i64 %a, i64 %b) {
|
||||
; CHECK: div.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = sdiv i64 %a, %b
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @udiv_i64(i64 %a, i64 %b) {
|
||||
; CHECK: div.u64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = udiv i64 %a, %b
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @srem_i64(i64 %a, i64 %b) {
|
||||
; CHECK: rem.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = srem i64 %a, %b
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @urem_i64(i64 %a, i64 %b) {
|
||||
; CHECK: rem.u64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = urem i64 %a, %b
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @and_i64(i64 %a, i64 %b) {
|
||||
; CHECK: and.b64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = and i64 %a, %b
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @or_i64(i64 %a, i64 %b) {
|
||||
; CHECK: or.b64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = or i64 %a, %b
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @xor_i64(i64 %a, i64 %b) {
|
||||
; CHECK: xor.b64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = xor i64 %a, %b
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @shl_i64(i64 %a, i64 %b) {
|
||||
; PTX requires 32-bit shift amount
|
||||
; CHECK: shl.b64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = shl i64 %a, %b
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @ashr_i64(i64 %a, i64 %b) {
|
||||
; PTX requires 32-bit shift amount
|
||||
; CHECK: shr.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = ashr i64 %a, %b
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @lshr_i64(i64 %a, i64 %b) {
|
||||
; PTX requires 32-bit shift amount
|
||||
; CHECK: shr.u64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = lshr i64 %a, %b
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
|
||||
;;; i32
|
||||
|
||||
define i32 @add_i32(i32 %a, i32 %b) {
|
||||
; CHECK: add.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = add i32 %a, %b
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i32 @sub_i32(i32 %a, i32 %b) {
|
||||
; CHECK: sub.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = sub i32 %a, %b
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i32 @mul_i32(i32 %a, i32 %b) {
|
||||
; CHECK: mul.lo.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = mul i32 %a, %b
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i32 @sdiv_i32(i32 %a, i32 %b) {
|
||||
; CHECK: div.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = sdiv i32 %a, %b
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i32 @udiv_i32(i32 %a, i32 %b) {
|
||||
; CHECK: div.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = udiv i32 %a, %b
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i32 @srem_i32(i32 %a, i32 %b) {
|
||||
; CHECK: rem.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = srem i32 %a, %b
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i32 @urem_i32(i32 %a, i32 %b) {
|
||||
; CHECK: rem.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = urem i32 %a, %b
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i32 @and_i32(i32 %a, i32 %b) {
|
||||
; CHECK: and.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = and i32 %a, %b
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i32 @or_i32(i32 %a, i32 %b) {
|
||||
; CHECK: or.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = or i32 %a, %b
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i32 @xor_i32(i32 %a, i32 %b) {
|
||||
; CHECK: xor.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = xor i32 %a, %b
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i32 @shl_i32(i32 %a, i32 %b) {
|
||||
; CHECK: shl.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = shl i32 %a, %b
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i32 @ashr_i32(i32 %a, i32 %b) {
|
||||
; CHECK: shr.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = ashr i32 %a, %b
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i32 @lshr_i32(i32 %a, i32 %b) {
|
||||
; CHECK: shr.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = lshr i32 %a, %b
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
;;; i16
|
||||
|
||||
define i16 @add_i16(i16 %a, i16 %b) {
|
||||
; CHECK: add.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = add i16 %a, %b
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
define i16 @sub_i16(i16 %a, i16 %b) {
|
||||
; CHECK: sub.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = sub i16 %a, %b
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
define i16 @mul_i16(i16 %a, i16 %b) {
|
||||
; CHECK: mul.lo.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = mul i16 %a, %b
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
define i16 @sdiv_i16(i16 %a, i16 %b) {
|
||||
; CHECK: div.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = sdiv i16 %a, %b
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
define i16 @udiv_i16(i16 %a, i16 %b) {
|
||||
; CHECK: div.u16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = udiv i16 %a, %b
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
define i16 @srem_i16(i16 %a, i16 %b) {
|
||||
; CHECK: rem.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = srem i16 %a, %b
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
define i16 @urem_i16(i16 %a, i16 %b) {
|
||||
; CHECK: rem.u16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = urem i16 %a, %b
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
define i16 @and_i16(i16 %a, i16 %b) {
|
||||
; CHECK: and.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = and i16 %a, %b
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
define i16 @or_i16(i16 %a, i16 %b) {
|
||||
; CHECK: or.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = or i16 %a, %b
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
define i16 @xor_i16(i16 %a, i16 %b) {
|
||||
; CHECK: xor.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = xor i16 %a, %b
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
define i16 @shl_i16(i16 %a, i16 %b) {
|
||||
; PTX requires 32-bit shift amount
|
||||
; CHECK: shl.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = shl i16 %a, %b
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
define i16 @ashr_i16(i16 %a, i16 %b) {
|
||||
; PTX requires 32-bit shift amount
|
||||
; CHECK: shr.s16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = ashr i16 %a, %b
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
define i16 @lshr_i16(i16 %a, i16 %b) {
|
||||
; PTX requires 32-bit shift amount
|
||||
; CHECK: shr.u16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%ret = lshr i16 %a, %b
|
||||
ret i16 %ret
|
||||
}
|
32
test/CodeGen/NVPTX/calling-conv.ll
Normal file
32
test/CodeGen/NVPTX/calling-conv.ll
Normal file
@ -0,0 +1,32 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
|
||||
|
||||
|
||||
;; Kernel function using ptx_kernel calling conv
|
||||
|
||||
; CHECK: .entry kernel_func
|
||||
define ptx_kernel void @kernel_func(float* %a) {
|
||||
; CHECK: ret
|
||||
ret void
|
||||
}
|
||||
|
||||
;; Device function
|
||||
; CHECK: .func device_func
|
||||
define void @device_func(float* %a) {
|
||||
; CHECK: ret
|
||||
ret void
|
||||
}
|
||||
|
||||
;; Kernel function using NVVM metadata
|
||||
; CHECK: .entry metadata_kernel
|
||||
define void @metadata_kernel(float* %a) {
|
||||
; CHECK: ret
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
!nvvm.annotations = !{!1}
|
||||
|
||||
!1 = metadata !{void (float*)* @metadata_kernel, metadata !"kernel", i32 1}
|
389
test/CodeGen/NVPTX/compare-int.ll
Normal file
389
test/CodeGen/NVPTX/compare-int.ll
Normal file
@ -0,0 +1,389 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
|
||||
|
||||
;; These tests should run for all targets
|
||||
|
||||
;;===-- Basic instruction selection tests ---------------------------------===;;
|
||||
|
||||
|
||||
;;; i64
|
||||
|
||||
define i64 @icmp_eq_i64(i64 %a, i64 %b) {
|
||||
; CHECK: setp.eq.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
|
||||
; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp eq i64 %a, %b
|
||||
%ret = zext i1 %cmp to i64
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @icmp_ne_i64(i64 %a, i64 %b) {
|
||||
; CHECK: setp.ne.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
|
||||
; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp ne i64 %a, %b
|
||||
%ret = zext i1 %cmp to i64
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @icmp_ugt_i64(i64 %a, i64 %b) {
|
||||
; CHECK: setp.gt.u64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
|
||||
; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp ugt i64 %a, %b
|
||||
%ret = zext i1 %cmp to i64
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @icmp_uge_i64(i64 %a, i64 %b) {
|
||||
; CHECK: setp.ge.u64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
|
||||
; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp uge i64 %a, %b
|
||||
%ret = zext i1 %cmp to i64
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @icmp_ult_i64(i64 %a, i64 %b) {
|
||||
; CHECK: setp.lt.u64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
|
||||
; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp ult i64 %a, %b
|
||||
%ret = zext i1 %cmp to i64
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @icmp_ule_i64(i64 %a, i64 %b) {
|
||||
; CHECK: setp.le.u64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
|
||||
; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp ule i64 %a, %b
|
||||
%ret = zext i1 %cmp to i64
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @icmp_sgt_i64(i64 %a, i64 %b) {
|
||||
; CHECK: setp.gt.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
|
||||
; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp sgt i64 %a, %b
|
||||
%ret = zext i1 %cmp to i64
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @icmp_sge_i64(i64 %a, i64 %b) {
|
||||
; CHECK: setp.ge.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
|
||||
; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp sge i64 %a, %b
|
||||
%ret = zext i1 %cmp to i64
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @icmp_slt_i64(i64 %a, i64 %b) {
|
||||
; CHECK: setp.lt.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
|
||||
; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp slt i64 %a, %b
|
||||
%ret = zext i1 %cmp to i64
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @icmp_sle_i64(i64 %a, i64 %b) {
|
||||
; CHECK: setp.le.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
|
||||
; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp sle i64 %a, %b
|
||||
%ret = zext i1 %cmp to i64
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
;;; i32
|
||||
|
||||
define i32 @icmp_eq_i32(i32 %a, i32 %b) {
|
||||
; CHECK: setp.eq.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp eq i32 %a, %b
|
||||
%ret = zext i1 %cmp to i32
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i32 @icmp_ne_i32(i32 %a, i32 %b) {
|
||||
; CHECK: setp.ne.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp ne i32 %a, %b
|
||||
%ret = zext i1 %cmp to i32
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i32 @icmp_ugt_i32(i32 %a, i32 %b) {
|
||||
; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp ugt i32 %a, %b
|
||||
%ret = zext i1 %cmp to i32
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i32 @icmp_uge_i32(i32 %a, i32 %b) {
|
||||
; CHECK: setp.ge.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp uge i32 %a, %b
|
||||
%ret = zext i1 %cmp to i32
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i32 @icmp_ult_i32(i32 %a, i32 %b) {
|
||||
; CHECK: setp.lt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp ult i32 %a, %b
|
||||
%ret = zext i1 %cmp to i32
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i32 @icmp_ule_i32(i32 %a, i32 %b) {
|
||||
; CHECK: setp.le.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp ule i32 %a, %b
|
||||
%ret = zext i1 %cmp to i32
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i32 @icmp_sgt_i32(i32 %a, i32 %b) {
|
||||
; CHECK: setp.gt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp sgt i32 %a, %b
|
||||
%ret = zext i1 %cmp to i32
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i32 @icmp_sge_i32(i32 %a, i32 %b) {
|
||||
; CHECK: setp.ge.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp sge i32 %a, %b
|
||||
%ret = zext i1 %cmp to i32
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i32 @icmp_slt_i32(i32 %a, i32 %b) {
|
||||
; CHECK: setp.lt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp slt i32 %a, %b
|
||||
%ret = zext i1 %cmp to i32
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i32 @icmp_sle_i32(i32 %a, i32 %b) {
|
||||
; CHECK: setp.le.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp sle i32 %a, %b
|
||||
%ret = zext i1 %cmp to i32
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
|
||||
;;; i16
|
||||
|
||||
define i16 @icmp_eq_i16(i16 %a, i16 %b) {
|
||||
; CHECK: setp.eq.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp eq i16 %a, %b
|
||||
%ret = zext i1 %cmp to i16
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
define i16 @icmp_ne_i16(i16 %a, i16 %b) {
|
||||
; CHECK: setp.ne.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp ne i16 %a, %b
|
||||
%ret = zext i1 %cmp to i16
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
define i16 @icmp_ugt_i16(i16 %a, i16 %b) {
|
||||
; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp ugt i16 %a, %b
|
||||
%ret = zext i1 %cmp to i16
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
define i16 @icmp_uge_i16(i16 %a, i16 %b) {
|
||||
; CHECK: setp.ge.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp uge i16 %a, %b
|
||||
%ret = zext i1 %cmp to i16
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
define i16 @icmp_ult_i16(i16 %a, i16 %b) {
|
||||
; CHECK: setp.lt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp ult i16 %a, %b
|
||||
%ret = zext i1 %cmp to i16
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
define i16 @icmp_ule_i16(i16 %a, i16 %b) {
|
||||
; CHECK: setp.le.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp ule i16 %a, %b
|
||||
%ret = zext i1 %cmp to i16
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
define i16 @icmp_sgt_i16(i16 %a, i16 %b) {
|
||||
; CHECK: setp.gt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp sgt i16 %a, %b
|
||||
%ret = zext i1 %cmp to i16
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
define i16 @icmp_sge_i16(i16 %a, i16 %b) {
|
||||
; CHECK: setp.ge.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp sge i16 %a, %b
|
||||
%ret = zext i1 %cmp to i16
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
define i16 @icmp_slt_i16(i16 %a, i16 %b) {
|
||||
; CHECK: setp.lt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp slt i16 %a, %b
|
||||
%ret = zext i1 %cmp to i16
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
define i16 @icmp_sle_i16(i16 %a, i16 %b) {
|
||||
; CHECK: setp.le.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp sle i16 %a, %b
|
||||
%ret = zext i1 %cmp to i16
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
|
||||
;;; i8
|
||||
|
||||
define i8 @icmp_eq_i8(i8 %a, i8 %b) {
|
||||
; Comparison happens in 16-bit
|
||||
; CHECK: setp.eq.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp eq i8 %a, %b
|
||||
%ret = zext i1 %cmp to i8
|
||||
ret i8 %ret
|
||||
}
|
||||
|
||||
define i8 @icmp_ne_i8(i8 %a, i8 %b) {
|
||||
; Comparison happens in 16-bit
|
||||
; CHECK: setp.ne.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp ne i8 %a, %b
|
||||
%ret = zext i1 %cmp to i8
|
||||
ret i8 %ret
|
||||
}
|
||||
|
||||
define i8 @icmp_ugt_i8(i8 %a, i8 %b) {
|
||||
; Comparison happens in 16-bit
|
||||
; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp ugt i8 %a, %b
|
||||
%ret = zext i1 %cmp to i8
|
||||
ret i8 %ret
|
||||
}
|
||||
|
||||
define i8 @icmp_uge_i8(i8 %a, i8 %b) {
|
||||
; Comparison happens in 16-bit
|
||||
; CHECK: setp.ge.u16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp uge i8 %a, %b
|
||||
%ret = zext i1 %cmp to i8
|
||||
ret i8 %ret
|
||||
}
|
||||
|
||||
define i8 @icmp_ult_i8(i8 %a, i8 %b) {
|
||||
; Comparison happens in 16-bit
|
||||
; CHECK: setp.lt.u16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp ult i8 %a, %b
|
||||
%ret = zext i1 %cmp to i8
|
||||
ret i8 %ret
|
||||
}
|
||||
|
||||
define i8 @icmp_ule_i8(i8 %a, i8 %b) {
|
||||
; Comparison happens in 16-bit
|
||||
; CHECK: setp.le.u16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp ule i8 %a, %b
|
||||
%ret = zext i1 %cmp to i8
|
||||
ret i8 %ret
|
||||
}
|
||||
|
||||
define i8 @icmp_sgt_i8(i8 %a, i8 %b) {
|
||||
; Comparison happens in 16-bit
|
||||
; CHECK: setp.gt.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp sgt i8 %a, %b
|
||||
%ret = zext i1 %cmp to i8
|
||||
ret i8 %ret
|
||||
}
|
||||
|
||||
define i8 @icmp_sge_i8(i8 %a, i8 %b) {
|
||||
; Comparison happens in 16-bit
|
||||
; CHECK: setp.ge.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp sge i8 %a, %b
|
||||
%ret = zext i1 %cmp to i8
|
||||
ret i8 %ret
|
||||
}
|
||||
|
||||
define i8 @icmp_slt_i8(i8 %a, i8 %b) {
|
||||
; Comparison happens in 16-bit
|
||||
; CHECK: setp.lt.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp slt i8 %a, %b
|
||||
%ret = zext i1 %cmp to i8
|
||||
ret i8 %ret
|
||||
}
|
||||
|
||||
define i8 @icmp_sle_i8(i8 %a, i8 %b) {
|
||||
; Comparison happens in 16-bit
|
||||
; CHECK: setp.le.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
|
||||
; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
%cmp = icmp sle i8 %a, %b
|
||||
%ret = zext i1 %cmp to i8
|
||||
ret i8 %ret
|
||||
}
|
146
test/CodeGen/NVPTX/convert-fp.ll
Normal file
146
test/CodeGen/NVPTX/convert-fp.ll
Normal file
@ -0,0 +1,146 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
|
||||
|
||||
|
||||
define i16 @cvt_i16_f32(float %x) {
|
||||
; CHECK: cvt.rzi.u16.f32 %rs{{[0-9]+}}, %f{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = fptoui float %x to i16
|
||||
ret i16 %a
|
||||
}
|
||||
|
||||
define i16 @cvt_i16_f64(double %x) {
|
||||
; CHECK: cvt.rzi.u16.f64 %rs{{[0-9]+}}, %fl{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = fptoui double %x to i16
|
||||
ret i16 %a
|
||||
}
|
||||
|
||||
define i32 @cvt_i32_f32(float %x) {
|
||||
; CHECK: cvt.rzi.u32.f32 %r{{[0-9]+}}, %f{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = fptoui float %x to i32
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
define i32 @cvt_i32_f64(double %x) {
|
||||
; CHECK: cvt.rzi.u32.f64 %r{{[0-9]+}}, %fl{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = fptoui double %x to i32
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
|
||||
define i64 @cvt_i64_f32(float %x) {
|
||||
; CHECK: cvt.rzi.u64.f32 %rl{{[0-9]+}}, %f{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = fptoui float %x to i64
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define i64 @cvt_i64_f64(double %x) {
|
||||
; CHECK: cvt.rzi.u64.f64 %rl{{[0-9]+}}, %fl{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = fptoui double %x to i64
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define float @cvt_f32_i16(i16 %x) {
|
||||
; CHECK: cvt.rn.f32.u16 %f{{[0-9]+}}, %rs{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = uitofp i16 %x to float
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define float @cvt_f32_i32(i32 %x) {
|
||||
; CHECK: cvt.rn.f32.u32 %f{{[0-9]+}}, %r{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = uitofp i32 %x to float
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define float @cvt_f32_i64(i64 %x) {
|
||||
; CHECK: cvt.rn.f32.u64 %f{{[0-9]+}}, %rl{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = uitofp i64 %x to float
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define float @cvt_f32_f64(double %x) {
|
||||
; CHECK: cvt.rn.f32.f64 %f{{[0-9]+}}, %fl{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = fptrunc double %x to float
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define float @cvt_f32_s16(i16 %x) {
|
||||
; CHECK: cvt.rn.f32.s16 %f{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%a = sitofp i16 %x to float
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define float @cvt_f32_s32(i32 %x) {
|
||||
; CHECK: cvt.rn.f32.s32 %f{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%a = sitofp i32 %x to float
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define float @cvt_f32_s64(i64 %x) {
|
||||
; CHECK: cvt.rn.f32.s64 %f{{[0-9]+}}, %rl{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%a = sitofp i64 %x to float
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define double @cvt_f64_i16(i16 %x) {
|
||||
; CHECK: cvt.rn.f64.u16 %fl{{[0-9]+}}, %rs{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = uitofp i16 %x to double
|
||||
ret double %a
|
||||
}
|
||||
|
||||
define double @cvt_f64_i32(i32 %x) {
|
||||
; CHECK: cvt.rn.f64.u32 %fl{{[0-9]+}}, %r{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = uitofp i32 %x to double
|
||||
ret double %a
|
||||
}
|
||||
|
||||
define double @cvt_f64_i64(i64 %x) {
|
||||
; CHECK: cvt.rn.f64.u64 %fl{{[0-9]+}}, %rl{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = uitofp i64 %x to double
|
||||
ret double %a
|
||||
}
|
||||
|
||||
define double @cvt_f64_f32(float %x) {
|
||||
; CHECK: cvt.f64.f32 %fl{{[0-9]+}}, %f{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = fpext float %x to double
|
||||
ret double %a
|
||||
}
|
||||
|
||||
define double @cvt_f64_s16(i16 %x) {
|
||||
; CHECK: cvt.rn.f64.s16 %fl{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%a = sitofp i16 %x to double
|
||||
ret double %a
|
||||
}
|
||||
|
||||
define double @cvt_f64_s32(i32 %x) {
|
||||
; CHECK: cvt.rn.f64.s32 %fl{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%a = sitofp i32 %x to double
|
||||
ret double %a
|
||||
}
|
||||
|
||||
define double @cvt_f64_s64(i64 %x) {
|
||||
; CHECK: cvt.rn.f64.s64 %fl{{[0-9]+}}, %rl{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%a = sitofp i64 %x to double
|
||||
ret double %a
|
||||
}
|
55
test/CodeGen/NVPTX/convert-int-sm10.ll
Normal file
55
test/CodeGen/NVPTX/convert-int-sm10.ll
Normal file
@ -0,0 +1,55 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
|
||||
|
||||
|
||||
; i16
|
||||
|
||||
define i16 @cvt_i16_i32(i32 %x) {
|
||||
; CHECK: cvt.u16.u32 %rs{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%a = trunc i32 %x to i16
|
||||
ret i16 %a
|
||||
}
|
||||
|
||||
define i16 @cvt_i16_i64(i64 %x) {
|
||||
; CHECK: cvt.u16.u64 %rs{{[0-9]+}}, %rl{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%a = trunc i64 %x to i16
|
||||
ret i16 %a
|
||||
}
|
||||
|
||||
|
||||
|
||||
; i32
|
||||
|
||||
define i32 @cvt_i32_i16(i16 %x) {
|
||||
; CHECK: cvt.u32.u16 %r{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%a = zext i16 %x to i32
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
define i32 @cvt_i32_i64(i64 %x) {
|
||||
; CHECK: cvt.u32.u64 %r{{[0-9]+}}, %rl{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%a = trunc i64 %x to i32
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
|
||||
|
||||
; i64
|
||||
|
||||
define i64 @cvt_i64_i16(i16 %x) {
|
||||
; CHECK: cvt.u64.u16 %rl{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%a = zext i16 %x to i64
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define i64 @cvt_i64_i32(i32 %x) {
|
||||
; CHECK: cvt.u64.u32 %rl{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%a = zext i32 %x to i64
|
||||
ret i64 %a
|
||||
}
|
64
test/CodeGen/NVPTX/convert-int-sm20.ll
Normal file
64
test/CodeGen/NVPTX/convert-int-sm20.ll
Normal file
@ -0,0 +1,64 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
|
||||
|
||||
|
||||
;; Integer conversions happen inplicitly by loading/storing the proper types
|
||||
|
||||
|
||||
; i16
|
||||
|
||||
define i16 @cvt_i16_i32(i32 %x) {
|
||||
; CHECK: ld.param.u16 %rs[[R0:[0-9]+]], [cvt_i16_i32_param_{{[0-9]+}}]
|
||||
; CHECK: st.param.b16 [func_retval{{[0-9]+}}+0], %rs[[R0]]
|
||||
; CHECK: ret
|
||||
%a = trunc i32 %x to i16
|
||||
ret i16 %a
|
||||
}
|
||||
|
||||
define i16 @cvt_i16_i64(i64 %x) {
|
||||
; CHECK: ld.param.u16 %rs[[R0:[0-9]+]], [cvt_i16_i64_param_{{[0-9]+}}]
|
||||
; CHECK: st.param.b16 [func_retval{{[0-9]+}}+0], %rs[[R0]]
|
||||
; CHECK: ret
|
||||
%a = trunc i64 %x to i16
|
||||
ret i16 %a
|
||||
}
|
||||
|
||||
|
||||
|
||||
; i32
|
||||
|
||||
define i32 @cvt_i32_i16(i16 %x) {
|
||||
; CHECK: ld.param.u16 %r[[R0:[0-9]+]], [cvt_i32_i16_param_{{[0-9]+}}]
|
||||
; CHECK: st.param.b32 [func_retval{{[0-9]+}}+0], %r[[R0]]
|
||||
; CHECK: ret
|
||||
%a = zext i16 %x to i32
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
define i32 @cvt_i32_i64(i64 %x) {
|
||||
; CHECK: ld.param.u32 %r[[R0:[0-9]+]], [cvt_i32_i64_param_{{[0-9]+}}]
|
||||
; CHECK: st.param.b32 [func_retval{{[0-9]+}}+0], %r[[R0]]
|
||||
; CHECK: ret
|
||||
%a = trunc i64 %x to i32
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
|
||||
|
||||
; i64
|
||||
|
||||
define i64 @cvt_i64_i16(i16 %x) {
|
||||
; CHECK: ld.param.u16 %rl[[R0:[0-9]+]], [cvt_i64_i16_param_{{[0-9]+}}]
|
||||
; CHECK: st.param.b64 [func_retval{{[0-9]+}}+0], %rl[[R0]]
|
||||
; CHECK: ret
|
||||
%a = zext i16 %x to i64
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define i64 @cvt_i64_i32(i32 %x) {
|
||||
; CHECK: ld.param.u32 %rl[[R0:[0-9]+]], [cvt_i64_i32_param_{{[0-9]+}}]
|
||||
; CHECK: st.param.b64 [func_retval{{[0-9]+}}+0], %rl[[R0]]
|
||||
; CHECK: ret
|
||||
%a = zext i32 %x to i64
|
||||
ret i64 %a
|
||||
}
|
24
test/CodeGen/NVPTX/fma-disable.ll
Normal file
24
test/CodeGen/NVPTX/fma-disable.ll
Normal file
@ -0,0 +1,24 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -nvptx-fma-level=1 | FileCheck %s -check-prefix=FMA
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -nvptx-fma-level=0 | FileCheck %s -check-prefix=MUL
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -nvptx-fma-level=1 | FileCheck %s -check-prefix=FMA
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -nvptx-fma-level=0 | FileCheck %s -check-prefix=MUL
|
||||
|
||||
define ptx_device float @test_mul_add_f(float %x, float %y, float %z) {
|
||||
entry:
|
||||
; FMA: fma.rn.f32
|
||||
; MUL: mul.rn.f32
|
||||
; MUL: add.rn.f32
|
||||
%a = fmul float %x, %y
|
||||
%b = fadd float %a, %z
|
||||
ret float %b
|
||||
}
|
||||
|
||||
define ptx_device double @test_mul_add_d(double %x, double %y, double %z) {
|
||||
entry:
|
||||
; FMA: fma.rn.f64
|
||||
; MUL: mul.rn.f64
|
||||
; MUL: add.rn.f64
|
||||
%a = fmul double %x, %y
|
||||
%b = fadd double %a, %z
|
||||
ret double %b
|
||||
}
|
17
test/CodeGen/NVPTX/fma.ll
Normal file
17
test/CodeGen/NVPTX/fma.ll
Normal file
@ -0,0 +1,17 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
|
||||
define ptx_device float @t1_f32(float %x, float %y, float %z) {
|
||||
; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = fmul float %x, %y
|
||||
%b = fadd float %a, %z
|
||||
ret float %b
|
||||
}
|
||||
|
||||
define ptx_device double @t1_f64(double %x, double %y, double %z) {
|
||||
; CHECK: fma.rn.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = fmul double %x, %y
|
||||
%b = fadd double %a, %z
|
||||
ret double %b
|
||||
}
|
284
test/CodeGen/NVPTX/intrinsic-old.ll
Normal file
284
test/CodeGen/NVPTX/intrinsic-old.ll
Normal file
@ -0,0 +1,284 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
|
||||
|
||||
define ptx_device i32 @test_tid_x() {
|
||||
; CHECK: mov.u32 %r0, %tid.x;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.tid.x()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_tid_y() {
|
||||
; CHECK: mov.u32 %r0, %tid.y;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.tid.y()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_tid_z() {
|
||||
; CHECK: mov.u32 %r0, %tid.z;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.tid.z()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_tid_w() {
|
||||
; CHECK: mov.u32 %r0, %tid.w;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.tid.w()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ntid_x() {
|
||||
; CHECK: mov.u32 %r0, %ntid.x;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ntid.x()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ntid_y() {
|
||||
; CHECK: mov.u32 %r0, %ntid.y;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ntid.y()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ntid_z() {
|
||||
; CHECK: mov.u32 %r0, %ntid.z;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ntid.z()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ntid_w() {
|
||||
; CHECK: mov.u32 %r0, %ntid.w;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ntid.w()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_laneid() {
|
||||
; CHECK: mov.u32 %r0, %laneid;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.laneid()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_warpid() {
|
||||
; CHECK: mov.u32 %r0, %warpid;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.warpid()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_nwarpid() {
|
||||
; CHECK: mov.u32 %r0, %nwarpid;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.nwarpid()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ctaid_x() {
|
||||
; CHECK: mov.u32 %r0, %ctaid.x;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ctaid.x()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ctaid_y() {
|
||||
; CHECK: mov.u32 %r0, %ctaid.y;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ctaid.y()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ctaid_z() {
|
||||
; CHECK: mov.u32 %r0, %ctaid.z;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ctaid.z()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ctaid_w() {
|
||||
; CHECK: mov.u32 %r0, %ctaid.w;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ctaid.w()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_nctaid_x() {
|
||||
; CHECK: mov.u32 %r0, %nctaid.x;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.nctaid.x()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_nctaid_y() {
|
||||
; CHECK: mov.u32 %r0, %nctaid.y;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.nctaid.y()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_nctaid_z() {
|
||||
; CHECK: mov.u32 %r0, %nctaid.z;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.nctaid.z()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_nctaid_w() {
|
||||
; CHECK: mov.u32 %r0, %nctaid.w;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.nctaid.w()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_smid() {
|
||||
; CHECK: mov.u32 %r0, %smid;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.smid()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_nsmid() {
|
||||
; CHECK: mov.u32 %r0, %nsmid;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.nsmid()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_gridid() {
|
||||
; CHECK: mov.u32 %r0, %gridid;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.gridid()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_lanemask_eq() {
|
||||
; CHECK: mov.u32 %r0, %lanemask_eq;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.lanemask.eq()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_lanemask_le() {
|
||||
; CHECK: mov.u32 %r0, %lanemask_le;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.lanemask.le()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_lanemask_lt() {
|
||||
; CHECK: mov.u32 %r0, %lanemask_lt;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.lanemask.lt()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_lanemask_ge() {
|
||||
; CHECK: mov.u32 %r0, %lanemask_ge;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.lanemask.ge()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_lanemask_gt() {
|
||||
; CHECK: mov.u32 %r0, %lanemask_gt;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.lanemask.gt()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_clock() {
|
||||
; CHECK: mov.u32 %r0, %clock;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.clock()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i64 @test_clock64() {
|
||||
; CHECK: mov.u64 %rl0, %clock64;
|
||||
; CHECK: ret;
|
||||
%x = call i64 @llvm.ptx.read.clock64()
|
||||
ret i64 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_pm0() {
|
||||
; CHECK: mov.u32 %r0, %pm0;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.pm0()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_pm1() {
|
||||
; CHECK: mov.u32 %r0, %pm1;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.pm1()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_pm2() {
|
||||
; CHECK: mov.u32 %r0, %pm2;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.pm2()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_pm3() {
|
||||
; CHECK: mov.u32 %r0, %pm3;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.pm3()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device void @test_bar_sync() {
|
||||
; CHECK: bar.sync 0
|
||||
; CHECK: ret;
|
||||
call void @llvm.ptx.bar.sync(i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.ptx.read.tid.x()
|
||||
declare i32 @llvm.ptx.read.tid.y()
|
||||
declare i32 @llvm.ptx.read.tid.z()
|
||||
declare i32 @llvm.ptx.read.tid.w()
|
||||
declare i32 @llvm.ptx.read.ntid.x()
|
||||
declare i32 @llvm.ptx.read.ntid.y()
|
||||
declare i32 @llvm.ptx.read.ntid.z()
|
||||
declare i32 @llvm.ptx.read.ntid.w()
|
||||
|
||||
declare i32 @llvm.ptx.read.laneid()
|
||||
declare i32 @llvm.ptx.read.warpid()
|
||||
declare i32 @llvm.ptx.read.nwarpid()
|
||||
|
||||
declare i32 @llvm.ptx.read.ctaid.x()
|
||||
declare i32 @llvm.ptx.read.ctaid.y()
|
||||
declare i32 @llvm.ptx.read.ctaid.z()
|
||||
declare i32 @llvm.ptx.read.ctaid.w()
|
||||
declare i32 @llvm.ptx.read.nctaid.x()
|
||||
declare i32 @llvm.ptx.read.nctaid.y()
|
||||
declare i32 @llvm.ptx.read.nctaid.z()
|
||||
declare i32 @llvm.ptx.read.nctaid.w()
|
||||
|
||||
declare i32 @llvm.ptx.read.smid()
|
||||
declare i32 @llvm.ptx.read.nsmid()
|
||||
declare i32 @llvm.ptx.read.gridid()
|
||||
|
||||
declare i32 @llvm.ptx.read.lanemask.eq()
|
||||
declare i32 @llvm.ptx.read.lanemask.le()
|
||||
declare i32 @llvm.ptx.read.lanemask.lt()
|
||||
declare i32 @llvm.ptx.read.lanemask.ge()
|
||||
declare i32 @llvm.ptx.read.lanemask.gt()
|
||||
|
||||
declare i32 @llvm.ptx.read.clock()
|
||||
declare i64 @llvm.ptx.read.clock64()
|
||||
|
||||
declare i32 @llvm.ptx.read.pm0()
|
||||
declare i32 @llvm.ptx.read.pm1()
|
||||
declare i32 @llvm.ptx.read.pm2()
|
||||
declare i32 @llvm.ptx.read.pm3()
|
||||
|
||||
declare void @llvm.ptx.bar.sync(i32 %i)
|
173
test/CodeGen/NVPTX/ld-addrspace.ll
Normal file
173
test/CodeGen/NVPTX/ld-addrspace.ll
Normal file
@ -0,0 +1,173 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s --check-prefix=PTX32
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s --check-prefix=PTX64
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
|
||||
|
||||
|
||||
;; i8
|
||||
define i8 @ld_global_i8(i8 addrspace(1)* %ptr) {
|
||||
; PTX32: ld.global.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.global.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load i8 addrspace(1)* %ptr
|
||||
ret i8 %a
|
||||
}
|
||||
|
||||
define i8 @ld_shared_i8(i8 addrspace(3)* %ptr) {
|
||||
; PTX32: ld.shared.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.shared.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load i8 addrspace(3)* %ptr
|
||||
ret i8 %a
|
||||
}
|
||||
|
||||
define i8 @ld_local_i8(i8 addrspace(5)* %ptr) {
|
||||
; PTX32: ld.local.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.local.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load i8 addrspace(5)* %ptr
|
||||
ret i8 %a
|
||||
}
|
||||
|
||||
;; i16
|
||||
define i16 @ld_global_i16(i16 addrspace(1)* %ptr) {
|
||||
; PTX32: ld.global.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.global.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load i16 addrspace(1)* %ptr
|
||||
ret i16 %a
|
||||
}
|
||||
|
||||
define i16 @ld_shared_i16(i16 addrspace(3)* %ptr) {
|
||||
; PTX32: ld.shared.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.shared.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load i16 addrspace(3)* %ptr
|
||||
ret i16 %a
|
||||
}
|
||||
|
||||
define i16 @ld_local_i16(i16 addrspace(5)* %ptr) {
|
||||
; PTX32: ld.local.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.local.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load i16 addrspace(5)* %ptr
|
||||
ret i16 %a
|
||||
}
|
||||
|
||||
;; i32
|
||||
define i32 @ld_global_i32(i32 addrspace(1)* %ptr) {
|
||||
; PTX32: ld.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.global.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load i32 addrspace(1)* %ptr
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
define i32 @ld_shared_i32(i32 addrspace(3)* %ptr) {
|
||||
; PTX32: ld.shared.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.shared.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load i32 addrspace(3)* %ptr
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
define i32 @ld_local_i32(i32 addrspace(5)* %ptr) {
|
||||
; PTX32: ld.local.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.local.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load i32 addrspace(5)* %ptr
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
;; i64
|
||||
define i64 @ld_global_i64(i64 addrspace(1)* %ptr) {
|
||||
; PTX32: ld.global.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.global.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load i64 addrspace(1)* %ptr
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define i64 @ld_shared_i64(i64 addrspace(3)* %ptr) {
|
||||
; PTX32: ld.shared.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.shared.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load i64 addrspace(3)* %ptr
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define i64 @ld_local_i64(i64 addrspace(5)* %ptr) {
|
||||
; PTX32: ld.local.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.local.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load i64 addrspace(5)* %ptr
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
;; f32
|
||||
define float @ld_global_f32(float addrspace(1)* %ptr) {
|
||||
; PTX32: ld.global.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.global.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load float addrspace(1)* %ptr
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define float @ld_shared_f32(float addrspace(3)* %ptr) {
|
||||
; PTX32: ld.shared.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.shared.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load float addrspace(3)* %ptr
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define float @ld_local_f32(float addrspace(5)* %ptr) {
|
||||
; PTX32: ld.local.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.local.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load float addrspace(5)* %ptr
|
||||
ret float %a
|
||||
}
|
||||
|
||||
;; f64
|
||||
define double @ld_global_f64(double addrspace(1)* %ptr) {
|
||||
; PTX32: ld.global.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.global.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load double addrspace(1)* %ptr
|
||||
ret double %a
|
||||
}
|
||||
|
||||
define double @ld_shared_f64(double addrspace(3)* %ptr) {
|
||||
; PTX32: ld.shared.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.shared.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load double addrspace(3)* %ptr
|
||||
ret double %a
|
||||
}
|
||||
|
||||
define double @ld_local_f64(double addrspace(5)* %ptr) {
|
||||
; PTX32: ld.local.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.local.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load double addrspace(5)* %ptr
|
||||
ret double %a
|
||||
}
|
63
test/CodeGen/NVPTX/ld-generic.ll
Normal file
63
test/CodeGen/NVPTX/ld-generic.ll
Normal file
@ -0,0 +1,63 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
|
||||
|
||||
|
||||
;; i8
|
||||
define i8 @ld_global_i8(i8 addrspace(0)* %ptr) {
|
||||
; PTX32: ld.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load i8 addrspace(0)* %ptr
|
||||
ret i8 %a
|
||||
}
|
||||
|
||||
;; i16
|
||||
define i16 @ld_global_i16(i16 addrspace(0)* %ptr) {
|
||||
; PTX32: ld.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load i16 addrspace(0)* %ptr
|
||||
ret i16 %a
|
||||
}
|
||||
|
||||
;; i32
|
||||
define i32 @ld_global_i32(i32 addrspace(0)* %ptr) {
|
||||
; PTX32: ld.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load i32 addrspace(0)* %ptr
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
;; i64
|
||||
define i64 @ld_global_i64(i64 addrspace(0)* %ptr) {
|
||||
; PTX32: ld.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load i64 addrspace(0)* %ptr
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
;; f32
|
||||
define float @ld_global_f32(float addrspace(0)* %ptr) {
|
||||
; PTX32: ld.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load float addrspace(0)* %ptr
|
||||
ret float %a
|
||||
}
|
||||
|
||||
;; f64
|
||||
define double @ld_global_f64(double addrspace(0)* %ptr) {
|
||||
; PTX32: ld.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
|
||||
; PTX32: ret
|
||||
; PTX64: ld.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
|
||||
; PTX64: ret
|
||||
%a = load double addrspace(0)* %ptr
|
||||
ret double %a
|
||||
}
|
5
test/CodeGen/NVPTX/lit.local.cfg
Normal file
5
test/CodeGen/NVPTX/lit.local.cfg
Normal file
@ -0,0 +1,5 @@
|
||||
config.suffixes = ['.ll', '.c', '.cpp']
|
||||
|
||||
targets = set(config.root.targets_to_build.split())
|
||||
if not 'NVPTX' in targets:
|
||||
config.unsupported = True
|
179
test/CodeGen/NVPTX/st-addrspace.ll
Normal file
179
test/CodeGen/NVPTX/st-addrspace.ll
Normal file
@ -0,0 +1,179 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s --check-prefix=PTX32
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s --check-prefix=PTX64
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
|
||||
|
||||
|
||||
;; i8
|
||||
|
||||
define void @st_global_i8(i8 addrspace(1)* %ptr, i8 %a) {
|
||||
; PTX32: st.global.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.global.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
store i8 %a, i8 addrspace(1)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st_shared_i8(i8 addrspace(3)* %ptr, i8 %a) {
|
||||
; PTX32: st.shared.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.shared.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
store i8 %a, i8 addrspace(3)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st_local_i8(i8 addrspace(5)* %ptr, i8 %a) {
|
||||
; PTX32: st.local.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.local.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
store i8 %a, i8 addrspace(5)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
;; i16
|
||||
|
||||
define void @st_global_i16(i16 addrspace(1)* %ptr, i16 %a) {
|
||||
; PTX32: st.global.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.global.u16 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
store i16 %a, i16 addrspace(1)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st_shared_i16(i16 addrspace(3)* %ptr, i16 %a) {
|
||||
; PTX32: st.shared.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.shared.u16 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
store i16 %a, i16 addrspace(3)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st_local_i16(i16 addrspace(5)* %ptr, i16 %a) {
|
||||
; PTX32: st.local.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.local.u16 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
store i16 %a, i16 addrspace(5)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
;; i32
|
||||
|
||||
define void @st_global_i32(i32 addrspace(1)* %ptr, i32 %a) {
|
||||
; PTX32: st.global.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.global.u32 [%rl{{[0-9]+}}], %r{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
store i32 %a, i32 addrspace(1)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st_shared_i32(i32 addrspace(3)* %ptr, i32 %a) {
|
||||
; PTX32: st.shared.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.shared.u32 [%rl{{[0-9]+}}], %r{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
store i32 %a, i32 addrspace(3)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st_local_i32(i32 addrspace(5)* %ptr, i32 %a) {
|
||||
; PTX32: st.local.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.local.u32 [%rl{{[0-9]+}}], %r{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
store i32 %a, i32 addrspace(5)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
;; i64
|
||||
|
||||
define void @st_global_i64(i64 addrspace(1)* %ptr, i64 %a) {
|
||||
; PTX32: st.global.u64 [%r{{[0-9]+}}], %rl{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.global.u64 [%rl{{[0-9]+}}], %rl{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
store i64 %a, i64 addrspace(1)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st_shared_i64(i64 addrspace(3)* %ptr, i64 %a) {
|
||||
; PTX32: st.shared.u64 [%r{{[0-9]+}}], %rl{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.shared.u64 [%rl{{[0-9]+}}], %rl{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
store i64 %a, i64 addrspace(3)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st_local_i64(i64 addrspace(5)* %ptr, i64 %a) {
|
||||
; PTX32: st.local.u64 [%r{{[0-9]+}}], %rl{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.local.u64 [%rl{{[0-9]+}}], %rl{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
store i64 %a, i64 addrspace(5)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
;; f32
|
||||
|
||||
define void @st_global_f32(float addrspace(1)* %ptr, float %a) {
|
||||
; PTX32: st.global.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.global.f32 [%rl{{[0-9]+}}], %f{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
store float %a, float addrspace(1)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st_shared_f32(float addrspace(3)* %ptr, float %a) {
|
||||
; PTX32: st.shared.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.shared.f32 [%rl{{[0-9]+}}], %f{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
store float %a, float addrspace(3)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st_local_f32(float addrspace(5)* %ptr, float %a) {
|
||||
; PTX32: st.local.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.local.f32 [%rl{{[0-9]+}}], %f{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
store float %a, float addrspace(5)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
;; f64
|
||||
|
||||
define void @st_global_f64(double addrspace(1)* %ptr, double %a) {
|
||||
; PTX32: st.global.f64 [%r{{[0-9]+}}], %fl{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.global.f64 [%rl{{[0-9]+}}], %fl{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
store double %a, double addrspace(1)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st_shared_f64(double addrspace(3)* %ptr, double %a) {
|
||||
; PTX32: st.shared.f64 [%r{{[0-9]+}}], %fl{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.shared.f64 [%rl{{[0-9]+}}], %fl{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
store double %a, double addrspace(3)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @st_local_f64(double addrspace(5)* %ptr, double %a) {
|
||||
; PTX32: st.local.f64 [%r{{[0-9]+}}], %fl{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.local.f64 [%rl{{[0-9]+}}], %fl{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
store double %a, double addrspace(5)* %ptr
|
||||
ret void
|
||||
}
|
69
test/CodeGen/NVPTX/st-generic.ll
Normal file
69
test/CodeGen/NVPTX/st-generic.ll
Normal file
@ -0,0 +1,69 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
|
||||
|
||||
|
||||
;; i8
|
||||
|
||||
define void @st_global_i8(i8 addrspace(0)* %ptr, i8 %a) {
|
||||
; PTX32: st.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
store i8 %a, i8 addrspace(0)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
;; i16
|
||||
|
||||
define void @st_global_i16(i16 addrspace(0)* %ptr, i16 %a) {
|
||||
; PTX32: st.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.u16 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
store i16 %a, i16 addrspace(0)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
;; i32
|
||||
|
||||
define void @st_global_i32(i32 addrspace(0)* %ptr, i32 %a) {
|
||||
; PTX32: st.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.u32 [%rl{{[0-9]+}}], %r{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
store i32 %a, i32 addrspace(0)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
;; i64
|
||||
|
||||
define void @st_global_i64(i64 addrspace(0)* %ptr, i64 %a) {
|
||||
; PTX32: st.u64 [%r{{[0-9]+}}], %rl{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.u64 [%rl{{[0-9]+}}], %rl{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
store i64 %a, i64 addrspace(0)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
;; f32
|
||||
|
||||
define void @st_global_f32(float addrspace(0)* %ptr, float %a) {
|
||||
; PTX32: st.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.f32 [%rl{{[0-9]+}}], %f{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
store float %a, float addrspace(0)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
;; f64
|
||||
|
||||
define void @st_global_f64(double addrspace(0)* %ptr, double %a) {
|
||||
; PTX32: st.f64 [%r{{[0-9]+}}], %fl{{[0-9]+}}
|
||||
; PTX32: ret
|
||||
; PTX64: st.f64 [%rl{{[0-9]+}}], %fl{{[0-9]+}}
|
||||
; PTX64: ret
|
||||
store double %a, double addrspace(0)* %ptr
|
||||
ret void
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user