1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

[NVPTX] Add preliminary intrinsics and codegen support for textures/surfaces

This commit adds intrinsics and codegen support for the surface read/write and texture read instructions that take an explicit sampler parameter. Codegen operates on image handles at the PTX level, but falls back to direct replacement of handles with kernel arguments if image handles are not enabled. Note that image handles are explicitly disabled for all target architectures in this change (to be enabled later).

llvm-svn: 205907
This commit is contained in:
Justin Holewinski 2014-04-09 15:39:15 +00:00
parent 80f133a62c
commit b035f9f3e4
18 changed files with 4831 additions and 20 deletions

View File

@ -875,6 +875,14 @@ def int_nvvm_move_ptr : Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty],
[IntrNoMem, NoCapture<0>], "llvm.nvvm.move.ptr">;
// For getting the handle from a texture or surface variable
def int_nvvm_texsurf_handle
: Intrinsic<[llvm_i64_ty], [llvm_metadata_ty, llvm_anyi64ptr_ty],
[IntrNoMem], "llvm.nvvm.texsurf.handle">;
def int_nvvm_texsurf_handle_internal
: Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty],
[IntrNoMem], "llvm.nvvm.texsurf.handle.internal">;
/// Error / Warn
def int_nvvm_compiler_error :
Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.error">;
@ -882,6 +890,918 @@ def int_nvvm_compiler_warn :
Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.warn">;
// Texture Fetch
def int_nvvm_tex_1d_v4f32_i32
: Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], [],
"llvm.nvvm.tex.1d.v4f32.i32">;
def int_nvvm_tex_1d_v4f32_f32
: Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_float_ty], [],
"llvm.nvvm.tex.1d.v4f32.f32">;
def int_nvvm_tex_1d_level_v4f32_f32
: Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
"llvm.nvvm.tex.1d.level.v4f32.f32">;
def int_nvvm_tex_1d_grad_v4f32_f32
: Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
llvm_float_ty], [],
"llvm.nvvm.tex.1d.grad.v4f32.f32">;
def int_nvvm_tex_1d_v4i32_i32
: Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], [],
"llvm.nvvm.tex.1d.v4i32.i32">;
def int_nvvm_tex_1d_v4i32_f32
: Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_float_ty], [],
"llvm.nvvm.tex.1d.v4i32.f32">;
def int_nvvm_tex_1d_level_v4i32_f32
: Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
"llvm.nvvm.tex.1d.level.v4i32.f32.level">;
def int_nvvm_tex_1d_grad_v4i32_f32
: Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
llvm_float_ty], [],
"llvm.nvvm.tex.1d.grad.v4i32.f32">;
def int_nvvm_tex_1d_array_v4f32_i32
: Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.tex.1d.array.v4f32.i32">;
def int_nvvm_tex_1d_array_v4f32_f32
: Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty], [],
"llvm.nvvm.tex.1d.array.v4f32.f32">;
def int_nvvm_tex_1d_array_level_v4f32_f32
: Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
llvm_float_ty], [],
"llvm.nvvm.tex.1d.array.level.v4f32.f32">;
def int_nvvm_tex_1d_array_grad_v4f32_f32
: Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
llvm_float_ty, llvm_float_ty], [],
"llvm.nvvm.tex.1d.array.grad.v4f32.f32">;
def int_nvvm_tex_1d_array_v4i32_i32
: Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.tex.1d.array.v4i32.i32">;
def int_nvvm_tex_1d_array_v4i32_f32
: Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty], [],
"llvm.nvvm.tex.1d.array.v4i32.f32">;
def int_nvvm_tex_1d_array_level_v4i32_f32
: Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
llvm_float_ty], [],
"llvm.nvvm.tex.1d.array.level.v4i32.f32">;
def int_nvvm_tex_1d_array_grad_v4i32_f32
: Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
llvm_float_ty, llvm_float_ty], [],
"llvm.nvvm.tex.1d.array.grad.v4i32.f32">;
def int_nvvm_tex_2d_v4f32_i32
: Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.tex.2d.v4f32.i32">;
def int_nvvm_tex_2d_v4f32_f32
: Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
"llvm.nvvm.tex.2d.v4f32.f32">;
def int_nvvm_tex_2d_level_v4f32_f32
: Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
llvm_float_ty], [],
"llvm.nvvm.tex.2d.level.v4f32.f32">;
def int_nvvm_tex_2d_grad_v4f32_f32
: Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
"llvm.nvvm.tex.2d.grad.v4f32.f32">;
def int_nvvm_tex_2d_v4i32_i32
: Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.tex.2d.v4i32.i32">;
def int_nvvm_tex_2d_v4i32_f32
: Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
"llvm.nvvm.tex.2d.v4i32.f32">;
def int_nvvm_tex_2d_level_v4i32_f32
: Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
llvm_float_ty], [],
"llvm.nvvm.tex.2d.level.v4i32.f32">;
def int_nvvm_tex_2d_grad_v4i32_f32
: Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
"llvm.nvvm.tex.2d.grad.v4i32.f32">;
def int_nvvm_tex_2d_array_v4f32_i32
: Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty], [],
"llvm.nvvm.tex.2d.array.v4f32.i32">;
def int_nvvm_tex_2d_array_v4f32_f32
: Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
llvm_float_ty], [],
"llvm.nvvm.tex.2d.array.v4f32.f32">;
def int_nvvm_tex_2d_array_level_v4f32_f32
: Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
llvm_float_ty, llvm_float_ty], [],
"llvm.nvvm.tex.2d.array.level.v4f32.f32">;
def int_nvvm_tex_2d_array_grad_v4f32_f32
: Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
llvm_float_ty], [],
"llvm.nvvm.tex.2d.array.grad.v4f32.f32">;
def int_nvvm_tex_2d_array_v4i32_i32
: Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty], [],
"llvm.nvvm.tex.2d.array.v4i32.i32">;
def int_nvvm_tex_2d_array_v4i32_f32
: Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
llvm_float_ty], [],
"llvm.nvvm.tex.2d.array.v4i32.f32">;
def int_nvvm_tex_2d_array_level_v4i32_f32
: Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
llvm_float_ty, llvm_float_ty], [],
"llvm.nvvm.tex.2d.array.level.v4i32.f32">;
def int_nvvm_tex_2d_array_grad_v4i32_f32
: Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
llvm_float_ty], [],
"llvm.nvvm.tex.2d.array.grad.v4i32.f32">;
def int_nvvm_tex_3d_v4f32_i32
: Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[], "llvm.nvvm.tex.3d.v4f32.i32">;
def int_nvvm_tex_3d_v4f32_f32
: Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
llvm_float_ty], [],
"llvm.nvvm.tex.3d.v4f32.f32">;
def int_nvvm_tex_3d_level_v4f32_f32
: Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
llvm_float_ty, llvm_float_ty], [],
"llvm.nvvm.tex.3d.level.v4f32.f32">;
def int_nvvm_tex_3d_grad_v4f32_f32
: Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
"llvm.nvvm.tex.3d.grad.v4f32.f32">;
def int_nvvm_tex_3d_v4i32_i32
: Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[], "llvm.nvvm.tex.3d.v4i32.i32">;
def int_nvvm_tex_3d_v4i32_f32
: Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
llvm_float_ty], [],
"llvm.nvvm.tex.3d.v4i32.f32">;
def int_nvvm_tex_3d_level_v4i32_f32
: Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
llvm_float_ty, llvm_float_ty], [],
"llvm.nvvm.tex.3d.level.v4i32.f32">;
def int_nvvm_tex_3d_grad_v4i32_f32
: Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
"llvm.nvvm.tex.3d.grad.v4i32.f32">;
// Surface Load
def int_nvvm_suld_1d_i8_trap
: Intrinsic<[llvm_i16_ty],
[llvm_i64_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.1d.i8.trap">;
def int_nvvm_suld_1d_i16_trap
: Intrinsic<[llvm_i16_ty],
[llvm_i64_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.1d.i16.trap">;
def int_nvvm_suld_1d_i32_trap
: Intrinsic<[llvm_i32_ty],
[llvm_i64_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.1d.i32.trap">;
def int_nvvm_suld_1d_v2i8_trap
: Intrinsic<[llvm_i16_ty, llvm_i16_ty],
[llvm_i64_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.1d.v2i8.trap">;
def int_nvvm_suld_1d_v2i16_trap
: Intrinsic<[llvm_i16_ty, llvm_i16_ty],
[llvm_i64_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.1d.v2i16.trap">;
def int_nvvm_suld_1d_v2i32_trap
: Intrinsic<[llvm_i32_ty, llvm_i32_ty],
[llvm_i64_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.1d.v2i32.trap">;
def int_nvvm_suld_1d_v4i8_trap
: Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
[llvm_i64_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.1d.v4i8.trap">;
def int_nvvm_suld_1d_v4i16_trap
: Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
[llvm_i64_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.1d.v4i16.trap">;
def int_nvvm_suld_1d_v4i32_trap
: Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[llvm_i64_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.1d.v4i32.trap">;
def int_nvvm_suld_1d_array_i8_trap
: Intrinsic<[llvm_i16_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.1d.array.i8.trap">;
def int_nvvm_suld_1d_array_i16_trap
: Intrinsic<[llvm_i16_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.1d.array.i16.trap">;
def int_nvvm_suld_1d_array_i32_trap
: Intrinsic<[llvm_i32_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.1d.array.i32.trap">;
def int_nvvm_suld_1d_array_v2i8_trap
: Intrinsic<[llvm_i16_ty, llvm_i16_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.1d.array.v2i8.trap">;
def int_nvvm_suld_1d_array_v2i16_trap
: Intrinsic<[llvm_i16_ty, llvm_i16_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.1d.array.v2i16.trap">;
def int_nvvm_suld_1d_array_v2i32_trap
: Intrinsic<[llvm_i32_ty, llvm_i32_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.1d.array.v2i32.trap">;
def int_nvvm_suld_1d_array_v4i8_trap
: Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.1d.array.v4i8.trap">;
def int_nvvm_suld_1d_array_v4i16_trap
: Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.1d.array.v4i16.trap">;
def int_nvvm_suld_1d_array_v4i32_trap
: Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.1d.array.v4i32.trap">;
def int_nvvm_suld_2d_i8_trap
: Intrinsic<[llvm_i16_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.2d.i8.trap">;
def int_nvvm_suld_2d_i16_trap
: Intrinsic<[llvm_i16_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.2d.i16.trap">;
def int_nvvm_suld_2d_i32_trap
: Intrinsic<[llvm_i32_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.2d.i32.trap">;
def int_nvvm_suld_2d_v2i8_trap
: Intrinsic<[llvm_i16_ty, llvm_i16_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.2d.v2i8.trap">;
def int_nvvm_suld_2d_v2i16_trap
: Intrinsic<[llvm_i16_ty, llvm_i16_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.2d.v2i16.trap">;
def int_nvvm_suld_2d_v2i32_trap
: Intrinsic<[llvm_i32_ty, llvm_i32_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.2d.v2i32.trap">;
def int_nvvm_suld_2d_v4i8_trap
: Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.2d.v4i8.trap">;
def int_nvvm_suld_2d_v4i16_trap
: Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.2d.v4i16.trap">;
def int_nvvm_suld_2d_v4i32_trap
: Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.2d.v4i32.trap">;
def int_nvvm_suld_2d_array_i8_trap
: Intrinsic<[llvm_i16_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.2d.array.i8.trap">;
def int_nvvm_suld_2d_array_i16_trap
: Intrinsic<[llvm_i16_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.2d.array.i16.trap">;
def int_nvvm_suld_2d_array_i32_trap
: Intrinsic<[llvm_i32_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.2d.array.i32.trap">;
def int_nvvm_suld_2d_array_v2i8_trap
: Intrinsic<[llvm_i16_ty, llvm_i16_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.2d.array.v2i8.trap">;
def int_nvvm_suld_2d_array_v2i16_trap
: Intrinsic<[llvm_i16_ty, llvm_i16_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.2d.array.v2i16.trap">;
def int_nvvm_suld_2d_array_v2i32_trap
: Intrinsic<[llvm_i32_ty, llvm_i32_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.2d.array.v2i32.trap">;
def int_nvvm_suld_2d_array_v4i8_trap
: Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.2d.array.v4i8.trap">;
def int_nvvm_suld_2d_array_v4i16_trap
: Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.2d.array.v4i16.trap">;
def int_nvvm_suld_2d_array_v4i32_trap
: Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.2d.array.v4i32.trap">;
def int_nvvm_suld_3d_i8_trap
: Intrinsic<[llvm_i16_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.3d.i8.trap">;
def int_nvvm_suld_3d_i16_trap
: Intrinsic<[llvm_i16_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.3d.i16.trap">;
def int_nvvm_suld_3d_i32_trap
: Intrinsic<[llvm_i32_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.3d.i32.trap">;
def int_nvvm_suld_3d_v2i8_trap
: Intrinsic<[llvm_i16_ty, llvm_i16_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.3d.v2i8.trap">;
def int_nvvm_suld_3d_v2i16_trap
: Intrinsic<[llvm_i16_ty, llvm_i16_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.3d.v2i16.trap">;
def int_nvvm_suld_3d_v2i32_trap
: Intrinsic<[llvm_i32_ty, llvm_i32_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.3d.v2i32.trap">;
def int_nvvm_suld_3d_v4i8_trap
: Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.3d.v4i8.trap">;
def int_nvvm_suld_3d_v4i16_trap
: Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.3d.v4i16.trap">;
def int_nvvm_suld_3d_v4i32_trap
: Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.suld.3d.v4i32.trap">;
//===- Texture Query ------------------------------------------------------===//
def int_nvvm_txq_channel_order
: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.txq.channel.order">,
GCCBuiltin<"__nvvm_txq_channel_order">;
def int_nvvm_txq_channel_data_type
: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.txq.channel.data.type">,
GCCBuiltin<"__nvvm_txq_channel_data_type">;
def int_nvvm_txq_width
: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.txq.width">,
GCCBuiltin<"__nvvm_txq_width">;
def int_nvvm_txq_height
: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.txq.height">,
GCCBuiltin<"__nvvm_txq_height">;
def int_nvvm_txq_depth
: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.txq.depth">,
GCCBuiltin<"__nvvm_txq_depth">;
def int_nvvm_txq_array_size
: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.txq.array.size">,
GCCBuiltin<"__nvvm_txq_array_size">;
def int_nvvm_txq_num_samples
: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.txq.num.samples">,
GCCBuiltin<"__nvvm_txq_num_samples">;
def int_nvvm_txq_num_mipmap_levels
: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.txq.num.mipmap.levels">,
GCCBuiltin<"__nvvm_txq_num_mipmap_levels">;
//===- Surface Query ------------------------------------------------------===//
def int_nvvm_suq_channel_order
: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.suq.channel.order">,
GCCBuiltin<"__nvvm_suq_channel_order">;
def int_nvvm_suq_channel_data_type
: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.suq.channel.data.type">,
GCCBuiltin<"__nvvm_suq_channel_data_type">;
def int_nvvm_suq_width
: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.suq.width">,
GCCBuiltin<"__nvvm_suq_width">;
def int_nvvm_suq_height
: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.suq.height">,
GCCBuiltin<"__nvvm_suq_height">;
def int_nvvm_suq_depth
: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.suq.depth">,
GCCBuiltin<"__nvvm_suq_depth">;
def int_nvvm_suq_array_size
: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.suq.array.size">,
GCCBuiltin<"__nvvm_suq_array_size">;
//===- Handle Query -------------------------------------------------------===//
def int_nvvm_istypep_sampler
: Intrinsic<[llvm_i1_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.istypep.sampler">,
GCCBuiltin<"__nvvm_istypep_sampler">;
def int_nvvm_istypep_surface
: Intrinsic<[llvm_i1_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.istypep.surface">,
GCCBuiltin<"__nvvm_istypep_surface">;
def int_nvvm_istypep_texture
: Intrinsic<[llvm_i1_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.istypep.texture">,
GCCBuiltin<"__nvvm_istypep_texture">;
//===- Surface Stores -----------------------------------------------------===//
// Unformatted
def int_nvvm_sust_b_1d_i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.i8.trap">,
GCCBuiltin<"__nvvm_sust_b_1d_i8_trap">;
def int_nvvm_sust_b_1d_i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.i16.trap">,
GCCBuiltin<"__nvvm_sust_b_1d_i16_trap">;
def int_nvvm_sust_b_1d_i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.1d.i32.trap">,
GCCBuiltin<"__nvvm_sust_b_1d_i32_trap">;
def int_nvvm_sust_b_1d_v2i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.v2i8.trap">,
GCCBuiltin<"__nvvm_sust_b_1d_v2i8_trap">;
def int_nvvm_sust_b_1d_v2i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.v2i16.trap">,
GCCBuiltin<"__nvvm_sust_b_1d_v2i16_trap">;
def int_nvvm_sust_b_1d_v2i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.1d.v2i32.trap">,
GCCBuiltin<"__nvvm_sust_b_1d_v2i32_trap">;
def int_nvvm_sust_b_1d_v4i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.v4i8.trap">,
GCCBuiltin<"__nvvm_sust_b_1d_v4i8_trap">;
def int_nvvm_sust_b_1d_v4i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.v4i16.trap">,
GCCBuiltin<"__nvvm_sust_b_1d_v4i16_trap">;
def int_nvvm_sust_b_1d_v4i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.1d.v4i32.trap">,
GCCBuiltin<"__nvvm_sust_b_1d_v4i32_trap">;
def int_nvvm_sust_b_1d_array_i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.array.i8.trap">,
GCCBuiltin<"__nvvm_sust_b_1d_array_i8_trap">;
def int_nvvm_sust_b_1d_array_i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.array.i16.trap">,
GCCBuiltin<"__nvvm_sust_b_1d_array_i16_trap">;
def int_nvvm_sust_b_1d_array_i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.1d.array.i32.trap">,
GCCBuiltin<"__nvvm_sust_b_1d_array_i32_trap">;
def int_nvvm_sust_b_1d_array_v2i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.array.v2i8.trap">,
GCCBuiltin<"__nvvm_sust_b_1d_array_v2i8_trap">;
def int_nvvm_sust_b_1d_array_v2i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.array.v2i16.trap">,
GCCBuiltin<"__nvvm_sust_b_1d_array_v2i16_trap">;
def int_nvvm_sust_b_1d_array_v2i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.1d.array.v2i32.trap">,
GCCBuiltin<"__nvvm_sust_b_1d_array_v2i32_trap">;
def int_nvvm_sust_b_1d_array_v4i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.array.v4i8.trap">,
GCCBuiltin<"__nvvm_sust_b_1d_array_v4i8_trap">;
def int_nvvm_sust_b_1d_array_v4i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.array.v4i16.trap">,
GCCBuiltin<"__nvvm_sust_b_1d_array_v4i16_trap">;
def int_nvvm_sust_b_1d_array_v4i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.1d.array.v4i32.trap">,
GCCBuiltin<"__nvvm_sust_b_1d_array_v4i32_trap">;
def int_nvvm_sust_b_2d_i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.i8.trap">,
GCCBuiltin<"__nvvm_sust_b_2d_i8_trap">;
def int_nvvm_sust_b_2d_i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.i16.trap">,
GCCBuiltin<"__nvvm_sust_b_2d_i16_trap">;
def int_nvvm_sust_b_2d_i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.2d.i32.trap">,
GCCBuiltin<"__nvvm_sust_b_2d_i32_trap">;
def int_nvvm_sust_b_2d_v2i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.v2i8.trap">,
GCCBuiltin<"__nvvm_sust_b_2d_v2i8_trap">;
def int_nvvm_sust_b_2d_v2i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.v2i16.trap">,
GCCBuiltin<"__nvvm_sust_b_2d_v2i16_trap">;
def int_nvvm_sust_b_2d_v2i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.2d.v2i32.trap">,
GCCBuiltin<"__nvvm_sust_b_2d_v2i32_trap">;
def int_nvvm_sust_b_2d_v4i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.v4i8.trap">,
GCCBuiltin<"__nvvm_sust_b_2d_v4i8_trap">;
def int_nvvm_sust_b_2d_v4i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.v4i16.trap">,
GCCBuiltin<"__nvvm_sust_b_2d_v4i16_trap">;
def int_nvvm_sust_b_2d_v4i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.2d.v4i32.trap">,
GCCBuiltin<"__nvvm_sust_b_2d_v4i32_trap">;
def int_nvvm_sust_b_2d_array_i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.array.i8.trap">,
GCCBuiltin<"__nvvm_sust_b_2d_array_i8_trap">;
def int_nvvm_sust_b_2d_array_i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.array.i16.trap">,
GCCBuiltin<"__nvvm_sust_b_2d_array_i16_trap">;
def int_nvvm_sust_b_2d_array_i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.2d.array.i32.trap">,
GCCBuiltin<"__nvvm_sust_b_2d_array_i32_trap">;
def int_nvvm_sust_b_2d_array_v2i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.array.v2i8.trap">,
GCCBuiltin<"__nvvm_sust_b_2d_array_v2i8_trap">;
def int_nvvm_sust_b_2d_array_v2i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.array.v2i16.trap">,
GCCBuiltin<"__nvvm_sust_b_2d_array_v2i16_trap">;
def int_nvvm_sust_b_2d_array_v2i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.2d.array.v2i32.trap">,
GCCBuiltin<"__nvvm_sust_b_2d_array_v2i32_trap">;
def int_nvvm_sust_b_2d_array_v4i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.array.v4i8.trap">,
GCCBuiltin<"__nvvm_sust_b_2d_array_v4i8_trap">;
def int_nvvm_sust_b_2d_array_v4i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.array.v4i16.trap">,
GCCBuiltin<"__nvvm_sust_b_2d_array_v4i16_trap">;
def int_nvvm_sust_b_2d_array_v4i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.2d.array.v4i32.trap">,
GCCBuiltin<"__nvvm_sust_b_2d_array_v4i32_trap">;
def int_nvvm_sust_b_3d_i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.3d.i8.trap">,
GCCBuiltin<"__nvvm_sust_b_3d_i8_trap">;
def int_nvvm_sust_b_3d_i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.3d.i16.trap">,
GCCBuiltin<"__nvvm_sust_b_3d_i16_trap">;
def int_nvvm_sust_b_3d_i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.3d.i32.trap">,
GCCBuiltin<"__nvvm_sust_b_3d_i32_trap">;
def int_nvvm_sust_b_3d_v2i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.3d.v2i8.trap">,
GCCBuiltin<"__nvvm_sust_b_3d_v2i8_trap">;
def int_nvvm_sust_b_3d_v2i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.3d.v2i16.trap">,
GCCBuiltin<"__nvvm_sust_b_3d_v2i16_trap">;
def int_nvvm_sust_b_3d_v2i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.3d.v2i32.trap">,
GCCBuiltin<"__nvvm_sust_b_3d_v2i32_trap">;
def int_nvvm_sust_b_3d_v4i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.3d.v4i8.trap">,
GCCBuiltin<"__nvvm_sust_b_3d_v4i8_trap">;
def int_nvvm_sust_b_3d_v4i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.3d.v4i16.trap">,
GCCBuiltin<"__nvvm_sust_b_3d_v4i16_trap">;
def int_nvvm_sust_b_3d_v4i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.3d.v4i32.trap">,
GCCBuiltin<"__nvvm_sust_b_3d_v4i32_trap">;
// Formatted
def int_nvvm_sust_p_1d_i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.1d.i8.trap">,
GCCBuiltin<"__nvvm_sust_p_1d_i8_trap">;
def int_nvvm_sust_p_1d_i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.1d.i16.trap">,
GCCBuiltin<"__nvvm_sust_p_1d_i16_trap">;
def int_nvvm_sust_p_1d_i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.p.1d.i32.trap">,
GCCBuiltin<"__nvvm_sust_p_1d_i32_trap">;
def int_nvvm_sust_p_1d_v2i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.1d.v2i8.trap">,
GCCBuiltin<"__nvvm_sust_p_1d_v2i8_trap">;
def int_nvvm_sust_p_1d_v2i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.1d.v2i16.trap">,
GCCBuiltin<"__nvvm_sust_p_1d_v2i16_trap">;
def int_nvvm_sust_p_1d_v2i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.p.1d.v2i32.trap">,
GCCBuiltin<"__nvvm_sust_p_1d_v2i32_trap">;
def int_nvvm_sust_p_1d_v4i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.1d.v4i8.trap">,
GCCBuiltin<"__nvvm_sust_p_1d_v4i8_trap">;
def int_nvvm_sust_p_1d_v4i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.1d.v4i16.trap">,
GCCBuiltin<"__nvvm_sust_p_1d_v4i16_trap">;
def int_nvvm_sust_p_1d_v4i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.p.1d.v4i32.trap">,
GCCBuiltin<"__nvvm_sust_p_1d_v4i32_trap">;
def int_nvvm_sust_p_1d_array_i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.1d.array.i8.trap">,
GCCBuiltin<"__nvvm_sust_p_1d_array_i8_trap">;
def int_nvvm_sust_p_1d_array_i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.1d.array.i16.trap">,
GCCBuiltin<"__nvvm_sust_p_1d_array_i16_trap">;
def int_nvvm_sust_p_1d_array_i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.p.1d.array.i32.trap">,
GCCBuiltin<"__nvvm_sust_p_1d_array_i32_trap">;
def int_nvvm_sust_p_1d_array_v2i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.1d.array.v2i8.trap">,
GCCBuiltin<"__nvvm_sust_p_1d_array_v2i8_trap">;
def int_nvvm_sust_p_1d_array_v2i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.1d.array.v2i16.trap">,
GCCBuiltin<"__nvvm_sust_p_1d_array_v2i16_trap">;
def int_nvvm_sust_p_1d_array_v2i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.p.1d.array.v2i32.trap">,
GCCBuiltin<"__nvvm_sust_p_1d_array_v2i32_trap">;
def int_nvvm_sust_p_1d_array_v4i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.1d.array.v4i8.trap">,
GCCBuiltin<"__nvvm_sust_p_1d_array_v4i8_trap">;
def int_nvvm_sust_p_1d_array_v4i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.1d.array.v4i16.trap">,
GCCBuiltin<"__nvvm_sust_p_1d_array_v4i16_trap">;
def int_nvvm_sust_p_1d_array_v4i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.p.1d.array.v4i32.trap">,
GCCBuiltin<"__nvvm_sust_p_1d_array_v4i32_trap">;
def int_nvvm_sust_p_2d_i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.2d.i8.trap">,
GCCBuiltin<"__nvvm_sust_p_2d_i8_trap">;
def int_nvvm_sust_p_2d_i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.2d.i16.trap">,
GCCBuiltin<"__nvvm_sust_p_2d_i16_trap">;
def int_nvvm_sust_p_2d_i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.p.2d.i32.trap">,
GCCBuiltin<"__nvvm_sust_p_2d_i32_trap">;
def int_nvvm_sust_p_2d_v2i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.2d.v2i8.trap">,
GCCBuiltin<"__nvvm_sust_p_2d_v2i8_trap">;
def int_nvvm_sust_p_2d_v2i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.2d.v2i16.trap">,
GCCBuiltin<"__nvvm_sust_p_2d_v2i16_trap">;
def int_nvvm_sust_p_2d_v2i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.p.2d.v2i32.trap">,
GCCBuiltin<"__nvvm_sust_p_2d_v2i32_trap">;
def int_nvvm_sust_p_2d_v4i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.2d.v4i8.trap">,
GCCBuiltin<"__nvvm_sust_p_2d_v4i8_trap">;
def int_nvvm_sust_p_2d_v4i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.2d.v4i16.trap">,
GCCBuiltin<"__nvvm_sust_p_2d_v4i16_trap">;
def int_nvvm_sust_p_2d_v4i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.p.2d.v4i32.trap">,
GCCBuiltin<"__nvvm_sust_p_2d_v4i32_trap">;
def int_nvvm_sust_p_2d_array_i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.2d.array.i8.trap">,
GCCBuiltin<"__nvvm_sust_p_2d_array_i8_trap">;
def int_nvvm_sust_p_2d_array_i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.2d.array.i16.trap">,
GCCBuiltin<"__nvvm_sust_p_2d_array_i16_trap">;
def int_nvvm_sust_p_2d_array_i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.p.2d.array.i32.trap">,
GCCBuiltin<"__nvvm_sust_p_2d_array_i32_trap">;
def int_nvvm_sust_p_2d_array_v2i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.2d.array.v2i8.trap">,
GCCBuiltin<"__nvvm_sust_p_2d_array_v2i8_trap">;
def int_nvvm_sust_p_2d_array_v2i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.2d.array.v2i16.trap">,
GCCBuiltin<"__nvvm_sust_p_2d_array_v2i16_trap">;
def int_nvvm_sust_p_2d_array_v2i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.p.2d.array.v2i32.trap">,
GCCBuiltin<"__nvvm_sust_p_2d_array_v2i32_trap">;
def int_nvvm_sust_p_2d_array_v4i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.2d.array.v4i8.trap">,
GCCBuiltin<"__nvvm_sust_p_2d_array_v4i8_trap">;
def int_nvvm_sust_p_2d_array_v4i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.2d.array.v4i16.trap">,
GCCBuiltin<"__nvvm_sust_p_2d_array_v4i16_trap">;
def int_nvvm_sust_p_2d_array_v4i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.p.2d.array.v4i32.trap">,
GCCBuiltin<"__nvvm_sust_p_2d_array_v4i32_trap">;
def int_nvvm_sust_p_3d_i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.3d.i8.trap">,
GCCBuiltin<"__nvvm_sust_p_3d_i8_trap">;
def int_nvvm_sust_p_3d_i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.3d.i16.trap">,
GCCBuiltin<"__nvvm_sust_p_3d_i16_trap">;
def int_nvvm_sust_p_3d_i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.p.3d.i32.trap">,
GCCBuiltin<"__nvvm_sust_p_3d_i32_trap">;
def int_nvvm_sust_p_3d_v2i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.3d.v2i8.trap">,
GCCBuiltin<"__nvvm_sust_p_3d_v2i8_trap">;
def int_nvvm_sust_p_3d_v2i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.3d.v2i16.trap">,
GCCBuiltin<"__nvvm_sust_p_3d_v2i16_trap">;
def int_nvvm_sust_p_3d_v2i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.p.3d.v2i32.trap">,
GCCBuiltin<"__nvvm_sust_p_3d_v2i32_trap">;
def int_nvvm_sust_p_3d_v4i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.3d.v4i8.trap">,
GCCBuiltin<"__nvvm_sust_p_3d_v4i8_trap">;
def int_nvvm_sust_p_3d_v4i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.3d.v4i16.trap">,
GCCBuiltin<"__nvvm_sust_p_3d_v4i16_trap">;
def int_nvvm_sust_p_3d_v4i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.p.3d.v4i32.trap">,
GCCBuiltin<"__nvvm_sust_p_3d_v4i32_trap">;
// Old PTX back-end intrinsics retained here for backwards-compatibility
multiclass PTXReadSpecialRegisterIntrinsic_v4i32<string prefix> {

View File

@ -27,6 +27,8 @@ set(NVPTXCodeGen_sources
NVPTXAssignValidGlobalNames.cpp
NVPTXPrologEpilogPass.cpp
NVPTXMCExpr.cpp
NVPTXReplaceImageHandles.cpp
NVPTXImageOptimizer.cpp
)
add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})

View File

@ -67,6 +67,8 @@ FunctionPass *createNVPTXFavorNonGenericAddrSpacesPass();
ModulePass *createNVVMReflectPass();
ModulePass *createNVVMReflectPass(const StringMap<int>& Mapping);
MachineFunctionPass *createNVPTXPrologEpilogPass();
MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
FunctionPass *createNVPTXImageOptimizerPass();
bool isImageOrSamplerVal(const Value *, const Module *);

View File

@ -17,6 +17,7 @@
#include "MCTargetDesc/NVPTXMCAsmInfo.h"
#include "NVPTX.h"
#include "NVPTXInstrInfo.h"
#include "NVPTXMachineFunctionInfo.h"
#include "NVPTXMCExpr.h"
#include "NVPTXRegisterInfo.h"
#include "NVPTXTargetMachine.h"
@ -325,13 +326,279 @@ void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitToStreamer(OutStreamer, Inst);
}
// Handle symbol backtracking for targets that do not support image handles
bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
unsigned OpNo, MCOperand &MCOp) {
const MachineOperand &MO = MI->getOperand(OpNo);
switch (MI->getOpcode()) {
default: return false;
case NVPTX::TEX_1D_F32_I32:
case NVPTX::TEX_1D_F32_F32:
case NVPTX::TEX_1D_F32_F32_LEVEL:
case NVPTX::TEX_1D_F32_F32_GRAD:
case NVPTX::TEX_1D_I32_I32:
case NVPTX::TEX_1D_I32_F32:
case NVPTX::TEX_1D_I32_F32_LEVEL:
case NVPTX::TEX_1D_I32_F32_GRAD:
case NVPTX::TEX_1D_ARRAY_F32_I32:
case NVPTX::TEX_1D_ARRAY_F32_F32:
case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL:
case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD:
case NVPTX::TEX_1D_ARRAY_I32_I32:
case NVPTX::TEX_1D_ARRAY_I32_F32:
case NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL:
case NVPTX::TEX_1D_ARRAY_I32_F32_GRAD:
case NVPTX::TEX_2D_F32_I32:
case NVPTX::TEX_2D_F32_F32:
case NVPTX::TEX_2D_F32_F32_LEVEL:
case NVPTX::TEX_2D_F32_F32_GRAD:
case NVPTX::TEX_2D_I32_I32:
case NVPTX::TEX_2D_I32_F32:
case NVPTX::TEX_2D_I32_F32_LEVEL:
case NVPTX::TEX_2D_I32_F32_GRAD:
case NVPTX::TEX_2D_ARRAY_F32_I32:
case NVPTX::TEX_2D_ARRAY_F32_F32:
case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL:
case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD:
case NVPTX::TEX_2D_ARRAY_I32_I32:
case NVPTX::TEX_2D_ARRAY_I32_F32:
case NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL:
case NVPTX::TEX_2D_ARRAY_I32_F32_GRAD:
case NVPTX::TEX_3D_F32_I32:
case NVPTX::TEX_3D_F32_F32:
case NVPTX::TEX_3D_F32_F32_LEVEL:
case NVPTX::TEX_3D_F32_F32_GRAD:
case NVPTX::TEX_3D_I32_I32:
case NVPTX::TEX_3D_I32_F32:
case NVPTX::TEX_3D_I32_F32_LEVEL:
case NVPTX::TEX_3D_I32_F32_GRAD:
{
// This is a texture fetch, so operand 4 is a texref and operand 5 is
// a samplerref
if (OpNo == 4) {
lowerImageHandleSymbol(MO.getImm(), MCOp);
return true;
}
if (OpNo == 5) {
lowerImageHandleSymbol(MO.getImm(), MCOp);
return true;
}
return false;
}
case NVPTX::SULD_1D_I8_TRAP:
case NVPTX::SULD_1D_I16_TRAP:
case NVPTX::SULD_1D_I32_TRAP:
case NVPTX::SULD_1D_ARRAY_I8_TRAP:
case NVPTX::SULD_1D_ARRAY_I16_TRAP:
case NVPTX::SULD_1D_ARRAY_I32_TRAP:
case NVPTX::SULD_2D_I8_TRAP:
case NVPTX::SULD_2D_I16_TRAP:
case NVPTX::SULD_2D_I32_TRAP:
case NVPTX::SULD_2D_ARRAY_I8_TRAP:
case NVPTX::SULD_2D_ARRAY_I16_TRAP:
case NVPTX::SULD_2D_ARRAY_I32_TRAP:
case NVPTX::SULD_3D_I8_TRAP:
case NVPTX::SULD_3D_I16_TRAP:
case NVPTX::SULD_3D_I32_TRAP: {
// This is a V1 surface load, so operand 1 is a surfref
if (OpNo == 1) {
lowerImageHandleSymbol(MO.getImm(), MCOp);
return true;
}
return false;
}
case NVPTX::SULD_1D_V2I8_TRAP:
case NVPTX::SULD_1D_V2I16_TRAP:
case NVPTX::SULD_1D_V2I32_TRAP:
case NVPTX::SULD_1D_ARRAY_V2I8_TRAP:
case NVPTX::SULD_1D_ARRAY_V2I16_TRAP:
case NVPTX::SULD_1D_ARRAY_V2I32_TRAP:
case NVPTX::SULD_2D_V2I8_TRAP:
case NVPTX::SULD_2D_V2I16_TRAP:
case NVPTX::SULD_2D_V2I32_TRAP:
case NVPTX::SULD_2D_ARRAY_V2I8_TRAP:
case NVPTX::SULD_2D_ARRAY_V2I16_TRAP:
case NVPTX::SULD_2D_ARRAY_V2I32_TRAP:
case NVPTX::SULD_3D_V2I8_TRAP:
case NVPTX::SULD_3D_V2I16_TRAP:
case NVPTX::SULD_3D_V2I32_TRAP: {
// This is a V2 surface load, so operand 2 is a surfref
if (OpNo == 2) {
lowerImageHandleSymbol(MO.getImm(), MCOp);
return true;
}
return false;
}
case NVPTX::SULD_1D_V4I8_TRAP:
case NVPTX::SULD_1D_V4I16_TRAP:
case NVPTX::SULD_1D_V4I32_TRAP:
case NVPTX::SULD_1D_ARRAY_V4I8_TRAP:
case NVPTX::SULD_1D_ARRAY_V4I16_TRAP:
case NVPTX::SULD_1D_ARRAY_V4I32_TRAP:
case NVPTX::SULD_2D_V4I8_TRAP:
case NVPTX::SULD_2D_V4I16_TRAP:
case NVPTX::SULD_2D_V4I32_TRAP:
case NVPTX::SULD_2D_ARRAY_V4I8_TRAP:
case NVPTX::SULD_2D_ARRAY_V4I16_TRAP:
case NVPTX::SULD_2D_ARRAY_V4I32_TRAP:
case NVPTX::SULD_3D_V4I8_TRAP:
case NVPTX::SULD_3D_V4I16_TRAP:
case NVPTX::SULD_3D_V4I32_TRAP: {
// This is a V4 surface load, so operand 4 is a surfref
if (OpNo == 4) {
lowerImageHandleSymbol(MO.getImm(), MCOp);
return true;
}
return false;
}
case NVPTX::SUST_B_1D_B8_TRAP:
case NVPTX::SUST_B_1D_B16_TRAP:
case NVPTX::SUST_B_1D_B32_TRAP:
case NVPTX::SUST_B_1D_V2B8_TRAP:
case NVPTX::SUST_B_1D_V2B16_TRAP:
case NVPTX::SUST_B_1D_V2B32_TRAP:
case NVPTX::SUST_B_1D_V4B8_TRAP:
case NVPTX::SUST_B_1D_V4B16_TRAP:
case NVPTX::SUST_B_1D_V4B32_TRAP:
case NVPTX::SUST_B_1D_ARRAY_B8_TRAP:
case NVPTX::SUST_B_1D_ARRAY_B16_TRAP:
case NVPTX::SUST_B_1D_ARRAY_B32_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP:
case NVPTX::SUST_B_2D_B8_TRAP:
case NVPTX::SUST_B_2D_B16_TRAP:
case NVPTX::SUST_B_2D_B32_TRAP:
case NVPTX::SUST_B_2D_V2B8_TRAP:
case NVPTX::SUST_B_2D_V2B16_TRAP:
case NVPTX::SUST_B_2D_V2B32_TRAP:
case NVPTX::SUST_B_2D_V4B8_TRAP:
case NVPTX::SUST_B_2D_V4B16_TRAP:
case NVPTX::SUST_B_2D_V4B32_TRAP:
case NVPTX::SUST_B_2D_ARRAY_B8_TRAP:
case NVPTX::SUST_B_2D_ARRAY_B16_TRAP:
case NVPTX::SUST_B_2D_ARRAY_B32_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP:
case NVPTX::SUST_B_3D_B8_TRAP:
case NVPTX::SUST_B_3D_B16_TRAP:
case NVPTX::SUST_B_3D_B32_TRAP:
case NVPTX::SUST_B_3D_V2B8_TRAP:
case NVPTX::SUST_B_3D_V2B16_TRAP:
case NVPTX::SUST_B_3D_V2B32_TRAP:
case NVPTX::SUST_B_3D_V4B8_TRAP:
case NVPTX::SUST_B_3D_V4B16_TRAP:
case NVPTX::SUST_B_3D_V4B32_TRAP:
case NVPTX::SUST_P_1D_B8_TRAP:
case NVPTX::SUST_P_1D_B16_TRAP:
case NVPTX::SUST_P_1D_B32_TRAP:
case NVPTX::SUST_P_1D_V2B8_TRAP:
case NVPTX::SUST_P_1D_V2B16_TRAP:
case NVPTX::SUST_P_1D_V2B32_TRAP:
case NVPTX::SUST_P_1D_V4B8_TRAP:
case NVPTX::SUST_P_1D_V4B16_TRAP:
case NVPTX::SUST_P_1D_V4B32_TRAP:
case NVPTX::SUST_P_1D_ARRAY_B8_TRAP:
case NVPTX::SUST_P_1D_ARRAY_B16_TRAP:
case NVPTX::SUST_P_1D_ARRAY_B32_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP:
case NVPTX::SUST_P_2D_B8_TRAP:
case NVPTX::SUST_P_2D_B16_TRAP:
case NVPTX::SUST_P_2D_B32_TRAP:
case NVPTX::SUST_P_2D_V2B8_TRAP:
case NVPTX::SUST_P_2D_V2B16_TRAP:
case NVPTX::SUST_P_2D_V2B32_TRAP:
case NVPTX::SUST_P_2D_V4B8_TRAP:
case NVPTX::SUST_P_2D_V4B16_TRAP:
case NVPTX::SUST_P_2D_V4B32_TRAP:
case NVPTX::SUST_P_2D_ARRAY_B8_TRAP:
case NVPTX::SUST_P_2D_ARRAY_B16_TRAP:
case NVPTX::SUST_P_2D_ARRAY_B32_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP:
case NVPTX::SUST_P_3D_B8_TRAP:
case NVPTX::SUST_P_3D_B16_TRAP:
case NVPTX::SUST_P_3D_B32_TRAP:
case NVPTX::SUST_P_3D_V2B8_TRAP:
case NVPTX::SUST_P_3D_V2B16_TRAP:
case NVPTX::SUST_P_3D_V2B32_TRAP:
case NVPTX::SUST_P_3D_V4B8_TRAP:
case NVPTX::SUST_P_3D_V4B16_TRAP:
case NVPTX::SUST_P_3D_V4B32_TRAP: {
// This is a surface store, so operand 0 is a surfref
if (OpNo == 0) {
lowerImageHandleSymbol(MO.getImm(), MCOp);
return true;
}
return false;
}
case NVPTX::TXQ_CHANNEL_ORDER:
case NVPTX::TXQ_CHANNEL_DATA_TYPE:
case NVPTX::TXQ_WIDTH:
case NVPTX::TXQ_HEIGHT:
case NVPTX::TXQ_DEPTH:
case NVPTX::TXQ_ARRAY_SIZE:
case NVPTX::TXQ_NUM_SAMPLES:
case NVPTX::TXQ_NUM_MIPMAP_LEVELS:
case NVPTX::SUQ_CHANNEL_ORDER:
case NVPTX::SUQ_CHANNEL_DATA_TYPE:
case NVPTX::SUQ_WIDTH:
case NVPTX::SUQ_HEIGHT:
case NVPTX::SUQ_DEPTH:
case NVPTX::SUQ_ARRAY_SIZE: {
// This is a query, so operand 1 is a surfref/texref
if (OpNo == 1) {
lowerImageHandleSymbol(MO.getImm(), MCOp);
return true;
}
return false;
}
}
}
void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
// Ewwww
TargetMachine &TM = const_cast<TargetMachine&>(MF->getTarget());
NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>();
const char *Sym = MFI->getImageHandleSymbol(Index);
std::string *SymNamePtr =
nvTM.getManagedStrPool()->getManagedString(Sym);
MCOp = GetSymbolRef(OutContext.GetOrCreateSymbol(
StringRef(SymNamePtr->c_str())));
}
void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
OutMI.setOpcode(MI->getOpcode());
const NVPTXSubtarget &ST = TM.getSubtarget<NVPTXSubtarget>();
// Special: Do not mangle symbol operand of CALL_PROTOTYPE
if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
const MachineOperand &MO = MI->getOperand(0);
OutMI.addOperand(GetSymbolRef(MO,
OutMI.addOperand(GetSymbolRef(
OutContext.GetOrCreateSymbol(Twine(MO.getSymbolName()))));
return;
}
@ -340,6 +607,13 @@ void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
const MachineOperand &MO = MI->getOperand(i);
MCOperand MCOp;
if (!ST.hasImageHandles()) {
if (lowerImageHandleOperand(MI, i, MCOp)) {
OutMI.addOperand(MCOp);
continue;
}
}
if (lowerOperand(MO, MCOp))
OutMI.addOperand(MCOp);
}
@ -360,10 +634,10 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
MO.getMBB()->getSymbol(), OutContext));
break;
case MachineOperand::MO_ExternalSymbol:
MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName()));
MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
break;
case MachineOperand::MO_GlobalAddress:
MCOp = GetSymbolRef(MO, getSymbol(MO.getGlobal()));
MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
break;
case MachineOperand::MO_FPImmediate: {
const ConstantFP *Cnt = MO.getFPImm();
@ -422,8 +696,7 @@ unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
}
}
MCOperand NVPTXAsmPrinter::GetSymbolRef(const MachineOperand &MO,
const MCSymbol *Symbol) {
MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
const MCExpr *Expr;
Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
OutContext);
@ -1512,19 +1785,33 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
first = false;
// Handle image/sampler parameters
if (llvm::isSampler(*I) || llvm::isImage(*I)) {
if (llvm::isImage(*I)) {
std::string sname = I->getName();
if (llvm::isImageWriteOnly(*I))
O << "\t.param .surfref " << *getSymbol(F) << "_param_"
<< paramIndex;
else // Default image is read_only
O << "\t.param .texref " << *getSymbol(F) << "_param_"
<< paramIndex;
} else // Should be llvm::isSampler(*I)
O << "\t.param .samplerref " << *getSymbol(F) << "_param_"
<< paramIndex;
continue;
if (isKernelFunction(*F)) {
if (isSampler(*I) || isImage(*I)) {
if (isImage(*I)) {
std::string sname = I->getName();
if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
if (nvptxSubtarget.hasImageHandles())
O << "\t.param .u64 .ptr .surfref ";
else
O << "\t.param .surfref ";
O << *CurrentFnSym << "_param_" << paramIndex;
}
else { // Default image is read_only
if (nvptxSubtarget.hasImageHandles())
O << "\t.param .u64 .ptr .texref ";
else
O << "\t.param .texref ";
O << *CurrentFnSym << "_param_" << paramIndex;
}
} else {
if (nvptxSubtarget.hasImageHandles())
O << "\t.param .u64 .ptr .samplerref ";
else
O << "\t.param .samplerref ";
O << *CurrentFnSym << "_param_" << paramIndex;
}
continue;
}
}
if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) {

View File

@ -205,7 +205,7 @@ private:
void EmitInstruction(const MachineInstr *);
void lowerToMCInst(const MachineInstr *MI, MCInst &OutMI);
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
MCOperand GetSymbolRef(const MCSymbol *Symbol);
unsigned encodeVirtualRegister(unsigned Reg);
void EmitAlignment(unsigned NumBits, const GlobalValue *GV = 0) const {}
@ -287,6 +287,10 @@ private:
static const char *getRegisterName(unsigned RegNo);
void emitDemotedVars(const Function *, raw_ostream &);
bool lowerImageHandleOperand(const MachineInstr *MI, unsigned OpNo,
MCOperand &MCOp);
void lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp);
LineReader *reader;
LineReader *getReader(std::string);

View File

@ -162,6 +162,98 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
case NVPTXISD::StoreParamU32:
ResNode = SelectStoreParam(N);
break;
case ISD::INTRINSIC_WO_CHAIN:
ResNode = SelectIntrinsicNoChain(N);
break;
case NVPTXISD::Tex1DFloatI32:
case NVPTXISD::Tex1DFloatFloat:
case NVPTXISD::Tex1DFloatFloatLevel:
case NVPTXISD::Tex1DFloatFloatGrad:
case NVPTXISD::Tex1DI32I32:
case NVPTXISD::Tex1DI32Float:
case NVPTXISD::Tex1DI32FloatLevel:
case NVPTXISD::Tex1DI32FloatGrad:
case NVPTXISD::Tex1DArrayFloatI32:
case NVPTXISD::Tex1DArrayFloatFloat:
case NVPTXISD::Tex1DArrayFloatFloatLevel:
case NVPTXISD::Tex1DArrayFloatFloatGrad:
case NVPTXISD::Tex1DArrayI32I32:
case NVPTXISD::Tex1DArrayI32Float:
case NVPTXISD::Tex1DArrayI32FloatLevel:
case NVPTXISD::Tex1DArrayI32FloatGrad:
case NVPTXISD::Tex2DFloatI32:
case NVPTXISD::Tex2DFloatFloat:
case NVPTXISD::Tex2DFloatFloatLevel:
case NVPTXISD::Tex2DFloatFloatGrad:
case NVPTXISD::Tex2DI32I32:
case NVPTXISD::Tex2DI32Float:
case NVPTXISD::Tex2DI32FloatLevel:
case NVPTXISD::Tex2DI32FloatGrad:
case NVPTXISD::Tex2DArrayFloatI32:
case NVPTXISD::Tex2DArrayFloatFloat:
case NVPTXISD::Tex2DArrayFloatFloatLevel:
case NVPTXISD::Tex2DArrayFloatFloatGrad:
case NVPTXISD::Tex2DArrayI32I32:
case NVPTXISD::Tex2DArrayI32Float:
case NVPTXISD::Tex2DArrayI32FloatLevel:
case NVPTXISD::Tex2DArrayI32FloatGrad:
case NVPTXISD::Tex3DFloatI32:
case NVPTXISD::Tex3DFloatFloat:
case NVPTXISD::Tex3DFloatFloatLevel:
case NVPTXISD::Tex3DFloatFloatGrad:
case NVPTXISD::Tex3DI32I32:
case NVPTXISD::Tex3DI32Float:
case NVPTXISD::Tex3DI32FloatLevel:
case NVPTXISD::Tex3DI32FloatGrad:
ResNode = SelectTextureIntrinsic(N);
break;
case NVPTXISD::Suld1DI8Trap:
case NVPTXISD::Suld1DI16Trap:
case NVPTXISD::Suld1DI32Trap:
case NVPTXISD::Suld1DV2I8Trap:
case NVPTXISD::Suld1DV2I16Trap:
case NVPTXISD::Suld1DV2I32Trap:
case NVPTXISD::Suld1DV4I8Trap:
case NVPTXISD::Suld1DV4I16Trap:
case NVPTXISD::Suld1DV4I32Trap:
case NVPTXISD::Suld1DArrayI8Trap:
case NVPTXISD::Suld1DArrayI16Trap:
case NVPTXISD::Suld1DArrayI32Trap:
case NVPTXISD::Suld1DArrayV2I8Trap:
case NVPTXISD::Suld1DArrayV2I16Trap:
case NVPTXISD::Suld1DArrayV2I32Trap:
case NVPTXISD::Suld1DArrayV4I8Trap:
case NVPTXISD::Suld1DArrayV4I16Trap:
case NVPTXISD::Suld1DArrayV4I32Trap:
case NVPTXISD::Suld2DI8Trap:
case NVPTXISD::Suld2DI16Trap:
case NVPTXISD::Suld2DI32Trap:
case NVPTXISD::Suld2DV2I8Trap:
case NVPTXISD::Suld2DV2I16Trap:
case NVPTXISD::Suld2DV2I32Trap:
case NVPTXISD::Suld2DV4I8Trap:
case NVPTXISD::Suld2DV4I16Trap:
case NVPTXISD::Suld2DV4I32Trap:
case NVPTXISD::Suld2DArrayI8Trap:
case NVPTXISD::Suld2DArrayI16Trap:
case NVPTXISD::Suld2DArrayI32Trap:
case NVPTXISD::Suld2DArrayV2I8Trap:
case NVPTXISD::Suld2DArrayV2I16Trap:
case NVPTXISD::Suld2DArrayV2I32Trap:
case NVPTXISD::Suld2DArrayV4I8Trap:
case NVPTXISD::Suld2DArrayV4I16Trap:
case NVPTXISD::Suld2DArrayV4I32Trap:
case NVPTXISD::Suld3DI8Trap:
case NVPTXISD::Suld3DI16Trap:
case NVPTXISD::Suld3DI32Trap:
case NVPTXISD::Suld3DV2I8Trap:
case NVPTXISD::Suld3DV2I16Trap:
case NVPTXISD::Suld3DV2I32Trap:
case NVPTXISD::Suld3DV4I8Trap:
case NVPTXISD::Suld3DV4I16Trap:
case NVPTXISD::Suld3DV4I32Trap:
ResNode = SelectSurfaceIntrinsic(N);
break;
case ISD::ADDRSPACECAST:
ResNode = SelectAddrSpaceCast(N);
break;
@ -194,6 +286,24 @@ static unsigned int getCodeAddrSpace(MemSDNode *N,
return NVPTX::PTXLdStInstCode::GENERIC;
}
SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
switch (IID) {
default:
return NULL;
case Intrinsic::nvvm_texsurf_handle_internal:
return SelectTexSurfHandle(N);
}
}
SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
// Op 0 is the intrinsic ID
SDValue Wrapper = N->getOperand(1);
SDValue GlobalVal = Wrapper.getOperand(0);
return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64,
GlobalVal);
}
SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
SDValue Src = N->getOperand(0);
AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
@ -2371,6 +2481,488 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
return Ret;
}
SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
SDValue Chain = N->getOperand(0);
SDValue TexRef = N->getOperand(1);
SDValue SampRef = N->getOperand(2);
SDNode *Ret = NULL;
unsigned Opc = 0;
SmallVector<SDValue, 8> Ops;
switch (N->getOpcode()) {
default: return NULL;
case NVPTXISD::Tex1DFloatI32:
Opc = NVPTX::TEX_1D_F32_I32;
break;
case NVPTXISD::Tex1DFloatFloat:
Opc = NVPTX::TEX_1D_F32_F32;
break;
case NVPTXISD::Tex1DFloatFloatLevel:
Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
break;
case NVPTXISD::Tex1DFloatFloatGrad:
Opc = NVPTX::TEX_1D_F32_F32_GRAD;
break;
case NVPTXISD::Tex1DI32I32:
Opc = NVPTX::TEX_1D_I32_I32;
break;
case NVPTXISD::Tex1DI32Float:
Opc = NVPTX::TEX_1D_I32_F32;
break;
case NVPTXISD::Tex1DI32FloatLevel:
Opc = NVPTX::TEX_1D_I32_F32_LEVEL;
break;
case NVPTXISD::Tex1DI32FloatGrad:
Opc = NVPTX::TEX_1D_I32_F32_GRAD;
break;
case NVPTXISD::Tex1DArrayFloatI32:
Opc = NVPTX::TEX_1D_ARRAY_F32_I32;
break;
case NVPTXISD::Tex1DArrayFloatFloat:
Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
break;
case NVPTXISD::Tex1DArrayFloatFloatLevel:
Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
break;
case NVPTXISD::Tex1DArrayFloatFloatGrad:
Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
break;
case NVPTXISD::Tex1DArrayI32I32:
Opc = NVPTX::TEX_1D_ARRAY_I32_I32;
break;
case NVPTXISD::Tex1DArrayI32Float:
Opc = NVPTX::TEX_1D_ARRAY_I32_F32;
break;
case NVPTXISD::Tex1DArrayI32FloatLevel:
Opc = NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL;
break;
case NVPTXISD::Tex1DArrayI32FloatGrad:
Opc = NVPTX::TEX_1D_ARRAY_I32_F32_GRAD;
break;
case NVPTXISD::Tex2DFloatI32:
Opc = NVPTX::TEX_2D_F32_I32;
break;
case NVPTXISD::Tex2DFloatFloat:
Opc = NVPTX::TEX_2D_F32_F32;
break;
case NVPTXISD::Tex2DFloatFloatLevel:
Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
break;
case NVPTXISD::Tex2DFloatFloatGrad:
Opc = NVPTX::TEX_2D_F32_F32_GRAD;
break;
case NVPTXISD::Tex2DI32I32:
Opc = NVPTX::TEX_2D_I32_I32;
break;
case NVPTXISD::Tex2DI32Float:
Opc = NVPTX::TEX_2D_I32_F32;
break;
case NVPTXISD::Tex2DI32FloatLevel:
Opc = NVPTX::TEX_2D_I32_F32_LEVEL;
break;
case NVPTXISD::Tex2DI32FloatGrad:
Opc = NVPTX::TEX_2D_I32_F32_GRAD;
break;
case NVPTXISD::Tex2DArrayFloatI32:
Opc = NVPTX::TEX_2D_ARRAY_F32_I32;
break;
case NVPTXISD::Tex2DArrayFloatFloat:
Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
break;
case NVPTXISD::Tex2DArrayFloatFloatLevel:
Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
break;
case NVPTXISD::Tex2DArrayFloatFloatGrad:
Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
break;
case NVPTXISD::Tex2DArrayI32I32:
Opc = NVPTX::TEX_2D_ARRAY_I32_I32;
break;
case NVPTXISD::Tex2DArrayI32Float:
Opc = NVPTX::TEX_2D_ARRAY_I32_F32;
break;
case NVPTXISD::Tex2DArrayI32FloatLevel:
Opc = NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL;
break;
case NVPTXISD::Tex2DArrayI32FloatGrad:
Opc = NVPTX::TEX_2D_ARRAY_I32_F32_GRAD;
break;
case NVPTXISD::Tex3DFloatI32:
Opc = NVPTX::TEX_3D_F32_I32;
break;
case NVPTXISD::Tex3DFloatFloat:
Opc = NVPTX::TEX_3D_F32_F32;
break;
case NVPTXISD::Tex3DFloatFloatLevel:
Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
break;
case NVPTXISD::Tex3DFloatFloatGrad:
Opc = NVPTX::TEX_3D_F32_F32_GRAD;
break;
case NVPTXISD::Tex3DI32I32:
Opc = NVPTX::TEX_3D_I32_I32;
break;
case NVPTXISD::Tex3DI32Float:
Opc = NVPTX::TEX_3D_I32_F32;
break;
case NVPTXISD::Tex3DI32FloatLevel:
Opc = NVPTX::TEX_3D_I32_F32_LEVEL;
break;
case NVPTXISD::Tex3DI32FloatGrad:
Opc = NVPTX::TEX_3D_I32_F32_GRAD;
break;
}
Ops.push_back(TexRef);
Ops.push_back(SampRef);
// Copy over indices
for (unsigned i = 3; i < N->getNumOperands(); ++i) {
Ops.push_back(N->getOperand(i));
}
Ops.push_back(Chain);
Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
return Ret;
}
SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
SDValue Chain = N->getOperand(0);
SDValue TexHandle = N->getOperand(1);
SDNode *Ret = NULL;
unsigned Opc = 0;
SmallVector<SDValue, 8> Ops;
switch (N->getOpcode()) {
default: return NULL;
case NVPTXISD::Suld1DI8Trap:
Opc = NVPTX::SULD_1D_I8_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld1DI16Trap:
Opc = NVPTX::SULD_1D_I16_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld1DI32Trap:
Opc = NVPTX::SULD_1D_I32_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld1DV2I8Trap:
Opc = NVPTX::SULD_1D_V2I8_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld1DV2I16Trap:
Opc = NVPTX::SULD_1D_V2I16_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld1DV2I32Trap:
Opc = NVPTX::SULD_1D_V2I32_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld1DV4I8Trap:
Opc = NVPTX::SULD_1D_V4I8_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld1DV4I16Trap:
Opc = NVPTX::SULD_1D_V4I16_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld1DV4I32Trap:
Opc = NVPTX::SULD_1D_V4I32_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld1DArrayI8Trap:
Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld1DArrayI16Trap:
Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld1DArrayI32Trap:
Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld1DArrayV2I8Trap:
Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld1DArrayV2I16Trap:
Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld1DArrayV2I32Trap:
Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld1DArrayV4I8Trap:
Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld1DArrayV4I16Trap:
Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld1DArrayV4I32Trap:
Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld2DI8Trap:
Opc = NVPTX::SULD_2D_I8_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld2DI16Trap:
Opc = NVPTX::SULD_2D_I16_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld2DI32Trap:
Opc = NVPTX::SULD_2D_I32_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld2DV2I8Trap:
Opc = NVPTX::SULD_2D_V2I8_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld2DV2I16Trap:
Opc = NVPTX::SULD_2D_V2I16_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld2DV2I32Trap:
Opc = NVPTX::SULD_2D_V2I32_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld2DV4I8Trap:
Opc = NVPTX::SULD_2D_V4I8_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld2DV4I16Trap:
Opc = NVPTX::SULD_2D_V4I16_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld2DV4I32Trap:
Opc = NVPTX::SULD_2D_V4I32_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld2DArrayI8Trap:
Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(N->getOperand(4));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld2DArrayI16Trap:
Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(N->getOperand(4));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld2DArrayI32Trap:
Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(N->getOperand(4));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld2DArrayV2I8Trap:
Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(N->getOperand(4));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld2DArrayV2I16Trap:
Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(N->getOperand(4));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld2DArrayV2I32Trap:
Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(N->getOperand(4));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld2DArrayV4I8Trap:
Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(N->getOperand(4));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld2DArrayV4I16Trap:
Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(N->getOperand(4));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld2DArrayV4I32Trap:
Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(N->getOperand(4));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld3DI8Trap:
Opc = NVPTX::SULD_3D_I8_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(N->getOperand(4));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld3DI16Trap:
Opc = NVPTX::SULD_3D_I16_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(N->getOperand(4));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld3DI32Trap:
Opc = NVPTX::SULD_3D_I32_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(N->getOperand(4));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld3DV2I8Trap:
Opc = NVPTX::SULD_3D_V2I8_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(N->getOperand(4));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld3DV2I16Trap:
Opc = NVPTX::SULD_3D_V2I16_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(N->getOperand(4));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld3DV2I32Trap:
Opc = NVPTX::SULD_3D_V2I32_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(N->getOperand(4));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld3DV4I8Trap:
Opc = NVPTX::SULD_3D_V4I8_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(N->getOperand(4));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld3DV4I16Trap:
Opc = NVPTX::SULD_3D_V4I16_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(N->getOperand(4));
Ops.push_back(Chain);
break;
case NVPTXISD::Suld3DV4I32Trap:
Opc = NVPTX::SULD_3D_V4I32_TRAP;
Ops.push_back(TexHandle);
Ops.push_back(N->getOperand(2));
Ops.push_back(N->getOperand(3));
Ops.push_back(N->getOperand(4));
Ops.push_back(Chain);
break;
}
Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
return Ret;
}
// SelectDirectAddr - Match a direct address for DAG.
// A direct address could be a globaladdress or externalsymbol.
bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {

View File

@ -59,6 +59,8 @@ private:
#include "NVPTXGenDAGISel.inc"
SDNode *Select(SDNode *N);
SDNode *SelectIntrinsicNoChain(SDNode *N);
SDNode *SelectTexSurfHandle(SDNode *N);
SDNode *SelectLoad(SDNode *N);
SDNode *SelectLoadVector(SDNode *N);
SDNode *SelectLDGLDUVector(SDNode *N);
@ -68,6 +70,8 @@ private:
SDNode *SelectStoreRetval(SDNode *N);
SDNode *SelectStoreParam(SDNode *N);
SDNode *SelectAddrSpaceCast(SDNode *N);
SDNode *SelectTextureIntrinsic(SDNode *N);
SDNode *SelectSurfaceIntrinsic(SDNode *N);
inline SDValue getI32Imm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i32);

View File

@ -328,6 +328,116 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "NVPTXISD::StoreV2";
case NVPTXISD::StoreV4:
return "NVPTXISD::StoreV4";
case NVPTXISD::Tex1DFloatI32: return "NVPTXISD::Tex1DFloatI32";
case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat";
case NVPTXISD::Tex1DFloatFloatLevel:
return "NVPTXISD::Tex1DFloatFloatLevel";
case NVPTXISD::Tex1DFloatFloatGrad:
return "NVPTXISD::Tex1DFloatFloatGrad";
case NVPTXISD::Tex1DI32I32: return "NVPTXISD::Tex1DI32I32";
case NVPTXISD::Tex1DI32Float: return "NVPTXISD::Tex1DI32Float";
case NVPTXISD::Tex1DI32FloatLevel:
return "NVPTXISD::Tex1DI32FloatLevel";
case NVPTXISD::Tex1DI32FloatGrad:
return "NVPTXISD::Tex1DI32FloatGrad";
case NVPTXISD::Tex1DArrayFloatI32: return "NVPTXISD::Tex2DArrayFloatI32";
case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat";
case NVPTXISD::Tex1DArrayFloatFloatLevel:
return "NVPTXISD::Tex2DArrayFloatFloatLevel";
case NVPTXISD::Tex1DArrayFloatFloatGrad:
return "NVPTXISD::Tex2DArrayFloatFloatGrad";
case NVPTXISD::Tex1DArrayI32I32: return "NVPTXISD::Tex2DArrayI32I32";
case NVPTXISD::Tex1DArrayI32Float: return "NVPTXISD::Tex2DArrayI32Float";
case NVPTXISD::Tex1DArrayI32FloatLevel:
return "NVPTXISD::Tex2DArrayI32FloatLevel";
case NVPTXISD::Tex1DArrayI32FloatGrad:
return "NVPTXISD::Tex2DArrayI32FloatGrad";
case NVPTXISD::Tex2DFloatI32: return "NVPTXISD::Tex2DFloatI32";
case NVPTXISD::Tex2DFloatFloat: return "NVPTXISD::Tex2DFloatFloat";
case NVPTXISD::Tex2DFloatFloatLevel:
return "NVPTXISD::Tex2DFloatFloatLevel";
case NVPTXISD::Tex2DFloatFloatGrad:
return "NVPTXISD::Tex2DFloatFloatGrad";
case NVPTXISD::Tex2DI32I32: return "NVPTXISD::Tex2DI32I32";
case NVPTXISD::Tex2DI32Float: return "NVPTXISD::Tex2DI32Float";
case NVPTXISD::Tex2DI32FloatLevel:
return "NVPTXISD::Tex2DI32FloatLevel";
case NVPTXISD::Tex2DI32FloatGrad:
return "NVPTXISD::Tex2DI32FloatGrad";
case NVPTXISD::Tex2DArrayFloatI32: return "NVPTXISD::Tex2DArrayFloatI32";
case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat";
case NVPTXISD::Tex2DArrayFloatFloatLevel:
return "NVPTXISD::Tex2DArrayFloatFloatLevel";
case NVPTXISD::Tex2DArrayFloatFloatGrad:
return "NVPTXISD::Tex2DArrayFloatFloatGrad";
case NVPTXISD::Tex2DArrayI32I32: return "NVPTXISD::Tex2DArrayI32I32";
case NVPTXISD::Tex2DArrayI32Float: return "NVPTXISD::Tex2DArrayI32Float";
case NVPTXISD::Tex2DArrayI32FloatLevel:
return "NVPTXISD::Tex2DArrayI32FloatLevel";
case NVPTXISD::Tex2DArrayI32FloatGrad:
return "NVPTXISD::Tex2DArrayI32FloatGrad";
case NVPTXISD::Tex3DFloatI32: return "NVPTXISD::Tex3DFloatI32";
case NVPTXISD::Tex3DFloatFloat: return "NVPTXISD::Tex3DFloatFloat";
case NVPTXISD::Tex3DFloatFloatLevel:
return "NVPTXISD::Tex3DFloatFloatLevel";
case NVPTXISD::Tex3DFloatFloatGrad:
return "NVPTXISD::Tex3DFloatFloatGrad";
case NVPTXISD::Tex3DI32I32: return "NVPTXISD::Tex3DI32I32";
case NVPTXISD::Tex3DI32Float: return "NVPTXISD::Tex3DI32Float";
case NVPTXISD::Tex3DI32FloatLevel:
return "NVPTXISD::Tex3DI32FloatLevel";
case NVPTXISD::Tex3DI32FloatGrad:
return "NVPTXISD::Tex3DI32FloatGrad";
case NVPTXISD::Suld1DI8Trap: return "NVPTXISD::Suld1DI8Trap";
case NVPTXISD::Suld1DI16Trap: return "NVPTXISD::Suld1DI16Trap";
case NVPTXISD::Suld1DI32Trap: return "NVPTXISD::Suld1DI32Trap";
case NVPTXISD::Suld1DV2I8Trap: return "NVPTXISD::Suld1DV2I8Trap";
case NVPTXISD::Suld1DV2I16Trap: return "NVPTXISD::Suld1DV2I16Trap";
case NVPTXISD::Suld1DV2I32Trap: return "NVPTXISD::Suld1DV2I32Trap";
case NVPTXISD::Suld1DV4I8Trap: return "NVPTXISD::Suld1DV4I8Trap";
case NVPTXISD::Suld1DV4I16Trap: return "NVPTXISD::Suld1DV4I16Trap";
case NVPTXISD::Suld1DV4I32Trap: return "NVPTXISD::Suld1DV4I32Trap";
case NVPTXISD::Suld1DArrayI8Trap: return "NVPTXISD::Suld1DArrayI8Trap";
case NVPTXISD::Suld1DArrayI16Trap: return "NVPTXISD::Suld1DArrayI16Trap";
case NVPTXISD::Suld1DArrayI32Trap: return "NVPTXISD::Suld1DArrayI32Trap";
case NVPTXISD::Suld1DArrayV2I8Trap: return "NVPTXISD::Suld1DArrayV2I8Trap";
case NVPTXISD::Suld1DArrayV2I16Trap: return "NVPTXISD::Suld1DArrayV2I16Trap";
case NVPTXISD::Suld1DArrayV2I32Trap: return "NVPTXISD::Suld1DArrayV2I32Trap";
case NVPTXISD::Suld1DArrayV4I8Trap: return "NVPTXISD::Suld1DArrayV4I8Trap";
case NVPTXISD::Suld1DArrayV4I16Trap: return "NVPTXISD::Suld1DArrayV4I16Trap";
case NVPTXISD::Suld1DArrayV4I32Trap: return "NVPTXISD::Suld1DArrayV4I32Trap";
case NVPTXISD::Suld2DI8Trap: return "NVPTXISD::Suld2DI8Trap";
case NVPTXISD::Suld2DI16Trap: return "NVPTXISD::Suld2DI16Trap";
case NVPTXISD::Suld2DI32Trap: return "NVPTXISD::Suld2DI32Trap";
case NVPTXISD::Suld2DV2I8Trap: return "NVPTXISD::Suld2DV2I8Trap";
case NVPTXISD::Suld2DV2I16Trap: return "NVPTXISD::Suld2DV2I16Trap";
case NVPTXISD::Suld2DV2I32Trap: return "NVPTXISD::Suld2DV2I32Trap";
case NVPTXISD::Suld2DV4I8Trap: return "NVPTXISD::Suld2DV4I8Trap";
case NVPTXISD::Suld2DV4I16Trap: return "NVPTXISD::Suld2DV4I16Trap";
case NVPTXISD::Suld2DV4I32Trap: return "NVPTXISD::Suld2DV4I32Trap";
case NVPTXISD::Suld2DArrayI8Trap: return "NVPTXISD::Suld2DArrayI8Trap";
case NVPTXISD::Suld2DArrayI16Trap: return "NVPTXISD::Suld2DArrayI16Trap";
case NVPTXISD::Suld2DArrayI32Trap: return "NVPTXISD::Suld2DArrayI32Trap";
case NVPTXISD::Suld2DArrayV2I8Trap: return "NVPTXISD::Suld2DArrayV2I8Trap";
case NVPTXISD::Suld2DArrayV2I16Trap: return "NVPTXISD::Suld2DArrayV2I16Trap";
case NVPTXISD::Suld2DArrayV2I32Trap: return "NVPTXISD::Suld2DArrayV2I32Trap";
case NVPTXISD::Suld2DArrayV4I8Trap: return "NVPTXISD::Suld2DArrayV4I8Trap";
case NVPTXISD::Suld2DArrayV4I16Trap: return "NVPTXISD::Suld2DArrayV4I16Trap";
case NVPTXISD::Suld2DArrayV4I32Trap: return "NVPTXISD::Suld2DArrayV4I32Trap";
case NVPTXISD::Suld3DI8Trap: return "NVPTXISD::Suld3DI8Trap";
case NVPTXISD::Suld3DI16Trap: return "NVPTXISD::Suld3DI16Trap";
case NVPTXISD::Suld3DI32Trap: return "NVPTXISD::Suld3DI32Trap";
case NVPTXISD::Suld3DV2I8Trap: return "NVPTXISD::Suld3DV2I8Trap";
case NVPTXISD::Suld3DV2I16Trap: return "NVPTXISD::Suld3DV2I16Trap";
case NVPTXISD::Suld3DV2I32Trap: return "NVPTXISD::Suld3DV2I32Trap";
case NVPTXISD::Suld3DV4I8Trap: return "NVPTXISD::Suld3DV4I8Trap";
case NVPTXISD::Suld3DV4I16Trap: return "NVPTXISD::Suld3DV4I16Trap";
case NVPTXISD::Suld3DV4I32Trap: return "NVPTXISD::Suld3DV4I32Trap";
}
}
@ -1891,6 +2001,195 @@ bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
return false;
}
static unsigned getOpcForTextureInstr(unsigned Intrinsic) {
switch (Intrinsic) {
default:
return 0;
case Intrinsic::nvvm_tex_1d_v4f32_i32:
return NVPTXISD::Tex1DFloatI32;
case Intrinsic::nvvm_tex_1d_v4f32_f32:
return NVPTXISD::Tex1DFloatFloat;
case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
return NVPTXISD::Tex1DFloatFloatLevel;
case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
return NVPTXISD::Tex1DFloatFloatGrad;
case Intrinsic::nvvm_tex_1d_v4i32_i32:
return NVPTXISD::Tex1DI32I32;
case Intrinsic::nvvm_tex_1d_v4i32_f32:
return NVPTXISD::Tex1DI32Float;
case Intrinsic::nvvm_tex_1d_level_v4i32_f32:
return NVPTXISD::Tex1DI32FloatLevel;
case Intrinsic::nvvm_tex_1d_grad_v4i32_f32:
return NVPTXISD::Tex1DI32FloatGrad;
case Intrinsic::nvvm_tex_1d_array_v4f32_i32:
return NVPTXISD::Tex1DArrayFloatI32;
case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
return NVPTXISD::Tex1DArrayFloatFloat;
case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
return NVPTXISD::Tex1DArrayFloatFloatLevel;
case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
return NVPTXISD::Tex1DArrayFloatFloatGrad;
case Intrinsic::nvvm_tex_1d_array_v4i32_i32:
return NVPTXISD::Tex1DArrayI32I32;
case Intrinsic::nvvm_tex_1d_array_v4i32_f32:
return NVPTXISD::Tex1DArrayI32Float;
case Intrinsic::nvvm_tex_1d_array_level_v4i32_f32:
return NVPTXISD::Tex1DArrayI32FloatLevel;
case Intrinsic::nvvm_tex_1d_array_grad_v4i32_f32:
return NVPTXISD::Tex1DArrayI32FloatGrad;
case Intrinsic::nvvm_tex_2d_v4f32_i32:
return NVPTXISD::Tex2DFloatI32;
case Intrinsic::nvvm_tex_2d_v4f32_f32:
return NVPTXISD::Tex2DFloatFloat;
case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
return NVPTXISD::Tex2DFloatFloatLevel;
case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
return NVPTXISD::Tex2DFloatFloatGrad;
case Intrinsic::nvvm_tex_2d_v4i32_i32:
return NVPTXISD::Tex2DI32I32;
case Intrinsic::nvvm_tex_2d_v4i32_f32:
return NVPTXISD::Tex2DI32Float;
case Intrinsic::nvvm_tex_2d_level_v4i32_f32:
return NVPTXISD::Tex2DI32FloatLevel;
case Intrinsic::nvvm_tex_2d_grad_v4i32_f32:
return NVPTXISD::Tex2DI32FloatGrad;
case Intrinsic::nvvm_tex_2d_array_v4f32_i32:
return NVPTXISD::Tex2DArrayFloatI32;
case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
return NVPTXISD::Tex2DArrayFloatFloat;
case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
return NVPTXISD::Tex2DArrayFloatFloatLevel;
case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
return NVPTXISD::Tex2DArrayFloatFloatGrad;
case Intrinsic::nvvm_tex_2d_array_v4i32_i32:
return NVPTXISD::Tex2DArrayI32I32;
case Intrinsic::nvvm_tex_2d_array_v4i32_f32:
return NVPTXISD::Tex2DArrayI32Float;
case Intrinsic::nvvm_tex_2d_array_level_v4i32_f32:
return NVPTXISD::Tex2DArrayI32FloatLevel;
case Intrinsic::nvvm_tex_2d_array_grad_v4i32_f32:
return NVPTXISD::Tex2DArrayI32FloatGrad;
case Intrinsic::nvvm_tex_3d_v4f32_i32:
return NVPTXISD::Tex3DFloatI32;
case Intrinsic::nvvm_tex_3d_v4f32_f32:
return NVPTXISD::Tex3DFloatFloat;
case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
return NVPTXISD::Tex3DFloatFloatLevel;
case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
return NVPTXISD::Tex3DFloatFloatGrad;
case Intrinsic::nvvm_tex_3d_v4i32_i32:
return NVPTXISD::Tex3DI32I32;
case Intrinsic::nvvm_tex_3d_v4i32_f32:
return NVPTXISD::Tex3DI32Float;
case Intrinsic::nvvm_tex_3d_level_v4i32_f32:
return NVPTXISD::Tex3DI32FloatLevel;
case Intrinsic::nvvm_tex_3d_grad_v4i32_f32:
return NVPTXISD::Tex3DI32FloatGrad;
}
}
static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) {
switch (Intrinsic) {
default:
return 0;
case Intrinsic::nvvm_suld_1d_i8_trap:
return NVPTXISD::Suld1DI8Trap;
case Intrinsic::nvvm_suld_1d_i16_trap:
return NVPTXISD::Suld1DI16Trap;
case Intrinsic::nvvm_suld_1d_i32_trap:
return NVPTXISD::Suld1DI32Trap;
case Intrinsic::nvvm_suld_1d_v2i8_trap:
return NVPTXISD::Suld1DV2I8Trap;
case Intrinsic::nvvm_suld_1d_v2i16_trap:
return NVPTXISD::Suld1DV2I16Trap;
case Intrinsic::nvvm_suld_1d_v2i32_trap:
return NVPTXISD::Suld1DV2I32Trap;
case Intrinsic::nvvm_suld_1d_v4i8_trap:
return NVPTXISD::Suld1DV4I8Trap;
case Intrinsic::nvvm_suld_1d_v4i16_trap:
return NVPTXISD::Suld1DV4I16Trap;
case Intrinsic::nvvm_suld_1d_v4i32_trap:
return NVPTXISD::Suld1DV4I32Trap;
case Intrinsic::nvvm_suld_1d_array_i8_trap:
return NVPTXISD::Suld1DArrayI8Trap;
case Intrinsic::nvvm_suld_1d_array_i16_trap:
return NVPTXISD::Suld1DArrayI16Trap;
case Intrinsic::nvvm_suld_1d_array_i32_trap:
return NVPTXISD::Suld1DArrayI32Trap;
case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
return NVPTXISD::Suld1DArrayV2I8Trap;
case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
return NVPTXISD::Suld1DArrayV2I16Trap;
case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
return NVPTXISD::Suld1DArrayV2I32Trap;
case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
return NVPTXISD::Suld1DArrayV4I8Trap;
case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
return NVPTXISD::Suld1DArrayV4I16Trap;
case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
return NVPTXISD::Suld1DArrayV4I32Trap;
case Intrinsic::nvvm_suld_2d_i8_trap:
return NVPTXISD::Suld2DI8Trap;
case Intrinsic::nvvm_suld_2d_i16_trap:
return NVPTXISD::Suld2DI16Trap;
case Intrinsic::nvvm_suld_2d_i32_trap:
return NVPTXISD::Suld2DI32Trap;
case Intrinsic::nvvm_suld_2d_v2i8_trap:
return NVPTXISD::Suld2DV2I8Trap;
case Intrinsic::nvvm_suld_2d_v2i16_trap:
return NVPTXISD::Suld2DV2I16Trap;
case Intrinsic::nvvm_suld_2d_v2i32_trap:
return NVPTXISD::Suld2DV2I32Trap;
case Intrinsic::nvvm_suld_2d_v4i8_trap:
return NVPTXISD::Suld2DV4I8Trap;
case Intrinsic::nvvm_suld_2d_v4i16_trap:
return NVPTXISD::Suld2DV4I16Trap;
case Intrinsic::nvvm_suld_2d_v4i32_trap:
return NVPTXISD::Suld2DV4I32Trap;
case Intrinsic::nvvm_suld_2d_array_i8_trap:
return NVPTXISD::Suld2DArrayI8Trap;
case Intrinsic::nvvm_suld_2d_array_i16_trap:
return NVPTXISD::Suld2DArrayI16Trap;
case Intrinsic::nvvm_suld_2d_array_i32_trap:
return NVPTXISD::Suld2DArrayI32Trap;
case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
return NVPTXISD::Suld2DArrayV2I8Trap;
case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
return NVPTXISD::Suld2DArrayV2I16Trap;
case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
return NVPTXISD::Suld2DArrayV2I32Trap;
case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
return NVPTXISD::Suld2DArrayV4I8Trap;
case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
return NVPTXISD::Suld2DArrayV4I16Trap;
case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
return NVPTXISD::Suld2DArrayV4I32Trap;
case Intrinsic::nvvm_suld_3d_i8_trap:
return NVPTXISD::Suld3DI8Trap;
case Intrinsic::nvvm_suld_3d_i16_trap:
return NVPTXISD::Suld3DI16Trap;
case Intrinsic::nvvm_suld_3d_i32_trap:
return NVPTXISD::Suld3DI32Trap;
case Intrinsic::nvvm_suld_3d_v2i8_trap:
return NVPTXISD::Suld3DV2I8Trap;
case Intrinsic::nvvm_suld_3d_v2i16_trap:
return NVPTXISD::Suld3DV2I16Trap;
case Intrinsic::nvvm_suld_3d_v2i32_trap:
return NVPTXISD::Suld3DV2I32Trap;
case Intrinsic::nvvm_suld_3d_v4i8_trap:
return NVPTXISD::Suld3DV4I8Trap;
case Intrinsic::nvvm_suld_3d_v4i16_trap:
return NVPTXISD::Suld3DV4I16Trap;
case Intrinsic::nvvm_suld_3d_v4i32_trap:
return NVPTXISD::Suld3DV4I32Trap;
}
}
// llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
// TgtMemIntrinsic
// because we need the information that is only available in the "Value" type
@ -1944,6 +2243,142 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.align = 0;
return true;
case Intrinsic::nvvm_tex_1d_v4f32_i32:
case Intrinsic::nvvm_tex_1d_v4f32_f32:
case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
case Intrinsic::nvvm_tex_1d_array_v4f32_i32:
case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
case Intrinsic::nvvm_tex_2d_v4f32_i32:
case Intrinsic::nvvm_tex_2d_v4f32_f32:
case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
case Intrinsic::nvvm_tex_2d_array_v4f32_i32:
case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
case Intrinsic::nvvm_tex_3d_v4f32_i32:
case Intrinsic::nvvm_tex_3d_v4f32_f32:
case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: {
Info.opc = getOpcForTextureInstr(Intrinsic);
Info.memVT = MVT::f32;
Info.ptrVal = NULL;
Info.offset = 0;
Info.vol = 0;
Info.readMem = true;
Info.writeMem = false;
Info.align = 16;
return true;
}
case Intrinsic::nvvm_tex_1d_v4i32_i32:
case Intrinsic::nvvm_tex_1d_v4i32_f32:
case Intrinsic::nvvm_tex_1d_level_v4i32_f32:
case Intrinsic::nvvm_tex_1d_grad_v4i32_f32:
case Intrinsic::nvvm_tex_1d_array_v4i32_i32:
case Intrinsic::nvvm_tex_1d_array_v4i32_f32:
case Intrinsic::nvvm_tex_1d_array_level_v4i32_f32:
case Intrinsic::nvvm_tex_1d_array_grad_v4i32_f32:
case Intrinsic::nvvm_tex_2d_v4i32_i32:
case Intrinsic::nvvm_tex_2d_v4i32_f32:
case Intrinsic::nvvm_tex_2d_level_v4i32_f32:
case Intrinsic::nvvm_tex_2d_grad_v4i32_f32:
case Intrinsic::nvvm_tex_2d_array_v4i32_i32:
case Intrinsic::nvvm_tex_2d_array_v4i32_f32:
case Intrinsic::nvvm_tex_2d_array_level_v4i32_f32:
case Intrinsic::nvvm_tex_2d_array_grad_v4i32_f32:
case Intrinsic::nvvm_tex_3d_v4i32_i32:
case Intrinsic::nvvm_tex_3d_v4i32_f32:
case Intrinsic::nvvm_tex_3d_level_v4i32_f32:
case Intrinsic::nvvm_tex_3d_grad_v4i32_f32: {
Info.opc = getOpcForTextureInstr(Intrinsic);
Info.memVT = MVT::i32;
Info.ptrVal = NULL;
Info.offset = 0;
Info.vol = 0;
Info.readMem = true;
Info.writeMem = false;
Info.align = 16;
return true;
}
case Intrinsic::nvvm_suld_1d_i8_trap:
case Intrinsic::nvvm_suld_1d_v2i8_trap:
case Intrinsic::nvvm_suld_1d_v4i8_trap:
case Intrinsic::nvvm_suld_1d_array_i8_trap:
case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
case Intrinsic::nvvm_suld_2d_i8_trap:
case Intrinsic::nvvm_suld_2d_v2i8_trap:
case Intrinsic::nvvm_suld_2d_v4i8_trap:
case Intrinsic::nvvm_suld_2d_array_i8_trap:
case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
case Intrinsic::nvvm_suld_3d_i8_trap:
case Intrinsic::nvvm_suld_3d_v2i8_trap:
case Intrinsic::nvvm_suld_3d_v4i8_trap: {
Info.opc = getOpcForSurfaceInstr(Intrinsic);
Info.memVT = MVT::i8;
Info.ptrVal = NULL;
Info.offset = 0;
Info.vol = 0;
Info.readMem = true;
Info.writeMem = false;
Info.align = 16;
return true;
}
case Intrinsic::nvvm_suld_1d_i16_trap:
case Intrinsic::nvvm_suld_1d_v2i16_trap:
case Intrinsic::nvvm_suld_1d_v4i16_trap:
case Intrinsic::nvvm_suld_1d_array_i16_trap:
case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
case Intrinsic::nvvm_suld_2d_i16_trap:
case Intrinsic::nvvm_suld_2d_v2i16_trap:
case Intrinsic::nvvm_suld_2d_v4i16_trap:
case Intrinsic::nvvm_suld_2d_array_i16_trap:
case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
case Intrinsic::nvvm_suld_3d_i16_trap:
case Intrinsic::nvvm_suld_3d_v2i16_trap:
case Intrinsic::nvvm_suld_3d_v4i16_trap: {
Info.opc = getOpcForSurfaceInstr(Intrinsic);
Info.memVT = MVT::i16;
Info.ptrVal = NULL;
Info.offset = 0;
Info.vol = 0;
Info.readMem = true;
Info.writeMem = false;
Info.align = 16;
return true;
}
case Intrinsic::nvvm_suld_1d_i32_trap:
case Intrinsic::nvvm_suld_1d_v2i32_trap:
case Intrinsic::nvvm_suld_1d_v4i32_trap:
case Intrinsic::nvvm_suld_1d_array_i32_trap:
case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
case Intrinsic::nvvm_suld_2d_i32_trap:
case Intrinsic::nvvm_suld_2d_v2i32_trap:
case Intrinsic::nvvm_suld_2d_v4i32_trap:
case Intrinsic::nvvm_suld_2d_array_i32_trap:
case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
case Intrinsic::nvvm_suld_3d_i32_trap:
case Intrinsic::nvvm_suld_3d_v2i32_trap:
case Intrinsic::nvvm_suld_3d_v4i32_trap: {
Info.opc = getOpcForSurfaceInstr(Intrinsic);
Info.memVT = MVT::i32;
Info.ptrVal = NULL;
Info.offset = 0;
Info.vol = 0;
Info.readMem = true;
Info.writeMem = false;
Info.align = 16;
return true;
}
}
return false;
}

View File

@ -70,7 +70,100 @@ enum NodeType {
StoreParamU32, // to zext and store a <32bit value, not used currently
StoreRetval,
StoreRetvalV2,
StoreRetvalV4
StoreRetvalV4,
// Texture intrinsics
Tex1DFloatI32,
Tex1DFloatFloat,
Tex1DFloatFloatLevel,
Tex1DFloatFloatGrad,
Tex1DI32I32,
Tex1DI32Float,
Tex1DI32FloatLevel,
Tex1DI32FloatGrad,
Tex1DArrayFloatI32,
Tex1DArrayFloatFloat,
Tex1DArrayFloatFloatLevel,
Tex1DArrayFloatFloatGrad,
Tex1DArrayI32I32,
Tex1DArrayI32Float,
Tex1DArrayI32FloatLevel,
Tex1DArrayI32FloatGrad,
Tex2DFloatI32,
Tex2DFloatFloat,
Tex2DFloatFloatLevel,
Tex2DFloatFloatGrad,
Tex2DI32I32,
Tex2DI32Float,
Tex2DI32FloatLevel,
Tex2DI32FloatGrad,
Tex2DArrayFloatI32,
Tex2DArrayFloatFloat,
Tex2DArrayFloatFloatLevel,
Tex2DArrayFloatFloatGrad,
Tex2DArrayI32I32,
Tex2DArrayI32Float,
Tex2DArrayI32FloatLevel,
Tex2DArrayI32FloatGrad,
Tex3DFloatI32,
Tex3DFloatFloat,
Tex3DFloatFloatLevel,
Tex3DFloatFloatGrad,
Tex3DI32I32,
Tex3DI32Float,
Tex3DI32FloatLevel,
Tex3DI32FloatGrad,
// Surface intrinsics
Suld1DI8Trap,
Suld1DI16Trap,
Suld1DI32Trap,
Suld1DV2I8Trap,
Suld1DV2I16Trap,
Suld1DV2I32Trap,
Suld1DV4I8Trap,
Suld1DV4I16Trap,
Suld1DV4I32Trap,
Suld1DArrayI8Trap,
Suld1DArrayI16Trap,
Suld1DArrayI32Trap,
Suld1DArrayV2I8Trap,
Suld1DArrayV2I16Trap,
Suld1DArrayV2I32Trap,
Suld1DArrayV4I8Trap,
Suld1DArrayV4I16Trap,
Suld1DArrayV4I32Trap,
Suld2DI8Trap,
Suld2DI16Trap,
Suld2DI32Trap,
Suld2DV2I8Trap,
Suld2DV2I16Trap,
Suld2DV2I32Trap,
Suld2DV4I8Trap,
Suld2DV4I16Trap,
Suld2DV4I32Trap,
Suld2DArrayI8Trap,
Suld2DArrayI16Trap,
Suld2DArrayI32Trap,
Suld2DArrayV2I8Trap,
Suld2DArrayV2I16Trap,
Suld2DArrayV2I32Trap,
Suld2DArrayV4I8Trap,
Suld2DArrayV4I16Trap,
Suld2DArrayV4I32Trap,
Suld3DI8Trap,
Suld3DI16Trap,
Suld3DI32Trap,
Suld3DV2I8Trap,
Suld3DV2I16Trap,
Suld3DV2I32Trap,
Suld3DV4I8Trap,
Suld3DV4I16Trap,
Suld3DV4I32Trap
};
}

View File

@ -0,0 +1,178 @@
//===-- NVPTXImageOptimizer.cpp - Image optimization pass -----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass implements IR-level optimizations of image access code,
// including:
//
// 1. Eliminate istypep intrinsics when image access qualifier is known
//
//===----------------------------------------------------------------------===//
#include "NVPTX.h"
#include "NVPTXUtilities.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/ConstantFolding.h"
using namespace llvm;
namespace {
class NVPTXImageOptimizer : public FunctionPass {
private:
static char ID;
SmallVector<Instruction*, 4> InstrToDelete;
public:
NVPTXImageOptimizer();
bool runOnFunction(Function &F);
private:
bool replaceIsTypePSampler(Instruction &I);
bool replaceIsTypePSurface(Instruction &I);
bool replaceIsTypePTexture(Instruction &I);
Value *cleanupValue(Value *V);
void replaceWith(Instruction *From, ConstantInt *To);
};
}
char NVPTXImageOptimizer::ID = 0;
NVPTXImageOptimizer::NVPTXImageOptimizer()
: FunctionPass(ID) {}
bool NVPTXImageOptimizer::runOnFunction(Function &F) {
bool Changed = false;
InstrToDelete.clear();
// Look for call instructions in the function
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE;
++BI) {
for (BasicBlock::iterator I = (*BI).begin(), E = (*BI).end();
I != E; ++I) {
Instruction &Instr = *I;
if (CallInst *CI = dyn_cast<CallInst>(I)) {
Function *CalledF = CI->getCalledFunction();
if (CalledF && CalledF->isIntrinsic()) {
// This is an intrinsic function call, check if its an istypep
switch (CalledF->getIntrinsicID()) {
default: break;
case Intrinsic::nvvm_istypep_sampler:
Changed |= replaceIsTypePSampler(Instr);
break;
case Intrinsic::nvvm_istypep_surface:
Changed |= replaceIsTypePSurface(Instr);
break;
case Intrinsic::nvvm_istypep_texture:
Changed |= replaceIsTypePTexture(Instr);
break;
}
}
}
}
}
// Delete any istypep instances we replaced in the IR
for (unsigned i = 0, e = InstrToDelete.size(); i != e; ++i)
InstrToDelete[i]->eraseFromParent();
return Changed;
}
bool NVPTXImageOptimizer::replaceIsTypePSampler(Instruction &I) {
Value *TexHandle = cleanupValue(I.getOperand(0));
if (isSampler(*TexHandle)) {
// This is an OpenCL sampler, so it must be a samplerref
replaceWith(&I, ConstantInt::getTrue(I.getContext()));
return true;
} else if (isImageWriteOnly(*TexHandle) ||
isImageReadWrite(*TexHandle) ||
isImageReadOnly(*TexHandle)) {
// This is an OpenCL image, so it cannot be a samplerref
replaceWith(&I, ConstantInt::getFalse(I.getContext()));
return true;
} else {
// The image type is unknown, so we cannot eliminate the intrinsic
return false;
}
}
bool NVPTXImageOptimizer::replaceIsTypePSurface(Instruction &I) {
Value *TexHandle = cleanupValue(I.getOperand(0));
if (isImageReadWrite(*TexHandle) ||
isImageWriteOnly(*TexHandle)) {
// This is an OpenCL read-only/read-write image, so it must be a surfref
replaceWith(&I, ConstantInt::getTrue(I.getContext()));
return true;
} else if (isImageReadOnly(*TexHandle) ||
isSampler(*TexHandle)) {
// This is an OpenCL read-only/ imageor sampler, so it cannot be
// a surfref
replaceWith(&I, ConstantInt::getFalse(I.getContext()));
return true;
} else {
// The image type is unknown, so we cannot eliminate the intrinsic
return false;
}
}
bool NVPTXImageOptimizer::replaceIsTypePTexture(Instruction &I) {
Value *TexHandle = cleanupValue(I.getOperand(0));
if (isImageReadOnly(*TexHandle)) {
// This is an OpenCL read-only image, so it must be a texref
replaceWith(&I, ConstantInt::getTrue(I.getContext()));
return true;
} else if (isImageWriteOnly(*TexHandle) ||
isImageReadWrite(*TexHandle) ||
isSampler(*TexHandle)) {
// This is an OpenCL read-write/write-only image or a sampler, so it
// cannot be a texref
replaceWith(&I, ConstantInt::getFalse(I.getContext()));
return true;
} else {
// The image type is unknown, so we cannot eliminate the intrinsic
return false;
}
}
void NVPTXImageOptimizer::replaceWith(Instruction *From, ConstantInt *To) {
// We implement "poor man's DCE" here to make sure any code that is no longer
// live is actually unreachable and can be trivially eliminated by the
// unreachable block elimiation pass.
for (CallInst::use_iterator UI = From->use_begin(), UE = From->use_end();
UI != UE; ++UI) {
if (BranchInst *BI = dyn_cast<BranchInst>(*UI)) {
if (BI->isUnconditional()) continue;
BasicBlock *Dest;
if (To->isZero())
// Get false block
Dest = BI->getSuccessor(1);
else
// Get true block
Dest = BI->getSuccessor(0);
BranchInst::Create(Dest, BI);
InstrToDelete.push_back(BI);
}
}
From->replaceAllUsesWith(To);
InstrToDelete.push_back(From);
}
Value *NVPTXImageOptimizer::cleanupValue(Value *V) {
if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(V)) {
return cleanupValue(EVI->getAggregateOperand());
}
return V;
}
FunctionPass *llvm::createNVPTXImageOptimizerPass() {
return new NVPTXImageOptimizer();
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,46 @@
//===-- NVPTXMachineFunctionInfo.h - NVPTX-specific Function Info --------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This class is attached to a MachineFunction instance and tracks target-
// dependent information
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineFunction.h"
namespace llvm {
class NVPTXMachineFunctionInfo : public MachineFunctionInfo {
private:
/// Stores a mapping from index to symbol name for removing image handles
/// on Fermi.
SmallVector<std::string, 8> ImageHandleList;
public:
NVPTXMachineFunctionInfo(MachineFunction &MF) {}
/// Returns the index for the symbol \p Symbol. If the symbol was previously,
/// added, the same index is returned. Otherwise, the symbol is added and the
/// new index is returned.
unsigned getImageHandleSymbolIndex(const char *Symbol) {
// Is the symbol already present?
for (unsigned i = 0, e = ImageHandleList.size(); i != e; ++i)
if (ImageHandleList[i] == std::string(Symbol))
return i;
// Nope, insert it
ImageHandleList.push_back(Symbol);
return ImageHandleList.size()-1;
}
/// Returns the symbol name at the given index.
const char *getImageHandleSymbol(unsigned Idx) const {
assert(ImageHandleList.size() > Idx && "Bad index");
return ImageHandleList[Idx].c_str();
}
};
}

View File

@ -0,0 +1,357 @@
//===-- NVPTXReplaceImageHandles.cpp - Replace image handles for Fermi ----===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// On Fermi, image handles are not supported. To work around this, we traverse
// the machine code and replace image handles with concrete symbols. For this
// to work reliably, inlining of all function call must be performed.
//
//===----------------------------------------------------------------------===//
#include "NVPTX.h"
#include "NVPTXMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/DenseSet.h"
using namespace llvm;
namespace {
class NVPTXReplaceImageHandles : public MachineFunctionPass {
private:
static char ID;
DenseSet<MachineInstr *> InstrsToRemove;
public:
NVPTXReplaceImageHandles();
bool runOnMachineFunction(MachineFunction &MF);
private:
bool processInstr(MachineInstr &MI);
void replaceImageHandle(MachineOperand &Op, MachineFunction &MF);
};
}
char NVPTXReplaceImageHandles::ID = 0;
NVPTXReplaceImageHandles::NVPTXReplaceImageHandles()
: MachineFunctionPass(ID) {}
bool NVPTXReplaceImageHandles::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
InstrsToRemove.clear();
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
++BI) {
for (MachineBasicBlock::iterator I = (*BI).begin(), E = (*BI).end();
I != E; ++I) {
MachineInstr &MI = *I;
Changed |= processInstr(MI);
}
}
// Now clean up any handle-access instructions
// This is needed in debug mode when code cleanup passes are not executed,
// but we need the handle access to be eliminated because they are not
// valid instructions when image handles are disabled.
for (DenseSet<MachineInstr *>::iterator I = InstrsToRemove.begin(),
E = InstrsToRemove.end(); I != E; ++I) {
(*I)->eraseFromParent();
}
return Changed;
}
bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) {
MachineFunction &MF = *MI.getParent()->getParent();
// Check if we have a surface/texture instruction
switch (MI.getOpcode()) {
default: return false;
case NVPTX::TEX_1D_F32_I32:
case NVPTX::TEX_1D_F32_F32:
case NVPTX::TEX_1D_F32_F32_LEVEL:
case NVPTX::TEX_1D_F32_F32_GRAD:
case NVPTX::TEX_1D_I32_I32:
case NVPTX::TEX_1D_I32_F32:
case NVPTX::TEX_1D_I32_F32_LEVEL:
case NVPTX::TEX_1D_I32_F32_GRAD:
case NVPTX::TEX_1D_ARRAY_F32_I32:
case NVPTX::TEX_1D_ARRAY_F32_F32:
case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL:
case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD:
case NVPTX::TEX_1D_ARRAY_I32_I32:
case NVPTX::TEX_1D_ARRAY_I32_F32:
case NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL:
case NVPTX::TEX_1D_ARRAY_I32_F32_GRAD:
case NVPTX::TEX_2D_F32_I32:
case NVPTX::TEX_2D_F32_F32:
case NVPTX::TEX_2D_F32_F32_LEVEL:
case NVPTX::TEX_2D_F32_F32_GRAD:
case NVPTX::TEX_2D_I32_I32:
case NVPTX::TEX_2D_I32_F32:
case NVPTX::TEX_2D_I32_F32_LEVEL:
case NVPTX::TEX_2D_I32_F32_GRAD:
case NVPTX::TEX_2D_ARRAY_F32_I32:
case NVPTX::TEX_2D_ARRAY_F32_F32:
case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL:
case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD:
case NVPTX::TEX_2D_ARRAY_I32_I32:
case NVPTX::TEX_2D_ARRAY_I32_F32:
case NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL:
case NVPTX::TEX_2D_ARRAY_I32_F32_GRAD:
case NVPTX::TEX_3D_F32_I32:
case NVPTX::TEX_3D_F32_F32:
case NVPTX::TEX_3D_F32_F32_LEVEL:
case NVPTX::TEX_3D_F32_F32_GRAD:
case NVPTX::TEX_3D_I32_I32:
case NVPTX::TEX_3D_I32_F32:
case NVPTX::TEX_3D_I32_F32_LEVEL:
case NVPTX::TEX_3D_I32_F32_GRAD: {
// This is a texture fetch, so operand 4 is a texref and operand 5 is
// a samplerref
MachineOperand &TexHandle = MI.getOperand(4);
MachineOperand &SampHandle = MI.getOperand(5);
replaceImageHandle(TexHandle, MF);
replaceImageHandle(SampHandle, MF);
return true;
}
case NVPTX::SULD_1D_I8_TRAP:
case NVPTX::SULD_1D_I16_TRAP:
case NVPTX::SULD_1D_I32_TRAP:
case NVPTX::SULD_1D_ARRAY_I8_TRAP:
case NVPTX::SULD_1D_ARRAY_I16_TRAP:
case NVPTX::SULD_1D_ARRAY_I32_TRAP:
case NVPTX::SULD_2D_I8_TRAP:
case NVPTX::SULD_2D_I16_TRAP:
case NVPTX::SULD_2D_I32_TRAP:
case NVPTX::SULD_2D_ARRAY_I8_TRAP:
case NVPTX::SULD_2D_ARRAY_I16_TRAP:
case NVPTX::SULD_2D_ARRAY_I32_TRAP:
case NVPTX::SULD_3D_I8_TRAP:
case NVPTX::SULD_3D_I16_TRAP:
case NVPTX::SULD_3D_I32_TRAP: {
// This is a V1 surface load, so operand 1 is a surfref
MachineOperand &SurfHandle = MI.getOperand(1);
replaceImageHandle(SurfHandle, MF);
return true;
}
case NVPTX::SULD_1D_V2I8_TRAP:
case NVPTX::SULD_1D_V2I16_TRAP:
case NVPTX::SULD_1D_V2I32_TRAP:
case NVPTX::SULD_1D_ARRAY_V2I8_TRAP:
case NVPTX::SULD_1D_ARRAY_V2I16_TRAP:
case NVPTX::SULD_1D_ARRAY_V2I32_TRAP:
case NVPTX::SULD_2D_V2I8_TRAP:
case NVPTX::SULD_2D_V2I16_TRAP:
case NVPTX::SULD_2D_V2I32_TRAP:
case NVPTX::SULD_2D_ARRAY_V2I8_TRAP:
case NVPTX::SULD_2D_ARRAY_V2I16_TRAP:
case NVPTX::SULD_2D_ARRAY_V2I32_TRAP:
case NVPTX::SULD_3D_V2I8_TRAP:
case NVPTX::SULD_3D_V2I16_TRAP:
case NVPTX::SULD_3D_V2I32_TRAP: {
// This is a V2 surface load, so operand 2 is a surfref
MachineOperand &SurfHandle = MI.getOperand(2);
replaceImageHandle(SurfHandle, MF);
return true;
}
case NVPTX::SULD_1D_V4I8_TRAP:
case NVPTX::SULD_1D_V4I16_TRAP:
case NVPTX::SULD_1D_V4I32_TRAP:
case NVPTX::SULD_1D_ARRAY_V4I8_TRAP:
case NVPTX::SULD_1D_ARRAY_V4I16_TRAP:
case NVPTX::SULD_1D_ARRAY_V4I32_TRAP:
case NVPTX::SULD_2D_V4I8_TRAP:
case NVPTX::SULD_2D_V4I16_TRAP:
case NVPTX::SULD_2D_V4I32_TRAP:
case NVPTX::SULD_2D_ARRAY_V4I8_TRAP:
case NVPTX::SULD_2D_ARRAY_V4I16_TRAP:
case NVPTX::SULD_2D_ARRAY_V4I32_TRAP:
case NVPTX::SULD_3D_V4I8_TRAP:
case NVPTX::SULD_3D_V4I16_TRAP:
case NVPTX::SULD_3D_V4I32_TRAP: {
// This is a V4 surface load, so operand 4 is a surfref
MachineOperand &SurfHandle = MI.getOperand(4);
replaceImageHandle(SurfHandle, MF);
return true;
}
case NVPTX::SUST_B_1D_B8_TRAP:
case NVPTX::SUST_B_1D_B16_TRAP:
case NVPTX::SUST_B_1D_B32_TRAP:
case NVPTX::SUST_B_1D_V2B8_TRAP:
case NVPTX::SUST_B_1D_V2B16_TRAP:
case NVPTX::SUST_B_1D_V2B32_TRAP:
case NVPTX::SUST_B_1D_V4B8_TRAP:
case NVPTX::SUST_B_1D_V4B16_TRAP:
case NVPTX::SUST_B_1D_V4B32_TRAP:
case NVPTX::SUST_B_1D_ARRAY_B8_TRAP:
case NVPTX::SUST_B_1D_ARRAY_B16_TRAP:
case NVPTX::SUST_B_1D_ARRAY_B32_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP:
case NVPTX::SUST_B_2D_B8_TRAP:
case NVPTX::SUST_B_2D_B16_TRAP:
case NVPTX::SUST_B_2D_B32_TRAP:
case NVPTX::SUST_B_2D_V2B8_TRAP:
case NVPTX::SUST_B_2D_V2B16_TRAP:
case NVPTX::SUST_B_2D_V2B32_TRAP:
case NVPTX::SUST_B_2D_V4B8_TRAP:
case NVPTX::SUST_B_2D_V4B16_TRAP:
case NVPTX::SUST_B_2D_V4B32_TRAP:
case NVPTX::SUST_B_2D_ARRAY_B8_TRAP:
case NVPTX::SUST_B_2D_ARRAY_B16_TRAP:
case NVPTX::SUST_B_2D_ARRAY_B32_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP:
case NVPTX::SUST_B_3D_B8_TRAP:
case NVPTX::SUST_B_3D_B16_TRAP:
case NVPTX::SUST_B_3D_B32_TRAP:
case NVPTX::SUST_B_3D_V2B8_TRAP:
case NVPTX::SUST_B_3D_V2B16_TRAP:
case NVPTX::SUST_B_3D_V2B32_TRAP:
case NVPTX::SUST_B_3D_V4B8_TRAP:
case NVPTX::SUST_B_3D_V4B16_TRAP:
case NVPTX::SUST_B_3D_V4B32_TRAP:
case NVPTX::SUST_P_1D_B8_TRAP:
case NVPTX::SUST_P_1D_B16_TRAP:
case NVPTX::SUST_P_1D_B32_TRAP:
case NVPTX::SUST_P_1D_V2B8_TRAP:
case NVPTX::SUST_P_1D_V2B16_TRAP:
case NVPTX::SUST_P_1D_V2B32_TRAP:
case NVPTX::SUST_P_1D_V4B8_TRAP:
case NVPTX::SUST_P_1D_V4B16_TRAP:
case NVPTX::SUST_P_1D_V4B32_TRAP:
case NVPTX::SUST_P_1D_ARRAY_B8_TRAP:
case NVPTX::SUST_P_1D_ARRAY_B16_TRAP:
case NVPTX::SUST_P_1D_ARRAY_B32_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP:
case NVPTX::SUST_P_2D_B8_TRAP:
case NVPTX::SUST_P_2D_B16_TRAP:
case NVPTX::SUST_P_2D_B32_TRAP:
case NVPTX::SUST_P_2D_V2B8_TRAP:
case NVPTX::SUST_P_2D_V2B16_TRAP:
case NVPTX::SUST_P_2D_V2B32_TRAP:
case NVPTX::SUST_P_2D_V4B8_TRAP:
case NVPTX::SUST_P_2D_V4B16_TRAP:
case NVPTX::SUST_P_2D_V4B32_TRAP:
case NVPTX::SUST_P_2D_ARRAY_B8_TRAP:
case NVPTX::SUST_P_2D_ARRAY_B16_TRAP:
case NVPTX::SUST_P_2D_ARRAY_B32_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP:
case NVPTX::SUST_P_3D_B8_TRAP:
case NVPTX::SUST_P_3D_B16_TRAP:
case NVPTX::SUST_P_3D_B32_TRAP:
case NVPTX::SUST_P_3D_V2B8_TRAP:
case NVPTX::SUST_P_3D_V2B16_TRAP:
case NVPTX::SUST_P_3D_V2B32_TRAP:
case NVPTX::SUST_P_3D_V4B8_TRAP:
case NVPTX::SUST_P_3D_V4B16_TRAP:
case NVPTX::SUST_P_3D_V4B32_TRAP: {
// This is a surface store, so operand 0 is a surfref
MachineOperand &SurfHandle = MI.getOperand(0);
replaceImageHandle(SurfHandle, MF);
return true;
}
case NVPTX::TXQ_CHANNEL_ORDER:
case NVPTX::TXQ_CHANNEL_DATA_TYPE:
case NVPTX::TXQ_WIDTH:
case NVPTX::TXQ_HEIGHT:
case NVPTX::TXQ_DEPTH:
case NVPTX::TXQ_ARRAY_SIZE:
case NVPTX::TXQ_NUM_SAMPLES:
case NVPTX::TXQ_NUM_MIPMAP_LEVELS:
case NVPTX::SUQ_CHANNEL_ORDER:
case NVPTX::SUQ_CHANNEL_DATA_TYPE:
case NVPTX::SUQ_WIDTH:
case NVPTX::SUQ_HEIGHT:
case NVPTX::SUQ_DEPTH:
case NVPTX::SUQ_ARRAY_SIZE: {
// This is a query, so operand 1 is a surfref/texref
MachineOperand &Handle = MI.getOperand(1);
replaceImageHandle(Handle, MF);
return true;
}
}
}
void NVPTXReplaceImageHandles::
replaceImageHandle(MachineOperand &Op, MachineFunction &MF) {
const MachineRegisterInfo &MRI = MF.getRegInfo();
NVPTXMachineFunctionInfo *MFI = MF.getInfo<NVPTXMachineFunctionInfo>();
// Which instruction defines the handle?
MachineInstr *MI = MRI.getVRegDef(Op.getReg());
assert(MI && "No def for image handle vreg?");
MachineInstr &TexHandleDef = *MI;
switch (TexHandleDef.getOpcode()) {
case NVPTX::LD_i64_avar: {
// The handle is a parameter value being loaded, replace with the
// parameter symbol
assert(TexHandleDef.getOperand(6).isSymbol() && "Load is not a symbol!");
StringRef Sym = TexHandleDef.getOperand(6).getSymbolName();
std::string ParamBaseName = MF.getName();
ParamBaseName += "_param_";
assert(Sym.startswith(ParamBaseName) && "Invalid symbol reference");
unsigned Param = atoi(Sym.data()+ParamBaseName.size());
std::string NewSym;
raw_string_ostream NewSymStr(NewSym);
NewSymStr << MF.getFunction()->getName() << "_param_" << Param;
Op.ChangeToImmediate(
MFI->getImageHandleSymbolIndex(NewSymStr.str().c_str()));
InstrsToRemove.insert(&TexHandleDef);
break;
}
case NVPTX::texsurf_handles: {
// The handle is a global variable, replace with the global variable name
assert(TexHandleDef.getOperand(1).isGlobal() && "Load is not a global!");
const GlobalValue *GV = TexHandleDef.getOperand(1).getGlobal();
assert(GV->hasName() && "Global sampler must be named!");
Op.ChangeToImmediate(MFI->getImageHandleSymbolIndex(GV->getName().data()));
InstrsToRemove.insert(&TexHandleDef);
break;
}
default:
llvm_unreachable("Unknown instruction operating on handle");
}
}
MachineFunctionPass *llvm::createNVPTXReplaceImageHandlesPass() {
return new NVPTXReplaceImageHandles();
}

View File

@ -65,6 +65,10 @@ public:
inline bool hasROT32() const { return hasHWROT32() || hasSWROT32(); }
inline bool hasROT64() const { return SmVersion >= 20; }
bool hasImageHandles() const {
// Currently disabled
return false;
}
bool is64Bit() const { return Is64Bit; }
unsigned int getSmVersion() const { return SmVersion; }

View File

@ -143,6 +143,7 @@ void NVPTXPassConfig::addIRPasses() {
disablePass(&BranchFolderPassID);
disablePass(&TailDuplicateID);
addPass(createNVPTXImageOptimizerPass());
TargetPassConfig::addIRPasses();
addPass(createNVPTXAssignValidGlobalNamesPass());
addPass(createGenericToNVVMPass());
@ -155,9 +156,16 @@ void NVPTXPassConfig::addIRPasses() {
}
bool NVPTXPassConfig::addInstSelector() {
const NVPTXSubtarget &ST =
getTM<NVPTXTargetMachine>().getSubtarget<NVPTXSubtarget>();
addPass(createLowerAggrCopies());
addPass(createAllocaHoisting());
addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
if (!ST.hasImageHandles())
addPass(createNVPTXReplaceImageHandlesPass());
return false;
}

View File

@ -0,0 +1,20 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
target triple = "nvptx-unknown-nvcl"
declare i32 @llvm.nvvm.suld.1d.i32.trap(i64, i32)
; CHECK: .entry foo
define void @foo(i64 %img, float* %red, i32 %idx) {
; CHECK: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [foo_param_0, {%r{{[0-9]+}}}]
%val = tail call i32 @llvm.nvvm.suld.1d.i32.trap(i64 %img, i32 %idx)
; CHECK: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
%ret = sitofp i32 %val to float
; CHECK: st.f32 [%r{{[0-9]+}}], %f[[REDF]]
store float %ret, float* %red
ret void
}
!nvvm.annotations = !{!1, !2}
!1 = metadata !{void (i64, float*, i32)* @foo, metadata !"kernel", i32 1}
!2 = metadata !{void (i64, float*, i32)* @foo, metadata !"rdwrimage", i32 0}

View File

@ -0,0 +1,16 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
target triple = "nvptx-unknown-nvcl"
declare void @llvm.nvvm.sust.b.1d.i32.trap(i64, i32, i32)
; CHECK: .entry foo
define void @foo(i64 %img, i32 %val, i32 %idx) {
; CHECK: sust.b.1d.b32.trap [foo_param_0, {%r{{[0-9]+}}}], {%r{{[0-9]+}}}
tail call void @llvm.nvvm.sust.b.1d.i32.trap(i64 %img, i32 %idx, i32 %val)
ret void
}
!nvvm.annotations = !{!1, !2}
!1 = metadata !{void (i64, i32, i32)* @foo, metadata !"kernel", i32 1}
!2 = metadata !{void (i64, i32, i32)* @foo, metadata !"wroimage", i32 0}

View File

@ -0,0 +1,20 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
target triple = "nvptx-unknown-nvcl"
declare { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.i32(i64, i64, i32)
; CHECK: .entry foo
define void @foo(i64 %img, i64 %sampler, float* %red, i32 %idx) {
; CHECK: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [foo_param_0, foo_param_1, {%r{{[0-9]+}}}]
%val = tail call { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.i32(i64 %img, i64 %sampler, i32 %idx)
%ret = extractvalue { float, float, float, float } %val, 0
; CHECK: st.f32 [%r{{[0-9]+}}], %f[[RED]]
store float %ret, float* %red
ret void
}
!nvvm.annotations = !{!1, !2, !3}
!1 = metadata !{void (i64, i64, float*, i32)* @foo, metadata !"kernel", i32 1}
!2 = metadata !{void (i64, i64, float*, i32)* @foo, metadata !"rdoimage", i32 0}
!3 = metadata !{void (i64, i64, float*, i32)* @foo, metadata !"sampler", i32 1}