mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-20 03:23:01 +02:00
1c4f697044
Summary: With this change (plus some changes to prevent !invariant from being clobbered within llvm), clang will be able to model the __ldg CUDA builtin as an invariant load, rather than as a target-specific llvm intrinsic. This will let the optimizer play with these loads -- specifically, we should be able to vectorize them in the load-store vectorizer. Reviewers: tra Subscribers: jholewinski, hfinkel, llvm-commits, chandlerc Differential Revision: https://reviews.llvm.org/D23477 llvm-svn: 281152
28 lines
696 B
LLVM
28 lines
696 B
LLVM
; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s
|
|
|
|
; Check that invariant loads from the global addrspace are lowered to
|
|
; ld.global.nc.
|
|
|
|
; CHECK-LABEL: @ld_global
|
|
define i32 @ld_global(i32 addrspace(1)* %ptr) {
|
|
; CHECK: ld.global.nc.{{[a-z]}}32
|
|
%a = load i32, i32 addrspace(1)* %ptr, !invariant.load !0
|
|
ret i32 %a
|
|
}
|
|
|
|
; CHECK-LABEL: @ld_not_invariant
|
|
define i32 @ld_not_invariant(i32 addrspace(1)* %ptr) {
|
|
; CHECK: ld.global.{{[a-z]}}32
|
|
%a = load i32, i32 addrspace(1)* %ptr
|
|
ret i32 %a
|
|
}
|
|
|
|
; CHECK-LABEL: @ld_not_global_addrspace
|
|
define i32 @ld_not_global_addrspace(i32 addrspace(0)* %ptr) {
|
|
; CHECK: ld.{{[a-z]}}32
|
|
%a = load i32, i32 addrspace(0)* %ptr
|
|
ret i32 %a
|
|
}
|
|
|
|
!0 = !{}
|