1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 03:33:20 +01:00
llvm-mirror/test/CodeGen/NVPTX/noduplicate-syncthreads.ll
Eli Bendersky ee591c4154 Expose "noduplicate" attribute as a property for intrinsics.
The "noduplicate" function attribute exists to prevent certain optimizations
from duplicating calls to the function. This is important on platforms where
certain function call duplications are unsafe (for example execution barriers
for CUDA and OpenCL).

This patch makes it possible to specify intrinsics as "noduplicate" and
translates that to the appropriate function attribute.

llvm-svn: 204200
2014-03-18 23:51:07 +00:00

75 lines
2.6 KiB
LLVM

; RUN: opt < %s -O3 -S | FileCheck %s
; Make sure the call to syncthreads is not duplicate here by the LLVM
; optimizations, because it has the noduplicate attribute set.
; CHECK: call void @llvm.cuda.syncthreads
; CHECK-NOT: call void @llvm.cuda.syncthreads
; Function Attrs: nounwind
define void @foo(float* %output) #1 {
entry:
%output.addr = alloca float*, align 8
store float* %output, float** %output.addr, align 8
%0 = load float** %output.addr, align 8
%arrayidx = getelementptr inbounds float* %0, i64 0
%1 = load float* %arrayidx, align 4
%conv = fpext float %1 to double
%cmp = fcmp olt double %conv, 1.000000e+01
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
%2 = load float** %output.addr, align 8
%3 = load float* %2, align 4
%conv1 = fpext float %3 to double
%add = fadd double %conv1, 1.000000e+00
%conv2 = fptrunc double %add to float
store float %conv2, float* %2, align 4
br label %if.end
if.else: ; preds = %entry
%4 = load float** %output.addr, align 8
%5 = load float* %4, align 4
%conv3 = fpext float %5 to double
%add4 = fadd double %conv3, 2.000000e+00
%conv5 = fptrunc double %add4 to float
store float %conv5, float* %4, align 4
br label %if.end
if.end: ; preds = %if.else, %if.then
call void @llvm.cuda.syncthreads()
%6 = load float** %output.addr, align 8
%arrayidx6 = getelementptr inbounds float* %6, i64 0
%7 = load float* %arrayidx6, align 4
%conv7 = fpext float %7 to double
%cmp8 = fcmp olt double %conv7, 1.000000e+01
br i1 %cmp8, label %if.then9, label %if.else13
if.then9: ; preds = %if.end
%8 = load float** %output.addr, align 8
%9 = load float* %8, align 4
%conv10 = fpext float %9 to double
%add11 = fadd double %conv10, 3.000000e+00
%conv12 = fptrunc double %add11 to float
store float %conv12, float* %8, align 4
br label %if.end17
if.else13: ; preds = %if.end
%10 = load float** %output.addr, align 8
%11 = load float* %10, align 4
%conv14 = fpext float %11 to double
%add15 = fadd double %conv14, 4.000000e+00
%conv16 = fptrunc double %add15 to float
store float %conv16, float* %10, align 4
br label %if.end17
if.end17: ; preds = %if.else13, %if.then9
ret void
}
; Function Attrs: noduplicate nounwind
declare void @llvm.cuda.syncthreads() #2
!0 = metadata !{void (float*)* @foo, metadata !"kernel", i32 1}
!1 = metadata !{null, metadata !"align", i32 8}