1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-24 13:33:37 +02:00
llvm-mirror/test/Transforms/LICM/promote-tls.ll
Philip Reames 492b57c038 [LICM] Store promotion when memory is thread local
This patch teaches LICM's implementation of store promotion to exploit the fact that the memory location being accessed might be provable thread local. The fact it's thread local weakens the requirements for where we can insert stores since no other thread can observe the write. This allows us perform store promotion even in cases where the store is not guaranteed to execute in the loop.

Two key assumption worth drawing out is that this assumes a) no-capture is strong enough to imply no-escape, and b) standard allocation functions like malloc, calloc, and operator new return values which can be assumed not to have previously escaped.

In future work, it would be nice to generalize this so that it works without directly seeing the allocation site. I believe that the nocapture return attribute should be suitable for this purpose, but haven't investigated carefully. It's also likely that we could support unescaped allocas with similar reasoning, but since SROA and Mem2Reg should destroy those, they're less interesting than they first might seem.

Differential Revision: http://reviews.llvm.org/D16783

llvm-svn: 263072
2016-03-09 22:59:30 +00:00

134 lines
3.9 KiB
LLVM

; RUN: opt -tbaa -basicaa -licm -S < %s | FileCheck %s
; If we can prove a local is thread local, we can insert stores during
; promotion which wouldn't be legal otherwise.
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-linux-generic"
@p = external global i8*
declare i8* @malloc(i64)
; Exercise the TLS case
define i32* @test(i32 %n) {
entry:
;; ignore the required null check for simplicity
%mem = call dereferenceable(16) noalias i8* @malloc(i64 16)
%addr = bitcast i8* %mem to i32*
br label %for.body.lr.ph
for.body.lr.ph: ; preds = %entry
br label %for.header
for.header:
%i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
%old = load i32, i32* %addr, align 4
; deliberate impossible to analyze branch
%guard = load volatile i8*, i8** @p
%exitcmp = icmp eq i8* %guard, null
br i1 %exitcmp, label %for.body, label %early-exit
early-exit:
; CHECK-LABEL: early-exit:
; CHECK: store i32 %new1.lcssa, i32* %addr, align 1
ret i32* null
for.body:
%new = add i32 %old, 1
store i32 %new, i32* %addr, align 4
%inc = add nsw i32 %i.02, 1
%cmp = icmp slt i32 %inc, %n
br i1 %cmp, label %for.header, label %for.cond.for.end_crit_edge
for.cond.for.end_crit_edge: ; preds = %for.body
; CHECK-LABEL: for.cond.for.end_crit_edge:
; CHECK: store i32 %new.lcssa, i32* %addr, align 1
%split = phi i32* [ %addr, %for.body ]
ret i32* null
}
declare i8* @not_malloc(i64)
; Negative test - not TLS
define i32* @test_neg(i32 %n) {
entry:
;; ignore the required null check for simplicity
%mem = call dereferenceable(16) noalias i8* @not_malloc(i64 16)
%addr = bitcast i8* %mem to i32*
br label %for.body.lr.ph
for.body.lr.ph: ; preds = %entry
br label %for.header
for.header:
%i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
%old = load i32, i32* %addr, align 4
; deliberate impossible to analyze branch
%guard = load volatile i8*, i8** @p
%exitcmp = icmp eq i8* %guard, null
br i1 %exitcmp, label %for.body, label %early-exit
early-exit:
; CHECK-LABEL: early-exit:
; CHECK-NOT: store
ret i32* null
for.body:
; CHECK-LABEL: for.body:
; CHECK: store i32 %new, i32* %addr, align 4
%new = add i32 %old, 1
store i32 %new, i32* %addr, align 4
%inc = add nsw i32 %i.02, 1
%cmp = icmp slt i32 %inc, %n
br i1 %cmp, label %for.header, label %for.cond.for.end_crit_edge
for.cond.for.end_crit_edge: ; preds = %for.body
; CHECK-LABEL: for.cond.for.end_crit_edge:
; CHECK-NOT: store
%split = phi i32* [ %addr, %for.body ]
ret i32* null
}
; Negative test - can't speculate load since branch
; may control alignment
define i32* @test_neg2(i32 %n) {
entry:
;; ignore the required null check for simplicity
%mem = call dereferenceable(16) noalias i8* @malloc(i64 16)
%addr = bitcast i8* %mem to i32*
br label %for.body.lr.ph
for.body.lr.ph: ; preds = %entry
br label %for.header
for.header:
%i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
; deliberate impossible to analyze branch
%guard = load volatile i8*, i8** @p
%exitcmp = icmp eq i8* %guard, null
br i1 %exitcmp, label %for.body, label %early-exit
early-exit:
; CHECK-LABEL: early-exit:
; CHECK-NOT: store
ret i32* null
for.body:
; CHECK-LABEL: for.body:
; CHECK: store i32 %new, i32* %addr, align 4
%old = load i32, i32* %addr, align 4
%new = add i32 %old, 1
store i32 %new, i32* %addr, align 4
%inc = add nsw i32 %i.02, 1
%cmp = icmp slt i32 %inc, %n
br i1 %cmp, label %for.header, label %for.cond.for.end_crit_edge
for.cond.for.end_crit_edge: ; preds = %for.body
; CHECK-LABEL: for.cond.for.end_crit_edge:
; CHECK-NOT: store
%split = phi i32* [ %addr, %for.body ]
ret i32* null
}