mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-24 13:33:37 +02:00
492b57c038
This patch teaches LICM's implementation of store promotion to exploit the fact that the memory location being accessed might be provable thread local. The fact it's thread local weakens the requirements for where we can insert stores since no other thread can observe the write. This allows us perform store promotion even in cases where the store is not guaranteed to execute in the loop. Two key assumption worth drawing out is that this assumes a) no-capture is strong enough to imply no-escape, and b) standard allocation functions like malloc, calloc, and operator new return values which can be assumed not to have previously escaped. In future work, it would be nice to generalize this so that it works without directly seeing the allocation site. I believe that the nocapture return attribute should be suitable for this purpose, but haven't investigated carefully. It's also likely that we could support unescaped allocas with similar reasoning, but since SROA and Mem2Reg should destroy those, they're less interesting than they first might seem. Differential Revision: http://reviews.llvm.org/D16783 llvm-svn: 263072
134 lines
3.9 KiB
LLVM
134 lines
3.9 KiB
LLVM
; RUN: opt -tbaa -basicaa -licm -S < %s | FileCheck %s
|
|
|
|
; If we can prove a local is thread local, we can insert stores during
|
|
; promotion which wouldn't be legal otherwise.
|
|
|
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
|
target triple = "x86_64-linux-generic"
|
|
|
|
@p = external global i8*
|
|
|
|
declare i8* @malloc(i64)
|
|
|
|
; Exercise the TLS case
|
|
define i32* @test(i32 %n) {
|
|
entry:
|
|
;; ignore the required null check for simplicity
|
|
%mem = call dereferenceable(16) noalias i8* @malloc(i64 16)
|
|
%addr = bitcast i8* %mem to i32*
|
|
br label %for.body.lr.ph
|
|
|
|
for.body.lr.ph: ; preds = %entry
|
|
br label %for.header
|
|
|
|
for.header:
|
|
%i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
|
|
%old = load i32, i32* %addr, align 4
|
|
; deliberate impossible to analyze branch
|
|
%guard = load volatile i8*, i8** @p
|
|
%exitcmp = icmp eq i8* %guard, null
|
|
br i1 %exitcmp, label %for.body, label %early-exit
|
|
|
|
early-exit:
|
|
; CHECK-LABEL: early-exit:
|
|
; CHECK: store i32 %new1.lcssa, i32* %addr, align 1
|
|
ret i32* null
|
|
|
|
for.body:
|
|
%new = add i32 %old, 1
|
|
store i32 %new, i32* %addr, align 4
|
|
%inc = add nsw i32 %i.02, 1
|
|
%cmp = icmp slt i32 %inc, %n
|
|
br i1 %cmp, label %for.header, label %for.cond.for.end_crit_edge
|
|
|
|
for.cond.for.end_crit_edge: ; preds = %for.body
|
|
; CHECK-LABEL: for.cond.for.end_crit_edge:
|
|
; CHECK: store i32 %new.lcssa, i32* %addr, align 1
|
|
%split = phi i32* [ %addr, %for.body ]
|
|
ret i32* null
|
|
}
|
|
|
|
declare i8* @not_malloc(i64)
|
|
|
|
; Negative test - not TLS
|
|
define i32* @test_neg(i32 %n) {
|
|
entry:
|
|
;; ignore the required null check for simplicity
|
|
%mem = call dereferenceable(16) noalias i8* @not_malloc(i64 16)
|
|
%addr = bitcast i8* %mem to i32*
|
|
br label %for.body.lr.ph
|
|
|
|
for.body.lr.ph: ; preds = %entry
|
|
br label %for.header
|
|
|
|
for.header:
|
|
%i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
|
|
%old = load i32, i32* %addr, align 4
|
|
; deliberate impossible to analyze branch
|
|
%guard = load volatile i8*, i8** @p
|
|
%exitcmp = icmp eq i8* %guard, null
|
|
br i1 %exitcmp, label %for.body, label %early-exit
|
|
|
|
early-exit:
|
|
; CHECK-LABEL: early-exit:
|
|
; CHECK-NOT: store
|
|
ret i32* null
|
|
|
|
for.body:
|
|
; CHECK-LABEL: for.body:
|
|
; CHECK: store i32 %new, i32* %addr, align 4
|
|
%new = add i32 %old, 1
|
|
store i32 %new, i32* %addr, align 4
|
|
%inc = add nsw i32 %i.02, 1
|
|
%cmp = icmp slt i32 %inc, %n
|
|
br i1 %cmp, label %for.header, label %for.cond.for.end_crit_edge
|
|
|
|
for.cond.for.end_crit_edge: ; preds = %for.body
|
|
; CHECK-LABEL: for.cond.for.end_crit_edge:
|
|
; CHECK-NOT: store
|
|
%split = phi i32* [ %addr, %for.body ]
|
|
ret i32* null
|
|
}
|
|
|
|
; Negative test - can't speculate load since branch
|
|
; may control alignment
|
|
define i32* @test_neg2(i32 %n) {
|
|
entry:
|
|
;; ignore the required null check for simplicity
|
|
%mem = call dereferenceable(16) noalias i8* @malloc(i64 16)
|
|
%addr = bitcast i8* %mem to i32*
|
|
br label %for.body.lr.ph
|
|
|
|
for.body.lr.ph: ; preds = %entry
|
|
br label %for.header
|
|
|
|
for.header:
|
|
%i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
|
|
; deliberate impossible to analyze branch
|
|
%guard = load volatile i8*, i8** @p
|
|
%exitcmp = icmp eq i8* %guard, null
|
|
br i1 %exitcmp, label %for.body, label %early-exit
|
|
|
|
early-exit:
|
|
; CHECK-LABEL: early-exit:
|
|
; CHECK-NOT: store
|
|
ret i32* null
|
|
|
|
for.body:
|
|
; CHECK-LABEL: for.body:
|
|
; CHECK: store i32 %new, i32* %addr, align 4
|
|
%old = load i32, i32* %addr, align 4
|
|
%new = add i32 %old, 1
|
|
store i32 %new, i32* %addr, align 4
|
|
%inc = add nsw i32 %i.02, 1
|
|
%cmp = icmp slt i32 %inc, %n
|
|
br i1 %cmp, label %for.header, label %for.cond.for.end_crit_edge
|
|
|
|
for.cond.for.end_crit_edge: ; preds = %for.body
|
|
; CHECK-LABEL: for.cond.for.end_crit_edge:
|
|
; CHECK-NOT: store
|
|
%split = phi i32* [ %addr, %for.body ]
|
|
ret i32* null
|
|
}
|
|
|