1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00
llvm-mirror/test/CodeGen/SystemZ/prefetch-03.ll
Jonas Paulsson e80a23909b [LoopDataPrefetch + SystemZ] Let target decide on prefetching for each loop.
This patch adds

- New arguments to getMinPrefetchStride() to let the target decide on a
  per-loop basis if software prefetching should be done even with a stride
  within the limit of the hw prefetcher.

- New TTI hook enableWritePrefetching() to let a target do write prefetching
  by default (defaults to false).

- In LoopDataPrefetch:

  - A search through the whole loop to gather information before emitting any
    prefetches. This way the target can get information via new arguments to
    getMinPrefetchStride() and emit prefetches more selectively. Collected
    information includes: Does the loop have a call, how many memory
    accesses, how many of them are strided, how many prefetches will cover
    them. This is NFC to before as long as the target does not change its
    definition of getMinPrefetchStride().

  - If a previous access to the same exact address was 'read', and the
    current one is 'write', make it a 'write' prefetch.

  - If two accesses that are covered by the same prefetch do not dominate
    each other, put the prefetch in a block that dominates both of them.

  - If a ConstantMaxTripCount is less than ItersAhead, then skip the loop.

- A SystemZ implementation of getMinPrefetchStride().

Review: Ulrich Weigand, Michael Kruse

Differential Revision: https://reviews.llvm.org/D70228
2020-04-02 14:57:46 +02:00

47 lines
1.4 KiB
LLVM

; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -prefetch-distance=50 \
; RUN: -loop-prefetch-writes -stop-after=loop-data-prefetch | FileCheck %s
;
; Check that prefetches are emitted in a position that is executed each
; iteration for each targeted memory instruction. The two stores in %true and
; %false are within one cache line in memory, so they should get a single
; prefetch in %for.body.
;
; CHECK-LABEL: for.body
; CHECK: call void @llvm.prefetch.p0i8(i8* {{.*}}, i32 0
; CHECK: call void @llvm.prefetch.p0i8(i8* {{.*}}, i32 1
; CHECK-LABEL: true
; CHECK-LABEL: false
; CHECK-LABEL: latch
define void @fun(i32* nocapture %Src, i32* nocapture readonly %Dst) {
entry:
br label %for.body
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next.9, %latch ]
%arrayidx = getelementptr inbounds i32, i32* %Dst, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%cmp = icmp sgt i32 %0, 0
br i1 %cmp, label %true, label %false
true:
%arrayidx2 = getelementptr inbounds i32, i32* %Src, i64 %indvars.iv
store i32 %0, i32* %arrayidx2, align 4
br label %latch
false:
%a = add i64 %indvars.iv, 8
%arrayidx3 = getelementptr inbounds i32, i32* %Src, i64 %a
store i32 %0, i32* %arrayidx3, align 4
br label %latch
latch:
%indvars.iv.next.9 = add nuw nsw i64 %indvars.iv, 1600
%cmp.9 = icmp ult i64 %indvars.iv.next.9, 11200
br i1 %cmp.9, label %for.body, label %for.cond.cleanup
for.cond.cleanup:
ret void
}