mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:23:11 +01:00
1daafa20ea
The function LoopIdiomRecognize::isLegalStore looks for stores in loops that could be transformed into memset or memcpy. However, the algorithm currently requires that we know how big the store is at runtime, i.e. that the store size will not overflow an unsigned integer. For scalable vectors we cannot guarantee this so I have changed the code to bail out for now. In addition, even if we add a way to query the maximum value of vscale in future we will still need to update the algorithm to cope with non-constant strides. The additional cost associated with calculating the memset and memcpy arguments will need to be taken into account as well. This patch also fixes up an implicit TypeSize -> uint64_t cast, thereby removing a warning. I've added tests here showing a fixed width vector loop being transformed into memcpy, and a scalable vector loop remaining unchanged: Transforms/LoopIdiom/memcpy-vectors.ll Differential Revision: https://reviews.llvm.org/D87439
54 lines
2.0 KiB
LLVM
54 lines
2.0 KiB
LLVM
; RUN: opt -loop-idiom -S <%s 2>%t | FileCheck %s
|
|
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
|
|
|
|
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
|
|
; WARN-NOT: warning
|
|
|
|
define void @memcpy_fixed_vec(i64* noalias %a, i64* noalias %b) local_unnamed_addr #1 {
|
|
; CHECK-LABEL: @memcpy_fixed_vec(
|
|
; CHECK: entry:
|
|
; CHECK: memcpy
|
|
; CHECK: vector.body
|
|
entry:
|
|
br label %vector.body
|
|
|
|
vector.body: ; preds = %vector.body, %entry
|
|
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
|
|
%0 = getelementptr inbounds i64, i64* %a, i64 %index
|
|
%1 = bitcast i64* %0 to <2 x i64>*
|
|
%wide.load = load <2 x i64>, <2 x i64>* %1, align 8
|
|
%2 = getelementptr inbounds i64, i64* %b, i64 %index
|
|
%3 = bitcast i64* %2 to <2 x i64>*
|
|
store <2 x i64> %wide.load, <2 x i64>* %3, align 8
|
|
%index.next = add nuw nsw i64 %index, 2
|
|
%4 = icmp eq i64 %index.next, 1024
|
|
br i1 %4, label %for.cond.cleanup, label %vector.body
|
|
|
|
for.cond.cleanup: ; preds = %vector.body
|
|
ret void
|
|
}
|
|
|
|
define void @memcpy_scalable_vec(i64* noalias %a, i64* noalias %b) local_unnamed_addr #1 {
|
|
; CHECK-LABEL: @memcpy_scalable_vec(
|
|
; CHECK: entry:
|
|
; CHECK-NOT: memcpy
|
|
; CHECK: vector.body
|
|
entry:
|
|
br label %vector.body
|
|
|
|
vector.body: ; preds = %vector.body, %entry
|
|
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
|
|
%0 = bitcast i64* %a to <vscale x 2 x i64>*
|
|
%1 = getelementptr inbounds <vscale x 2 x i64>, <vscale x 2 x i64>* %0, i64 %index
|
|
%wide.load = load <vscale x 2 x i64>, <vscale x 2 x i64>* %1, align 16
|
|
%2 = bitcast i64* %b to <vscale x 2 x i64>*
|
|
%3 = getelementptr inbounds <vscale x 2 x i64>, <vscale x 2 x i64>* %2, i64 %index
|
|
store <vscale x 2 x i64> %wide.load, <vscale x 2 x i64>* %3, align 16
|
|
%index.next = add nuw nsw i64 %index, 1
|
|
%4 = icmp eq i64 %index.next, 1024
|
|
br i1 %4, label %for.cond.cleanup, label %vector.body
|
|
|
|
for.cond.cleanup: ; preds = %vector.body
|
|
ret void
|
|
}
|