1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-24 13:33:37 +02:00
llvm-mirror/test/Transforms/LoopVectorize/X86/gather-vs-interleave.ll
Elena Demikhovsky edaa790008 [Loop Vectorizer] Cost-based decision for vectorization form of memory instruction.
Making the cost model selecting between Interleave, GatherScatter or Scalar vectorization form of memory instruction.
The right decision should be done for non-consecutive memory access instrcuctions that may have more than one vectorization solution.

This patch includes the following changes:
- Cost Model calculates the cost of Load/Store vector form and choose the better option between Widening, Interleave, GatherScactter and Scalarization. Cost Model keeps the widening decision.
- Arrays of Uniform and Scalar values are moved from Legality to Cost Model.
- Cost Model collects Uniforms and Scalars per VF. The collection is based on CM decision map of Loadis/Stores vectorization form.
- Vectorization of memory instruction is performed according to the CM decision.

Differential Revision: https://reviews.llvm.org/D27919

llvm-svn: 294503
2017-02-08 19:25:23 +00:00

42 lines
1.2 KiB
LLVM

; RUN: opt -loop-vectorize -S -mcpu=skylake-avx512 < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; This test checks that "gather" operation is choosen since it's cost is better
; than interleaving pattern.
;
;unsigned long A[SIZE];
;unsigned long B[SIZE];
;
;void foo() {
; for (int i=0; i<N; i+=8) {
; B[i] = A[i] + 5;
; }
;}
@A = global [10240 x i64] zeroinitializer, align 16
@B = global [10240 x i64] zeroinitializer, align 16
; CHECK_LABEL: strided_load_i64
; CHECK: masked.gather
define void @strided_load_i64() {
br label %1
; <label>:1: ; preds = %0, %1
%indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
%2 = getelementptr inbounds [10240 x i64], [10240 x i64]* @A, i64 0, i64 %indvars.iv
%3 = load i64, i64* %2, align 16
%4 = add i64 %3, 5
%5 = getelementptr inbounds [10240 x i64], [10240 x i64]* @B, i64 0, i64 %indvars.iv
store i64 %4, i64* %5, align 16
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 8
%6 = icmp slt i64 %indvars.iv.next, 1024
br i1 %6, label %1, label %7
; <label>:7: ; preds = %1
ret void
}