1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[SLP] Update test checks, NFC.

llvm-svn: 324387
This commit is contained in:
Alexey Bataev 2018-02-06 20:00:05 +00:00
parent f197f2d555
commit 4a3c78822d
3 changed files with 1127 additions and 64 deletions

View File

@ -1,3 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=i386-apple-macosx10.9.0 -mcpu=corei7-avx | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
@ -6,6 +7,19 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; Function Attrs: nounwind ssp uwtable
define i32 @fn1() {
; CHECK-LABEL: @fn1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** @a, align 8
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64*> undef, i64* [[TMP0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64*> [[TMP1]], i64* [[TMP0]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, <2 x i64*> [[TMP2]], <2 x i64> <i64 11, i64 56>
; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint <2 x i64*> [[TMP3]] to <2 x i64>
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 12
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64*> [[TMP3]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>*
; CHECK-NEXT: store <2 x i64> [[TMP4]], <2 x i64>* [[TMP6]], align 8
; CHECK-NEXT: ret i32 undef
;
entry:
%0 = load i64*, i64** @a, align 8
%add.ptr = getelementptr inbounds i64, i64* %0, i64 11
@ -16,14 +30,34 @@ entry:
%arrayidx2 = getelementptr inbounds i64, i64* %0, i64 12
store i64 %2, i64* %arrayidx2, align 8
ret i32 undef
; CHECK-LABEL: @fn1(
; CHECK: extractelement <2 x i64*>
; CHECK: ret
}
declare float @llvm.powi.f32(float, i32)
define void @fn2(i32* %a, i32* %b, float* %c) {
; CHECK-LABEL: @fn2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 1
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 1
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 2
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 2
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 3
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to <4 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 3
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[B]] to <4 x i32>*
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = sitofp <4 x i32> [[TMP4]] to <4 x float>
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = call <4 x float> @llvm.powi.v4f32(<4 x float> [[TMP5]], i32 [[TMP6]])
; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i32 1
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[C]], i32 2
; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[C]], i32 3
; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[C]] to <4 x float>*
; CHECK-NEXT: store <4 x float> [[TMP7]], <4 x float>* [[TMP8]], align 4
; CHECK-NEXT: ret void
;
entry:
%i0 = load i32, i32* %a, align 4
%i1 = load i32, i32* %b, align 4
@ -64,7 +98,4 @@ entry:
store float %call4, float* %arrayidx10, align 4
ret void
; CHECK-LABEL: @fn2(
; CHECK: extractelement <4 x i32>
; CHECK: ret
}

File diff suppressed because it is too large Load Diff

View File

@ -16,28 +16,58 @@
; SSE2: Adding cost 4 for reduction that starts with %7 = load i32, i32* %arrayidx.7, align 4 (It is a splitting reduction)
define i32 @test(i32* nocapture readonly %p) {
; CHECK-LABEL: @test(
; CHECK: [[BC:%.*]] = bitcast i32* %p to <8 x i32>*
; CHECK-NEXT: [[LD:%.*]] = load <8 x i32>, <8 x i32>* [[BC]], align 4
; CHECK: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[LD]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[LD]], [[RDX_SHUF]]
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3
; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4
; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5
; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6
; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[MUL_18:%.*]] = add i32 undef, undef
; CHECK-NEXT: [[MUL_29:%.*]] = add i32 undef, [[MUL_18]]
; CHECK-NEXT: [[MUL_310:%.*]] = add i32 undef, [[MUL_29]]
; CHECK-NEXT: [[MUL_411:%.*]] = add i32 undef, [[MUL_310]]
; CHECK-NEXT: [[MUL_512:%.*]] = add i32 undef, [[MUL_411]]
; CHECK-NEXT: [[MUL_613:%.*]] = add i32 undef, [[MUL_512]]
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP1]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
; CHECK: ret i32 [[TMP2]]
; CHECK-NEXT: [[MUL_714:%.*]] = add i32 undef, [[MUL_613]]
; CHECK-NEXT: ret i32 [[TMP2]]
;
; SSE2-LABEL: @test(
; SSE2: [[BC:%.*]] = bitcast i32* %p to <8 x i32>*
; SSE2-NEXT: [[LD:%.*]] = load <8 x i32>, <8 x i32>* [[BC]], align 4
; SSE2: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[LD]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
; SSE2-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[LD]], [[RDX_SHUF]]
; SSE2-NEXT: entry:
; SSE2-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
; SSE2-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2
; SSE2-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3
; SSE2-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4
; SSE2-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5
; SSE2-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6
; SSE2-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7
; SSE2-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>*
; SSE2-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
; SSE2-NEXT: [[MUL_18:%.*]] = add i32 undef, undef
; SSE2-NEXT: [[MUL_29:%.*]] = add i32 undef, [[MUL_18]]
; SSE2-NEXT: [[MUL_310:%.*]] = add i32 undef, [[MUL_29]]
; SSE2-NEXT: [[MUL_411:%.*]] = add i32 undef, [[MUL_310]]
; SSE2-NEXT: [[MUL_512:%.*]] = add i32 undef, [[MUL_411]]
; SSE2-NEXT: [[MUL_613:%.*]] = add i32 undef, [[MUL_512]]
; SSE2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
; SSE2-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP1]], [[RDX_SHUF]]
; SSE2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; SSE2-NEXT: [[BIN_RDX2:%.*]] = add <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
; SSE2-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; SSE2-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
; SSE2-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
; SSE2: ret i32 [[TMP2]]
; SSE2-NEXT: [[MUL_714:%.*]] = add i32 undef, [[MUL_613]]
; SSE2-NEXT: ret i32 [[TMP2]]
;
entry:
%0 = load i32, i32* %p, align 4