[SLP] Update test checks, NFC.

llvm-svn: 324387
2024-10-19 11:02:59 +02:00 · 2018-02-06 20:00:05 +00:00 · 2018-02-06 20:00:05 +00:00 · 4a3c78822d
commit 4a3c78822d
parent f197f2d555
3 changed files with 1127 additions and 64 deletions
--- a/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
+++ b/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=i386-apple-macosx10.9.0 -mcpu=corei7-avx | FileCheck %s

 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
@ -6,6 +7,19 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"

 ; Function Attrs: nounwind ssp uwtable
 define i32 @fn1() {
+; CHECK-LABEL: @fn1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64*, i64** @a, align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64*> undef, i64* [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64*> [[TMP1]], i64* [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i64, <2 x i64*> [[TMP2]], <2 x i64> <i64 11, i64 56>
+; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint <2 x i64*> [[TMP3]] to <2 x i64>
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 12
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64*> [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>*
+; CHECK-NEXT:    store <2 x i64> [[TMP4]], <2 x i64>* [[TMP6]], align 8
+; CHECK-NEXT:    ret i32 undef
+;
 entry:
  %0 = load i64*, i64** @a, align 8
  %add.ptr = getelementptr inbounds i64, i64* %0, i64 11
@ -16,14 +30,34 @@ entry:
  %arrayidx2 = getelementptr inbounds i64, i64* %0, i64 12
  store i64 %2, i64* %arrayidx2, align 8
  ret i32 undef
-; CHECK-LABEL: @fn1(
-; CHECK: extractelement <2 x i64*>
-; CHECK: ret
 }


 declare float @llvm.powi.f32(float, i32)
 define void @fn2(i32* %a, i32* %b, float* %c) {
+; CHECK-LABEL: @fn2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 1
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 1
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 2
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 2
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 3
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[A]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 3
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[B]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = sitofp <4 x i32> [[TMP4]] to <4 x float>
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x float> @llvm.powi.v4f32(<4 x float> [[TMP5]], i32 [[TMP6]])
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i32 1
+; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[C]], i32 2
+; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[C]], i32 3
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast float* [[C]] to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[TMP7]], <4 x float>* [[TMP8]], align 4
+; CHECK-NEXT:    ret void
+;
 entry:
  %i0 = load i32, i32* %a, align 4
  %i1 = load i32, i32* %b, align 4
@ -64,7 +98,4 @@ entry:
  store float %call4, float* %arrayidx10, align 4
  ret void

-; CHECK-LABEL: @fn2(
-; CHECK: extractelement <4 x i32>
-; CHECK: ret
 }
--- a/test/Transforms/SLPVectorizer/X86/horizontal.ll
+++ b/test/Transforms/SLPVectorizer/X86/horizontal.ll
--- a/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll
+++ b/test/Transforms/SLPVectorizer/X86/reduction_unrolled.ll
@ -16,28 +16,58 @@
 ; SSE2:  Adding cost 4 for reduction that starts with   %7 = load i32, i32* %arrayidx.7, align 4 (It is a splitting reduction)
 define i32 @test(i32* nocapture readonly %p) {
 ; CHECK-LABEL: @test(
-; CHECK:         [[BC:%.*]] = bitcast i32* %p to <8 x i32>*
-; CHECK-NEXT:    [[LD:%.*]] = load <8 x i32>, <8 x i32>* [[BC]], align 4
-; CHECK:         [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[LD]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <8 x i32> [[LD]], [[RDX_SHUF]]
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3
+; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5
+; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6
+; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[MUL_18:%.*]] = add i32 undef, undef
+; CHECK-NEXT:    [[MUL_29:%.*]] = add i32 undef, [[MUL_18]]
+; CHECK-NEXT:    [[MUL_310:%.*]] = add i32 undef, [[MUL_29]]
+; CHECK-NEXT:    [[MUL_411:%.*]] = add i32 undef, [[MUL_310]]
+; CHECK-NEXT:    [[MUL_512:%.*]] = add i32 undef, [[MUL_411]]
+; CHECK-NEXT:    [[MUL_613:%.*]] = add i32 undef, [[MUL_512]]
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <8 x i32> [[TMP1]], [[RDX_SHUF]]
 ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = add <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
 ; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
-; CHECK:         ret i32 [[TMP2]]
+; CHECK-NEXT:    [[MUL_714:%.*]] = add i32 undef, [[MUL_613]]
+; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
 ; SSE2-LABEL: @test(
-; SSE2:         [[BC:%.*]] = bitcast i32* %p to <8 x i32>*
-; SSE2-NEXT:    [[LD:%.*]] = load <8 x i32>, <8 x i32>* [[BC]], align 4
-; SSE2:         [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[LD]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
-; SSE2-NEXT:    [[BIN_RDX:%.*]] = add <8 x i32> [[LD]], [[RDX_SHUF]]
+; SSE2-NEXT:  entry:
+; SSE2-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
+; SSE2-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2
+; SSE2-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3
+; SSE2-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4
+; SSE2-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5
+; SSE2-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6
+; SSE2-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7
+; SSE2-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>*
+; SSE2-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
+; SSE2-NEXT:    [[MUL_18:%.*]] = add i32 undef, undef
+; SSE2-NEXT:    [[MUL_29:%.*]] = add i32 undef, [[MUL_18]]
+; SSE2-NEXT:    [[MUL_310:%.*]] = add i32 undef, [[MUL_29]]
+; SSE2-NEXT:    [[MUL_411:%.*]] = add i32 undef, [[MUL_310]]
+; SSE2-NEXT:    [[MUL_512:%.*]] = add i32 undef, [[MUL_411]]
+; SSE2-NEXT:    [[MUL_613:%.*]] = add i32 undef, [[MUL_512]]
+; SSE2-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+; SSE2-NEXT:    [[BIN_RDX:%.*]] = add <8 x i32> [[TMP1]], [[RDX_SHUF]]
 ; SSE2-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; SSE2-NEXT:    [[BIN_RDX2:%.*]] = add <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
 ; SSE2-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; SSE2-NEXT:    [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
 ; SSE2-NEXT:    [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
-; SSE2:         ret i32 [[TMP2]]
+; SSE2-NEXT:    [[MUL_714:%.*]] = add i32 undef, [[MUL_613]]
+; SSE2-NEXT:    ret i32 [[TMP2]]
 ;
 entry:
  %0 = load i32, i32* %p, align 4