From 0ea8f6b013af5c127f3b33cc0826c39abf339559 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Fri, 3 Jul 2020 01:53:13 +0300 Subject: [PATCH] [NFC][Scalarizer] Add some insertelement/extractelement tests See D82961/D82970/D83101/D83102. --- test/Transforms/Scalarizer/basic.ll | 20 ---------- .../Scalarizer/constant-extractelement.ll | 29 ++++++++++++++ .../Scalarizer/constant-insertelement.ll | 27 +++++++++++++ .../Scalarizer/variable-extractelement.ll | 38 ++++++++++++++++++ .../Scalarizer/variable-insertelement.ll | 40 +++++++++++++++++++ 5 files changed, 134 insertions(+), 20 deletions(-) create mode 100644 test/Transforms/Scalarizer/constant-extractelement.ll create mode 100644 test/Transforms/Scalarizer/constant-insertelement.ll create mode 100644 test/Transforms/Scalarizer/variable-extractelement.ll create mode 100644 test/Transforms/Scalarizer/variable-insertelement.ll diff --git a/test/Transforms/Scalarizer/basic.ll b/test/Transforms/Scalarizer/basic.ll index ba6aa933aa8..2c82fd9cc3a 100644 --- a/test/Transforms/Scalarizer/basic.ll +++ b/test/Transforms/Scalarizer/basic.ll @@ -363,26 +363,6 @@ define void @f11(<32 x i1> *%dest, <32 x i1> *%src0) { ret void } -; Test that variable inserts aren't scalarized. -define void @f12(<4 x i32> *%dest, <4 x i32> *%src, i32 %index) { -; CHECK: @f12( -; CHECK: %val1 = insertelement <4 x i32> %val0, i32 1, i32 %index -; CHECK-DAG: %val1.i0 = extractelement <4 x i32> %val1, i32 0 -; CHECK-DAG: %val1.i1 = extractelement <4 x i32> %val1, i32 1 -; CHECK-DAG: %val1.i2 = extractelement <4 x i32> %val1, i32 2 -; CHECK-DAG: %val1.i3 = extractelement <4 x i32> %val1, i32 3 -; CHECK-DAG: %val2.i0 = shl i32 1, %val1.i0 -; CHECK-DAG: %val2.i1 = shl i32 2, %val1.i1 -; CHECK-DAG: %val2.i2 = shl i32 3, %val1.i2 -; CHECK-DAG: %val2.i3 = shl i32 4, %val1.i3 -; CHECK: ret void - %val0 = load <4 x i32> , <4 x i32> *%src - %val1 = insertelement <4 x i32> %val0, i32 1, i32 %index - %val2 = shl <4 x i32> , %val1 - store <4 x i32> %val2, <4 x i32> *%dest - ret void -} - ; Test vector GEPs with more than one index. define void @f13(<4 x float *> *%dest, <4 x [4 x float] *> %ptr, <4 x i32> %i, float *%other) { diff --git a/test/Transforms/Scalarizer/constant-extractelement.ll b/test/Transforms/Scalarizer/constant-extractelement.ll new file mode 100644 index 00000000000..031b729d43b --- /dev/null +++ b/test/Transforms/Scalarizer/constant-extractelement.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -scalarizer -dce -S | FileCheck --check-prefixes=ALL %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; Test that constant extracts are nicely scalarized +define i32 @f1(<4 x i32> *%src, i32 %index) { +; ALL-LABEL: @f1( +; ALL-NEXT: [[VAL0:%.*]] = load <4 x i32>, <4 x i32>* [[SRC:%.*]], align 16 +; ALL-NEXT: [[VAL0_I0:%.*]] = extractelement <4 x i32> [[VAL0]], i32 0 +; ALL-NEXT: [[VAL1_I0:%.*]] = shl i32 1, [[VAL0_I0]] +; ALL-NEXT: [[VAL0_I1:%.*]] = extractelement <4 x i32> [[VAL0]], i32 1 +; ALL-NEXT: [[VAL1_I1:%.*]] = shl i32 2, [[VAL0_I1]] +; ALL-NEXT: [[VAL0_I2:%.*]] = extractelement <4 x i32> [[VAL0]], i32 2 +; ALL-NEXT: [[VAL1_I2:%.*]] = shl i32 3, [[VAL0_I2]] +; ALL-NEXT: [[VAL0_I3:%.*]] = extractelement <4 x i32> [[VAL0]], i32 3 +; ALL-NEXT: [[VAL1_I3:%.*]] = shl i32 4, [[VAL0_I3]] +; ALL-NEXT: [[VAL1_UPTO0:%.*]] = insertelement <4 x i32> undef, i32 [[VAL1_I0]], i32 0 +; ALL-NEXT: [[VAL1_UPTO1:%.*]] = insertelement <4 x i32> [[VAL1_UPTO0]], i32 [[VAL1_I1]], i32 1 +; ALL-NEXT: [[VAL1_UPTO2:%.*]] = insertelement <4 x i32> [[VAL1_UPTO1]], i32 [[VAL1_I2]], i32 2 +; ALL-NEXT: [[VAL1:%.*]] = insertelement <4 x i32> [[VAL1_UPTO2]], i32 [[VAL1_I3]], i32 3 +; ALL-NEXT: [[VAL2:%.*]] = extractelement <4 x i32> [[VAL1]], i32 3 +; ALL-NEXT: ret i32 [[VAL2]] +; + %val0 = load <4 x i32> , <4 x i32> *%src + %val1 = shl <4 x i32> , %val0 + %val2 = extractelement <4 x i32> %val1, i32 3 + ret i32 %val2 +} diff --git a/test/Transforms/Scalarizer/constant-insertelement.ll b/test/Transforms/Scalarizer/constant-insertelement.ll new file mode 100644 index 00000000000..3877feb8a0e --- /dev/null +++ b/test/Transforms/Scalarizer/constant-insertelement.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -scalarizer -dce -S | FileCheck --check-prefixes=ALL %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; Test that constant inserts are nicely scalarized +define <4 x i32> @f1(<4 x i32> *%src, i32 %repl, i32 %index) { +; ALL-LABEL: @f1( +; ALL-NEXT: [[VAL0:%.*]] = load <4 x i32>, <4 x i32>* [[SRC:%.*]], align 16 +; ALL-NEXT: [[VAL0_I0:%.*]] = extractelement <4 x i32> [[VAL0]], i32 0 +; ALL-NEXT: [[VAL2_I0:%.*]] = shl i32 1, [[VAL0_I0]] +; ALL-NEXT: [[VAL0_I1:%.*]] = extractelement <4 x i32> [[VAL0]], i32 1 +; ALL-NEXT: [[VAL2_I1:%.*]] = shl i32 2, [[VAL0_I1]] +; ALL-NEXT: [[VAL0_I2:%.*]] = extractelement <4 x i32> [[VAL0]], i32 2 +; ALL-NEXT: [[VAL2_I2:%.*]] = shl i32 3, [[VAL0_I2]] +; ALL-NEXT: [[VAL2_I3:%.*]] = shl i32 4, [[REPL:%.*]] +; ALL-NEXT: [[VAL2_UPTO0:%.*]] = insertelement <4 x i32> undef, i32 [[VAL2_I0]], i32 0 +; ALL-NEXT: [[VAL2_UPTO1:%.*]] = insertelement <4 x i32> [[VAL2_UPTO0]], i32 [[VAL2_I1]], i32 1 +; ALL-NEXT: [[VAL2_UPTO2:%.*]] = insertelement <4 x i32> [[VAL2_UPTO1]], i32 [[VAL2_I2]], i32 2 +; ALL-NEXT: [[VAL2:%.*]] = insertelement <4 x i32> [[VAL2_UPTO2]], i32 [[VAL2_I3]], i32 3 +; ALL-NEXT: ret <4 x i32> [[VAL2]] +; + %val0 = load <4 x i32> , <4 x i32> *%src + %val1 = insertelement <4 x i32> %val0, i32 %repl, i32 3 + %val2 = shl <4 x i32> , %val1 + ret <4 x i32> %val2 +} diff --git a/test/Transforms/Scalarizer/variable-extractelement.ll b/test/Transforms/Scalarizer/variable-extractelement.ll new file mode 100644 index 00000000000..805c3ff5d64 --- /dev/null +++ b/test/Transforms/Scalarizer/variable-extractelement.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -scalarizer -dce -S | FileCheck --check-prefixes=ALL %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; Test that variable extracts scalarized. +define i32 @f1(<4 x i32> %src, i32 %index) { +; ALL-LABEL: @f1( +; ALL-NEXT: [[RES:%.*]] = extractelement <4 x i32> [[SRC:%.*]], i32 [[INDEX:%.*]] +; ALL-NEXT: ret i32 [[RES]] +; + %res = extractelement <4 x i32> %src, i32 %index + ret i32 %res +} + +define i32 @f2(<4 x i32> *%src, i32 %index) { +; ALL-LABEL: @f2( +; ALL-NEXT: [[VAL0:%.*]] = load <4 x i32>, <4 x i32>* [[SRC:%.*]], align 16 +; ALL-NEXT: [[VAL0_I0:%.*]] = extractelement <4 x i32> [[VAL0]], i32 0 +; ALL-NEXT: [[VAL1_I0:%.*]] = shl i32 1, [[VAL0_I0]] +; ALL-NEXT: [[VAL0_I1:%.*]] = extractelement <4 x i32> [[VAL0]], i32 1 +; ALL-NEXT: [[VAL1_I1:%.*]] = shl i32 2, [[VAL0_I1]] +; ALL-NEXT: [[VAL0_I2:%.*]] = extractelement <4 x i32> [[VAL0]], i32 2 +; ALL-NEXT: [[VAL1_I2:%.*]] = shl i32 3, [[VAL0_I2]] +; ALL-NEXT: [[VAL0_I3:%.*]] = extractelement <4 x i32> [[VAL0]], i32 3 +; ALL-NEXT: [[VAL1_I3:%.*]] = shl i32 4, [[VAL0_I3]] +; ALL-NEXT: [[VAL1_UPTO0:%.*]] = insertelement <4 x i32> undef, i32 [[VAL1_I0]], i32 0 +; ALL-NEXT: [[VAL1_UPTO1:%.*]] = insertelement <4 x i32> [[VAL1_UPTO0]], i32 [[VAL1_I1]], i32 1 +; ALL-NEXT: [[VAL1_UPTO2:%.*]] = insertelement <4 x i32> [[VAL1_UPTO1]], i32 [[VAL1_I2]], i32 2 +; ALL-NEXT: [[VAL1:%.*]] = insertelement <4 x i32> [[VAL1_UPTO2]], i32 [[VAL1_I3]], i32 3 +; ALL-NEXT: [[VAL2:%.*]] = extractelement <4 x i32> [[VAL1]], i32 [[INDEX:%.*]] +; ALL-NEXT: ret i32 [[VAL2]] +; + %val0 = load <4 x i32> , <4 x i32> *%src + %val1 = shl <4 x i32> , %val0 + %val2 = extractelement <4 x i32> %val1, i32 %index + ret i32 %val2 +} diff --git a/test/Transforms/Scalarizer/variable-insertelement.ll b/test/Transforms/Scalarizer/variable-insertelement.ll new file mode 100644 index 00000000000..3240f7acf4c --- /dev/null +++ b/test/Transforms/Scalarizer/variable-insertelement.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -scalarizer -dce -S | FileCheck --check-prefixes=ALL %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; Test that variable inserts are scalarized. +define <4 x i32> @f1(<4 x i32> %src, i32 %val, i32 %index) { +; ALL-LABEL: @f1( +; ALL-NEXT: [[RES:%.*]] = insertelement <4 x i32> [[SRC:%.*]], i32 [[VAL:%.*]], i32 [[INDEX:%.*]] +; ALL-NEXT: ret <4 x i32> [[RES]] +; + %res = insertelement <4 x i32> %src, i32 %val, i32 %index + ret <4 x i32> %res +} + +define void @f2(<4 x i32> *%dest, <4 x i32> *%src, i32 %index) { +; ALL-LABEL: @f2( +; ALL-NEXT: [[VAL0:%.*]] = load <4 x i32>, <4 x i32>* [[SRC:%.*]], align 16 +; ALL-NEXT: [[VAL1:%.*]] = insertelement <4 x i32> [[VAL0]], i32 1, i32 [[INDEX:%.*]] +; ALL-NEXT: [[VAL1_I0:%.*]] = extractelement <4 x i32> [[VAL1]], i32 0 +; ALL-NEXT: [[VAL2_I0:%.*]] = shl i32 1, [[VAL1_I0]] +; ALL-NEXT: [[VAL1_I1:%.*]] = extractelement <4 x i32> [[VAL1]], i32 1 +; ALL-NEXT: [[VAL2_I1:%.*]] = shl i32 2, [[VAL1_I1]] +; ALL-NEXT: [[VAL1_I2:%.*]] = extractelement <4 x i32> [[VAL1]], i32 2 +; ALL-NEXT: [[VAL2_I2:%.*]] = shl i32 3, [[VAL1_I2]] +; ALL-NEXT: [[VAL1_I3:%.*]] = extractelement <4 x i32> [[VAL1]], i32 3 +; ALL-NEXT: [[VAL2_I3:%.*]] = shl i32 4, [[VAL1_I3]] +; ALL-NEXT: [[VAL2_UPTO0:%.*]] = insertelement <4 x i32> undef, i32 [[VAL2_I0]], i32 0 +; ALL-NEXT: [[VAL2_UPTO1:%.*]] = insertelement <4 x i32> [[VAL2_UPTO0]], i32 [[VAL2_I1]], i32 1 +; ALL-NEXT: [[VAL2_UPTO2:%.*]] = insertelement <4 x i32> [[VAL2_UPTO1]], i32 [[VAL2_I2]], i32 2 +; ALL-NEXT: [[VAL2:%.*]] = insertelement <4 x i32> [[VAL2_UPTO2]], i32 [[VAL2_I3]], i32 3 +; ALL-NEXT: store <4 x i32> [[VAL2]], <4 x i32>* [[DEST:%.*]], align 16 +; ALL-NEXT: ret void +; + %val0 = load <4 x i32> , <4 x i32> *%src + %val1 = insertelement <4 x i32> %val0, i32 1, i32 %index + %val2 = shl <4 x i32> , %val1 + store <4 x i32> %val2, <4 x i32> *%dest + ret void +}