[PhaseOrdering] add test for missing vector/CSE transforms (PR45015); NFC

2025-02-01 05:01:59 +01:00 · 2020-02-25 08:54:58 -05:00 · 2020-02-25 08:54:58 -05:00 · 2b34c4298e
commit 2b34c4298e
parent 82c3c898a1
2 changed files with 40 additions and 0 deletions
--- a/test/Transforms/PhaseOrdering/X86/addsub.ll
+++ b/test/Transforms/PhaseOrdering/X86/addsub.ll
@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mattr=avx | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; TODO: Ideally, this should reach the backend with 1 fsub, 1 fadd, and 1 shuffle.
+; That may require some coordination between VectorCombine, SLP, and other passes.
+; The end goal is to get a single "vaddsubps" instruction for x86 with AVX.
+ 
+define <4 x float> @PR45015(<4 x float> %arg, <4 x float> %arg1) {
+; CHECK-LABEL: @PR45015(
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub <4 x float> [[ARG:%.*]], [[ARG1:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd <4 x float> [[ARG]], [[ARG1]]
+; CHECK-NEXT:    [[T8:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP3:%.*]] = fsub <4 x float> [[ARG]], [[ARG1]]
+; CHECK-NEXT:    [[T12:%.*]] = shufflevector <4 x float> [[T8]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd <4 x float> [[ARG]], [[ARG1]]
+; CHECK-NEXT:    [[T16:%.*]] = shufflevector <4 x float> [[T12]], <4 x float> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT:    ret <4 x float> [[T16]]
+;
+  %t = extractelement <4 x float> %arg, i32 0
+  %t2 = extractelement <4 x float> %arg1, i32 0
+  %t3 = fsub float %t, %t2
+  %t4 = insertelement <4 x float> undef, float %t3, i32 0
+  %t5 = extractelement <4 x float> %arg, i32 1
+  %t6 = extractelement <4 x float> %arg1, i32 1
+  %t7 = fadd float %t5, %t6
+  %t8 = insertelement <4 x float> %t4, float %t7, i32 1
+  %t9 = extractelement <4 x float> %arg, i32 2
+  %t10 = extractelement <4 x float> %arg1, i32 2
+  %t11 = fsub float %t9, %t10
+  %t12 = insertelement <4 x float> %t8, float %t11, i32 2
+  %t13 = extractelement <4 x float> %arg, i32 3
+  %t14 = extractelement <4 x float> %arg1, i32 3
+  %t15 = fadd float %t13, %t14
+  %t16 = insertelement <4 x float> %t12, float %t15, i32 3
+  ret <4 x float> %t16
+}
--- a/test/Transforms/PhaseOrdering/X86/lit.local.cfg
+++ b/test/Transforms/PhaseOrdering/X86/lit.local.cfg
@ -0,0 +1,2 @@
+if not 'X86' in config.root.targets:
+    config.unsupported = True