From 35504cee3340f4790fe49dbf4c2a7b79742a12ac Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Sun, 10 May 2020 11:37:47 -0400
Subject: [PATCH] [InstCombine] canonicalize bitcast after insertelement into
 undef

We have a transform in the opposite direction only for the x86 MMX type,
Other types are not handled either way before this patch.

The motivating case from PR45748:
https://bugs.llvm.org/show_bug.cgi?id=45748
...is the last test diff. In that example, we are triggering an existing
bitcast transform, so we reduce the number of casts, and that should give
us the ideal x86 codegen.

Differential Revision: https://reviews.llvm.org/D79171
---
 .../InstCombine/InstCombineVectorOps.cpp      | 19 +++++++++-
 .../InstCombine/bitcast-vec-canon.ll          | 38 +++++++++++++++----
 2 files changed, 48 insertions(+), 9 deletions(-)

diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index b2dc7259e13..d7c6db2051f 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -1050,9 +1050,26 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
           VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
     return replaceInstUsesWith(IE, V);
 
+  // If the scalar is bitcast and inserted into undef, do the insert in the
+  // source type followed by bitcast.
+  // TODO: Generalize for insert into any constant, not just undef?
+  Value *ScalarSrc;
+  if (match(VecOp, m_Undef()) &&
+      match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) &&
+      (ScalarSrc->getType()->isIntegerTy() ||
+       ScalarSrc->getType()->isFloatingPointTy())) {
+    // inselt undef, (bitcast ScalarSrc), IdxOp -->
+    //   bitcast (inselt undef, ScalarSrc, IdxOp)
+    Type *ScalarTy = ScalarSrc->getType();
+    Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount());
+    UndefValue *NewUndef = UndefValue::get(VecTy);
+    Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp);
+    return new BitCastInst(NewInsElt, IE.getType());
+  }
+
   // If the vector and scalar are both bitcast from the same element type, do
   // the insert in that source type followed by bitcast.
-  Value *VecSrc, *ScalarSrc;
+  Value *VecSrc;
   if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
       match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
       (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
diff --git a/test/Transforms/InstCombine/bitcast-vec-canon.ll b/test/Transforms/InstCombine/bitcast-vec-canon.ll
index 73aa226bd58..e90f60c5f67 100644
--- a/test/Transforms/InstCombine/bitcast-vec-canon.ll
+++ b/test/Transforms/InstCombine/bitcast-vec-canon.ll
@@ -70,10 +70,12 @@ entry:
   ret double %1
 }
 
+; FP source is ok.
+
 define <3 x i64> @bitcast_inselt_undef(double %x, i32 %idx) {
 ; CHECK-LABEL: @bitcast_inselt_undef(
-; CHECK-NEXT:    [[XB:%.*]] = bitcast double [[X:%.*]] to i64
-; CHECK-NEXT:    [[I:%.*]] = insertelement <3 x i64> undef, i64 [[XB]], i32 [[IDX:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <3 x double> undef, double [[X:%.*]], i32 [[IDX:%.*]]
+; CHECK-NEXT:    [[I:%.*]] = bitcast <3 x double> [[TMP1]] to <3 x i64>
 ; CHECK-NEXT:    ret <3 x i64> [[I]]
 ;
   %xb = bitcast double %x to i64
@@ -81,10 +83,12 @@ define <3 x i64> @bitcast_inselt_undef(double %x, i32 %idx) {
   ret <3 x i64> %i
 }
 
+; Integer source is ok; index is anything.
+
 define <3 x float> @bitcast_inselt_undef_fp(i32 %x, i567 %idx) {
 ; CHECK-LABEL: @bitcast_inselt_undef_fp(
-; CHECK-NEXT:    [[XB:%.*]] = bitcast i32 [[X:%.*]] to float
-; CHECK-NEXT:    [[I:%.*]] = insertelement <3 x float> undef, float [[XB]], i567 [[IDX:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <3 x i32> undef, i32 [[X:%.*]], i567 [[IDX:%.*]]
+; CHECK-NEXT:    [[I:%.*]] = bitcast <3 x i32> [[TMP1]] to <3 x float>
 ; CHECK-NEXT:    ret <3 x float> [[I]]
 ;
   %xb = bitcast i32 %x to float
@@ -92,8 +96,21 @@ define <3 x float> @bitcast_inselt_undef_fp(i32 %x, i567 %idx) {
   ret <3 x float> %i
 }
 
+define <vscale x 3 x float> @bitcast_inselt_undef_vscale(i32 %x, i567 %idx) {
+; CHECK-LABEL: @bitcast_inselt_undef_vscale(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <vscale x 3 x i32> undef, i32 [[X:%.*]], i567 [[IDX:%.*]]
+; CHECK-NEXT:    [[I:%.*]] = bitcast <vscale x 3 x i32> [[TMP1]] to <vscale x 3 x float>
+; CHECK-NEXT:    ret <vscale x 3 x float> [[I]]
+;
+  %xb = bitcast i32 %x to float
+  %i = insertelement <vscale x 3 x float> undef, float %xb, i567 %idx
+  ret <vscale x 3 x float> %i
+}
+
 declare void @use(i64)
 
+; Negative test - extra use prevents canonicalization
+
 define <3 x i64> @bitcast_inselt_undef_extra_use(double %x, i32 %idx) {
 ; CHECK-LABEL: @bitcast_inselt_undef_extra_use(
 ; CHECK-NEXT:    [[XB:%.*]] = bitcast double [[X:%.*]] to i64
@@ -107,6 +124,8 @@ define <3 x i64> @bitcast_inselt_undef_extra_use(double %x, i32 %idx) {
   ret <3 x i64> %i
 }
 
+; Negative test - source type must be scalar
+
 define <3 x i64> @bitcast_inselt_undef_vec_src(<2 x i32> %x, i32 %idx) {
 ; CHECK-LABEL: @bitcast_inselt_undef_vec_src(
 ; CHECK-NEXT:    [[XB:%.*]] = bitcast <2 x i32> [[X:%.*]] to i64
@@ -118,6 +137,8 @@ define <3 x i64> @bitcast_inselt_undef_vec_src(<2 x i32> %x, i32 %idx) {
   ret <3 x i64> %i
 }
 
+; Negative test - source type must be scalar
+
 define <3 x i64> @bitcast_inselt_undef_from_mmx(x86_mmx %x, i32 %idx) {
 ; CHECK-LABEL: @bitcast_inselt_undef_from_mmx(
 ; CHECK-NEXT:    [[XB:%.*]] = bitcast x86_mmx [[X:%.*]] to i64
@@ -129,12 +150,13 @@ define <3 x i64> @bitcast_inselt_undef_from_mmx(x86_mmx %x, i32 %idx) {
   ret <3 x i64> %i
 }
 
+; Reduce number of casts
+
 define <2 x i64> @PR45748(double %x, double %y) {
 ; CHECK-LABEL: @PR45748(
-; CHECK-NEXT:    [[XB:%.*]] = bitcast double [[X:%.*]] to i64
-; CHECK-NEXT:    [[I0:%.*]] = insertelement <2 x i64> undef, i64 [[XB]], i32 0
-; CHECK-NEXT:    [[YB:%.*]] = bitcast double [[Y:%.*]] to i64
-; CHECK-NEXT:    [[I1:%.*]] = insertelement <2 x i64> [[I0]], i64 [[YB]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[Y:%.*]], i32 1
+; CHECK-NEXT:    [[I1:%.*]] = bitcast <2 x double> [[TMP2]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[I1]]
 ;
   %xb = bitcast double %x to i64