[InstCombine][X86] Regenerate SSE combine tests as part of setup for D17490

Regenerated with utils/update_test_checks.py llvm-svn: 266731
2025-01-31 20:51:52 +01:00 · 2016-04-19 12:56:46 +00:00 · 2016-04-19 12:56:46 +00:00 · 501f5ba3b6
commit 501f5ba3b6
parent e1392bc92c
6 changed files with 581 additions and 468 deletions
--- a/test/Transforms/InstCombine/x86-f16c.ll
+++ b/test/Transforms/InstCombine/x86-f16c.ll
@ -1,3 +1,4 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>)
@ -9,9 +10,10 @@ declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>)
 ; Only bottom 4 elements required.
 define <4 x float> @demand_vcvtph2ps_128(<8 x i16> %A) {
-; CHECK-LABEL: @demand_vcvtph2ps_128
+; CHECK-LABEL: @demand_vcvtph2ps_128(
-; CHECK-NEXT: %1 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %A)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %A)
-; CHECK-NEXT: ret <4 x float> %1
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
  %1 = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
  %2 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %1)
  ret <4 x float> %2
@ -19,10 +21,11 @@ define <4 x float> @demand_vcvtph2ps_128(<8 x i16> %A) {
 ; All 8 elements required.
 define <8 x float> @demand_vcvtph2ps_256(<8 x i16> %A) {
-; CHECK-LABEL: @demand_vcvtph2ps_256
+; CHECK-LABEL: @demand_vcvtph2ps_256(
-; CHECK-NEXT: %1 = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: %2 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %1)
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> [[TMP1]])
-; CHECK-NEXT: ret <8 x float> %2
+; CHECK-NEXT:    ret <8 x float> [[TMP2]]
 ;
  %1 = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
  %2 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %1)
  ret <8 x float> %2
@ -33,29 +36,33 @@ define <8 x float> @demand_vcvtph2ps_256(<8 x i16> %A) {
 ;
 define <4 x float> @fold_vcvtph2ps_128() {
-; CHECK-LABEL: @fold_vcvtph2ps_128
+; CHECK-LABEL: @fold_vcvtph2ps_128(
-; CHECK-NEXT: ret <4 x float> <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float -0.000000e+00>
+; CHECK-NEXT:    ret <4 x float> <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float -0.000000e+00>
 ;
  %1 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> <i16 0, i16 14336, i16 15360, i16 32768, i16 16384, i16 31743, i16 48128, i16 49152>)
  ret <4 x float> %1
 }
 define <8 x float> @fold_vcvtph2ps_256() {
-; CHECK-LABEL: @fold_vcvtph2ps_256
+; CHECK-LABEL: @fold_vcvtph2ps_256(
-; CHECK-NEXT: ret <8 x float> <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float -0.000000e+00, float 2.000000e+00, float 6.550400e+04, float -1.000000e+00, float -2.000000e+00>
+; CHECK-NEXT:    ret <8 x float> <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float -0.000000e+00, float 2.000000e+00, float 6.550400e+04, float -1.000000e+00, float -2.000000e+00>
 ;
  %1 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> <i16 0, i16 14336, i16 15360, i16 32768, i16 16384, i16 31743, i16 48128, i16 49152>)
  ret <8 x float> %1
 }
 define <4 x float> @fold_vcvtph2ps_128_zero() {
-; CHECK-LABEL: @fold_vcvtph2ps_128_zero
+; CHECK-LABEL: @fold_vcvtph2ps_128_zero(
-; CHECK-NEXT: ret <4 x float> zeroinitializer
+; CHECK-NEXT:    ret <4 x float> zeroinitializer
 ;
  %1 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
  ret <4 x float> %1
 }
 define <8 x float> @fold_vcvtph2ps_256_zero() {
-; CHECK-LABEL: @fold_vcvtph2ps_256_zero
+; CHECK-LABEL: @fold_vcvtph2ps_256_zero(
-; CHECK-NEXT: ret <8 x float> zeroinitializer
+; CHECK-NEXT:    ret <8 x float> zeroinitializer
 ;
  %1 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
  ret <8 x float> %1
 }
--- a/test/Transforms/InstCombine/x86-sse.ll
+++ b/test/Transforms/InstCombine/x86-sse.ll
@ -1,15 +1,17 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 define float @test_rcp_ss_0(float %a) {
-; CHECK-LABEL: @test_rcp_ss_0
+; CHECK-LABEL: @test_rcp_ss_0(
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1
-; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2
-; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3
-; CHECK-NEXT: %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> [[TMP4]])
-; CHECK-NEXT: %6 = extractelement <4 x float> %5, i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[TMP5]], i32 0
-; CHECK-NEXT: ret float %6
+; CHECK-NEXT:    ret float [[TMP6]]
 ;
  %1 = insertelement <4 x float> undef, float %a, i32 0
  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@ -20,14 +22,15 @@ define float @test_rcp_ss_0(float %a) {
 }
 define float @test_sqrt_ss_0(float %a) {
-; CHECK-LABEL: @test_sqrt_ss_0
+; CHECK-LABEL: @test_sqrt_ss_0(
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1
-; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2
-; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3
-; CHECK-NEXT: %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4)
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> [[TMP4]])
-; CHECK-NEXT: %6 = extractelement <4 x float> %5, i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[TMP5]], i32 0
-; CHECK-NEXT: ret float %6
+; CHECK-NEXT:    ret float [[TMP6]]
 ;
  %1 = insertelement <4 x float> undef, float %a, i32 0
  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@ -38,14 +41,15 @@ define float @test_sqrt_ss_0(float %a) {
 }
 define float @test_rsqrt_ss_0(float %a) {
-; CHECK-LABEL: @test_rsqrt_ss_0
+; CHECK-LABEL: @test_rsqrt_ss_0(
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1
-; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2
-; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3
-; CHECK-NEXT: %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> [[TMP4]])
-; CHECK-NEXT: %6 = extractelement <4 x float> %5, i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[TMP5]], i32 0
-; CHECK-NEXT: ret float %6
+; CHECK-NEXT:    ret float [[TMP6]]
 ;
  %1 = insertelement <4 x float> undef, float %a, i32 0
  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@ -56,18 +60,19 @@ define float @test_rsqrt_ss_0(float %a) {
 }
 define float @test_add_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_add_ss_0
+; CHECK-LABEL: @test_add_ss_0(
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1
-; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2
-; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3
-; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float 4.000000e+00, i32 1
-; CHECK-NEXT: %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float 5.000000e+00, i32 2
-; CHECK-NEXT: %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float 6.000000e+00, i32 3
-; CHECK-NEXT: %9 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %8)
+; CHECK-NEXT:    [[TMP9:%.*]] = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> [[TMP4]], <4 x float> [[TMP8]])
-; CHECK-NEXT: %r = extractelement <4 x float> %9, i32 0
+; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> [[TMP9]], i32 0
-; CHECK-NEXT: ret float %r
+; CHECK-NEXT:    ret float [[R]]
 ;
  %1 = insertelement <4 x float> undef, float %a, i32 0
  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@ -82,9 +87,10 @@ define float @test_add_ss_0(float %a, float %b) {
 }
 define float @test_sub_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_sub_ss_0
+; CHECK-LABEL: @test_sub_ss_0(
-; CHECK-NEXT: %1 = fsub float %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub float %a, %b
-; CHECK-NEXT: ret float %1
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
  %1 = insertelement <4 x float> undef, float %a, i32 0
  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@ -99,9 +105,10 @@ define float @test_sub_ss_0(float %a, float %b) {
 }
 define float @test_mul_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_mul_ss_0
+; CHECK-LABEL: @test_mul_ss_0(
-; CHECK-NEXT: %1 = fmul float %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul float %a, %b
-; CHECK-NEXT: ret float %1
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
  %1 = insertelement <4 x float> undef, float %a, i32 0
  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@ -116,18 +123,19 @@ define float @test_mul_ss_0(float %a, float %b) {
 }
 define float @test_div_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_div_ss_0
+; CHECK-LABEL: @test_div_ss_0(
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1
-; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2
-; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3
-; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float 4.000000e+00, i32 1
-; CHECK-NEXT: %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float 5.000000e+00, i32 2
-; CHECK-NEXT: %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float 6.000000e+00, i32 3
-; CHECK-NEXT: %9 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %8)
+; CHECK-NEXT:    [[TMP9:%.*]] = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> [[TMP4]], <4 x float> [[TMP8]])
-; CHECK-NEXT: %r = extractelement <4 x float> %9, i32 0
+; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> [[TMP9]], i32 0
-; CHECK-NEXT: ret float %r
+; CHECK-NEXT:    ret float [[R]]
 ;
  %1 = insertelement <4 x float> undef, float %a, i32 0
  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@ -142,12 +150,13 @@ define float @test_div_ss_0(float %a, float %b) {
 }
 define float @test_min_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_min_ss_0
+; CHECK-LABEL: @test_min_ss_0(
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %3 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %1, <4 x float> %2)
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; CHECK-NEXT: %4 = extractelement <4 x float> %3, i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
-; CHECK-NEXT: ret float %4
+; CHECK-NEXT:    ret float [[TMP4]]
 ;
  %1 = insertelement <4 x float> undef, float %a, i32 0
  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@ -162,12 +171,13 @@ define float @test_min_ss_0(float %a, float %b) {
 }
 define float @test_max_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_max_ss_0
+; CHECK-LABEL: @test_max_ss_0(
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %3 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %1, <4 x float> %2)
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; CHECK-NEXT: %4 = extractelement <4 x float> %3, i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
-; CHECK-NEXT: ret float %4
+; CHECK-NEXT:    ret float [[TMP4]]
 ;
  %1 = insertelement <4 x float> undef, float %a, i32 0
  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@ -182,18 +192,19 @@ define float @test_max_ss_0(float %a, float %b) {
 }
 define float @test_cmp_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_cmp_ss_0
+; CHECK-LABEL: @test_cmp_ss_0(
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1
-; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2
-; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3
-; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float 4.000000e+00, i32 1
-; CHECK-NEXT: %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float 5.000000e+00, i32 2
-; CHECK-NEXT: %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float 6.000000e+00, i32 3
-; CHECK-NEXT: %9 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %8, i8 0)
+; CHECK-NEXT:    [[TMP9:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[TMP4]], <4 x float> [[TMP8]], i8 0)
-; CHECK-NEXT: %r = extractelement <4 x float> %9, i32 0
+; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> [[TMP9]], i32 0
-; CHECK-NEXT: ret float %r
+; CHECK-NEXT:    ret float [[R]]
 ;
  %1 = insertelement <4 x float> undef, float %a, i32 0
  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@ -208,11 +219,12 @@ define float @test_cmp_ss_0(float %a, float %b) {
 }
 define i32 @test_comieq_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_comieq_ss_0
+; CHECK-LABEL: @test_comieq_ss_0(
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> %1, <4 x float> %2)
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; CHECK-NEXT: ret i32 %3
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
  %1 = insertelement <4 x float> undef, float %a, i32 0
  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@ -226,11 +238,12 @@ define i32 @test_comieq_ss_0(float %a, float %b) {
 }
 define i32 @test_comige_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_comige_ss_0
+; CHECK-LABEL: @test_comige_ss_0(
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> %1, <4 x float> %2)
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; CHECK-NEXT: ret i32 %3
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
  %1 = insertelement <4 x float> undef, float %a, i32 0
  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@ -244,11 +257,12 @@ define i32 @test_comige_ss_0(float %a, float %b) {
 }
 define i32 @test_comigt_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_comigt_ss_0
+; CHECK-LABEL: @test_comigt_ss_0(
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> %1, <4 x float> %2)
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; CHECK-NEXT: ret i32 %3
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
  %1 = insertelement <4 x float> undef, float %a, i32 0
  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@ -262,11 +276,12 @@ define i32 @test_comigt_ss_0(float %a, float %b) {
 }
 define i32 @test_comile_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_comile_ss_0
+; CHECK-LABEL: @test_comile_ss_0(
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> %1, <4 x float> %2)
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; CHECK-NEXT: ret i32 %3
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
  %1 = insertelement <4 x float> undef, float %a, i32 0
  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@ -280,11 +295,12 @@ define i32 @test_comile_ss_0(float %a, float %b) {
 }
 define i32 @test_comilt_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_comilt_ss_0
+; CHECK-LABEL: @test_comilt_ss_0(
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> %1, <4 x float> %2)
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; CHECK-NEXT: ret i32 %3
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
  %1 = insertelement <4 x float> undef, float %a, i32 0
  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@ -298,11 +314,12 @@ define i32 @test_comilt_ss_0(float %a, float %b) {
 }
 define i32 @test_comineq_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_comineq_ss_0
+; CHECK-LABEL: @test_comineq_ss_0(
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> %1, <4 x float> %2)
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; CHECK-NEXT: ret i32 %3
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
  %1 = insertelement <4 x float> undef, float %a, i32 0
  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@ -316,11 +333,12 @@ define i32 @test_comineq_ss_0(float %a, float %b) {
 }
 define i32 @test_ucomieq_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_ucomieq_ss_0
+; CHECK-LABEL: @test_ucomieq_ss_0(
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %1, <4 x float> %2)
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; CHECK-NEXT: ret i32 %3
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
  %1 = insertelement <4 x float> undef, float %a, i32 0
  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@ -334,11 +352,12 @@ define i32 @test_ucomieq_ss_0(float %a, float %b) {
 }
 define i32 @test_ucomige_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_ucomige_ss_0
+; CHECK-LABEL: @test_ucomige_ss_0(
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %1, <4 x float> %2)
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; CHECK-NEXT: ret i32 %3
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
  %1 = insertelement <4 x float> undef, float %a, i32 0
  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@ -352,11 +371,12 @@ define i32 @test_ucomige_ss_0(float %a, float %b) {
 }
 define i32 @test_ucomigt_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_ucomigt_ss_0
+; CHECK-LABEL: @test_ucomigt_ss_0(
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %1, <4 x float> %2)
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; CHECK-NEXT: ret i32 %3
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
  %1 = insertelement <4 x float> undef, float %a, i32 0
  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@ -370,11 +390,12 @@ define i32 @test_ucomigt_ss_0(float %a, float %b) {
 }
 define i32 @test_ucomile_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_ucomile_ss_0
+; CHECK-LABEL: @test_ucomile_ss_0(
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %1, <4 x float> %2)
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; CHECK-NEXT: ret i32 %3
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
  %1 = insertelement <4 x float> undef, float %a, i32 0
  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@ -388,11 +409,12 @@ define i32 @test_ucomile_ss_0(float %a, float %b) {
 }
 define i32 @test_ucomilt_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_ucomilt_ss_0
+; CHECK-LABEL: @test_ucomilt_ss_0(
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %1, <4 x float> %2)
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; CHECK-NEXT: ret i32 %3
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
  %1 = insertelement <4 x float> undef, float %a, i32 0
  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@ -406,11 +428,12 @@ define i32 @test_ucomilt_ss_0(float %a, float %b) {
 }
 define i32 @test_ucomineq_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_ucomineq_ss_0
+; CHECK-LABEL: @test_ucomineq_ss_0(
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %1, <4 x float> %2)
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; CHECK-NEXT: ret i32 %3
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
  %1 = insertelement <4 x float> undef, float %a, i32 0
  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
--- a/test/Transforms/InstCombine/x86-sse2.ll
+++ b/test/Transforms/InstCombine/x86-sse2.ll
@ -1,13 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 define double @test_sqrt_sd_0(double %a) {
-; CHECK-LABEL: @test_sqrt_sd_0
+; CHECK-LABEL: @test_sqrt_sd_0(
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1
-; CHECK-NEXT: %3 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2)
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> [[TMP2]])
-; CHECK-NEXT: %4 = extractelement <2 x double> %3, i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
-; CHECK-NEXT: ret double %4
+; CHECK-NEXT:    ret double [[TMP4]]
 ;
  %1 = insertelement <2 x double> undef, double %a, i32 0
  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
  %3 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2)
@ -16,108 +18,115 @@ define double @test_sqrt_sd_0(double %a) {
 }
 define double @test_add_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_add_sd_0
+; CHECK-LABEL: @test_add_sd_0(
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1
-; CHECK-NEXT: %3 = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 2.000000e+00, i32 1
-; CHECK-NEXT: %5 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %2, <2 x double> %4)
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> [[TMP2]], <2 x double> [[TMP4]])
-; CHECK-NEXT: %6 = extractelement <2 x double> %5, i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
-; CHECK-NEXT: ret double %6
+; CHECK-NEXT:    ret double [[TMP6]]
 ;
  %1 = insertelement <2 x double> undef, double %a, i32 0
  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
  %3 = insertelement <2 x double> undef, double %b, i32 0
  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
  %5 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %2, <2 x double> %4)
  %6 = extractelement <2 x double> %5, i32 0
-  ret double %6 
+  ret double %6
 }
 define double @test_sub_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_sub_sd_0
+; CHECK-LABEL: @test_sub_sd_0(
-; CHECK-NEXT: %1 = fsub double %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub double %a, %b
-; CHECK-NEXT: ret double %1
+; CHECK-NEXT:    ret double [[TMP1]]
 ;
  %1 = insertelement <2 x double> undef, double %a, i32 0
  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
  %3 = insertelement <2 x double> undef, double %b, i32 0
  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
  %5 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %2, <2 x double> %4)
  %6 = extractelement <2 x double> %5, i32 0
-  ret double %6 
+  ret double %6
 }
 define double @test_mul_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_mul_sd_0
+; CHECK-LABEL: @test_mul_sd_0(
-; CHECK-NEXT: %1 = fmul double %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul double %a, %b
-; CHECK-NEXT: ret double %1
+; CHECK-NEXT:    ret double [[TMP1]]
 ;
  %1 = insertelement <2 x double> undef, double %a, i32 0
  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
  %3 = insertelement <2 x double> undef, double %b, i32 0
  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
  %5 = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %2, <2 x double> %4)
  %6 = extractelement <2 x double> %5, i32 0
-  ret double %6 
+  ret double %6
 }
 define double @test_div_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_div_sd_0
+; CHECK-LABEL: @test_div_sd_0(
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1
-; CHECK-NEXT: %3 = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 2.000000e+00, i32 1
-; CHECK-NEXT: %5 = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %2, <2 x double> %4)
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> [[TMP2]], <2 x double> [[TMP4]])
-; CHECK-NEXT: %6 = extractelement <2 x double> %5, i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
-; CHECK-NEXT: ret double %6
+; CHECK-NEXT:    ret double [[TMP6]]
 ;
  %1 = insertelement <2 x double> undef, double %a, i32 0
  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
  %3 = insertelement <2 x double> undef, double %b, i32 0
  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
  %5 = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %2, <2 x double> %4)
  %6 = extractelement <2 x double> %5, i32 0
-  ret double %6 
+  ret double %6
 }
 define double @test_min_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_min_sd_0
+; CHECK-LABEL: @test_min_sd_0(
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %3 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %1, <2 x double> %2)
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; CHECK-NEXT: %4 = extractelement <2 x double> %3, i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
-; CHECK-NEXT: ret double %4
+; CHECK-NEXT:    ret double [[TMP4]]
 ;
  %1 = insertelement <2 x double> undef, double %a, i32 0
  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
  %3 = insertelement <2 x double> undef, double %b, i32 0
  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
  %5 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %2, <2 x double> %4)
  %6 = extractelement <2 x double> %5, i32 0
-  ret double %6 
+  ret double %6
 }
 define double @test_max_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_max_sd_0
+; CHECK-LABEL: @test_max_sd_0(
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %3 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %1, <2 x double> %2)
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; CHECK-NEXT: %4 = extractelement <2 x double> %3, i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
-; CHECK-NEXT: ret double %4
+; CHECK-NEXT:    ret double [[TMP4]]
 ;
  %1 = insertelement <2 x double> undef, double %a, i32 0
  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
  %3 = insertelement <2 x double> undef, double %b, i32 0
  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
  %5 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %2, <2 x double> %4)
  %6 = extractelement <2 x double> %5, i32 0
-  ret double %6 
+  ret double %6
 }
 define double @test_cmp_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_cmp_sd_0
+; CHECK-LABEL: @test_cmp_sd_0(
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1
-; CHECK-NEXT: %3 = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 2.000000e+00, i32 1
-; CHECK-NEXT: %5 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %2, <2 x double> %4, i8 0)
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[TMP2]], <2 x double> [[TMP4]], i8 0)
-; CHECK-NEXT: %6 = extractelement <2 x double> %5, i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
-; CHECK-NEXT: ret double %6
+; CHECK-NEXT:    ret double [[TMP6]]
 ;
  %1 = insertelement <2 x double> undef, double %a, i32 0
  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
  %3 = insertelement <2 x double> undef, double %b, i32 0
@ -128,11 +137,12 @@ define double @test_cmp_sd_0(double %a, double %b) {
 }
 define i32 @test_comieq_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_comieq_sd_0
+; CHECK-LABEL: @test_comieq_sd_0(
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %1, <2 x double> %2)
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comieq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; CHECK-NEXT: ret i32 %3
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
  %1 = insertelement <2 x double> undef, double %a, i32 0
  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
  %3 = insertelement <2 x double> undef, double %b, i32 0
@ -142,11 +152,12 @@ define i32 @test_comieq_sd_0(double %a, double %b) {
 }
 define i32 @test_comige_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_comige_sd_0
+; CHECK-LABEL: @test_comige_sd_0(
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.comige.sd(<2 x double> %1, <2 x double> %2)
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comige.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; CHECK-NEXT: ret i32 %3
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
  %1 = insertelement <2 x double> undef, double %a, i32 0
  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
  %3 = insertelement <2 x double> undef, double %b, i32 0
@ -156,11 +167,12 @@ define i32 @test_comige_sd_0(double %a, double %b) {
 }
 define i32 @test_comigt_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_comigt_sd_0
+; CHECK-LABEL: @test_comigt_sd_0(
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %1, <2 x double> %2)
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comigt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; CHECK-NEXT: ret i32 %3
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
  %1 = insertelement <2 x double> undef, double %a, i32 0
  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
  %3 = insertelement <2 x double> undef, double %b, i32 0
@ -170,11 +182,12 @@ define i32 @test_comigt_sd_0(double %a, double %b) {
 }
 define i32 @test_comile_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_comile_sd_0
+; CHECK-LABEL: @test_comile_sd_0(
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.comile.sd(<2 x double> %1, <2 x double> %2)
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comile.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; CHECK-NEXT: ret i32 %3
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
  %1 = insertelement <2 x double> undef, double %a, i32 0
  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
  %3 = insertelement <2 x double> undef, double %b, i32 0
@ -184,11 +197,12 @@ define i32 @test_comile_sd_0(double %a, double %b) {
 }
 define i32 @test_comilt_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_comilt_sd_0
+; CHECK-LABEL: @test_comilt_sd_0(
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %1, <2 x double> %2)
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comilt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; CHECK-NEXT: ret i32 %3
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
  %1 = insertelement <2 x double> undef, double %a, i32 0
  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
  %3 = insertelement <2 x double> undef, double %b, i32 0
@ -198,11 +212,12 @@ define i32 @test_comilt_sd_0(double %a, double %b) {
 }
 define i32 @test_comineq_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_comineq_sd_0
+; CHECK-LABEL: @test_comineq_sd_0(
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %1, <2 x double> %2)
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comineq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; CHECK-NEXT: ret i32 %3
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
  %1 = insertelement <2 x double> undef, double %a, i32 0
  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
  %3 = insertelement <2 x double> undef, double %b, i32 0
@ -212,11 +227,12 @@ define i32 @test_comineq_sd_0(double %a, double %b) {
 }
 define i32 @test_ucomieq_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_ucomieq_sd_0
+; CHECK-LABEL: @test_ucomieq_sd_0(
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %1, <2 x double> %2)
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; CHECK-NEXT: ret i32 %3
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
  %1 = insertelement <2 x double> undef, double %a, i32 0
  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
  %3 = insertelement <2 x double> undef, double %b, i32 0
@ -226,11 +242,12 @@ define i32 @test_ucomieq_sd_0(double %a, double %b) {
 }
 define i32 @test_ucomige_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_ucomige_sd_0
+; CHECK-LABEL: @test_ucomige_sd_0(
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %1, <2 x double> %2)
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; CHECK-NEXT: ret i32 %3
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
  %1 = insertelement <2 x double> undef, double %a, i32 0
  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
  %3 = insertelement <2 x double> undef, double %b, i32 0
@ -240,11 +257,12 @@ define i32 @test_ucomige_sd_0(double %a, double %b) {
 }
 define i32 @test_ucomigt_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_ucomigt_sd_0
+; CHECK-LABEL: @test_ucomigt_sd_0(
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %1, <2 x double> %2)
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; CHECK-NEXT: ret i32 %3
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
  %1 = insertelement <2 x double> undef, double %a, i32 0
  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
  %3 = insertelement <2 x double> undef, double %b, i32 0
@ -254,11 +272,12 @@ define i32 @test_ucomigt_sd_0(double %a, double %b) {
 }
 define i32 @test_ucomile_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_ucomile_sd_0
+; CHECK-LABEL: @test_ucomile_sd_0(
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %1, <2 x double> %2)
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; CHECK-NEXT: ret i32 %3
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
  %1 = insertelement <2 x double> undef, double %a, i32 0
  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
  %3 = insertelement <2 x double> undef, double %b, i32 0
@ -268,11 +287,12 @@ define i32 @test_ucomile_sd_0(double %a, double %b) {
 }
 define i32 @test_ucomilt_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_ucomilt_sd_0
+; CHECK-LABEL: @test_ucomilt_sd_0(
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %1, <2 x double> %2)
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; CHECK-NEXT: ret i32 %3
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
  %1 = insertelement <2 x double> undef, double %a, i32 0
  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
  %3 = insertelement <2 x double> undef, double %b, i32 0
@ -282,11 +302,12 @@ define i32 @test_ucomilt_sd_0(double %a, double %b) {
 }
 define i32 @test_ucomineq_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_ucomineq_sd_0
+; CHECK-LABEL: @test_ucomineq_sd_0(
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %1, <2 x double> %2)
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; CHECK-NEXT: ret i32 %3
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
  %1 = insertelement <2 x double> undef, double %a, i32 0
  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
  %3 = insertelement <2 x double> undef, double %b, i32 0
--- a/test/Transforms/InstCombine/x86-sse41.ll
+++ b/test/Transforms/InstCombine/x86-sse41.ll
@ -1,12 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 define <2 x double> @test_round_sd(<2 x double> %a, <2 x double> %b) {
-; CHECK-LABEL: @test_round_sd
+; CHECK-LABEL: @test_round_sd(
-; CHECK-NEXT: %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> %a, double 1.000000e+00, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> %b, double 2.000000e+00, i32 1
-; CHECK-NEXT: %3 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %1, <2 x double> %2, i32 10)
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]], i32 10)
-; CHECK-NEXT: ret <2 x double> %3
+; CHECK-NEXT:    ret <2 x double> [[TMP3]]
 ;
  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 0
  %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
  %3 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %1, <2 x double> %2, i32 10)
@ -14,33 +16,35 @@ define <2 x double> @test_round_sd(<2 x double> %a, <2 x double> %b) {
 }
 define double @test_round_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_round_sd_0
+; CHECK-LABEL: @test_round_sd_0(
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1
-; CHECK-NEXT: %3 = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 2.000000e+00, i32 1
-; CHECK-NEXT: %5 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %2, <2 x double> %4, i32 10)
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> [[TMP2]], <2 x double> [[TMP4]], i32 10)
-; CHECK-NEXT: %6 = extractelement <2 x double> %5, i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
-; CHECK-NEXT: ret double %6
+; CHECK-NEXT:    ret double [[TMP6]]
 ;
  %1 = insertelement <2 x double> undef, double %a, i32 0
  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
  %3 = insertelement <2 x double> undef, double %b, i32 0
  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
  %5 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %2, <2 x double> %4, i32 10)
  %6 = extractelement <2 x double> %5, i32 0
-  ret double %6 
+  ret double %6
 }
 define <4 x float> @test_round_ss(<4 x float> %a, <4 x float> %b) {
-; CHECK-LABEL: @test_round
+; CHECK-LABEL: @test_round_ss(
-; CHECK-NEXT: %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> %a, float 1.000000e+00, i32 1
-; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 2.000000e+00, i32 2
-; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 3.000000e+00, i32 3
-; CHECK-NEXT: %4 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> %b, float 1.000000e+00, i32 1
-; CHECK-NEXT: %5 = insertelement <4 x float> %4, float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> [[TMP4]], float 2.000000e+00, i32 2
-; CHECK-NEXT: %6 = insertelement <4 x float> %5, float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float 3.000000e+00, i32 3
-; CHECK-NEXT: %7 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %3, <4 x float> %6, i32 10)
+; CHECK-NEXT:    [[TMP7:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> [[TMP3]], <4 x float> [[TMP6]], i32 10)
-; CHECK-NEXT: ret <4 x float> %7
+; CHECK-NEXT:    ret <4 x float> [[TMP7]]
 ;
  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
@ -52,18 +56,19 @@ define <4 x float> @test_round_ss(<4 x float> %a, <4 x float> %b) {
 }
 define float @test_round_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_round_ss_0
+; CHECK-LABEL: @test_round_ss_0(
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1
-; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2
-; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3
-; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float 4.000000e+00, i32 1
-; CHECK-NEXT: %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float 5.000000e+00, i32 2
-; CHECK-NEXT: %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float 6.000000e+00, i32 3
-; CHECK-NEXT: %9 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %4, <4 x float> %8, i32 10)
+; CHECK-NEXT:    [[TMP9:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> [[TMP4]], <4 x float> [[TMP8]], i32 10)
-; CHECK-NEXT: %r = extractelement <4 x float> %9, i32 0
+; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> [[TMP9]], i32 0
-; CHECK-NEXT: ret float %r
+; CHECK-NEXT:    ret float [[R]]
 ;
  %1 = insertelement <4 x float> undef, float %a, i32 0
  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@ -78,4 +83,4 @@ define float @test_round_ss_0(float %a, float %b) {
 }
 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
-declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
+declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
--- a/test/Transforms/InstCombine/x86-sse4a.ll
+++ b/test/Transforms/InstCombine/x86-sse4a.ll
@ -1,3 +1,4 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 ;
@ -5,45 +6,51 @@
 ;
 define <2 x i64> @test_extrq_call(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_call
+; CHECK-LABEL: @test_extrq_call(
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) #1
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind
  ret <2 x i64> %1
 }
 define <2 x i64> @test_extrq_zero_arg0(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_zero_arg0
+; CHECK-LABEL: @test_extrq_zero_arg0(
-; CHECK-NEXT: ret <2 x i64> <i64 0, i64 undef>
+; CHECK-NEXT:    ret <2 x i64> <i64 0, i64 undef>
 ;
  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> zeroinitializer, <16 x i8> %y) nounwind
  ret <2 x i64> %1
 }
 define <2 x i64> @test_extrq_zero_arg1(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_zero_arg1
+; CHECK-LABEL: @test_extrq_zero_arg1(
-; CHECK-NEXT: ret <2 x i64> %x
+; CHECK-NEXT:    ret <2 x i64> %x
 ;
  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> zeroinitializer) nounwind
  ret <2 x i64> %1
 }
 define <2 x i64> @test_extrq_to_extqi(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_to_extqi
+; CHECK-LABEL: @test_extrq_to_extqi(
-; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 15)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 15)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
  ret <2 x i64> %1
 }
 define <2 x i64> @test_extrq_constant(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_constant
+; CHECK-LABEL: @test_extrq_constant(
-; CHECK-NEXT: ret <2 x i64> <i64 255, i64 undef>
+; CHECK-NEXT:    ret <2 x i64> <i64 255, i64 undef>
 ;
  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 55>, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
  ret <2 x i64> %1
 }
 define <2 x i64> @test_extrq_constant_undef(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_constant_undef
+; CHECK-LABEL: @test_extrq_constant_undef(
-; CHECK-NEXT: ret <2 x i64> <i64 65535, i64 undef>
+; CHECK-NEXT:    ret <2 x i64> <i64 65535, i64 undef>
 ;
  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 undef>, <16 x i8> <i8 16, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
  ret <2 x i64> %1
 }
@ -53,57 +60,64 @@ define <2 x i64> @test_extrq_constant_undef(<2 x i64> %x, <16 x i8> %y) {
 ;
 define <2 x i64> @test_extrqi_call(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_call
+; CHECK-LABEL: @test_extrqi_call(
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23)
  ret <2 x i64> %1
 }
 define <2 x i64> @test_extrqi_shuffle_1zuu(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_shuffle_1zuu
+; CHECK-LABEL: @test_extrqi_shuffle_1zuu(
-; CHECK-NEXT: %1 = bitcast <2 x i64> %x to <16 x i8>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> %x to <16 x i8>
-; CHECK-NEXT: %2 = shufflevector <16 x i8> %1, <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
-; CHECK-NEXT: ret <2 x i64> %3
+; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 ;
  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 32, i8 32)
  ret <2 x i64> %1
 }
 define <2 x i64> @test_extrqi_shuffle_2zzzzzzzuuuuuuuu(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_shuffle_2zzzzzzzuuuuuuuu
+; CHECK-LABEL: @test_extrqi_shuffle_2zzzzzzzuuuuuuuu(
-; CHECK-NEXT: %1 = bitcast <2 x i64> %x to <16 x i8>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> %x to <16 x i8>
-; CHECK-NEXT: %2 = shufflevector <16 x i8> %1, <16 x i8> <i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 2, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> <i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 2, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
-; CHECK-NEXT: ret <2 x i64> %3
+; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 ;
  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 16)
  ret <2 x i64> %1
 }
 define <2 x i64> @test_extrqi_undef(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_undef
+; CHECK-LABEL: @test_extrqi_undef(
-; CHECK-NEXT: ret <2 x i64> undef
+; CHECK-NEXT:    ret <2 x i64> undef
 ;
  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 32, i8 33)
  ret <2 x i64> %1
 }
 define <2 x i64> @test_extrqi_zero(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_zero
+; CHECK-LABEL: @test_extrqi_zero(
-; CHECK-NEXT: ret <2 x i64> <i64 0, i64 undef>
+; CHECK-NEXT:    ret <2 x i64> <i64 0, i64 undef>
 ;
  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 3, i8 18)
  ret <2 x i64> %1
 }
 define <2 x i64> @test_extrqi_constant(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_constant
+; CHECK-LABEL: @test_extrqi_constant(
-; CHECK-NEXT: ret <2 x i64> <i64 7, i64 undef>
+; CHECK-NEXT:    ret <2 x i64> <i64 7, i64 undef>
 ;
  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 55>, i8 3, i8 18)
  ret <2 x i64> %1
 }
 define <2 x i64> @test_extrqi_constant_undef(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_constant_undef
+; CHECK-LABEL: @test_extrqi_constant_undef(
-; CHECK-NEXT: ret <2 x i64> <i64 15, i64 undef>
+; CHECK-NEXT:    ret <2 x i64> <i64 15, i64 undef>
 ;
  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 undef>, i8 4, i8 18)
  ret <2 x i64> %1
 }
@ -113,31 +127,35 @@ define <2 x i64> @test_extrqi_constant_undef(<2 x i64> %x) {
 ;
 define <2 x i64> @test_insertq_call(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertq_call
+; CHECK-LABEL: @test_insertq_call(
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) #1
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
  ret <2 x i64> %1
 }
 define <2 x i64> @test_insertq_to_insertqi(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertq_to_insertqi
+; CHECK-LABEL: @test_insertq_to_insertqi(
-; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> <i64 8, i64 undef>, i8 18, i8 2)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> <i64 8, i64 undef>, i8 18, i8 2)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> <i64 8, i64 658>) nounwind
  ret <2 x i64> %1
 }
 define <2 x i64> @test_insertq_constant(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertq_constant
+; CHECK-LABEL: @test_insertq_constant(
-; CHECK-NEXT: ret <2 x i64> <i64 32, i64 undef>
+; CHECK-NEXT:    ret <2 x i64> <i64 32, i64 undef>
 ;
  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 8, i64 658>) nounwind
  ret <2 x i64> %1
 }
 define <2 x i64> @test_insertq_constant_undef(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertq_constant_undef
+; CHECK-LABEL: @test_insertq_constant_undef(
-; CHECK-NEXT: ret <2 x i64> <i64 33, i64 undef>
+; CHECK-NEXT:    ret <2 x i64> <i64 33, i64 undef>
 ;
  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 1, i64 undef>, <2 x i64> <i64 8, i64 658>) nounwind
  ret <2 x i64> %1
 }
@ -147,9 +165,10 @@ define <2 x i64> @test_insertq_constant_undef(<2 x i64> %x, <2 x i64> %y) {
 ;
 define <16 x i8> @test_insertqi_shuffle_04uu(<16 x i8> %v, <16 x i8> %i) {
-; CHECK-LABEL: @test_insertqi_shuffle_04uu
+; CHECK-LABEL: @test_insertqi_shuffle_04uu(
-; CHECK-NEXT: %1 = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: ret <16 x i8> %1
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
 ;
  %1 = bitcast <16 x i8> %v to <2 x i64>
  %2 = bitcast <16 x i8> %i to <2 x i64>
  %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 32, i8 32)
@ -158,9 +177,10 @@ define <16 x i8> @test_insertqi_shuffle_04uu(<16 x i8> %v, <16 x i8> %i) {
 }
 define <16 x i8> @test_insertqi_shuffle_8123uuuu(<16 x i8> %v, <16 x i8> %i) {
-; CHECK-LABEL: @test_insertqi_shuffle_8123uuuu
+; CHECK-LABEL: @test_insertqi_shuffle_8123uuuu(
-; CHECK-NEXT: %1 = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: ret <16 x i8> %1
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
 ;
  %1 = bitcast <16 x i8> %v to <2 x i64>
  %2 = bitcast <16 x i8> %i to <2 x i64>
  %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 16, i8 0)
@ -169,8 +189,9 @@ define <16 x i8> @test_insertqi_shuffle_8123uuuu(<16 x i8> %v, <16 x i8> %i) {
 }
 define <2 x i64> @test_insertqi_constant(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @test_insertqi_constant
+; CHECK-LABEL: @test_insertqi_constant(
-; CHECK-NEXT: ret <2 x i64> <i64 -131055, i64 undef>
+; CHECK-NEXT:    ret <2 x i64> <i64 -131055, i64 undef>
 ;
  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 8, i64 0>, i8 16, i8 1)
  ret <2 x i64> %1
 }
@ -179,36 +200,41 @@ define <2 x i64> @test_insertqi_constant(<2 x i64> %v, <2 x i64> %i) {
 ; the result are undefined, and we copy the bottom 64 bits from the
 ; second arg
 define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @testInsert64Bits
+; CHECK-LABEL: @testInsert64Bits(
-; CHECK-NEXT: ret <2 x i64> %i
+; CHECK-NEXT:    ret <2 x i64> %i
 ;
  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 0)
  ret <2 x i64> %1
 }
 define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @testZeroLength
+; CHECK-LABEL: @testZeroLength(
-; CHECK-NEXT: ret <2 x i64> %i
+; CHECK-NEXT:    ret <2 x i64> %i
 ;
  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0)
  ret <2 x i64> %1
 }
 define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @testUndefinedInsertq_1
+; CHECK-LABEL: @testUndefinedInsertq_1(
-; CHECK-NEXT: ret <2 x i64> undef
+; CHECK-NEXT:    ret <2 x i64> undef
 ;
  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 16)
  ret <2 x i64> %1
 }
 define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @testUndefinedInsertq_2
+; CHECK-LABEL: @testUndefinedInsertq_2(
-; CHECK-NEXT: ret <2 x i64> undef
+; CHECK-NEXT:    ret <2 x i64> undef
 ;
  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 32)
  ret <2 x i64> %1
 }
 define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @testUndefinedInsertq_3
+; CHECK-LABEL: @testUndefinedInsertq_3(
-; CHECK-NEXT: ret <2 x i64> undef
+; CHECK-NEXT:    ret <2 x i64> undef
 ;
  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 16)
  ret <2 x i64> %1
 }
@ -218,27 +244,30 @@ define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) {
 ;
 define <2 x i64> @test_extrq_arg0(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_arg0
+; CHECK-LABEL: @test_extrq_arg0(
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) #1
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
  %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %y) nounwind
  ret <2 x i64> %2
 }
 define <2 x i64> @test_extrq_arg1(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_arg1
+; CHECK-LABEL: @test_extrq_arg1(
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) #1
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
  %1 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
  %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %1) nounwind
  ret <2 x i64> %2
 }
 define <2 x i64> @test_extrq_args01(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_args01
+; CHECK-LABEL: @test_extrq_args01(
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) #1
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
  %2 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
  %3 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %2) nounwind
@ -246,69 +275,77 @@ define <2 x i64> @test_extrq_args01(<2 x i64> %x, <16 x i8> %y) {
 }
 define <2 x i64> @test_extrq_ret(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_ret
+; CHECK-LABEL: @test_extrq_ret(
-; CHECK-NEXT: ret <2 x i64> undef
+; CHECK-NEXT:    ret <2 x i64> undef
 ;
  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind
  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
  ret <2 x i64> %2
 }
 define <2 x i64> @test_extrqi_arg0(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_arg0
+; CHECK-LABEL: @test_extrqi_arg0(
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
  %2 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %1, i8 3, i8 2)
  ret <2 x i64> %2
 }
 define <2 x i64> @test_extrqi_ret(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_ret
+; CHECK-LABEL: @test_extrqi_ret(
-; CHECK-NEXT: ret <2 x i64> undef
+; CHECK-NEXT:    ret <2 x i64> undef
 ;
  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2) nounwind
  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
  ret <2 x i64> %2
 }
 define <2 x i64> @test_insertq_arg0(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertq_arg0
+; CHECK-LABEL: @test_insertq_arg0(
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) #1
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %1, <2 x i64> %y) nounwind
  ret <2 x i64> %2
 }
 define <2 x i64> @test_insertq_ret(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertq_ret
+; CHECK-LABEL: @test_insertq_ret(
-; CHECK-NEXT: ret <2 x i64> undef
+; CHECK-NEXT:    ret <2 x i64> undef
 ;
  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
  ret <2 x i64> %2
 }
 define <2 x i64> @test_insertqi_arg0(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertqi_arg0
+; CHECK-LABEL: @test_insertqi_arg0(
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) #1
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %y, i8 3, i8 2) nounwind
  ret <2 x i64> %2
 }
 define <2 x i64> @test_insertqi_arg1(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertqi_arg1
+; CHECK-LABEL: @test_insertqi_arg1(
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) #1
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
  %1 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %1, i8 3, i8 2) nounwind
  ret <2 x i64> %2
 }
 define <2 x i64> @test_insertqi_args01(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertqi_args01
+; CHECK-LABEL: @test_insertqi_args01(
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) #1
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
  %2 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
  %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 3, i8 2) nounwind
@ -316,8 +353,9 @@ define <2 x i64> @test_insertqi_args01(<2 x i64> %x, <2 x i64> %y) {
 }
 define <2 x i64> @test_insertqi_ret(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertqi_ret
+; CHECK-LABEL: @test_insertqi_ret(
-; CHECK-NEXT: ret <2 x i64> undef
+; CHECK-NEXT:    ret <2 x i64> undef
 ;
  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) nounwind
  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
  ret <2 x i64> %2
--- a/test/Transforms/InstCombine/x86-xop.ll
+++ b/test/Transforms/InstCombine/x86-xop.ll
@ -1,12 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 define double @test_vfrcz_sd_0(double %a) {
-; CHECK-LABEL: @test_vfrcz_sd_0
+; CHECK-LABEL: @test_vfrcz_sd_0(
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1
-; CHECK-NEXT: %3 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %2)
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> [[TMP2]])
-; CHECK-NEXT: %4 = extractelement <2 x double> %3, i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
-; CHECK-NEXT: ret double %4
+; CHECK-NEXT:    ret double [[TMP4]]
 ;
  %1 = insertelement <2 x double> undef, double %a, i32 0
  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
  %3 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %2)
@ -15,14 +17,15 @@ define double @test_vfrcz_sd_0(double %a) {
 }
 define float @test_vfrcz_ss_0(float %a) {
-; CHECK-LABEL: @test_vfrcz_ss_0
+; CHECK-LABEL: @test_vfrcz_ss_0(
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1
-; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2
-; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3
-; CHECK-NEXT: %5 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %4)
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> [[TMP4]])
-; CHECK-NEXT: %6 = extractelement <4 x float> %5, i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[TMP5]], i32 0
-; CHECK-NEXT: ret float %6
+; CHECK-NEXT:    ret float [[TMP6]]
 ;
  %1 = insertelement <4 x float> undef, float %a, i32 0
  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@ -33,137 +36,153 @@ define float @test_vfrcz_ss_0(float %a) {
 }
 define <2 x i64> @cmp_slt_v2i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: @cmp_slt_v2i64
+; CHECK-LABEL: @cmp_slt_v2i64(
-; CHECK-NEXT: %1 = icmp slt <2 x i64> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i64> %a, %b
-; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
-; CHECK-NEXT: ret <2 x i64> %2
+; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
 ;
  %1 = tail call <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64> %a, <2 x i64> %b)
  ret <2 x i64> %1
 }
 define <2 x i64> @cmp_ult_v2i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: @cmp_ult_v2i64
+; CHECK-LABEL: @cmp_ult_v2i64(
-; CHECK-NEXT: %1 = icmp ult <2 x i64> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i64> %a, %b
-; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
-; CHECK-NEXT: ret <2 x i64> %2
+; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
 ;
  %1 = tail call <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64> %a, <2 x i64> %b)
  ret <2 x i64> %1
 }
 define <2 x i64> @cmp_sle_v2i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: @cmp_sle_v2i64
+; CHECK-LABEL: @cmp_sle_v2i64(
-; CHECK-NEXT: %1 = icmp sle <2 x i64> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sle <2 x i64> %a, %b
-; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
-; CHECK-NEXT: ret <2 x i64> %2
+; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
 ;
  %1 = tail call <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64> %a, <2 x i64> %b)
  ret <2 x i64> %1
 }
 define <2 x i64> @cmp_ule_v2i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: @cmp_ule_v2i64
+; CHECK-LABEL: @cmp_ule_v2i64(
-; CHECK-NEXT: %1 = icmp ule <2 x i64> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <2 x i64> %a, %b
-; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
-; CHECK-NEXT: ret <2 x i64> %2
+; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
 ;
  %1 = tail call <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64> %a, <2 x i64> %b)
  ret <2 x i64> %1
 }
 define <4 x i32> @cmp_sgt_v4i32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: @cmp_sgt_v4i32
+; CHECK-LABEL: @cmp_sgt_v4i32(
-; CHECK-NEXT: %1 = icmp sgt <4 x i32> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> %a, %b
-; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
-; CHECK-NEXT: ret <4 x i32> %2
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
  %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32> %a, <4 x i32> %b)
  ret <4 x i32> %1
 }
 define <4 x i32> @cmp_ugt_v4i32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: @cmp_ugt_v4i32
+; CHECK-LABEL: @cmp_ugt_v4i32(
-; CHECK-NEXT: %1 = icmp ugt <4 x i32> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <4 x i32> %a, %b
-; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
-; CHECK-NEXT: ret <4 x i32> %2
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
  %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32> %a, <4 x i32> %b)
  ret <4 x i32> %1
 }
 define <4 x i32> @cmp_sge_v4i32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: @cmp_sge_v4i32
+; CHECK-LABEL: @cmp_sge_v4i32(
-; CHECK-NEXT: %1 = icmp sge <4 x i32> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sge <4 x i32> %a, %b
-; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
-; CHECK-NEXT: ret <4 x i32> %2
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
  %1 = tail call <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32> %a, <4 x i32> %b)
  ret <4 x i32> %1
 }
 define <4 x i32> @cmp_uge_v4i32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: @cmp_uge_v4i32
+; CHECK-LABEL: @cmp_uge_v4i32(
-; CHECK-NEXT: %1 = icmp uge <4 x i32> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge <4 x i32> %a, %b
-; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
-; CHECK-NEXT: ret <4 x i32> %2
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
  %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32> %a, <4 x i32> %b)
  ret <4 x i32> %1
 }
 define <8 x i16> @cmp_seq_v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: @cmp_seq_v8i16
+; CHECK-LABEL: @cmp_seq_v8i16(
-; CHECK-NEXT: %1 = icmp eq <8 x i16> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i16> %a, %b
-; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16>
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
-; CHECK-NEXT: ret <8 x i16> %2
+; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
 ;
  %1 = tail call <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16> %a, <8 x i16> %b)
  ret <8 x i16> %1
 }
 define <8 x i16> @cmp_ueq_v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: @cmp_ueq_v8i16
+; CHECK-LABEL: @cmp_ueq_v8i16(
-; CHECK-NEXT: %1 = icmp eq <8 x i16> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i16> %a, %b
-; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16>
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
-; CHECK-NEXT: ret <8 x i16> %2
+; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
 ;
  %1 = tail call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a, <8 x i16> %b)
  ret <8 x i16> %1
 }
 define <8 x i16> @cmp_sne_v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: @cmp_sne_v8i16
+; CHECK-LABEL: @cmp_sne_v8i16(
-; CHECK-NEXT: %1 = icmp ne <8 x i16> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <8 x i16> %a, %b
-; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16>
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
-; CHECK-NEXT: ret <8 x i16> %2
+; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
 ;
  %1 = tail call <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16> %a, <8 x i16> %b)
  ret <8 x i16> %1
 }
 define <8 x i16> @cmp_une_v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: @cmp_une_v8i16
+; CHECK-LABEL: @cmp_une_v8i16(
-; CHECK-NEXT: %1 = icmp ne <8 x i16> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <8 x i16> %a, %b
-; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16>
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
-; CHECK-NEXT: ret <8 x i16> %2
+; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
 ;
  %1 = tail call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a, <8 x i16> %b)
  ret <8 x i16> %1
 }
 define <16 x i8> @cmp_strue_v16i8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: @cmp_strue_v16i8
+; CHECK-LABEL: @cmp_strue_v16i8(
-; CHECK-NEXT: ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 ;
  %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a, <16 x i8> %b)
  ret <16 x i8> %1
 }
 define <16 x i8> @cmp_utrue_v16i8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: @cmp_utrue_v16i8
+; CHECK-LABEL: @cmp_utrue_v16i8(
-; CHECK-NEXT: ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 ;
  %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a, <16 x i8> %b)
  ret <16 x i8> %1
 }
 define <16 x i8> @cmp_sfalse_v16i8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: @cmp_sfalse_v16i8
+; CHECK-LABEL: @cmp_sfalse_v16i8(
-; CHECK-NEXT: ret <16 x i8> zeroinitializer
+; CHECK-NEXT:    ret <16 x i8> zeroinitializer
 ;
  %1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a, <16 x i8> %b)
  ret <16 x i8> %1
 }
 define <16 x i8> @cmp_ufalse_v16i8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: @cmp_ufalse_v16i8
+; CHECK-LABEL: @cmp_ufalse_v16i8(
-; CHECK-NEXT: ret <16 x i8> zeroinitializer
+; CHECK-NEXT:    ret <16 x i8> zeroinitializer
 ;
  %1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a, <16 x i8> %b)
  ret <16 x i8> %1
 }