diff --git a/test/CodeGen/X86/avx2-arith.ll b/test/CodeGen/X86/avx2-arith.ll index aec74424b9b..017f54b40b2 100644 --- a/test/CodeGen/X86/avx2-arith.ll +++ b/test/CodeGen/X86/avx2-arith.ll @@ -1,15 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32 -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64 define <4 x i64> @test_vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone { ; X32-LABEL: test_vpaddq: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_vpaddq: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq %x = add <4 x i64> %i, %j @@ -18,12 +18,12 @@ define <4 x i64> @test_vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone { define <8 x i32> @test_vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone { ; X32-LABEL: test_vpaddd: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_vpaddd: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq %x = add <8 x i32> %i, %j @@ -32,12 +32,12 @@ define <8 x i32> @test_vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone { define <16 x i16> @test_vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { ; X32-LABEL: test_vpaddw: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_vpaddw: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq %x = add <16 x i16> %i, %j @@ -46,12 +46,12 @@ define <16 x i16> @test_vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { define <32 x i8> @test_vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone { ; X32-LABEL: test_vpaddb: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpaddb %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_vpaddb: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpaddb %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq %x = add <32 x i8> %i, %j @@ -60,12 +60,12 @@ define <32 x i8> @test_vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone { define <4 x i64> @test_vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone { ; X32-LABEL: test_vpsubq: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsubq %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_vpsubq: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsubq %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq %x = sub <4 x i64> %i, %j @@ -74,12 +74,12 @@ define <4 x i64> @test_vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone { define <8 x i32> @test_vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone { ; X32-LABEL: test_vpsubd: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsubd %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_vpsubd: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsubd %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq %x = sub <8 x i32> %i, %j @@ -88,12 +88,12 @@ define <8 x i32> @test_vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone { define <16 x i16> @test_vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { ; X32-LABEL: test_vpsubw: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsubw %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_vpsubw: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsubw %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq %x = sub <16 x i16> %i, %j @@ -102,12 +102,12 @@ define <16 x i16> @test_vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { define <32 x i8> @test_vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone { ; X32-LABEL: test_vpsubb: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsubb %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_vpsubb: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsubb %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq %x = sub <32 x i8> %i, %j @@ -116,12 +116,12 @@ define <32 x i8> @test_vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone { define <8 x i32> @test_vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone { ; X32-LABEL: test_vpmulld: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpmulld %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_vpmulld: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpmulld %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq %x = mul <8 x i32> %i, %j @@ -130,12 +130,12 @@ define <8 x i32> @test_vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone { define <16 x i16> @test_vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { ; X32-LABEL: test_vpmullw: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpmullw %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_vpmullw: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpmullw %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq %x = mul <16 x i16> %i, %j @@ -144,7 +144,7 @@ define <16 x i16> @test_vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone define <16 x i8> @mul_v16i8(<16 x i8> %i, <16 x i8> %j) nounwind readnone { ; X32-LABEL: mul_v16i8: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpmovsxbw %xmm1, %ymm1 ; X32-NEXT: vpmovsxbw %xmm0, %ymm0 ; X32-NEXT: vpmullw %ymm1, %ymm0, %ymm0 @@ -157,7 +157,7 @@ define <16 x i8> @mul_v16i8(<16 x i8> %i, <16 x i8> %j) nounwind readnone { ; X32-NEXT: retl ; ; X64-LABEL: mul_v16i8: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpmovsxbw %xmm1, %ymm1 ; X64-NEXT: vpmovsxbw %xmm0, %ymm0 ; X64-NEXT: vpmullw %ymm1, %ymm0, %ymm0 @@ -174,7 +174,7 @@ define <16 x i8> @mul_v16i8(<16 x i8> %i, <16 x i8> %j) nounwind readnone { define <32 x i8> @mul_v32i8(<32 x i8> %i, <32 x i8> %j) nounwind readnone { ; X32-LABEL: mul_v32i8: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vextracti128 $1, %ymm1, %xmm2 ; X32-NEXT: vpmovsxbw %xmm2, %ymm2 ; X32-NEXT: vextracti128 $1, %ymm0, %xmm3 @@ -196,7 +196,7 @@ define <32 x i8> @mul_v32i8(<32 x i8> %i, <32 x i8> %j) nounwind readnone { ; X32-NEXT: retl ; ; X64-LABEL: mul_v32i8: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vextracti128 $1, %ymm1, %xmm2 ; X64-NEXT: vpmovsxbw %xmm2, %ymm2 ; X64-NEXT: vextracti128 $1, %ymm0, %xmm3 @@ -222,7 +222,7 @@ define <32 x i8> @mul_v32i8(<32 x i8> %i, <32 x i8> %j) nounwind readnone { define <4 x i64> @mul_v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone { ; X32-LABEL: mul_v4i64: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsrlq $32, %ymm0, %ymm2 ; X32-NEXT: vpmuludq %ymm1, %ymm2, %ymm2 ; X32-NEXT: vpsrlq $32, %ymm1, %ymm3 @@ -234,7 +234,7 @@ define <4 x i64> @mul_v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone { ; X32-NEXT: retl ; ; X64-LABEL: mul_v4i64: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsrlq $32, %ymm0, %ymm2 ; X64-NEXT: vpmuludq %ymm1, %ymm2, %ymm2 ; X64-NEXT: vpsrlq $32, %ymm1, %ymm3 @@ -250,12 +250,12 @@ define <4 x i64> @mul_v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone { define <8 x i32> @mul_const1(<8 x i32> %x) { ; X32-LABEL: mul_const1: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: mul_const1: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; X64-NEXT: retq %y = mul <8 x i32> %x, @@ -264,12 +264,12 @@ define <8 x i32> @mul_const1(<8 x i32> %x) { define <4 x i64> @mul_const2(<4 x i64> %x) { ; X32-LABEL: mul_const2: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsllq $2, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: mul_const2: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsllq $2, %ymm0, %ymm0 ; X64-NEXT: retq %y = mul <4 x i64> %x, @@ -278,12 +278,12 @@ define <4 x i64> @mul_const2(<4 x i64> %x) { define <16 x i16> @mul_const3(<16 x i16> %x) { ; X32-LABEL: mul_const3: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsllw $3, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: mul_const3: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsllw $3, %ymm0, %ymm0 ; X64-NEXT: retq %y = mul <16 x i16> %x, @@ -292,13 +292,13 @@ define <16 x i16> @mul_const3(<16 x i16> %x) { define <4 x i64> @mul_const4(<4 x i64> %x) { ; X32-LABEL: mul_const4: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; X32-NEXT: vpsubq %ymm0, %ymm1, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: mul_const4: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; X64-NEXT: vpsubq %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq @@ -308,12 +308,12 @@ define <4 x i64> @mul_const4(<4 x i64> %x) { define <8 x i32> @mul_const5(<8 x i32> %x) { ; X32-LABEL: mul_const5: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vxorps %ymm0, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: mul_const5: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vxorps %ymm0, %ymm0, %ymm0 ; X64-NEXT: retq %y = mul <8 x i32> %x, @@ -322,12 +322,12 @@ define <8 x i32> @mul_const5(<8 x i32> %x) { define <8 x i32> @mul_const6(<8 x i32> %x) { ; X32-LABEL: mul_const6: -; X32: ## BB#0: -; X32-NEXT: vpmulld LCPI18_0, %ymm0, %ymm0 +; X32: # BB#0: +; X32-NEXT: vpmulld {{\.LCPI.*}}, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: mul_const6: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpmulld {{.*}}(%rip), %ymm0, %ymm0 ; X64-NEXT: retq %y = mul <8 x i32> %x, @@ -336,13 +336,13 @@ define <8 x i32> @mul_const6(<8 x i32> %x) { define <8 x i64> @mul_const7(<8 x i64> %x) { ; X32-LABEL: mul_const7: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpaddq %ymm0, %ymm0, %ymm0 ; X32-NEXT: vpaddq %ymm1, %ymm1, %ymm1 ; X32-NEXT: retl ; ; X64-LABEL: mul_const7: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpaddq %ymm0, %ymm0, %ymm0 ; X64-NEXT: vpaddq %ymm1, %ymm1, %ymm1 ; X64-NEXT: retq @@ -352,12 +352,12 @@ define <8 x i64> @mul_const7(<8 x i64> %x) { define <8 x i16> @mul_const8(<8 x i16> %x) { ; X32-LABEL: mul_const8: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsllw $3, %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: mul_const8: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsllw $3, %xmm0, %xmm0 ; X64-NEXT: retq %y = mul <8 x i16> %x, @@ -366,14 +366,14 @@ define <8 x i16> @mul_const8(<8 x i16> %x) { define <8 x i32> @mul_const9(<8 x i32> %x) { ; X32-LABEL: mul_const9: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: movl $2, %eax ; X32-NEXT: vmovd %eax, %xmm1 ; X32-NEXT: vpmulld %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: mul_const9: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: movl $2, %eax ; X64-NEXT: vmovd %eax, %xmm1 ; X64-NEXT: vpmulld %ymm1, %ymm0, %ymm0 @@ -385,13 +385,13 @@ define <8 x i32> @mul_const9(<8 x i32> %x) { ; %x * 0x01010101 define <4 x i32> @mul_const10(<4 x i32> %x) { ; X32-LABEL: mul_const10: -; X32: ## BB#0: -; X32-NEXT: vpbroadcastd LCPI22_0, %xmm1 +; X32: # BB#0: +; X32-NEXT: vpbroadcastd {{\.LCPI.*}}, %xmm1 ; X32-NEXT: vpmulld %xmm1, %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: mul_const10: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1 ; X64-NEXT: vpmulld %xmm1, %xmm0, %xmm0 ; X64-NEXT: retq @@ -402,13 +402,13 @@ define <4 x i32> @mul_const10(<4 x i32> %x) { ; %x * 0x80808080 define <4 x i32> @mul_const11(<4 x i32> %x) { ; X32-LABEL: mul_const11: -; X32: ## BB#0: -; X32-NEXT: vpbroadcastd LCPI23_0, %xmm1 +; X32: # BB#0: +; X32-NEXT: vpbroadcastd {{\.LCPI.*}}, %xmm1 ; X32-NEXT: vpmulld %xmm1, %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: mul_const11: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1 ; X64-NEXT: vpmulld %xmm1, %xmm0, %xmm0 ; X64-NEXT: retq diff --git a/test/CodeGen/X86/avx2-cmp.ll b/test/CodeGen/X86/avx2-cmp.ll index e2b550383c8..2369aa5ac9a 100644 --- a/test/CodeGen/X86/avx2-cmp.ll +++ b/test/CodeGen/X86/avx2-cmp.ll @@ -1,15 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32 -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64 define <8 x i32> @v8i32_cmpgt(<8 x i32> %i, <8 x i32> %j) nounwind readnone { ; X32-LABEL: v8i32_cmpgt: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: v8i32_cmpgt: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq %bincmp = icmp slt <8 x i32> %i, %j @@ -19,12 +19,12 @@ define <8 x i32> @v8i32_cmpgt(<8 x i32> %i, <8 x i32> %j) nounwind readnone { define <4 x i64> @v4i64_cmpgt(<4 x i64> %i, <4 x i64> %j) nounwind readnone { ; X32-LABEL: v4i64_cmpgt: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: v4i64_cmpgt: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq %bincmp = icmp slt <4 x i64> %i, %j @@ -34,12 +34,12 @@ define <4 x i64> @v4i64_cmpgt(<4 x i64> %i, <4 x i64> %j) nounwind readnone { define <16 x i16> @v16i16_cmpgt(<16 x i16> %i, <16 x i16> %j) nounwind readnone { ; X32-LABEL: v16i16_cmpgt: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: v16i16_cmpgt: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq %bincmp = icmp slt <16 x i16> %i, %j @@ -49,12 +49,12 @@ define <16 x i16> @v16i16_cmpgt(<16 x i16> %i, <16 x i16> %j) nounwind readnone define <32 x i8> @v32i8_cmpgt(<32 x i8> %i, <32 x i8> %j) nounwind readnone { ; X32-LABEL: v32i8_cmpgt: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: v32i8_cmpgt: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq %bincmp = icmp slt <32 x i8> %i, %j @@ -64,12 +64,12 @@ define <32 x i8> @v32i8_cmpgt(<32 x i8> %i, <32 x i8> %j) nounwind readnone { define <8 x i32> @int256_cmpeq(<8 x i32> %i, <8 x i32> %j) nounwind readnone { ; X32-LABEL: int256_cmpeq: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: int256_cmpeq: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq %bincmp = icmp eq <8 x i32> %i, %j @@ -79,12 +79,12 @@ define <8 x i32> @int256_cmpeq(<8 x i32> %i, <8 x i32> %j) nounwind readnone { define <4 x i64> @v4i64_cmpeq(<4 x i64> %i, <4 x i64> %j) nounwind readnone { ; X32-LABEL: v4i64_cmpeq: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: v4i64_cmpeq: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq %bincmp = icmp eq <4 x i64> %i, %j @@ -94,12 +94,12 @@ define <4 x i64> @v4i64_cmpeq(<4 x i64> %i, <4 x i64> %j) nounwind readnone { define <16 x i16> @v16i16_cmpeq(<16 x i16> %i, <16 x i16> %j) nounwind readnone { ; X32-LABEL: v16i16_cmpeq: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: v16i16_cmpeq: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq %bincmp = icmp eq <16 x i16> %i, %j @@ -109,12 +109,12 @@ define <16 x i16> @v16i16_cmpeq(<16 x i16> %i, <16 x i16> %j) nounwind readnone define <32 x i8> @v32i8_cmpeq(<32 x i8> %i, <32 x i8> %j) nounwind readnone { ; X32-LABEL: v32i8_cmpeq: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: v32i8_cmpeq: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq %bincmp = icmp eq <32 x i8> %i, %j diff --git a/test/CodeGen/X86/avx2-conversions.ll b/test/CodeGen/X86/avx2-conversions.ll index 26edafbdb64..60cc2cf199e 100644 --- a/test/CodeGen/X86/avx2-conversions.ll +++ b/test/CodeGen/X86/avx2-conversions.ll @@ -1,21 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32 -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64 define <4 x i32> @trunc4(<4 x i64> %A) nounwind { ; X32-LABEL: trunc4: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] ; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] -; X32-NEXT: ## kill: %XMM0 %XMM0 %YMM0 +; X32-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; X32-NEXT: vzeroupper ; X32-NEXT: retl ; ; X64-LABEL: trunc4: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] ; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] -; X64-NEXT: ## kill: %XMM0 %XMM0 %YMM0 +; X64-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq %B = trunc <4 x i64> %A to <4 x i32> @@ -24,18 +24,18 @@ define <4 x i32> @trunc4(<4 x i64> %A) nounwind { define <8 x i16> @trunc8(<8 x i32> %A) nounwind { ; X32-LABEL: trunc8: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] ; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] -; X32-NEXT: ## kill: %XMM0 %XMM0 %YMM0 +; X32-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; X32-NEXT: vzeroupper ; X32-NEXT: retl ; ; X64-LABEL: trunc8: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] ; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] -; X64-NEXT: ## kill: %XMM0 %XMM0 %YMM0 +; X64-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq %B = trunc <8 x i32> %A to <8 x i16> @@ -44,12 +44,12 @@ define <8 x i16> @trunc8(<8 x i32> %A) nounwind { define <4 x i64> @sext4(<4 x i32> %A) nounwind { ; X32-LABEL: sext4: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpmovsxdq %xmm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: sext4: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpmovsxdq %xmm0, %ymm0 ; X64-NEXT: retq %B = sext <4 x i32> %A to <4 x i64> @@ -58,12 +58,12 @@ define <4 x i64> @sext4(<4 x i32> %A) nounwind { define <8 x i32> @sext8(<8 x i16> %A) nounwind { ; X32-LABEL: sext8: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpmovsxwd %xmm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: sext8: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpmovsxwd %xmm0, %ymm0 ; X64-NEXT: retq %B = sext <8 x i16> %A to <8 x i32> @@ -72,12 +72,12 @@ define <8 x i32> @sext8(<8 x i16> %A) nounwind { define <4 x i64> @zext4(<4 x i32> %A) nounwind { ; X32-LABEL: zext4: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; X32-NEXT: retl ; ; X64-LABEL: zext4: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; X64-NEXT: retq %B = zext <4 x i32> %A to <4 x i64> @@ -86,12 +86,12 @@ define <4 x i64> @zext4(<4 x i32> %A) nounwind { define <8 x i32> @zext8(<8 x i16> %A) nounwind { ; X32-LABEL: zext8: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; X32-NEXT: retl ; ; X64-LABEL: zext8: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; X64-NEXT: retq %B = zext <8 x i16> %A to <8 x i32> @@ -100,13 +100,13 @@ define <8 x i32> @zext8(<8 x i16> %A) nounwind { define <8 x i32> @zext_8i8_8i32(<8 x i8> %A) nounwind { ; X32-LABEL: zext_8i8_8i32: -; X32: ## BB#0: -; X32-NEXT: vpand LCPI6_0, %xmm0, %xmm0 +; X32: # BB#0: +; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 ; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; X32-NEXT: retl ; ; X64-LABEL: zext_8i8_8i32: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; X64-NEXT: retq @@ -116,12 +116,12 @@ define <8 x i32> @zext_8i8_8i32(<8 x i8> %A) nounwind { define <16 x i16> @zext_16i8_16i16(<16 x i8> %z) { ; X32-LABEL: zext_16i8_16i16: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; X32-NEXT: retl ; ; X64-LABEL: zext_16i8_16i16: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; X64-NEXT: retq %t = zext <16 x i8> %z to <16 x i16> @@ -130,12 +130,12 @@ define <16 x i16> @zext_16i8_16i16(<16 x i8> %z) { define <16 x i16> @sext_16i8_16i16(<16 x i8> %z) { ; X32-LABEL: sext_16i8_16i16: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpmovsxbw %xmm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: sext_16i8_16i16: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpmovsxbw %xmm0, %ymm0 ; X64-NEXT: retq %t = sext <16 x i8> %z to <16 x i16> @@ -144,7 +144,7 @@ define <16 x i16> @sext_16i8_16i16(<16 x i8> %z) { define <16 x i8> @trunc_16i16_16i8(<16 x i16> %z) { ; X32-LABEL: trunc_16i16_16i8: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X32-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> ; X32-NEXT: vpshufb %xmm2, %xmm1, %xmm1 @@ -154,7 +154,7 @@ define <16 x i8> @trunc_16i16_16i8(<16 x i16> %z) { ; X32-NEXT: retl ; ; X64-LABEL: trunc_16i16_16i8: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X64-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> ; X64-NEXT: vpshufb %xmm2, %xmm1, %xmm1 @@ -168,13 +168,13 @@ define <16 x i8> @trunc_16i16_16i8(<16 x i16> %z) { define <4 x i64> @load_sext_test1(<4 x i32> *%ptr) { ; X32-LABEL: load_sext_test1: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: vpmovsxdq (%eax), %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: load_sext_test1: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpmovsxdq (%rdi), %ymm0 ; X64-NEXT: retq %X = load <4 x i32>, <4 x i32>* %ptr @@ -184,13 +184,13 @@ define <4 x i64> @load_sext_test1(<4 x i32> *%ptr) { define <4 x i64> @load_sext_test2(<4 x i8> *%ptr) { ; X32-LABEL: load_sext_test2: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: vpmovsxbq (%eax), %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: load_sext_test2: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpmovsxbq (%rdi), %ymm0 ; X64-NEXT: retq %X = load <4 x i8>, <4 x i8>* %ptr @@ -200,13 +200,13 @@ define <4 x i64> @load_sext_test2(<4 x i8> *%ptr) { define <4 x i64> @load_sext_test3(<4 x i16> *%ptr) { ; X32-LABEL: load_sext_test3: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: vpmovsxwq (%eax), %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: load_sext_test3: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpmovsxwq (%rdi), %ymm0 ; X64-NEXT: retq %X = load <4 x i16>, <4 x i16>* %ptr @@ -216,13 +216,13 @@ define <4 x i64> @load_sext_test3(<4 x i16> *%ptr) { define <8 x i32> @load_sext_test4(<8 x i16> *%ptr) { ; X32-LABEL: load_sext_test4: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: vpmovsxwd (%eax), %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: load_sext_test4: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpmovsxwd (%rdi), %ymm0 ; X64-NEXT: retq %X = load <8 x i16>, <8 x i16>* %ptr @@ -232,13 +232,13 @@ define <8 x i32> @load_sext_test4(<8 x i16> *%ptr) { define <8 x i32> @load_sext_test5(<8 x i8> *%ptr) { ; X32-LABEL: load_sext_test5: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: vpmovsxbd (%eax), %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: load_sext_test5: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpmovsxbd (%rdi), %ymm0 ; X64-NEXT: retq %X = load <8 x i8>, <8 x i8>* %ptr diff --git a/test/CodeGen/X86/avx2-fma-fneg-combine.ll b/test/CodeGen/X86/avx2-fma-fneg-combine.ll index 345943bd730..019593cc0f8 100644 --- a/test/CodeGen/X86/avx2-fma-fneg-combine.ll +++ b/test/CodeGen/X86/avx2-fma-fneg-combine.ll @@ -1,17 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32 -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2,+fma | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fma | FileCheck %s --check-prefix=X64 ; This test checks combinations of FNEG and FMA intrinsics define <8 x float> @test1(<8 x float> %a, <8 x float> %b, <8 x float> %c) { ; X32-LABEL: test1: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test1: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 ; X64-NEXT: retq entry: @@ -24,12 +24,12 @@ declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x f define <4 x float> @test2(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; X32-LABEL: test2: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: test2: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 ; X64-NEXT: retq entry: @@ -42,14 +42,14 @@ declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x define <4 x float> @test3(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; X32-LABEL: test3: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 -; X32-NEXT: vbroadcastss LCPI2_0, %xmm1 +; X32-NEXT: vbroadcastss {{\.LCPI.*}}, %xmm1 ; X32-NEXT: vxorps %xmm1, %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: test3: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 ; X64-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 ; X64-NEXT: vxorps %xmm1, %xmm0, %xmm0 @@ -64,12 +64,12 @@ declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 define <8 x float> @test4(<8 x float> %a, <8 x float> %b, <8 x float> %c) { ; X32-LABEL: test4: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test4: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 ; X64-NEXT: retq entry: @@ -80,14 +80,14 @@ entry: define <8 x float> @test5(<8 x float> %a, <8 x float> %b, <8 x float> %c) { ; X32-LABEL: test5: -; X32: ## BB#0: ## %entry -; X32-NEXT: vbroadcastss LCPI4_0, %ymm3 +; X32: # BB#0: # %entry +; X32-NEXT: vbroadcastss {{\.LCPI.*}}, %ymm3 ; X32-NEXT: vxorps %ymm3, %ymm2, %ymm2 ; X32-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test5: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: vbroadcastss {{.*}}(%rip), %ymm3 ; X64-NEXT: vxorps %ymm3, %ymm2, %ymm2 ; X64-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 @@ -103,12 +103,12 @@ declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x f define <2 x double> @test6(<2 x double> %a, <2 x double> %b, <2 x double> %c) { ; X32-LABEL: test6: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: test6: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 ; X64-NEXT: retq entry: diff --git a/test/CodeGen/X86/avx2-gather.ll b/test/CodeGen/X86/avx2-gather.ll index d162b4755ee..64dd6fa0061 100644 --- a/test/CodeGen/X86/avx2-gather.ll +++ b/test/CodeGen/X86/avx2-gather.ll @@ -1,13 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32 -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64 declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*, <4 x i32>, <4 x float>, i8) nounwind readonly define <4 x float> @test_x86_avx2_gather_d_ps(i8* %a1, <4 x i32> %idx, <4 x float> %mask) { ; X32-LABEL: test_x86_avx2_gather_d_ps: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; X32-NEXT: vgatherdps %xmm1, (%eax,%xmm0,2), %xmm2 @@ -15,7 +15,7 @@ define <4 x float> @test_x86_avx2_gather_d_ps(i8* %a1, <4 x i32> %idx, <4 x floa ; X32-NEXT: retl ; ; X64-LABEL: test_x86_avx2_gather_d_ps: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; X64-NEXT: vgatherdps %xmm1, (%rdi,%xmm0,2), %xmm2 ; X64-NEXT: vmovaps %xmm2, %xmm0 @@ -30,7 +30,7 @@ declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*, define <2 x double> @test_x86_avx2_gather_d_pd(i8* %a1, <4 x i32> %idx, <2 x double> %mask) { ; X32-LABEL: test_x86_avx2_gather_d_pd: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; X32-NEXT: vgatherdpd %xmm1, (%eax,%xmm0,2), %xmm2 @@ -38,7 +38,7 @@ define <2 x double> @test_x86_avx2_gather_d_pd(i8* %a1, <4 x i32> %idx, <2 x dou ; X32-NEXT: retl ; ; X64-LABEL: test_x86_avx2_gather_d_pd: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; X64-NEXT: vgatherdpd %xmm1, (%rdi,%xmm0,2), %xmm2 ; X64-NEXT: vmovapd %xmm2, %xmm0 @@ -53,7 +53,7 @@ declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*, define <8 x float> @test_x86_avx2_gather_d_ps_256(i8* %a1, <8 x i32> %idx, <8 x float> %mask) { ; X32-LABEL: test_x86_avx2_gather_d_ps_256: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: vxorps %ymm2, %ymm2, %ymm2 ; X32-NEXT: vgatherdps %ymm1, (%eax,%ymm0,4), %ymm2 @@ -61,7 +61,7 @@ define <8 x float> @test_x86_avx2_gather_d_ps_256(i8* %a1, <8 x i32> %idx, <8 x ; X32-NEXT: retl ; ; X64-LABEL: test_x86_avx2_gather_d_ps_256: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vxorps %ymm2, %ymm2, %ymm2 ; X64-NEXT: vgatherdps %ymm1, (%rdi,%ymm0,4), %ymm2 ; X64-NEXT: vmovaps %ymm2, %ymm0 @@ -76,7 +76,7 @@ declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*, define <4 x double> @test_x86_avx2_gather_d_pd_256(i8* %a1, <4 x i32> %idx, <4 x double> %mask) { ; X32-LABEL: test_x86_avx2_gather_d_pd_256: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: vxorpd %ymm2, %ymm2, %ymm2 ; X32-NEXT: vgatherdpd %ymm1, (%eax,%xmm0,8), %ymm2 @@ -84,7 +84,7 @@ define <4 x double> @test_x86_avx2_gather_d_pd_256(i8* %a1, <4 x i32> %idx, <4 x ; X32-NEXT: retl ; ; X64-LABEL: test_x86_avx2_gather_d_pd_256: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vxorpd %ymm2, %ymm2, %ymm2 ; X64-NEXT: vgatherdpd %ymm1, (%rdi,%xmm0,8), %ymm2 ; X64-NEXT: vmovapd %ymm2, %ymm0 @@ -96,7 +96,7 @@ define <4 x double> @test_x86_avx2_gather_d_pd_256(i8* %a1, <4 x i32> %idx, <4 x define <2 x i64> @test_mm_i32gather_epi32(i32 *%a0, <2 x i64> %a1) { ; X32-LABEL: test_mm_i32gather_epi32: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -105,7 +105,7 @@ define <2 x i64> @test_mm_i32gather_epi32(i32 *%a0, <2 x i64> %a1) { ; X32-NEXT: retl ; ; X64-LABEL: test_mm_i32gather_epi32: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X64-NEXT: vpgatherdd %xmm2, (%rdi,%xmm0,2), %xmm1 @@ -122,7 +122,7 @@ declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, <4 x i32>, <4 x i32> define <2 x double> @test_mm_i32gather_pd(double *%a0, <2 x i64> %a1) { ; X32-LABEL: test_mm_i32gather_pd: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1 @@ -131,7 +131,7 @@ define <2 x double> @test_mm_i32gather_pd(double *%a0, <2 x i64> %a1) { ; X32-NEXT: retl ; ; X64-LABEL: test_mm_i32gather_pd: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; X64-NEXT: vgatherdpd %xmm2, (%rdi,%xmm0,2), %xmm1 diff --git a/test/CodeGen/X86/avx2-logic.ll b/test/CodeGen/X86/avx2-logic.ll index 9208d959a75..68d486699cb 100644 --- a/test/CodeGen/X86/avx2-logic.ll +++ b/test/CodeGen/X86/avx2-logic.ll @@ -1,17 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32 -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64 define <4 x i64> @vpandn(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { ; X32-LABEL: vpandn: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; X32-NEXT: vpsubq %ymm1, %ymm0, %ymm1 ; X32-NEXT: vpandn %ymm0, %ymm1, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: vpandn: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; X64-NEXT: vpsubq %ymm1, %ymm0, %ymm1 ; X64-NEXT: vpandn %ymm0, %ymm1, %ymm0 @@ -26,14 +26,14 @@ entry: define <4 x i64> @vpand(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { ; X32-LABEL: vpand: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 ; X32-NEXT: vpsubq %ymm2, %ymm0, %ymm0 ; X32-NEXT: vpand %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: vpand: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 ; X64-NEXT: vpsubq %ymm2, %ymm0, %ymm0 ; X64-NEXT: vpand %ymm1, %ymm0, %ymm0 @@ -47,14 +47,14 @@ entry: define <4 x i64> @vpor(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { ; X32-LABEL: vpor: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 ; X32-NEXT: vpsubq %ymm2, %ymm0, %ymm0 ; X32-NEXT: vpor %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: vpor: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 ; X64-NEXT: vpsubq %ymm2, %ymm0, %ymm0 ; X64-NEXT: vpor %ymm1, %ymm0, %ymm0 @@ -68,14 +68,14 @@ entry: define <4 x i64> @vpxor(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { ; X32-LABEL: vpxor: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 ; X32-NEXT: vpsubq %ymm2, %ymm0, %ymm0 ; X32-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: vpxor: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 ; X64-NEXT: vpsubq %ymm2, %ymm0, %ymm0 ; X64-NEXT: vpxor %ymm1, %ymm0, %ymm0 @@ -89,14 +89,14 @@ entry: define <32 x i8> @vpblendvb(<32 x i1> %cond, <32 x i8> %x, <32 x i8> %y) { ; X32-LABEL: vpblendvb: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsllw $7, %ymm0, %ymm0 -; X32-NEXT: vpand LCPI4_0, %ymm0, %ymm0 +; X32-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm0 ; X32-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: vpblendvb: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsllw $7, %ymm0, %ymm0 ; X64-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 ; X64-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 @@ -107,12 +107,12 @@ define <32 x i8> @vpblendvb(<32 x i1> %cond, <32 x i8> %x, <32 x i8> %y) { define <8 x i32> @allOnes() nounwind { ; X32-LABEL: allOnes: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: allOnes: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 ; X64-NEXT: retq ret <8 x i32> @@ -120,12 +120,12 @@ define <8 x i32> @allOnes() nounwind { define <16 x i16> @allOnes2() nounwind { ; X32-LABEL: allOnes2: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: allOnes2: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 ; X64-NEXT: retq ret <16 x i16> diff --git a/test/CodeGen/X86/avx2-phaddsub.ll b/test/CodeGen/X86/avx2-phaddsub.ll index 9eafac902b8..232a3326fa1 100644 --- a/test/CodeGen/X86/avx2-phaddsub.ll +++ b/test/CodeGen/X86/avx2-phaddsub.ll @@ -1,15 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32 -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64 define <16 x i16> @phaddw1(<16 x i16> %x, <16 x i16> %y) { ; X32-LABEL: phaddw1: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vphaddw %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: phaddw1: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vphaddw %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq %a = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> @@ -20,12 +20,12 @@ define <16 x i16> @phaddw1(<16 x i16> %x, <16 x i16> %y) { define <16 x i16> @phaddw2(<16 x i16> %x, <16 x i16> %y) { ; X32-LABEL: phaddw2: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vphaddw %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: phaddw2: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vphaddw %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq %a = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> @@ -36,12 +36,12 @@ define <16 x i16> @phaddw2(<16 x i16> %x, <16 x i16> %y) { define <8 x i32> @phaddd1(<8 x i32> %x, <8 x i32> %y) { ; X32-LABEL: phaddd1: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vphaddd %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: phaddd1: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vphaddd %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> @@ -52,12 +52,12 @@ define <8 x i32> @phaddd1(<8 x i32> %x, <8 x i32> %y) { define <8 x i32> @phaddd2(<8 x i32> %x, <8 x i32> %y) { ; X32-LABEL: phaddd2: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vphaddd %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: phaddd2: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vphaddd %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> @@ -68,12 +68,12 @@ define <8 x i32> @phaddd2(<8 x i32> %x, <8 x i32> %y) { define <8 x i32> @phaddd3(<8 x i32> %x) { ; X32-LABEL: phaddd3: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vphaddd %ymm0, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: phaddd3: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vphaddd %ymm0, %ymm0, %ymm0 ; X64-NEXT: retq %a = shufflevector <8 x i32> %x, <8 x i32> undef, <8 x i32> @@ -84,12 +84,12 @@ define <8 x i32> @phaddd3(<8 x i32> %x) { define <16 x i16> @phsubw1(<16 x i16> %x, <16 x i16> %y) { ; X32-LABEL: phsubw1: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vphsubw %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: phsubw1: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vphsubw %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq %a = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> @@ -100,12 +100,12 @@ define <16 x i16> @phsubw1(<16 x i16> %x, <16 x i16> %y) { define <8 x i32> @phsubd1(<8 x i32> %x, <8 x i32> %y) { ; X32-LABEL: phsubd1: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vphsubd %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: phsubd1: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vphsubd %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> @@ -116,12 +116,12 @@ define <8 x i32> @phsubd1(<8 x i32> %x, <8 x i32> %y) { define <8 x i32> @phsubd2(<8 x i32> %x, <8 x i32> %y) { ; X32-LABEL: phsubd2: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vphsubd %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: phsubd2: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vphsubd %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> diff --git a/test/CodeGen/X86/avx2-shift.ll b/test/CodeGen/X86/avx2-shift.ll index 4345bd6f792..47bbba2c7e0 100644 --- a/test/CodeGen/X86/avx2-shift.ll +++ b/test/CodeGen/X86/avx2-shift.ll @@ -1,15 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32 -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64 define <4 x i32> @variable_shl0(<4 x i32> %x, <4 x i32> %y) { ; X32-LABEL: variable_shl0: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: variable_shl0: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 ; X64-NEXT: retq %k = shl <4 x i32> %x, %y @@ -18,12 +18,12 @@ define <4 x i32> @variable_shl0(<4 x i32> %x, <4 x i32> %y) { define <8 x i32> @variable_shl1(<8 x i32> %x, <8 x i32> %y) { ; X32-LABEL: variable_shl1: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: variable_shl1: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq %k = shl <8 x i32> %x, %y @@ -32,12 +32,12 @@ define <8 x i32> @variable_shl1(<8 x i32> %x, <8 x i32> %y) { define <2 x i64> @variable_shl2(<2 x i64> %x, <2 x i64> %y) { ; X32-LABEL: variable_shl2: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: variable_shl2: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 ; X64-NEXT: retq %k = shl <2 x i64> %x, %y @@ -46,12 +46,12 @@ define <2 x i64> @variable_shl2(<2 x i64> %x, <2 x i64> %y) { define <4 x i64> @variable_shl3(<4 x i64> %x, <4 x i64> %y) { ; X32-LABEL: variable_shl3: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: variable_shl3: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq %k = shl <4 x i64> %x, %y @@ -60,12 +60,12 @@ define <4 x i64> @variable_shl3(<4 x i64> %x, <4 x i64> %y) { define <4 x i32> @variable_srl0(<4 x i32> %x, <4 x i32> %y) { ; X32-LABEL: variable_srl0: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: variable_srl0: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 ; X64-NEXT: retq %k = lshr <4 x i32> %x, %y @@ -74,12 +74,12 @@ define <4 x i32> @variable_srl0(<4 x i32> %x, <4 x i32> %y) { define <8 x i32> @variable_srl1(<8 x i32> %x, <8 x i32> %y) { ; X32-LABEL: variable_srl1: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: variable_srl1: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq %k = lshr <8 x i32> %x, %y @@ -88,12 +88,12 @@ define <8 x i32> @variable_srl1(<8 x i32> %x, <8 x i32> %y) { define <2 x i64> @variable_srl2(<2 x i64> %x, <2 x i64> %y) { ; X32-LABEL: variable_srl2: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: variable_srl2: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 ; X64-NEXT: retq %k = lshr <2 x i64> %x, %y @@ -102,12 +102,12 @@ define <2 x i64> @variable_srl2(<2 x i64> %x, <2 x i64> %y) { define <4 x i64> @variable_srl3(<4 x i64> %x, <4 x i64> %y) { ; X32-LABEL: variable_srl3: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: variable_srl3: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq %k = lshr <4 x i64> %x, %y @@ -116,12 +116,12 @@ define <4 x i64> @variable_srl3(<4 x i64> %x, <4 x i64> %y) { define <4 x i32> @variable_sra0(<4 x i32> %x, <4 x i32> %y) { ; X32-LABEL: variable_sra0: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsravd %xmm1, %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: variable_sra0: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsravd %xmm1, %xmm0, %xmm0 ; X64-NEXT: retq %k = ashr <4 x i32> %x, %y @@ -130,12 +130,12 @@ define <4 x i32> @variable_sra0(<4 x i32> %x, <4 x i32> %y) { define <8 x i32> @variable_sra1(<8 x i32> %x, <8 x i32> %y) { ; X32-LABEL: variable_sra1: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsravd %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: variable_sra1: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq %k = ashr <8 x i32> %x, %y @@ -146,12 +146,12 @@ define <8 x i32> @variable_sra1(<8 x i32> %x, <8 x i32> %y) { define <8 x i32> @vshift00(<8 x i32> %a) nounwind readnone { ; X32-LABEL: vshift00: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpslld $2, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: vshift00: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpslld $2, %ymm0, %ymm0 ; X64-NEXT: retq %s = shl <8 x i32> %a, @@ -160,12 +160,12 @@ define <8 x i32> @vshift00(<8 x i32> %a) nounwind readnone { define <16 x i16> @vshift01(<16 x i16> %a) nounwind readnone { ; X32-LABEL: vshift01: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsllw $2, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: vshift01: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsllw $2, %ymm0, %ymm0 ; X64-NEXT: retq %s = shl <16 x i16> %a, @@ -174,12 +174,12 @@ define <16 x i16> @vshift01(<16 x i16> %a) nounwind readnone { define <4 x i64> @vshift02(<4 x i64> %a) nounwind readnone { ; X32-LABEL: vshift02: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsllq $2, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: vshift02: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsllq $2, %ymm0, %ymm0 ; X64-NEXT: retq %s = shl <4 x i64> %a, @@ -190,12 +190,12 @@ define <4 x i64> @vshift02(<4 x i64> %a) nounwind readnone { define <8 x i32> @vshift03(<8 x i32> %a) nounwind readnone { ; X32-LABEL: vshift03: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsrld $2, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: vshift03: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsrld $2, %ymm0, %ymm0 ; X64-NEXT: retq %s = lshr <8 x i32> %a, @@ -204,12 +204,12 @@ define <8 x i32> @vshift03(<8 x i32> %a) nounwind readnone { define <16 x i16> @vshift04(<16 x i16> %a) nounwind readnone { ; X32-LABEL: vshift04: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsrlw $2, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: vshift04: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsrlw $2, %ymm0, %ymm0 ; X64-NEXT: retq %s = lshr <16 x i16> %a, @@ -218,12 +218,12 @@ define <16 x i16> @vshift04(<16 x i16> %a) nounwind readnone { define <4 x i64> @vshift05(<4 x i64> %a) nounwind readnone { ; X32-LABEL: vshift05: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsrlq $2, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: vshift05: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsrlq $2, %ymm0, %ymm0 ; X64-NEXT: retq %s = lshr <4 x i64> %a, @@ -234,12 +234,12 @@ define <4 x i64> @vshift05(<4 x i64> %a) nounwind readnone { define <8 x i32> @vshift06(<8 x i32> %a) nounwind readnone { ; X32-LABEL: vshift06: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsrad $2, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: vshift06: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsrad $2, %ymm0, %ymm0 ; X64-NEXT: retq %s = ashr <8 x i32> %a, @@ -248,12 +248,12 @@ define <8 x i32> @vshift06(<8 x i32> %a) nounwind readnone { define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone { ; X32-LABEL: vshift07: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsraw $2, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: vshift07: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsraw $2, %ymm0, %ymm0 ; X64-NEXT: retq %s = ashr <16 x i16> %a, @@ -262,13 +262,13 @@ define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone { define <4 x i32> @variable_sra0_load(<4 x i32> %x, <4 x i32>* %y) { ; X32-LABEL: variable_sra0_load: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: vpsravd (%eax), %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: variable_sra0_load: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsravd (%rdi), %xmm0, %xmm0 ; X64-NEXT: retq %y1 = load <4 x i32>, <4 x i32>* %y @@ -278,13 +278,13 @@ define <4 x i32> @variable_sra0_load(<4 x i32> %x, <4 x i32>* %y) { define <8 x i32> @variable_sra1_load(<8 x i32> %x, <8 x i32>* %y) { ; X32-LABEL: variable_sra1_load: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: vpsravd (%eax), %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: variable_sra1_load: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsravd (%rdi), %ymm0, %ymm0 ; X64-NEXT: retq %y1 = load <8 x i32>, <8 x i32>* %y @@ -294,13 +294,13 @@ define <8 x i32> @variable_sra1_load(<8 x i32> %x, <8 x i32>* %y) { define <4 x i32> @variable_shl0_load(<4 x i32> %x, <4 x i32>* %y) { ; X32-LABEL: variable_shl0_load: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: vpsllvd (%eax), %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: variable_shl0_load: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 ; X64-NEXT: retq %y1 = load <4 x i32>, <4 x i32>* %y @@ -310,13 +310,13 @@ define <4 x i32> @variable_shl0_load(<4 x i32> %x, <4 x i32>* %y) { define <8 x i32> @variable_shl1_load(<8 x i32> %x, <8 x i32>* %y) { ; X32-LABEL: variable_shl1_load: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: vpsllvd (%eax), %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: variable_shl1_load: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 ; X64-NEXT: retq %y1 = load <8 x i32>, <8 x i32>* %y @@ -326,13 +326,13 @@ define <8 x i32> @variable_shl1_load(<8 x i32> %x, <8 x i32>* %y) { define <2 x i64> @variable_shl2_load(<2 x i64> %x, <2 x i64>* %y) { ; X32-LABEL: variable_shl2_load: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: vpsllvq (%eax), %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: variable_shl2_load: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 ; X64-NEXT: retq %y1 = load <2 x i64>, <2 x i64>* %y @@ -342,13 +342,13 @@ define <2 x i64> @variable_shl2_load(<2 x i64> %x, <2 x i64>* %y) { define <4 x i64> @variable_shl3_load(<4 x i64> %x, <4 x i64>* %y) { ; X32-LABEL: variable_shl3_load: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: vpsllvq (%eax), %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: variable_shl3_load: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 ; X64-NEXT: retq %y1 = load <4 x i64>, <4 x i64>* %y @@ -358,13 +358,13 @@ define <4 x i64> @variable_shl3_load(<4 x i64> %x, <4 x i64>* %y) { define <4 x i32> @variable_srl0_load(<4 x i32> %x, <4 x i32>* %y) { ; X32-LABEL: variable_srl0_load: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: vpsrlvd (%eax), %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: variable_srl0_load: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 ; X64-NEXT: retq %y1 = load <4 x i32>, <4 x i32>* %y @@ -374,13 +374,13 @@ define <4 x i32> @variable_srl0_load(<4 x i32> %x, <4 x i32>* %y) { define <8 x i32> @variable_srl1_load(<8 x i32> %x, <8 x i32>* %y) { ; X32-LABEL: variable_srl1_load: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: vpsrlvd (%eax), %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: variable_srl1_load: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 ; X64-NEXT: retq %y1 = load <8 x i32>, <8 x i32>* %y @@ -390,13 +390,13 @@ define <8 x i32> @variable_srl1_load(<8 x i32> %x, <8 x i32>* %y) { define <2 x i64> @variable_srl2_load(<2 x i64> %x, <2 x i64>* %y) { ; X32-LABEL: variable_srl2_load: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: vpsrlvq (%eax), %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: variable_srl2_load: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 ; X64-NEXT: retq %y1 = load <2 x i64>, <2 x i64>* %y @@ -406,13 +406,13 @@ define <2 x i64> @variable_srl2_load(<2 x i64> %x, <2 x i64>* %y) { define <4 x i64> @variable_srl3_load(<4 x i64> %x, <4 x i64>* %y) { ; X32-LABEL: variable_srl3_load: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: vpsrlvq (%eax), %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: variable_srl3_load: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 ; X64-NEXT: retq %y1 = load <4 x i64>, <4 x i64>* %y @@ -422,13 +422,13 @@ define <4 x i64> @variable_srl3_load(<4 x i64> %x, <4 x i64>* %y) { define <32 x i8> @shl9(<32 x i8> %A) nounwind { ; X32-LABEL: shl9: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsllw $3, %ymm0, %ymm0 -; X32-NEXT: vpand LCPI28_0, %ymm0, %ymm0 +; X32-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: shl9: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsllw $3, %ymm0, %ymm0 ; X64-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 ; X64-NEXT: retq @@ -438,13 +438,13 @@ define <32 x i8> @shl9(<32 x i8> %A) nounwind { define <32 x i8> @shr9(<32 x i8> %A) nounwind { ; X32-LABEL: shr9: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsrlw $3, %ymm0, %ymm0 -; X32-NEXT: vpand LCPI29_0, %ymm0, %ymm0 +; X32-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: shr9: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsrlw $3, %ymm0, %ymm0 ; X64-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 ; X64-NEXT: retq @@ -454,13 +454,13 @@ define <32 x i8> @shr9(<32 x i8> %A) nounwind { define <32 x i8> @sra_v32i8_7(<32 x i8> %A) nounwind { ; X32-LABEL: sra_v32i8_7: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; X32-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: sra_v32i8_7: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; X64-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq @@ -470,16 +470,16 @@ define <32 x i8> @sra_v32i8_7(<32 x i8> %A) nounwind { define <32 x i8> @sra_v32i8(<32 x i8> %A) nounwind { ; X32-LABEL: sra_v32i8: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsrlw $3, %ymm0, %ymm0 -; X32-NEXT: vpand LCPI31_0, %ymm0, %ymm0 +; X32-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm0 ; X32-NEXT: vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; X32-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; X32-NEXT: vpsubb %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: sra_v32i8: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsrlw $3, %ymm0, %ymm0 ; X64-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 ; X64-NEXT: vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] @@ -492,13 +492,13 @@ define <32 x i8> @sra_v32i8(<32 x i8> %A) nounwind { define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind { ; X32-LABEL: sext_v16i16: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsllw $8, %ymm0, %ymm0 ; X32-NEXT: vpsraw $8, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: sext_v16i16: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsllw $8, %ymm0, %ymm0 ; X64-NEXT: vpsraw $8, %ymm0, %ymm0 ; X64-NEXT: retq @@ -509,13 +509,13 @@ define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind { define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind { ; X32-LABEL: sext_v8i32: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpslld $16, %ymm0, %ymm0 ; X32-NEXT: vpsrad $16, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: sext_v8i32: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpslld $16, %ymm0, %ymm0 ; X64-NEXT: vpsrad $16, %ymm0, %ymm0 ; X64-NEXT: retq @@ -526,24 +526,24 @@ define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind { define <8 x i16> @variable_shl16(<8 x i16> %lhs, <8 x i16> %rhs) { ; X32-LABEL: variable_shl16: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero ; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; X32-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 ; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] ; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] -; X32-NEXT: ## kill: %XMM0 %XMM0 %YMM0 +; X32-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; X32-NEXT: vzeroupper ; X32-NEXT: retl ; ; X64-LABEL: variable_shl16: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero ; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 ; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] ; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] -; X64-NEXT: ## kill: %XMM0 %XMM0 %YMM0 +; X64-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq %res = shl <8 x i16> %lhs, %rhs @@ -552,24 +552,24 @@ define <8 x i16> @variable_shl16(<8 x i16> %lhs, <8 x i16> %rhs) { define <8 x i16> @variable_ashr16(<8 x i16> %lhs, <8 x i16> %rhs) { ; X32-LABEL: variable_ashr16: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero ; X32-NEXT: vpmovsxwd %xmm0, %ymm0 ; X32-NEXT: vpsravd %ymm1, %ymm0, %ymm0 ; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] ; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] -; X32-NEXT: ## kill: %XMM0 %XMM0 %YMM0 +; X32-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; X32-NEXT: vzeroupper ; X32-NEXT: retl ; ; X64-LABEL: variable_ashr16: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero ; X64-NEXT: vpmovsxwd %xmm0, %ymm0 ; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm0 ; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] ; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] -; X64-NEXT: ## kill: %XMM0 %XMM0 %YMM0 +; X64-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq %res = ashr <8 x i16> %lhs, %rhs @@ -578,24 +578,24 @@ define <8 x i16> @variable_ashr16(<8 x i16> %lhs, <8 x i16> %rhs) { define <8 x i16> @variable_lshr16(<8 x i16> %lhs, <8 x i16> %rhs) { ; X32-LABEL: variable_lshr16: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero ; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; X32-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 ; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] ; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] -; X32-NEXT: ## kill: %XMM0 %XMM0 %YMM0 +; X32-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; X32-NEXT: vzeroupper ; X32-NEXT: retl ; ; X64-LABEL: variable_lshr16: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero ; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 ; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] ; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] -; X64-NEXT: ## kill: %XMM0 %XMM0 %YMM0 +; X64-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq %res = lshr <8 x i16> %lhs, %rhs diff --git a/test/CodeGen/X86/avx2-vector-shifts.ll b/test/CodeGen/X86/avx2-vector-shifts.ll index 958c7746e29..127726ea30d 100644 --- a/test/CodeGen/X86/avx2-vector-shifts.ll +++ b/test/CodeGen/X86/avx2-vector-shifts.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32 -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64 ; AVX2 Logical Shift Left define <16 x i16> @test_sllw_1(<16 x i16> %InVec) { ; X32-LABEL: test_sllw_1: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: retl ; ; X64-LABEL: test_sllw_1: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: retq entry: %shl = shl <16 x i16> %InVec, @@ -19,12 +19,12 @@ entry: define <16 x i16> @test_sllw_2(<16 x i16> %InVec) { ; X32-LABEL: test_sllw_2: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_sllw_2: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; X64-NEXT: retq entry: @@ -34,12 +34,12 @@ entry: define <16 x i16> @test_sllw_3(<16 x i16> %InVec) { ; X32-LABEL: test_sllw_3: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: vpsllw $15, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_sllw_3: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: vpsllw $15, %ymm0, %ymm0 ; X64-NEXT: retq entry: @@ -49,11 +49,11 @@ entry: define <8 x i32> @test_slld_1(<8 x i32> %InVec) { ; X32-LABEL: test_slld_1: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: retl ; ; X64-LABEL: test_slld_1: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: retq entry: %shl = shl <8 x i32> %InVec, @@ -62,12 +62,12 @@ entry: define <8 x i32> @test_slld_2(<8 x i32> %InVec) { ; X32-LABEL: test_slld_2: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_slld_2: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; X64-NEXT: retq entry: @@ -77,14 +77,14 @@ entry: define <8 x i32> @test_vpslld_var(i32 %shift) { ; X32-LABEL: test_vpslld_var: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32-NEXT: vmovdqa {{.*#+}} ymm1 = [192,193,194,195,196,197,198,199] ; X32-NEXT: vpslld %xmm0, %ymm1, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_vpslld_var: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vmovd %edi, %xmm0 ; X64-NEXT: vmovdqa {{.*#+}} ymm1 = [192,193,194,195,196,197,198,199] ; X64-NEXT: vpslld %xmm0, %ymm1, %ymm0 @@ -96,12 +96,12 @@ define <8 x i32> @test_vpslld_var(i32 %shift) { define <8 x i32> @test_slld_3(<8 x i32> %InVec) { ; X32-LABEL: test_slld_3: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: vpslld $31, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_slld_3: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: vpslld $31, %ymm0, %ymm0 ; X64-NEXT: retq entry: @@ -111,11 +111,11 @@ entry: define <4 x i64> @test_sllq_1(<4 x i64> %InVec) { ; X32-LABEL: test_sllq_1: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: retl ; ; X64-LABEL: test_sllq_1: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: retq entry: %shl = shl <4 x i64> %InVec, @@ -124,12 +124,12 @@ entry: define <4 x i64> @test_sllq_2(<4 x i64> %InVec) { ; X32-LABEL: test_sllq_2: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: vpaddq %ymm0, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_sllq_2: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: vpaddq %ymm0, %ymm0, %ymm0 ; X64-NEXT: retq entry: @@ -139,12 +139,12 @@ entry: define <4 x i64> @test_sllq_3(<4 x i64> %InVec) { ; X32-LABEL: test_sllq_3: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: vpsllq $63, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_sllq_3: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: vpsllq $63, %ymm0, %ymm0 ; X64-NEXT: retq entry: @@ -156,11 +156,11 @@ entry: define <16 x i16> @test_sraw_1(<16 x i16> %InVec) { ; X32-LABEL: test_sraw_1: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: retl ; ; X64-LABEL: test_sraw_1: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: retq entry: %shl = ashr <16 x i16> %InVec, @@ -169,12 +169,12 @@ entry: define <16 x i16> @test_sraw_2(<16 x i16> %InVec) { ; X32-LABEL: test_sraw_2: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: vpsraw $1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_sraw_2: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: vpsraw $1, %ymm0, %ymm0 ; X64-NEXT: retq entry: @@ -184,12 +184,12 @@ entry: define <16 x i16> @test_sraw_3(<16 x i16> %InVec) { ; X32-LABEL: test_sraw_3: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: vpsraw $15, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_sraw_3: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: vpsraw $15, %ymm0, %ymm0 ; X64-NEXT: retq entry: @@ -199,11 +199,11 @@ entry: define <8 x i32> @test_srad_1(<8 x i32> %InVec) { ; X32-LABEL: test_srad_1: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: retl ; ; X64-LABEL: test_srad_1: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: retq entry: %shl = ashr <8 x i32> %InVec, @@ -212,12 +212,12 @@ entry: define <8 x i32> @test_srad_2(<8 x i32> %InVec) { ; X32-LABEL: test_srad_2: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: vpsrad $1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_srad_2: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: vpsrad $1, %ymm0, %ymm0 ; X64-NEXT: retq entry: @@ -227,12 +227,12 @@ entry: define <8 x i32> @test_srad_3(<8 x i32> %InVec) { ; X32-LABEL: test_srad_3: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: vpsrad $31, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_srad_3: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: vpsrad $31, %ymm0, %ymm0 ; X64-NEXT: retq entry: @@ -244,11 +244,11 @@ entry: define <16 x i16> @test_srlw_1(<16 x i16> %InVec) { ; X32-LABEL: test_srlw_1: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: retl ; ; X64-LABEL: test_srlw_1: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: retq entry: %shl = lshr <16 x i16> %InVec, @@ -257,12 +257,12 @@ entry: define <16 x i16> @test_srlw_2(<16 x i16> %InVec) { ; X32-LABEL: test_srlw_2: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: vpsrlw $1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_srlw_2: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: vpsrlw $1, %ymm0, %ymm0 ; X64-NEXT: retq entry: @@ -272,12 +272,12 @@ entry: define <16 x i16> @test_srlw_3(<16 x i16> %InVec) { ; X32-LABEL: test_srlw_3: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: vpsrlw $15, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_srlw_3: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: vpsrlw $15, %ymm0, %ymm0 ; X64-NEXT: retq entry: @@ -287,11 +287,11 @@ entry: define <8 x i32> @test_srld_1(<8 x i32> %InVec) { ; X32-LABEL: test_srld_1: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: retl ; ; X64-LABEL: test_srld_1: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: retq entry: %shl = lshr <8 x i32> %InVec, @@ -300,12 +300,12 @@ entry: define <8 x i32> @test_srld_2(<8 x i32> %InVec) { ; X32-LABEL: test_srld_2: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: vpsrld $1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_srld_2: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: vpsrld $1, %ymm0, %ymm0 ; X64-NEXT: retq entry: @@ -315,12 +315,12 @@ entry: define <8 x i32> @test_srld_3(<8 x i32> %InVec) { ; X32-LABEL: test_srld_3: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: vpsrld $31, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_srld_3: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: vpsrld $31, %ymm0, %ymm0 ; X64-NEXT: retq entry: @@ -330,11 +330,11 @@ entry: define <4 x i64> @test_srlq_1(<4 x i64> %InVec) { ; X32-LABEL: test_srlq_1: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: retl ; ; X64-LABEL: test_srlq_1: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: retq entry: %shl = lshr <4 x i64> %InVec, @@ -343,12 +343,12 @@ entry: define <4 x i64> @test_srlq_2(<4 x i64> %InVec) { ; X32-LABEL: test_srlq_2: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: vpsrlq $1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_srlq_2: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: vpsrlq $1, %ymm0, %ymm0 ; X64-NEXT: retq entry: @@ -358,12 +358,12 @@ entry: define <4 x i64> @test_srlq_3(<4 x i64> %InVec) { ; X32-LABEL: test_srlq_3: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: vpsrlq $63, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_srlq_3: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: vpsrlq $63, %ymm0, %ymm0 ; X64-NEXT: retq entry: @@ -373,18 +373,17 @@ entry: define <4 x i32> @srl_trunc_and_v4i64(<4 x i32> %x, <4 x i64> %y) nounwind { ; X32-LABEL: srl_trunc_and_v4i64: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] ; X32-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] -; X32-NEXT: vpbroadcastd LCPI25_0, %xmm2 +; X32-NEXT: vpbroadcastd {{\.LCPI.*}}, %xmm2 ; X32-NEXT: vpand %xmm2, %xmm1, %xmm1 ; X32-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 ; X32-NEXT: vzeroupper ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: srl_trunc_and_v4i64: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] ; X64-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] ; X64-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2 @@ -392,7 +391,6 @@ define <4 x i32> @srl_trunc_and_v4i64(<4 x i32> %x, <4 x i64> %y) nounwind { ; X64-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq -; X64-NEXT: ## -- End function %and = and <4 x i64> %y, %trunc = trunc <4 x i64> %and to <4 x i32> %sra = lshr <4 x i32> %x, %trunc @@ -405,85 +403,80 @@ define <4 x i32> @srl_trunc_and_v4i64(<4 x i32> %x, <4 x i64> %y) nounwind { define <8 x i16> @shl_8i16(<8 x i16> %r, <8 x i16> %a) nounwind { ; X32-LABEL: shl_8i16: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero ; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; X32-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 ; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] ; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] -; X32-NEXT: ## kill: %XMM0 %XMM0 %YMM0 +; X32-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; X32-NEXT: vzeroupper ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: shl_8i16: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero ; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 ; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] ; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] -; X64-NEXT: ## kill: %XMM0 %XMM0 %YMM0 +; X64-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq -; X64-NEXT: ## -- End function %shl = shl <8 x i16> %r, %a ret <8 x i16> %shl } define <16 x i16> @shl_16i16(<16 x i16> %r, <16 x i16> %a) nounwind { ; X32-LABEL: shl_16i16: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpxor %ymm2, %ymm2, %ymm2 ; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] ; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] ; X32-NEXT: vpsllvd %ymm3, %ymm4, %ymm3 +; X32-NEXT: vpsrld $16, %ymm3, %ymm3 ; X32-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] ; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] ; X32-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 -; X32-NEXT: vpsrld $16, %ymm3, %ymm1 ; X32-NEXT: vpsrld $16, %ymm0, %ymm0 -; X32-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 +; X32-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: shl_16i16: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpxor %ymm2, %ymm2, %ymm2 ; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] ; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] ; X64-NEXT: vpsllvd %ymm3, %ymm4, %ymm3 +; X64-NEXT: vpsrld $16, %ymm3, %ymm3 ; X64-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] ; X64-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] ; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 -; X64-NEXT: vpsrld $16, %ymm3, %ymm1 ; X64-NEXT: vpsrld $16, %ymm0, %ymm0 -; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 +; X64-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 ; X64-NEXT: retq -; X64-NEXT: ## -- End function %shl = shl <16 x i16> %r, %a ret <16 x i16> %shl } define <32 x i8> @shl_32i8(<32 x i8> %r, <32 x i8> %a) nounwind { ; X32-LABEL: shl_32i8: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsllw $5, %ymm1, %ymm1 ; X32-NEXT: vpsllw $4, %ymm0, %ymm2 -; X32-NEXT: vpand LCPI28_0, %ymm2, %ymm2 +; X32-NEXT: vpand {{\.LCPI.*}}, %ymm2, %ymm2 ; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; X32-NEXT: vpsllw $2, %ymm0, %ymm2 -; X32-NEXT: vpand LCPI28_1, %ymm2, %ymm2 +; X32-NEXT: vpand {{\.LCPI.*}}, %ymm2, %ymm2 ; X32-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; X32-NEXT: vpaddb %ymm0, %ymm0, %ymm2 ; X32-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: shl_32i8: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsllw $5, %ymm1, %ymm1 ; X64-NEXT: vpsllw $4, %ymm0, %ymm2 ; X64-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 @@ -496,76 +489,71 @@ define <32 x i8> @shl_32i8(<32 x i8> %r, <32 x i8> %a) nounwind { ; X64-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; X64-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; X64-NEXT: retq -; X64-NEXT: ## -- End function %shl = shl <32 x i8> %r, %a ret <32 x i8> %shl } define <8 x i16> @ashr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind { ; X32-LABEL: ashr_8i16: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero ; X32-NEXT: vpmovsxwd %xmm0, %ymm0 ; X32-NEXT: vpsravd %ymm1, %ymm0, %ymm0 ; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] ; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] -; X32-NEXT: ## kill: %XMM0 %XMM0 %YMM0 +; X32-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; X32-NEXT: vzeroupper ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: ashr_8i16: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero ; X64-NEXT: vpmovsxwd %xmm0, %ymm0 ; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm0 ; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] ; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] -; X64-NEXT: ## kill: %XMM0 %XMM0 %YMM0 +; X64-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq -; X64-NEXT: ## -- End function %ashr = ashr <8 x i16> %r, %a ret <8 x i16> %ashr } define <16 x i16> @ashr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind { ; X32-LABEL: ashr_16i16: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpxor %ymm2, %ymm2, %ymm2 ; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] ; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] ; X32-NEXT: vpsravd %ymm3, %ymm4, %ymm3 +; X32-NEXT: vpsrld $16, %ymm3, %ymm3 ; X32-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] ; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] ; X32-NEXT: vpsravd %ymm1, %ymm0, %ymm0 -; X32-NEXT: vpsrld $16, %ymm3, %ymm1 ; X32-NEXT: vpsrld $16, %ymm0, %ymm0 -; X32-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 +; X32-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: ashr_16i16: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpxor %ymm2, %ymm2, %ymm2 ; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] ; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] ; X64-NEXT: vpsravd %ymm3, %ymm4, %ymm3 +; X64-NEXT: vpsrld $16, %ymm3, %ymm3 ; X64-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] ; X64-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] ; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm0 -; X64-NEXT: vpsrld $16, %ymm3, %ymm1 ; X64-NEXT: vpsrld $16, %ymm0, %ymm0 -; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 +; X64-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 ; X64-NEXT: retq -; X64-NEXT: ## -- End function %ashr = ashr <16 x i16> %r, %a ret <16 x i16> %ashr } define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind { ; X32-LABEL: ashr_32i8: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsllw $5, %ymm1, %ymm1 ; X32-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] ; X32-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] @@ -591,10 +579,9 @@ define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind { ; X32-NEXT: vpsrlw $8, %ymm0, %ymm0 ; X32-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: ashr_32i8: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsllw $5, %ymm1, %ymm1 ; X64-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] ; X64-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] @@ -620,93 +607,87 @@ define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind { ; X64-NEXT: vpsrlw $8, %ymm0, %ymm0 ; X64-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 ; X64-NEXT: retq -; X64-NEXT: ## -- End function %ashr = ashr <32 x i8> %r, %a ret <32 x i8> %ashr } define <8 x i16> @lshr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind { ; X32-LABEL: lshr_8i16: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero ; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; X32-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 ; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] ; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] -; X32-NEXT: ## kill: %XMM0 %XMM0 %YMM0 +; X32-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; X32-NEXT: vzeroupper ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: lshr_8i16: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero ; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 ; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] ; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] -; X64-NEXT: ## kill: %XMM0 %XMM0 %YMM0 +; X64-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq -; X64-NEXT: ## -- End function %lshr = lshr <8 x i16> %r, %a ret <8 x i16> %lshr } define <16 x i16> @lshr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind { ; X32-LABEL: lshr_16i16: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpxor %ymm2, %ymm2, %ymm2 ; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] ; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] ; X32-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3 +; X32-NEXT: vpsrld $16, %ymm3, %ymm3 ; X32-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] ; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] ; X32-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 -; X32-NEXT: vpsrld $16, %ymm3, %ymm1 ; X32-NEXT: vpsrld $16, %ymm0, %ymm0 -; X32-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 +; X32-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: lshr_16i16: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpxor %ymm2, %ymm2, %ymm2 ; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] ; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] ; X64-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3 +; X64-NEXT: vpsrld $16, %ymm3, %ymm3 ; X64-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] ; X64-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] ; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 -; X64-NEXT: vpsrld $16, %ymm3, %ymm1 ; X64-NEXT: vpsrld $16, %ymm0, %ymm0 -; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 +; X64-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 ; X64-NEXT: retq -; X64-NEXT: ## -- End function %lshr = lshr <16 x i16> %r, %a ret <16 x i16> %lshr } define <32 x i8> @lshr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind { ; X32-LABEL: lshr_32i8: -; X32: ## BB#0: +; X32: # BB#0: ; X32-NEXT: vpsllw $5, %ymm1, %ymm1 ; X32-NEXT: vpsrlw $4, %ymm0, %ymm2 -; X32-NEXT: vpand LCPI34_0, %ymm2, %ymm2 +; X32-NEXT: vpand {{\.LCPI.*}}, %ymm2, %ymm2 ; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; X32-NEXT: vpsrlw $2, %ymm0, %ymm2 -; X32-NEXT: vpand LCPI34_1, %ymm2, %ymm2 +; X32-NEXT: vpand {{\.LCPI.*}}, %ymm2, %ymm2 ; X32-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; X32-NEXT: vpsrlw $1, %ymm0, %ymm2 -; X32-NEXT: vpand LCPI34_2, %ymm2, %ymm2 +; X32-NEXT: vpand {{\.LCPI.*}}, %ymm2, %ymm2 ; X32-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; X32-NEXT: retl -; X32-NEXT: ## -- End function ; ; X64-LABEL: lshr_32i8: -; X64: ## BB#0: +; X64: # BB#0: ; X64-NEXT: vpsllw $5, %ymm1, %ymm1 ; X64-NEXT: vpsrlw $4, %ymm0, %ymm2 ; X64-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 @@ -720,7 +701,6 @@ define <32 x i8> @lshr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind { ; X64-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; X64-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; X64-NEXT: retq -; X64-NEXT: ## -- End function %lshr = lshr <32 x i8> %r, %a ret <32 x i8> %lshr } diff --git a/test/CodeGen/X86/avx2-vperm.ll b/test/CodeGen/X86/avx2-vperm.ll index d0e18550f6a..d57daafab24 100644 --- a/test/CodeGen/X86/avx2-vperm.ll +++ b/test/CodeGen/X86/avx2-vperm.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X32 -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64 define <8 x i32> @perm_cl_int_8x32(<8 x i32> %A) nounwind readnone { ; X32-LABEL: perm_cl_int_8x32: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: vmovdqa {{.*#+}} ymm1 = [0,7,2,1,2,7,6,0] ; X32-NEXT: vpermd %ymm0, %ymm1, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: perm_cl_int_8x32: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: vmovdqa {{.*#+}} ymm1 = [0,7,2,1,2,7,6,0] ; X64-NEXT: vpermd %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq @@ -22,13 +22,13 @@ entry: define <8 x float> @perm_cl_fp_8x32(<8 x float> %A) nounwind readnone { ; X32-LABEL: perm_cl_fp_8x32: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: vmovaps {{.*#+}} ymm1 = ; X32-NEXT: vpermps %ymm0, %ymm1, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: perm_cl_fp_8x32: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: vmovaps {{.*#+}} ymm1 = ; X64-NEXT: vpermps %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq @@ -39,12 +39,12 @@ entry: define <4 x i64> @perm_cl_int_4x64(<4 x i64> %A) nounwind readnone { ; X32-LABEL: perm_cl_int_4x64: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,1] ; X32-NEXT: retl ; ; X64-LABEL: perm_cl_int_4x64: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,1] ; X64-NEXT: retq entry: @@ -54,12 +54,12 @@ entry: define <4 x double> @perm_cl_fp_4x64(<4 x double> %A) nounwind readnone { ; X32-LABEL: perm_cl_fp_4x64: -; X32: ## BB#0: ## %entry +; X32: # BB#0: # %entry ; X32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,2,1] ; X32-NEXT: retl ; ; X64-LABEL: perm_cl_fp_4x64: -; X64: ## BB#0: ## %entry +; X64: # BB#0: # %entry ; X64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,2,1] ; X64-NEXT: retq entry: