From a1dd87c298ac215792dbeacf6323b8cf2d531c72 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 3 Jul 2017 16:53:11 +0000 Subject: [PATCH] [X86][SSE4A] Add SSE4A shuffle tests on pre-SSSE3 hardware llvm-svn: 307042 --- test/CodeGen/X86/vector-shuffle-sse4a.ll | 71 ++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/test/CodeGen/X86/vector-shuffle-sse4a.ll b/test/CodeGen/X86/vector-shuffle-sse4a.ll index 138c421215f..03a1cefc069 100644 --- a/test/CodeGen/X86/vector-shuffle-sse4a.ll +++ b/test/CodeGen/X86/vector-shuffle-sse4a.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=AMD10H ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=BTVER1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=BTVER2 @@ -36,6 +37,11 @@ define <2 x i64> @extrqi_len32_idx48(<2 x i64> %a) { } define <16 x i8> @shuf_0zzzuuuuuuuuuuuu(<16 x i8> %a0) { +; AMD10H-LABEL: shuf_0zzzuuuuuuuuuuuu: +; AMD10H: # BB#0: +; AMD10H-NEXT: extrq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] +; AMD10H-NEXT: retq +; ; BTVER1-LABEL: shuf_0zzzuuuuuuuuuuuu: ; BTVER1: # BB#0: ; BTVER1-NEXT: extrq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] @@ -50,6 +56,14 @@ define <16 x i8> @shuf_0zzzuuuuuuuuuuuu(<16 x i8> %a0) { } define <16 x i8> @shuf_0zzzzzzz1zzzzzzz(<16 x i8> %a0) { +; AMD10H-LABEL: shuf_0zzzzzzz1zzzzzzz: +; AMD10H: # BB#0: +; AMD10H-NEXT: movdqa %xmm0, %xmm1 +; AMD10H-NEXT: extrq {{.*#+}} xmm1 = xmm1[1],zero,zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u] +; AMD10H-NEXT: extrq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] +; AMD10H-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AMD10H-NEXT: retq +; ; BTVER1-LABEL: shuf_0zzzzzzz1zzzzzzz: ; BTVER1: # BB#0: ; BTVER1-NEXT: movdqa %xmm0, %xmm1 @@ -67,6 +81,14 @@ define <16 x i8> @shuf_0zzzzzzz1zzzzzzz(<16 x i8> %a0) { } define <16 x i8> @shuf_2zzzzzzz3zzzzzzz(<16 x i8> %a0) { +; AMD10H-LABEL: shuf_2zzzzzzz3zzzzzzz: +; AMD10H: # BB#0: +; AMD10H-NEXT: movdqa %xmm0, %xmm1 +; AMD10H-NEXT: extrq {{.*#+}} xmm1 = xmm1[3],zero,zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u] +; AMD10H-NEXT: extrq {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] +; AMD10H-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AMD10H-NEXT: retq +; ; BTVER1-LABEL: shuf_2zzzzzzz3zzzzzzz: ; BTVER1: # BB#0: ; BTVER1-NEXT: movdqa %xmm0, %xmm1 @@ -85,6 +107,11 @@ define <16 x i8> @shuf_2zzzzzzz3zzzzzzz(<16 x i8> %a0) { } define <16 x i8> @shuf_01zzuuuuuuuuuuuu(<16 x i8> %a0) { +; AMD10H-LABEL: shuf_01zzuuuuuuuuuuuu: +; AMD10H: # BB#0: +; AMD10H-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] +; AMD10H-NEXT: retq +; ; BTVER1-LABEL: shuf_01zzuuuuuuuuuuuu: ; BTVER1: # BB#0: ; BTVER1-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] @@ -99,6 +126,14 @@ define <16 x i8> @shuf_01zzuuuuuuuuuuuu(<16 x i8> %a0) { } define <16 x i8> @shuf_01zzzzzz23zzzzzz(<16 x i8> %a0) { +; AMD10H-LABEL: shuf_01zzzzzz23zzzzzz: +; AMD10H: # BB#0: +; AMD10H-NEXT: movdqa %xmm0, %xmm1 +; AMD10H-NEXT: extrq {{.*#+}} xmm1 = xmm1[2,3],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u] +; AMD10H-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] +; AMD10H-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AMD10H-NEXT: retq +; ; BTVER1-LABEL: shuf_01zzzzzz23zzzzzz: ; BTVER1: # BB#0: ; BTVER1-NEXT: movdqa %xmm0, %xmm1 @@ -152,6 +187,14 @@ define <8 x i16> @shuf_012zuuuu(<8 x i16> %a0) { } define <8 x i16> @shuf_0zzz1zzz(<8 x i16> %a0) { +; AMD10H-LABEL: shuf_0zzz1zzz: +; AMD10H: # BB#0: +; AMD10H-NEXT: movdqa %xmm0, %xmm1 +; AMD10H-NEXT: extrq {{.*#+}} xmm1 = xmm1[2,3],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u] +; AMD10H-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] +; AMD10H-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AMD10H-NEXT: retq +; ; BTVER1-LABEL: shuf_0zzz1zzz: ; BTVER1: # BB#0: ; BTVER1-NEXT: movdqa %xmm0, %xmm1 @@ -169,6 +212,12 @@ define <8 x i16> @shuf_0zzz1zzz(<8 x i16> %a0) { } define <4 x i32> @shuf_0z1z(<4 x i32> %a0) { +; AMD10H-LABEL: shuf_0z1z: +; AMD10H: # BB#0: +; AMD10H-NEXT: pxor %xmm1, %xmm1 +; AMD10H-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AMD10H-NEXT: retq +; ; BTVER1-LABEL: shuf_0z1z: ; BTVER1: # BB#0: ; BTVER1-NEXT: pxor %xmm1, %xmm1 @@ -303,6 +352,15 @@ define <8 x i16> @shuf_089uuuuu(<8 x i16> %a0, <8 x i16> %a1) { ; Out of range. define <16 x i8> @shuffle_8_18_uuuuuuuuuuuuuu(<16 x i8> %a, <16 x i8> %b) { +; AMD10H-LABEL: shuffle_8_18_uuuuuuuuuuuuuu: +; AMD10H: # BB#0: +; AMD10H-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AMD10H-NEXT: andpd {{.*}}(%rip), %xmm0 +; AMD10H-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AMD10H-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7] +; AMD10H-NEXT: packuswb %xmm0, %xmm0 +; AMD10H-NEXT: retq +; ; BTVER1-LABEL: shuffle_8_18_uuuuuuuuuuuuuu: ; BTVER1: # BB#0: ; BTVER1-NEXT: psrld $16, %xmm1 @@ -321,6 +379,13 @@ define <16 x i8> @shuffle_8_18_uuuuuuuuuuuuuu(<16 x i8> %a, <16 x i8> %b) { } define <16 x i8> @shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) { +; AMD10H-LABEL: shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: +; AMD10H: # BB#0: +; AMD10H-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; AMD10H-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AMD10H-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7] +; AMD10H-NEXT: retq +; ; BTVER1-LABEL: shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: ; BTVER1: # BB#0: ; BTVER1-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,5,5,4,4,5,5,4,4,5,5,6,6,7,7] @@ -335,6 +400,12 @@ define <16 x i8> @shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8 } define <16 x i8> @shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) { +; AMD10H-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: +; AMD10H: # BB#0: +; AMD10H-NEXT: psrlq $16, %xmm0 +; AMD10H-NEXT: pand {{.*}}(%rip), %xmm0 +; AMD10H-NEXT: retq +; ; BTVER1-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: ; BTVER1: # BB#0: ; BTVER1-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u],zero,xmm0[4],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u]