1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-20 03:23:01 +02:00

[X86] Regenerate vector shift tests. NFCI.

Merge prefixes where possible, use 'X86' instead of 'X32' (which we try to only use for gnux32 triple tests).
This commit is contained in:
Simon Pilgrim 2020-10-27 12:13:53 +00:00
parent 3e4f2ad439
commit 29e9d9a1e2
16 changed files with 532 additions and 715 deletions

View File

@ -1,17 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64
define <2 x i64> @t1(<2 x i64> %b1, <2 x i64> %c) nounwind {
; X32-LABEL: t1:
; X32: # %bb.0: # %entry
; X32-NEXT: psllw %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: t1:
; X64: # %bb.0: # %entry
; X64-NEXT: psllw %xmm1, %xmm0
; X64-NEXT: retq
; CHECK-LABEL: t1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: psllw %xmm1, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
entry:
%tmp6 = bitcast <2 x i64> %c to <8 x i16> ; <<8 x i16>> [#uses=1]
%tmp8 = bitcast <2 x i64> %b1 to <8 x i16> ; <<8 x i16>> [#uses=1]
@ -21,11 +16,11 @@ entry:
}
define <2 x i64> @t3(<2 x i64> %b1, i32 %c) nounwind {
; X32-LABEL: t3:
; X32: # %bb.0: # %entry
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: psraw %xmm1, %xmm0
; X32-NEXT: retl
; X86-LABEL: t3:
; X86: # %bb.0: # %entry
; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-NEXT: psraw %xmm1, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: t3:
; X64: # %bb.0: # %entry
@ -44,15 +39,10 @@ entry:
declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
define <2 x i64> @t2(<2 x i64> %b1, <2 x i64> %c) nounwind {
; X32-LABEL: t2:
; X32: # %bb.0: # %entry
; X32-NEXT: psrlq %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: t2:
; X64: # %bb.0: # %entry
; X64-NEXT: psrlq %xmm1, %xmm0
; X64-NEXT: retq
; CHECK-LABEL: t2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: psrlq %xmm1, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
entry:
%tmp9 = tail call <2 x i64> @llvm.x86.sse2.psrl.q( <2 x i64> %b1, <2 x i64> %c ) nounwind readnone ; <<2 x i64>> [#uses=1]
ret <2 x i64> %tmp9

View File

@ -1,17 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64
define <2 x i64> @t1(<2 x i64> %b1, <2 x i64> %c) nounwind {
; X32-LABEL: t1:
; X32: # %bb.0:
; X32-NEXT: psrlw $14, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: t1:
; X64: # %bb.0:
; X64-NEXT: psrlw $14, %xmm0
; X64-NEXT: retq
; CHECK-LABEL: t1:
; CHECK: # %bb.0:
; CHECK-NEXT: psrlw $14, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
%tmp1 = bitcast <2 x i64> %b1 to <8 x i16>
%tmp2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w( <8 x i16> %tmp1, <8 x i16> bitcast (<4 x i32> < i32 14, i32 undef, i32 undef, i32 undef > to <8 x i16>) ) nounwind readnone
%tmp3 = bitcast <8 x i16> %tmp2 to <2 x i64>
@ -19,19 +14,12 @@ define <2 x i64> @t1(<2 x i64> %b1, <2 x i64> %c) nounwind {
}
define <4 x i32> @t2(<2 x i64> %b1, <2 x i64> %c) nounwind {
; X32-LABEL: t2:
; X32: # %bb.0:
; X32-NEXT: movl $14, %eax
; X32-NEXT: movd %eax, %xmm1
; X32-NEXT: pslld %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: t2:
; X64: # %bb.0:
; X64-NEXT: movl $14, %eax
; X64-NEXT: movd %eax, %xmm1
; X64-NEXT: pslld %xmm1, %xmm0
; X64-NEXT: retq
; CHECK-LABEL: t2:
; CHECK: # %bb.0:
; CHECK-NEXT: movl $14, %eax
; CHECK-NEXT: movd %eax, %xmm1
; CHECK-NEXT: pslld %xmm1, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
%tmp1 = bitcast <2 x i64> %b1 to <4 x i32>
%tmp2 = tail call <4 x i32> @llvm.x86.sse2.psll.d( <4 x i32> %tmp1, <4 x i32> < i32 14, i32 undef, i32 undef, i32 undef > ) nounwind readnone
ret <4 x i32> %tmp2

View File

@ -1,13 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64
define <2 x i64> @t1(<2 x i64> %x1, i32 %bits) nounwind {
; X32-LABEL: t1:
; X32: # %bb.0: # %entry
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: psllq %xmm1, %xmm0
; X32-NEXT: retl
; X86-LABEL: t1:
; X86: # %bb.0: # %entry
; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-NEXT: psllq %xmm1, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: t1:
; X64: # %bb.0: # %entry
@ -20,26 +20,21 @@ entry:
}
define <2 x i64> @t2(<2 x i64> %x1) nounwind {
; X32-LABEL: t2:
; X32: # %bb.0: # %entry
; X32-NEXT: psllq $10, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: t2:
; X64: # %bb.0: # %entry
; X64-NEXT: psllq $10, %xmm0
; X64-NEXT: retq
; CHECK-LABEL: t2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: psllq $10, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
entry:
%tmp3 = tail call <2 x i64> @llvm.x86.sse2.pslli.q( <2 x i64> %x1, i32 10 ) nounwind readnone ; <<2 x i64>> [#uses=1]
ret <2 x i64> %tmp3
}
define <2 x i64> @t3(<2 x i64> %x1, i32 %bits) nounwind {
; X32-LABEL: t3:
; X32: # %bb.0: # %entry
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: psraw %xmm1, %xmm0
; X32-NEXT: retl
; X86-LABEL: t3:
; X86: # %bb.0: # %entry
; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-NEXT: psraw %xmm1, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: t3:
; X64: # %bb.0: # %entry

View File

@ -1,15 +1,15 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,X64
define <2 x i64> @shl1(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp {
; X32-LABEL: shl1:
; X32: # %bb.0: # %entry
; X32-NEXT: pslld $23, %xmm1
; X32-NEXT: paddd {{\.LCPI.*}}, %xmm1
; X32-NEXT: cvttps2dq %xmm1, %xmm1
; X32-NEXT: pmulld %xmm1, %xmm0
; X32-NEXT: retl
; X86-LABEL: shl1:
; X86: # %bb.0: # %entry
; X86-NEXT: pslld $23, %xmm1
; X86-NEXT: paddd {{\.LCPI.*}}, %xmm1
; X86-NEXT: cvttps2dq %xmm1, %xmm1
; X86-NEXT: pmulld %xmm1, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: shl1:
; X64: # %bb.0: # %entry
@ -19,40 +19,34 @@ define <2 x i64> @shl1(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp {
; X64-NEXT: pmulld %xmm1, %xmm0
; X64-NEXT: retq
entry:
; CHECK-NOT: shll
; CHECK: pslld
; CHECK: paddd
; CHECK: cvttps2dq
; CHECK: pmulld
%shl = shl <4 x i32> %r, %a ; <<4 x i32>> [#uses=1]
%tmp2 = bitcast <4 x i32> %shl to <2 x i64> ; <<2 x i64>> [#uses=1]
ret <2 x i64> %tmp2
}
define <2 x i64> @shl2(<16 x i8> %r, <16 x i8> %a) nounwind readnone ssp {
; X32-LABEL: shl2:
; X32: # %bb.0: # %entry
; X32-NEXT: movdqa %xmm0, %xmm2
; X32-NEXT: psllw $5, %xmm1
; X32-NEXT: movdqa %xmm0, %xmm3
; X32-NEXT: psllw $4, %xmm3
; X32-NEXT: pand {{\.LCPI.*}}, %xmm3
; X32-NEXT: movdqa %xmm1, %xmm0
; X32-NEXT: pblendvb %xmm0, %xmm3, %xmm2
; X32-NEXT: movdqa %xmm2, %xmm3
; X32-NEXT: psllw $2, %xmm3
; X32-NEXT: pand {{\.LCPI.*}}, %xmm3
; X32-NEXT: paddb %xmm1, %xmm1
; X32-NEXT: movdqa %xmm1, %xmm0
; X32-NEXT: pblendvb %xmm0, %xmm3, %xmm2
; X32-NEXT: movdqa %xmm2, %xmm3
; X32-NEXT: paddb %xmm2, %xmm3
; X32-NEXT: paddb %xmm1, %xmm1
; X32-NEXT: movdqa %xmm1, %xmm0
; X32-NEXT: pblendvb %xmm0, %xmm3, %xmm2
; X32-NEXT: movdqa %xmm2, %xmm0
; X32-NEXT: retl
; X86-LABEL: shl2:
; X86: # %bb.0: # %entry
; X86-NEXT: movdqa %xmm0, %xmm2
; X86-NEXT: psllw $5, %xmm1
; X86-NEXT: movdqa %xmm0, %xmm3
; X86-NEXT: psllw $4, %xmm3
; X86-NEXT: pand {{\.LCPI.*}}, %xmm3
; X86-NEXT: movdqa %xmm1, %xmm0
; X86-NEXT: pblendvb %xmm0, %xmm3, %xmm2
; X86-NEXT: movdqa %xmm2, %xmm3
; X86-NEXT: psllw $2, %xmm3
; X86-NEXT: pand {{\.LCPI.*}}, %xmm3
; X86-NEXT: paddb %xmm1, %xmm1
; X86-NEXT: movdqa %xmm1, %xmm0
; X86-NEXT: pblendvb %xmm0, %xmm3, %xmm2
; X86-NEXT: movdqa %xmm2, %xmm3
; X86-NEXT: paddb %xmm2, %xmm3
; X86-NEXT: paddb %xmm1, %xmm1
; X86-NEXT: movdqa %xmm1, %xmm0
; X86-NEXT: pblendvb %xmm0, %xmm3, %xmm2
; X86-NEXT: movdqa %xmm2, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: shl2:
; X64: # %bb.0: # %entry
@ -77,10 +71,6 @@ define <2 x i64> @shl2(<16 x i8> %r, <16 x i8> %a) nounwind readnone ssp {
; X64-NEXT: movdqa %xmm2, %xmm0
; X64-NEXT: retq
entry:
; CHECK-NOT: shlb
; CHECK: pblendvb
; CHECK: pblendvb
; CHECK: pblendvb
%shl = shl <16 x i8> %r, %a ; <<16 x i8>> [#uses=1]
%tmp2 = bitcast <16 x i8> %shl to <2 x i64> ; <<2 x i64>> [#uses=1]
ret <2 x i64> %tmp2

View File

@ -1,100 +1,70 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64
; Verify that we correctly fold target specific packed vector shifts by
; immediate count into a simple build_vector when the elements of the vector
; in input to the packed shift are all constants or undef.
define <8 x i16> @test1() {
; X32-LABEL: test1:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [8,16,32,64,8,16,32,64]
; X32-NEXT: retl
;
; X64-LABEL: test1:
; X64: # %bb.0:
; X64-NEXT: movaps {{.*#+}} xmm0 = [8,16,32,64,8,16,32,64]
; X64-NEXT: retq
; CHECK-LABEL: test1:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [8,16,32,64,8,16,32,64]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> <i16 1, i16 2, i16 4, i16 8, i16 1, i16 2, i16 4, i16 8>, i32 3)
ret <8 x i16> %1
}
define <8 x i16> @test2() {
; X32-LABEL: test2:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4,0,1,2,4]
; X32-NEXT: retl
;
; X64-LABEL: test2:
; X64: # %bb.0:
; X64-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4,0,1,2,4]
; X64-NEXT: retq
; CHECK-LABEL: test2:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4,0,1,2,4]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> <i16 4, i16 8, i16 16, i16 32, i16 4, i16 8, i16 16, i16 32>, i32 3)
ret <8 x i16> %1
}
define <8 x i16> @test3() {
; X32-LABEL: test3:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4,0,1,2,4]
; X32-NEXT: retl
;
; X64-LABEL: test3:
; X64: # %bb.0:
; X64-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4,0,1,2,4]
; X64-NEXT: retq
; CHECK-LABEL: test3:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4,0,1,2,4]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 4, i16 8, i16 16, i16 32, i16 4, i16 8, i16 16, i16 32>, i32 3)
ret <8 x i16> %1
}
define <4 x i32> @test4() {
; X32-LABEL: test4:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [8,16,32,64]
; X32-NEXT: retl
;
; X64-LABEL: test4:
; X64: # %bb.0:
; X64-NEXT: movaps {{.*#+}} xmm0 = [8,16,32,64]
; X64-NEXT: retq
; CHECK-LABEL: test4:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [8,16,32,64]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> <i32 1, i32 2, i32 4, i32 8>, i32 3)
ret <4 x i32> %1
}
define <4 x i32> @test5() {
; X32-LABEL: test5:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4]
; X32-NEXT: retl
;
; X64-LABEL: test5:
; X64: # %bb.0:
; X64-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4]
; X64-NEXT: retq
; CHECK-LABEL: test5:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> <i32 4, i32 8, i32 16, i32 32>, i32 3)
ret <4 x i32> %1
}
define <4 x i32> @test6() {
; X32-LABEL: test6:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4]
; X32-NEXT: retl
;
; X64-LABEL: test6:
; X64: # %bb.0:
; X64-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4]
; X64-NEXT: retq
; CHECK-LABEL: test6:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> <i32 4, i32 8, i32 16, i32 32>, i32 3)
ret <4 x i32> %1
}
define <2 x i64> @test7() {
; X32-LABEL: test7:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [8,0,16,0]
; X32-NEXT: retl
; X86-LABEL: test7:
; X86: # %bb.0:
; X86-NEXT: movaps {{.*#+}} xmm0 = [8,0,16,0]
; X86-NEXT: retl
;
; X64-LABEL: test7:
; X64: # %bb.0:
@ -105,10 +75,10 @@ define <2 x i64> @test7() {
}
define <2 x i64> @test8() {
; X32-LABEL: test8:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [1,0,2,0]
; X32-NEXT: retl
; X86-LABEL: test8:
; X86: # %bb.0:
; X86-NEXT: movaps {{.*#+}} xmm0 = [1,0,2,0]
; X86-NEXT: retl
;
; X64-LABEL: test8:
; X64: # %bb.0:
@ -119,38 +89,28 @@ define <2 x i64> @test8() {
}
define <8 x i16> @test9() {
; X32-LABEL: test9:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [1,1,0,0,3,0,8,16]
; X32-NEXT: retl
;
; X64-LABEL: test9:
; X64: # %bb.0:
; X64-NEXT: movaps {{.*#+}} xmm0 = [1,1,0,0,3,0,8,16]
; X64-NEXT: retq
; CHECK-LABEL: test9:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1,1,0,0,3,0,8,16]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 15, i16 8, i16 undef, i16 undef, i16 31, i16 undef, i16 64, i16 128>, i32 3)
ret <8 x i16> %1
}
define <4 x i32> @test10() {
; X32-LABEL: test10:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [0,1,0,4]
; X32-NEXT: retl
;
; X64-LABEL: test10:
; X64: # %bb.0:
; X64-NEXT: movaps {{.*#+}} xmm0 = [0,1,0,4]
; X64-NEXT: retq
; CHECK-LABEL: test10:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [0,1,0,4]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> <i32 undef, i32 8, i32 undef, i32 32>, i32 3)
ret <4 x i32> %1
}
define <2 x i64> @test11() {
; X32-LABEL: test11:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [0,0,3,0]
; X32-NEXT: retl
; X86-LABEL: test11:
; X86: # %bb.0:
; X86-NEXT: movaps {{.*#+}} xmm0 = [0,0,3,0]
; X86-NEXT: retl
;
; X64-LABEL: test11:
; X64: # %bb.0:
@ -161,66 +121,46 @@ define <2 x i64> @test11() {
}
define <8 x i16> @test12() {
; X32-LABEL: test12:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [1,1,0,0,3,0,8,16]
; X32-NEXT: retl
;
; X64-LABEL: test12:
; X64: # %bb.0:
; X64-NEXT: movaps {{.*#+}} xmm0 = [1,1,0,0,3,0,8,16]
; X64-NEXT: retq
; CHECK-LABEL: test12:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1,1,0,0,3,0,8,16]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 15, i16 8, i16 undef, i16 undef, i16 31, i16 undef, i16 64, i16 128>, i32 3)
ret <8 x i16> %1
}
define <4 x i32> @test13() {
; X32-LABEL: test13:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [0,1,0,4]
; X32-NEXT: retl
;
; X64-LABEL: test13:
; X64: # %bb.0:
; X64-NEXT: movaps {{.*#+}} xmm0 = [0,1,0,4]
; X64-NEXT: retq
; CHECK-LABEL: test13:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [0,1,0,4]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> <i32 undef, i32 8, i32 undef, i32 32>, i32 3)
ret <4 x i32> %1
}
define <8 x i16> @test14() {
; X32-LABEL: test14:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [1,1,0,0,3,0,8,16]
; X32-NEXT: retl
;
; X64-LABEL: test14:
; X64: # %bb.0:
; X64-NEXT: movaps {{.*#+}} xmm0 = [1,1,0,0,3,0,8,16]
; X64-NEXT: retq
; CHECK-LABEL: test14:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1,1,0,0,3,0,8,16]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> <i16 15, i16 8, i16 undef, i16 undef, i16 31, i16 undef, i16 64, i16 128>, i32 3)
ret <8 x i16> %1
}
define <4 x i32> @test15() {
; X32-LABEL: test15:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [0,64,0,256]
; X32-NEXT: retl
;
; X64-LABEL: test15:
; X64: # %bb.0:
; X64-NEXT: movaps {{.*#+}} xmm0 = [0,64,0,256]
; X64-NEXT: retq
; CHECK-LABEL: test15:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [0,64,0,256]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> <i32 undef, i32 8, i32 undef, i32 32>, i32 3)
ret <4 x i32> %1
}
define <2 x i64> @test16() {
; X32-LABEL: test16:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [0,0,248,0]
; X32-NEXT: retl
; X86-LABEL: test16:
; X86: # %bb.0:
; X86-NEXT: movaps {{.*#+}} xmm0 = [0,0,248,0]
; X86-NEXT: retl
;
; X64-LABEL: test16:
; X64: # %bb.0:

View File

@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=sse2 | FileCheck %s --check-prefixes=SSE,SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx2 | FileCheck %s --check-prefixes=AVX,AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx512f | FileCheck %s --check-prefixes=AVX,AVX512
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE,SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx512f | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
; Verify that we don't scalarize a packed vector shift left of 16-bit
; signed integers if the amount is a constant build_vector.

View File

@ -1,16 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64
; Verify that we don't fail when shift by zero is encountered.
define i64 @test1(<2 x i64> %a) {
; X32-LABEL: test1:
; X32: # %bb.0: # %entry
; X32-NEXT: movd %xmm0, %eax
; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
; X32-NEXT: movd %xmm0, %edx
; X32-NEXT: retl
; X86-LABEL: test1:
; X86: # %bb.0: # %entry
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
; X86-NEXT: movd %xmm0, %edx
; X86-NEXT: retl
;
; X64-LABEL: test1:
; X64: # %bb.0: # %entry

View File

@ -1,17 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X64
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64
; test vector shifts converted to proper SSE2 vector shifts when the shift
; amounts are the same.
define void @shift1a(<2 x i64> %val, <2 x i64>* %dst) nounwind {
; X32-LABEL: shift1a:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: psllq $32, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift1a:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: psllq $32, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift1a:
; X64: # %bb.0: # %entry
@ -25,13 +25,13 @@ entry:
}
define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
; X32-LABEL: shift1b:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X32-NEXT: psllq %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift1b:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X86-NEXT: psllq %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift1b:
; X64: # %bb.0: # %entry
@ -49,12 +49,12 @@ entry:
define void @shift2a(<4 x i32> %val, <4 x i32>* %dst) nounwind {
; X32-LABEL: shift2a:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: pslld $5, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift2a:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: pslld $5, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift2a:
; X64: # %bb.0: # %entry
@ -68,13 +68,13 @@ entry:
}
define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
; X32-LABEL: shift2b:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: pslld %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift2b:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-NEXT: pslld %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift2b:
; X64: # %bb.0: # %entry
@ -93,12 +93,12 @@ entry:
}
define void @shift3a(<8 x i16> %val, <8 x i16>* %dst) nounwind {
; X32-LABEL: shift3a:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: psllw $5, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift3a:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: psllw $5, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift3a:
; X64: # %bb.0: # %entry
@ -113,14 +113,14 @@ entry:
; Make sure the shift amount is properly zero extended.
define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
; X32-LABEL: shift3b:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movd %ecx, %xmm1
; X32-NEXT: psllw %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift3b:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movd %ecx, %xmm1
; X86-NEXT: psllw %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift3b:
; X64: # %bb.0: # %entry

View File

@ -1,17 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X64
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64
; test vector shifts converted to proper SSE2 vector shifts when the shift
; amounts are the same.
define void @shift1a(<2 x i64> %val, <2 x i64>* %dst) nounwind {
; X32-LABEL: shift1a:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: psrlq $32, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift1a:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: psrlq $32, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift1a:
; X64: # %bb.0: # %entry
@ -25,13 +25,13 @@ entry:
}
define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
; X32-LABEL: shift1b:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X32-NEXT: psrlq %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift1b:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X86-NEXT: psrlq %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift1b:
; X64: # %bb.0: # %entry
@ -48,12 +48,12 @@ entry:
}
define void @shift2a(<4 x i32> %val, <4 x i32>* %dst) nounwind {
; X32-LABEL: shift2a:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: psrld $17, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift2a:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: psrld $17, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift2a:
; X64: # %bb.0: # %entry
@ -67,13 +67,13 @@ entry:
}
define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
; X32-LABEL: shift2b:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: psrld %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift2b:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-NEXT: psrld %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift2b:
; X64: # %bb.0: # %entry
@ -93,12 +93,12 @@ entry:
define void @shift3a(<8 x i16> %val, <8 x i16>* %dst) nounwind {
; X32-LABEL: shift3a:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: psrlw $5, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift3a:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: psrlw $5, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift3a:
; X64: # %bb.0: # %entry
@ -113,14 +113,14 @@ entry:
; properly zero extend the shift amount
define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
; X32-LABEL: shift3b:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movd %ecx, %xmm1
; X32-NEXT: psrlw %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift3b:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movd %ecx, %xmm1
; X86-NEXT: psrlw %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift3b:
; X64: # %bb.0: # %entry

View File

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X64
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64
; test vector shifts converted to proper SSE2 vector shifts when the shift
; amounts are the same.
@ -8,15 +8,15 @@
; Note that x86 does have ashr
define void @shift1a(<2 x i64> %val, <2 x i64>* %dst) nounwind {
; X32-LABEL: shift1a:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
; X32-NEXT: psrad $31, %xmm0
; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X32-NEXT: movdqa %xmm1, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift1a:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
; X86-NEXT: psrad $31, %xmm0
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X86-NEXT: movdqa %xmm1, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift1a:
; X64: # %bb.0: # %entry
@ -33,12 +33,12 @@ entry:
}
define void @shift2a(<4 x i32> %val, <4 x i32>* %dst) nounwind {
; X32-LABEL: shift2a:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: psrad $5, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift2a:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: psrad $5, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift2a:
; X64: # %bb.0: # %entry
@ -52,13 +52,13 @@ entry:
}
define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
; X32-LABEL: shift2b:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: psrad %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift2b:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-NEXT: psrad %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift2b:
; X64: # %bb.0: # %entry
@ -77,12 +77,12 @@ entry:
}
define void @shift3a(<8 x i16> %val, <8 x i16>* %dst) nounwind {
; X32-LABEL: shift3a:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: psraw $5, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift3a:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: psraw $5, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift3a:
; X64: # %bb.0: # %entry
@ -96,14 +96,14 @@ entry:
}
define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
; X32-LABEL: shift3b:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movd %ecx, %xmm1
; X32-NEXT: psraw %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift3b:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movd %ecx, %xmm1
; X86-NEXT: psraw %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift3b:
; X64: # %bb.0: # %entry

View File

@ -1,17 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X64
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64
; test vector shifts converted to proper SSE2 vector shifts when the shift
; amounts are the same when using a shuffle splat.
define void @shift1a(<2 x i64> %val, <2 x i64>* %dst, <2 x i64> %sh) nounwind {
; X32-LABEL: shift1a:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: psllq %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift1a:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: psllq %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift1a:
; X64: # %bb.0: # %entry
@ -27,16 +27,16 @@ entry:
; shift1b can't use a packed shift but can shift lanes separately and shuffle back together
define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, <2 x i64> %sh) nounwind {
; X32-LABEL: shift1b:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movdqa %xmm0, %xmm2
; X32-NEXT: psllq %xmm1, %xmm2
; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; X32-NEXT: psllq %xmm1, %xmm0
; X32-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
; X32-NEXT: movapd %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift1b:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movdqa %xmm0, %xmm2
; X86-NEXT: psllq %xmm1, %xmm2
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; X86-NEXT: psllq %xmm1, %xmm0
; X86-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
; X86-NEXT: movapd %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift1b:
; X64: # %bb.0: # %entry
@ -55,13 +55,13 @@ entry:
}
define void @shift2a(<4 x i32> %val, <4 x i32>* %dst, <2 x i32> %amt) nounwind {
; X32-LABEL: shift2a:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: psrlq $32, %xmm1
; X32-NEXT: pslld %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift2a:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: psrlq $32, %xmm1
; X86-NEXT: pslld %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift2a:
; X64: # %bb.0: # %entry
@ -77,13 +77,13 @@ entry:
}
define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, <2 x i32> %amt) nounwind {
; X32-LABEL: shift2b:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: psrlq $32, %xmm1
; X32-NEXT: pslld %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift2b:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: psrlq $32, %xmm1
; X86-NEXT: pslld %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift2b:
; X64: # %bb.0: # %entry
@ -99,13 +99,13 @@ entry:
}
define void @shift2c(<4 x i32> %val, <4 x i32>* %dst, <2 x i32> %amt) nounwind {
; X32-LABEL: shift2c:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: psrlq $32, %xmm1
; X32-NEXT: pslld %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift2c:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: psrlq $32, %xmm1
; X86-NEXT: pslld %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift2c:
; X64: # %bb.0: # %entry
@ -121,14 +121,14 @@ entry:
}
define void @shift3a(<8 x i16> %val, <8 x i16>* %dst, <8 x i16> %amt) nounwind {
; X32-LABEL: shift3a:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,6,6]
; X32-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X32-NEXT: psllw %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift3a:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,6,6]
; X86-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X86-NEXT: psllw %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift3a:
; X64: # %bb.0: # %entry
@ -145,14 +145,14 @@ entry:
}
define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
; X32-LABEL: shift3b:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movd %ecx, %xmm1
; X32-NEXT: psllw %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift3b:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movd %ecx, %xmm1
; X86-NEXT: psllw %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift3b:
; X64: # %bb.0: # %entry

View File

@ -1,18 +1,18 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X64
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64
; When loading the shift amount from memory, avoid generating the splat.
define void @shift5a(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind {
; X32-LABEL: shift5a:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: pslld %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift5a:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-NEXT: pslld %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift5a:
; X64: # %bb.0: # %entry
@ -31,14 +31,14 @@ entry:
define void @shift5b(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind {
; X32-LABEL: shift5b:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: psrad %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift5b:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-NEXT: psrad %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift5b:
; X64: # %bb.0: # %entry
@ -57,13 +57,13 @@ entry:
define void @shift5c(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
; X32-LABEL: shift5c:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: pslld %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift5c:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-NEXT: pslld %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift5c:
; X64: # %bb.0: # %entry
@ -81,13 +81,13 @@ entry:
define void @shift5d(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
; X32-LABEL: shift5d:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: psrad %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift5d:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-NEXT: psrad %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift5d:
; X64: # %bb.0: # %entry

View File

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X64
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64
; This test makes sure that the compiler does not crash with an
; assertion failure when trying to fold a vector shift left
@ -25,42 +25,42 @@
; 'count' is the vector shift count.
define <16 x i8> @do_not_crash(i8*, i32*, i64*, i32, i64, i8) {
; X32-LABEL: do_not_crash:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movb %al, (%ecx)
; X32-NEXT: movd %eax, %xmm0
; X32-NEXT: psllq $56, %xmm0
; X32-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255]
; X32-NEXT: movdqa %xmm2, %xmm1
; X32-NEXT: pandn %xmm0, %xmm1
; X32-NEXT: por %xmm2, %xmm1
; X32-NEXT: pcmpeqd %xmm2, %xmm2
; X32-NEXT: psllw $5, %xmm1
; X32-NEXT: pxor %xmm3, %xmm3
; X32-NEXT: pxor %xmm0, %xmm0
; X32-NEXT: pcmpgtb %xmm1, %xmm0
; X32-NEXT: pxor %xmm0, %xmm2
; X32-NEXT: pand {{\.LCPI.*}}, %xmm0
; X32-NEXT: por %xmm2, %xmm0
; X32-NEXT: paddb %xmm1, %xmm1
; X32-NEXT: pxor %xmm2, %xmm2
; X32-NEXT: pcmpgtb %xmm1, %xmm2
; X32-NEXT: movdqa %xmm2, %xmm4
; X32-NEXT: pandn %xmm0, %xmm4
; X32-NEXT: psllw $2, %xmm0
; X32-NEXT: pand %xmm2, %xmm0
; X32-NEXT: pand {{\.LCPI.*}}, %xmm0
; X32-NEXT: por %xmm4, %xmm0
; X32-NEXT: paddb %xmm1, %xmm1
; X32-NEXT: pcmpgtb %xmm1, %xmm3
; X32-NEXT: movdqa %xmm3, %xmm1
; X32-NEXT: pandn %xmm0, %xmm1
; X32-NEXT: paddb %xmm0, %xmm0
; X32-NEXT: pand %xmm3, %xmm0
; X32-NEXT: por %xmm1, %xmm0
; X32-NEXT: retl
; X86-LABEL: do_not_crash:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movb %al, (%ecx)
; X86-NEXT: movd %eax, %xmm0
; X86-NEXT: psllq $56, %xmm0
; X86-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255]
; X86-NEXT: movdqa %xmm2, %xmm1
; X86-NEXT: pandn %xmm0, %xmm1
; X86-NEXT: por %xmm2, %xmm1
; X86-NEXT: pcmpeqd %xmm2, %xmm2
; X86-NEXT: psllw $5, %xmm1
; X86-NEXT: pxor %xmm3, %xmm3
; X86-NEXT: pxor %xmm0, %xmm0
; X86-NEXT: pcmpgtb %xmm1, %xmm0
; X86-NEXT: pxor %xmm0, %xmm2
; X86-NEXT: pand {{\.LCPI.*}}, %xmm0
; X86-NEXT: por %xmm2, %xmm0
; X86-NEXT: paddb %xmm1, %xmm1
; X86-NEXT: pxor %xmm2, %xmm2
; X86-NEXT: pcmpgtb %xmm1, %xmm2
; X86-NEXT: movdqa %xmm2, %xmm4
; X86-NEXT: pandn %xmm0, %xmm4
; X86-NEXT: psllw $2, %xmm0
; X86-NEXT: pand %xmm2, %xmm0
; X86-NEXT: pand {{\.LCPI.*}}, %xmm0
; X86-NEXT: por %xmm4, %xmm0
; X86-NEXT: paddb %xmm1, %xmm1
; X86-NEXT: pcmpgtb %xmm1, %xmm3
; X86-NEXT: movdqa %xmm3, %xmm1
; X86-NEXT: pandn %xmm0, %xmm1
; X86-NEXT: paddb %xmm0, %xmm0
; X86-NEXT: pand %xmm3, %xmm0
; X86-NEXT: por %xmm1, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: do_not_crash:
; X64: # %bb.0: # %entry

View File

@ -1,7 +1,15 @@
; RUN: llc < %s -mtriple=i686-- -mattr=+sse2
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 | FileCheck %s
; Example that requires splitting and expanding a vector shift.
define <2 x i64> @update(<2 x i64> %val) nounwind readnone {
; CHECK-LABEL: update:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movdqa %xmm0, %xmm1
; CHECK-NEXT: psrlq $2, %xmm1
; CHECK-NEXT: psrlq $3, %xmm0
; CHECK-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; CHECK-NEXT: retl
entry:
%shr = lshr <2 x i64> %val, < i64 2, i64 3 >
ret <2 x i64> %shr

View File

@ -1,8 +1,17 @@
; RUN: llc < %s -mtriple=i686-- -mcpu=yonah
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-- -mcpu=yonah | FileCheck %s
; Legalization example that requires splitting a large vector into smaller pieces.
define void @update(<8 x i32> %val, <8 x i32>* %dst) nounwind {
; CHECK-LABEL: update:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: psrad $2, %xmm0
; CHECK-NEXT: psrad $4, %xmm1
; CHECK-NEXT: movdqa %xmm1, 16(%eax)
; CHECK-NEXT: movdqa %xmm0, (%eax)
; CHECK-NEXT: retl
entry:
%shl = shl <8 x i32> %val, < i32 2, i32 2, i32 2, i32 2, i32 4, i32 4, i32 4, i32 4 >
%shr = ashr <8 x i32> %val, < i32 2, i32 2, i32 2, i32 2, i32 4, i32 4, i32 4, i32 4 >

View File

@ -1,25 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64
; Splat patterns below
define <4 x i32> @shl4(<4 x i32> %A) nounwind {
; X32-LABEL: shl4:
; X32: # %bb.0: # %entry
; X32-NEXT: movdqa %xmm0, %xmm1
; X32-NEXT: pslld $2, %xmm1
; X32-NEXT: paddd %xmm0, %xmm0
; X32-NEXT: pxor %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: shl4:
; X64: # %bb.0: # %entry
; X64-NEXT: movdqa %xmm0, %xmm1
; X64-NEXT: pslld $2, %xmm1
; X64-NEXT: paddd %xmm0, %xmm0
; X64-NEXT: pxor %xmm1, %xmm0
; X64-NEXT: retq
; CHECK-LABEL: shl4:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movdqa %xmm0, %xmm1
; CHECK-NEXT: pslld $2, %xmm1
; CHECK-NEXT: paddd %xmm0, %xmm0
; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
entry:
%B = shl <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2>
%C = shl <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1>
@ -28,21 +20,13 @@ entry:
}
define <4 x i32> @shr4(<4 x i32> %A) nounwind {
; X32-LABEL: shr4:
; X32: # %bb.0: # %entry
; X32-NEXT: movdqa %xmm0, %xmm1
; X32-NEXT: psrld $2, %xmm1
; X32-NEXT: psrld $1, %xmm0
; X32-NEXT: pxor %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: shr4:
; X64: # %bb.0: # %entry
; X64-NEXT: movdqa %xmm0, %xmm1
; X64-NEXT: psrld $2, %xmm1
; X64-NEXT: psrld $1, %xmm0
; X64-NEXT: pxor %xmm1, %xmm0
; X64-NEXT: retq
; CHECK-LABEL: shr4:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movdqa %xmm0, %xmm1
; CHECK-NEXT: psrld $2, %xmm1
; CHECK-NEXT: psrld $1, %xmm0
; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
entry:
%B = lshr <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2>
%C = lshr <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1>
@ -51,21 +35,13 @@ entry:
}
define <4 x i32> @sra4(<4 x i32> %A) nounwind {
; X32-LABEL: sra4:
; X32: # %bb.0: # %entry
; X32-NEXT: movdqa %xmm0, %xmm1
; X32-NEXT: psrad $2, %xmm1
; X32-NEXT: psrad $1, %xmm0
; X32-NEXT: pxor %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: sra4:
; X64: # %bb.0: # %entry
; X64-NEXT: movdqa %xmm0, %xmm1
; X64-NEXT: psrad $2, %xmm1
; X64-NEXT: psrad $1, %xmm0
; X64-NEXT: pxor %xmm1, %xmm0
; X64-NEXT: retq
; CHECK-LABEL: sra4:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movdqa %xmm0, %xmm1
; CHECK-NEXT: psrad $2, %xmm1
; CHECK-NEXT: psrad $1, %xmm0
; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
entry:
%B = ashr <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2>
%C = ashr <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1>
@ -74,21 +50,13 @@ entry:
}
define <2 x i64> @shl2(<2 x i64> %A) nounwind {
; X32-LABEL: shl2:
; X32: # %bb.0: # %entry
; X32-NEXT: movdqa %xmm0, %xmm1
; X32-NEXT: psllq $2, %xmm1
; X32-NEXT: psllq $9, %xmm0
; X32-NEXT: pxor %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: shl2:
; X64: # %bb.0: # %entry
; X64-NEXT: movdqa %xmm0, %xmm1
; X64-NEXT: psllq $2, %xmm1
; X64-NEXT: psllq $9, %xmm0
; X64-NEXT: pxor %xmm1, %xmm0
; X64-NEXT: retq
; CHECK-LABEL: shl2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movdqa %xmm0, %xmm1
; CHECK-NEXT: psllq $2, %xmm1
; CHECK-NEXT: psllq $9, %xmm0
; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
entry:
%B = shl <2 x i64> %A, < i64 2, i64 2>
%C = shl <2 x i64> %A, < i64 9, i64 9>
@ -97,21 +65,13 @@ entry:
}
define <2 x i64> @shr2(<2 x i64> %A) nounwind {
; X32-LABEL: shr2:
; X32: # %bb.0: # %entry
; X32-NEXT: movdqa %xmm0, %xmm1
; X32-NEXT: psrlq $8, %xmm1
; X32-NEXT: psrlq $1, %xmm0
; X32-NEXT: pxor %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: shr2:
; X64: # %bb.0: # %entry
; X64-NEXT: movdqa %xmm0, %xmm1
; X64-NEXT: psrlq $8, %xmm1
; X64-NEXT: psrlq $1, %xmm0
; X64-NEXT: pxor %xmm1, %xmm0
; X64-NEXT: retq
; CHECK-LABEL: shr2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movdqa %xmm0, %xmm1
; CHECK-NEXT: psrlq $8, %xmm1
; CHECK-NEXT: psrlq $1, %xmm0
; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
entry:
%B = lshr <2 x i64> %A, < i64 8, i64 8>
%C = lshr <2 x i64> %A, < i64 1, i64 1>
@ -119,23 +79,14 @@ entry:
ret <2 x i64> %K
}
define <8 x i16> @shl8(<8 x i16> %A) nounwind {
; X32-LABEL: shl8:
; X32: # %bb.0: # %entry
; X32-NEXT: movdqa %xmm0, %xmm1
; X32-NEXT: psllw $2, %xmm1
; X32-NEXT: paddw %xmm0, %xmm0
; X32-NEXT: pxor %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: shl8:
; X64: # %bb.0: # %entry
; X64-NEXT: movdqa %xmm0, %xmm1
; X64-NEXT: psllw $2, %xmm1
; X64-NEXT: paddw %xmm0, %xmm0
; X64-NEXT: pxor %xmm1, %xmm0
; X64-NEXT: retq
; CHECK-LABEL: shl8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movdqa %xmm0, %xmm1
; CHECK-NEXT: psllw $2, %xmm1
; CHECK-NEXT: paddw %xmm0, %xmm0
; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
entry:
%B = shl <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
%C = shl <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@ -144,21 +95,13 @@ entry:
}
define <8 x i16> @shr8(<8 x i16> %A) nounwind {
; X32-LABEL: shr8:
; X32: # %bb.0: # %entry
; X32-NEXT: movdqa %xmm0, %xmm1
; X32-NEXT: psrlw $2, %xmm1
; X32-NEXT: psrlw $1, %xmm0
; X32-NEXT: pxor %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: shr8:
; X64: # %bb.0: # %entry
; X64-NEXT: movdqa %xmm0, %xmm1
; X64-NEXT: psrlw $2, %xmm1
; X64-NEXT: psrlw $1, %xmm0
; X64-NEXT: pxor %xmm1, %xmm0
; X64-NEXT: retq
; CHECK-LABEL: shr8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movdqa %xmm0, %xmm1
; CHECK-NEXT: psrlw $2, %xmm1
; CHECK-NEXT: psrlw $1, %xmm0
; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
entry:
%B = lshr <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
%C = lshr <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@ -167,21 +110,13 @@ entry:
}
define <8 x i16> @sra8(<8 x i16> %A) nounwind {
; X32-LABEL: sra8:
; X32: # %bb.0: # %entry
; X32-NEXT: movdqa %xmm0, %xmm1
; X32-NEXT: psraw $2, %xmm1
; X32-NEXT: psraw $1, %xmm0
; X32-NEXT: pxor %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: sra8:
; X64: # %bb.0: # %entry
; X64-NEXT: movdqa %xmm0, %xmm1
; X64-NEXT: psraw $2, %xmm1
; X64-NEXT: psraw $1, %xmm0
; X64-NEXT: pxor %xmm1, %xmm0
; X64-NEXT: retq
; CHECK-LABEL: sra8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movdqa %xmm0, %xmm1
; CHECK-NEXT: psraw $2, %xmm1
; CHECK-NEXT: psraw $1, %xmm0
; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
entry:
%B = ashr <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
%C = ashr <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@ -191,15 +126,14 @@ entry:
; non-splat test
define <8 x i16> @sll8_nosplat(<8 x i16> %A) nounwind {
; X32-LABEL: sll8_nosplat:
; X32: # %bb.0: # %entry
; X32-NEXT: movdqa {{.*#+}} xmm1 = [2,4,8,64,4,4,4,4]
; X32-NEXT: pmullw %xmm0, %xmm1
; X32-NEXT: pmullw {{\.LCPI.*}}, %xmm0
; X32-NEXT: pxor %xmm1, %xmm0
; X32-NEXT: retl
; X86-LABEL: sll8_nosplat:
; X86: # %bb.0: # %entry
; X86-NEXT: movdqa {{.*#+}} xmm1 = [2,4,8,64,4,4,4,4]
; X86-NEXT: pmullw %xmm0, %xmm1
; X86-NEXT: pmullw {{\.LCPI.*}}, %xmm0
; X86-NEXT: pxor %xmm1, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: sll8_nosplat:
; X64: # %bb.0: # %entry
@ -215,31 +149,18 @@ entry:
ret <8 x i16> %K
}
define <2 x i64> @shr2_nosplat(<2 x i64> %A) nounwind {
; X32-LABEL: shr2_nosplat:
; X32: # %bb.0: # %entry
; X32-NEXT: movdqa %xmm0, %xmm2
; X32-NEXT: psrlq $8, %xmm2
; X32-NEXT: movdqa %xmm0, %xmm1
; X32-NEXT: psrlq $1, %xmm1
; X32-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3]
; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
; X32-NEXT: xorps %xmm2, %xmm1
; X32-NEXT: movaps %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: shr2_nosplat:
; X64: # %bb.0: # %entry
; X64-NEXT: movdqa %xmm0, %xmm2
; X64-NEXT: psrlq $8, %xmm2
; X64-NEXT: movdqa %xmm0, %xmm1
; X64-NEXT: psrlq $1, %xmm1
; X64-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3]
; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
; X64-NEXT: xorps %xmm2, %xmm1
; X64-NEXT: movaps %xmm1, %xmm0
; X64-NEXT: retq
; CHECK-LABEL: shr2_nosplat:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movdqa %xmm0, %xmm2
; CHECK-NEXT: psrlq $8, %xmm2
; CHECK-NEXT: movdqa %xmm0, %xmm1
; CHECK-NEXT: psrlq $1, %xmm1
; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3]
; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
; CHECK-NEXT: xorps %xmm2, %xmm1
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
entry:
%B = lshr <2 x i64> %A, < i64 8, i64 1>
%C = lshr <2 x i64> %A, < i64 1, i64 0>
@ -247,25 +168,16 @@ entry:
ret <2 x i64> %K
}
; Other shifts
define <2 x i32> @shl2_other(<2 x i32> %A) nounwind {
; X32-LABEL: shl2_other:
; X32: # %bb.0: # %entry
; X32-NEXT: movdqa %xmm0, %xmm1
; X32-NEXT: pslld $2, %xmm1
; X32-NEXT: pslld $9, %xmm0
; X32-NEXT: pxor %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: shl2_other:
; X64: # %bb.0: # %entry
; X64-NEXT: movdqa %xmm0, %xmm1
; X64-NEXT: pslld $2, %xmm1
; X64-NEXT: pslld $9, %xmm0
; X64-NEXT: pxor %xmm1, %xmm0
; X64-NEXT: retq
; CHECK-LABEL: shl2_other:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movdqa %xmm0, %xmm1
; CHECK-NEXT: pslld $2, %xmm1
; CHECK-NEXT: pslld $9, %xmm0
; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
entry:
%B = shl <2 x i32> %A, < i32 2, i32 2>
%C = shl <2 x i32> %A, < i32 9, i32 9>
@ -274,21 +186,13 @@ entry:
}
define <2 x i32> @shr2_other(<2 x i32> %A) nounwind {
; X32-LABEL: shr2_other:
; X32: # %bb.0: # %entry
; X32-NEXT: movdqa %xmm0, %xmm1
; X32-NEXT: psrld $8, %xmm1
; X32-NEXT: psrld $1, %xmm0
; X32-NEXT: pxor %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: shr2_other:
; X64: # %bb.0: # %entry
; X64-NEXT: movdqa %xmm0, %xmm1
; X64-NEXT: psrld $8, %xmm1
; X64-NEXT: psrld $1, %xmm0
; X64-NEXT: pxor %xmm1, %xmm0
; X64-NEXT: retq
; CHECK-LABEL: shr2_other:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movdqa %xmm0, %xmm1
; CHECK-NEXT: psrld $8, %xmm1
; CHECK-NEXT: psrld $1, %xmm0
; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
entry:
%B = lshr <2 x i32> %A, < i32 8, i32 8>
%C = lshr <2 x i32> %A, < i32 1, i32 1>
@ -297,11 +201,11 @@ entry:
}
define <16 x i8> @shl9(<16 x i8> %A) nounwind {
; X32-LABEL: shl9:
; X32: # %bb.0:
; X32-NEXT: psllw $3, %xmm0
; X32-NEXT: pand {{\.LCPI.*}}, %xmm0
; X32-NEXT: retl
; X86-LABEL: shl9:
; X86: # %bb.0:
; X86-NEXT: psllw $3, %xmm0
; X86-NEXT: pand {{\.LCPI.*}}, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: shl9:
; X64: # %bb.0:
@ -313,11 +217,11 @@ define <16 x i8> @shl9(<16 x i8> %A) nounwind {
}
define <16 x i8> @shr9(<16 x i8> %A) nounwind {
; X32-LABEL: shr9:
; X32: # %bb.0:
; X32-NEXT: psrlw $3, %xmm0
; X32-NEXT: pand {{\.LCPI.*}}, %xmm0
; X32-NEXT: retl
; X86-LABEL: shr9:
; X86: # %bb.0:
; X86-NEXT: psrlw $3, %xmm0
; X86-NEXT: pand {{\.LCPI.*}}, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: shr9:
; X64: # %bb.0:
@ -329,32 +233,25 @@ define <16 x i8> @shr9(<16 x i8> %A) nounwind {
}
define <16 x i8> @sra_v16i8_7(<16 x i8> %A) nounwind {
; X32-LABEL: sra_v16i8_7:
; X32: # %bb.0:
; X32-NEXT: pxor %xmm1, %xmm1
; X32-NEXT: pcmpgtb %xmm0, %xmm1
; X32-NEXT: movdqa %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: sra_v16i8_7:
; X64: # %bb.0:
; X64-NEXT: pxor %xmm1, %xmm1
; X64-NEXT: pcmpgtb %xmm0, %xmm1
; X64-NEXT: movdqa %xmm1, %xmm0
; X64-NEXT: retq
; CHECK-LABEL: sra_v16i8_7:
; CHECK: # %bb.0:
; CHECK-NEXT: pxor %xmm1, %xmm1
; CHECK-NEXT: pcmpgtb %xmm0, %xmm1
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: ret{{[l|q]}}
%B = ashr <16 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
ret <16 x i8> %B
}
define <16 x i8> @sra_v16i8(<16 x i8> %A) nounwind {
; X32-LABEL: sra_v16i8:
; X32: # %bb.0:
; X32-NEXT: psrlw $3, %xmm0
; X32-NEXT: pand {{\.LCPI.*}}, %xmm0
; X32-NEXT: movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; X32-NEXT: pxor %xmm1, %xmm0
; X32-NEXT: psubb %xmm1, %xmm0
; X32-NEXT: retl
; X86-LABEL: sra_v16i8:
; X86: # %bb.0:
; X86-NEXT: psrlw $3, %xmm0
; X86-NEXT: pand {{\.LCPI.*}}, %xmm0
; X86-NEXT: movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; X86-NEXT: pxor %xmm1, %xmm0
; X86-NEXT: psubb %xmm1, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: sra_v16i8:
; X64: # %bb.0: