mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
[X86][SSE] Regenerate vector shift codegen tests. NFCI.
This commit is contained in:
parent
b28aeaea9c
commit
ddf476d680
@ -1,15 +1,15 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X32,X32-SLOW
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=X32,X32-FAST
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X86,X86-SLOW
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=X86,X86-FAST
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X64,X64-SLOW
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=X64,X64-FAST
|
||||
|
||||
; AVX2 Logical Shift Left
|
||||
|
||||
define <16 x i16> @test_sllw_1(<16 x i16> %InVec) {
|
||||
; X32-LABEL: test_sllw_1:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: test_sllw_1:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_sllw_1:
|
||||
; X64: # %bb.0: # %entry
|
||||
@ -20,10 +20,10 @@ entry:
|
||||
}
|
||||
|
||||
define <16 x i16> @test_sllw_2(<16 x i16> %InVec) {
|
||||
; X32-LABEL: test_sllw_2:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: vpaddw %ymm0, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: test_sllw_2:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: vpaddw %ymm0, %ymm0, %ymm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_sllw_2:
|
||||
; X64: # %bb.0: # %entry
|
||||
@ -35,10 +35,10 @@ entry:
|
||||
}
|
||||
|
||||
define <16 x i16> @test_sllw_3(<16 x i16> %InVec) {
|
||||
; X32-LABEL: test_sllw_3:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: vpsllw $15, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: test_sllw_3:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: vpsllw $15, %ymm0, %ymm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_sllw_3:
|
||||
; X64: # %bb.0: # %entry
|
||||
@ -50,9 +50,9 @@ entry:
|
||||
}
|
||||
|
||||
define <8 x i32> @test_slld_1(<8 x i32> %InVec) {
|
||||
; X32-LABEL: test_slld_1:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: test_slld_1:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_slld_1:
|
||||
; X64: # %bb.0: # %entry
|
||||
@ -63,10 +63,10 @@ entry:
|
||||
}
|
||||
|
||||
define <8 x i32> @test_slld_2(<8 x i32> %InVec) {
|
||||
; X32-LABEL: test_slld_2:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: vpaddd %ymm0, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: test_slld_2:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: vpaddd %ymm0, %ymm0, %ymm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_slld_2:
|
||||
; X64: # %bb.0: # %entry
|
||||
@ -78,12 +78,12 @@ entry:
|
||||
}
|
||||
|
||||
define <8 x i32> @test_vpslld_var(i32 %shift) {
|
||||
; X32-LABEL: test_vpslld_var:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32-NEXT: vmovdqa {{.*#+}} ymm1 = [192,193,194,195,196,197,198,199]
|
||||
; X32-NEXT: vpslld %xmm0, %ymm1, %ymm0
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: test_vpslld_var:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X86-NEXT: vmovdqa {{.*#+}} ymm1 = [192,193,194,195,196,197,198,199]
|
||||
; X86-NEXT: vpslld %xmm0, %ymm1, %ymm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_vpslld_var:
|
||||
; X64: # %bb.0:
|
||||
@ -97,10 +97,10 @@ define <8 x i32> @test_vpslld_var(i32 %shift) {
|
||||
}
|
||||
|
||||
define <8 x i32> @test_slld_3(<8 x i32> %InVec) {
|
||||
; X32-LABEL: test_slld_3:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: vpslld $31, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: test_slld_3:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: vpslld $31, %ymm0, %ymm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_slld_3:
|
||||
; X64: # %bb.0: # %entry
|
||||
@ -112,9 +112,9 @@ entry:
|
||||
}
|
||||
|
||||
define <4 x i64> @test_sllq_1(<4 x i64> %InVec) {
|
||||
; X32-LABEL: test_sllq_1:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: test_sllq_1:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_sllq_1:
|
||||
; X64: # %bb.0: # %entry
|
||||
@ -125,10 +125,10 @@ entry:
|
||||
}
|
||||
|
||||
define <4 x i64> @test_sllq_2(<4 x i64> %InVec) {
|
||||
; X32-LABEL: test_sllq_2:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: vpaddq %ymm0, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: test_sllq_2:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: vpaddq %ymm0, %ymm0, %ymm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_sllq_2:
|
||||
; X64: # %bb.0: # %entry
|
||||
@ -140,10 +140,10 @@ entry:
|
||||
}
|
||||
|
||||
define <4 x i64> @test_sllq_3(<4 x i64> %InVec) {
|
||||
; X32-LABEL: test_sllq_3:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: vpsllq $63, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: test_sllq_3:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: vpsllq $63, %ymm0, %ymm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_sllq_3:
|
||||
; X64: # %bb.0: # %entry
|
||||
@ -157,9 +157,9 @@ entry:
|
||||
; AVX2 Arithmetic Shift
|
||||
|
||||
define <16 x i16> @test_sraw_1(<16 x i16> %InVec) {
|
||||
; X32-LABEL: test_sraw_1:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: test_sraw_1:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_sraw_1:
|
||||
; X64: # %bb.0: # %entry
|
||||
@ -170,10 +170,10 @@ entry:
|
||||
}
|
||||
|
||||
define <16 x i16> @test_sraw_2(<16 x i16> %InVec) {
|
||||
; X32-LABEL: test_sraw_2:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: vpsraw $1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: test_sraw_2:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: vpsraw $1, %ymm0, %ymm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_sraw_2:
|
||||
; X64: # %bb.0: # %entry
|
||||
@ -185,10 +185,10 @@ entry:
|
||||
}
|
||||
|
||||
define <16 x i16> @test_sraw_3(<16 x i16> %InVec) {
|
||||
; X32-LABEL: test_sraw_3:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: vpsraw $15, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: test_sraw_3:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: vpsraw $15, %ymm0, %ymm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_sraw_3:
|
||||
; X64: # %bb.0: # %entry
|
||||
@ -200,9 +200,9 @@ entry:
|
||||
}
|
||||
|
||||
define <8 x i32> @test_srad_1(<8 x i32> %InVec) {
|
||||
; X32-LABEL: test_srad_1:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: test_srad_1:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_srad_1:
|
||||
; X64: # %bb.0: # %entry
|
||||
@ -213,10 +213,10 @@ entry:
|
||||
}
|
||||
|
||||
define <8 x i32> @test_srad_2(<8 x i32> %InVec) {
|
||||
; X32-LABEL: test_srad_2:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: vpsrad $1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: test_srad_2:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: vpsrad $1, %ymm0, %ymm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_srad_2:
|
||||
; X64: # %bb.0: # %entry
|
||||
@ -228,10 +228,10 @@ entry:
|
||||
}
|
||||
|
||||
define <8 x i32> @test_srad_3(<8 x i32> %InVec) {
|
||||
; X32-LABEL: test_srad_3:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: vpsrad $31, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: test_srad_3:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: vpsrad $31, %ymm0, %ymm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_srad_3:
|
||||
; X64: # %bb.0: # %entry
|
||||
@ -245,9 +245,9 @@ entry:
|
||||
; SSE Logical Shift Right
|
||||
|
||||
define <16 x i16> @test_srlw_1(<16 x i16> %InVec) {
|
||||
; X32-LABEL: test_srlw_1:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: test_srlw_1:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_srlw_1:
|
||||
; X64: # %bb.0: # %entry
|
||||
@ -258,10 +258,10 @@ entry:
|
||||
}
|
||||
|
||||
define <16 x i16> @test_srlw_2(<16 x i16> %InVec) {
|
||||
; X32-LABEL: test_srlw_2:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: vpsrlw $1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: test_srlw_2:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: vpsrlw $1, %ymm0, %ymm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_srlw_2:
|
||||
; X64: # %bb.0: # %entry
|
||||
@ -273,10 +273,10 @@ entry:
|
||||
}
|
||||
|
||||
define <16 x i16> @test_srlw_3(<16 x i16> %InVec) {
|
||||
; X32-LABEL: test_srlw_3:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: vpsrlw $15, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: test_srlw_3:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: vpsrlw $15, %ymm0, %ymm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_srlw_3:
|
||||
; X64: # %bb.0: # %entry
|
||||
@ -288,9 +288,9 @@ entry:
|
||||
}
|
||||
|
||||
define <8 x i32> @test_srld_1(<8 x i32> %InVec) {
|
||||
; X32-LABEL: test_srld_1:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: test_srld_1:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_srld_1:
|
||||
; X64: # %bb.0: # %entry
|
||||
@ -301,10 +301,10 @@ entry:
|
||||
}
|
||||
|
||||
define <8 x i32> @test_srld_2(<8 x i32> %InVec) {
|
||||
; X32-LABEL: test_srld_2:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: vpsrld $1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: test_srld_2:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: vpsrld $1, %ymm0, %ymm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_srld_2:
|
||||
; X64: # %bb.0: # %entry
|
||||
@ -316,10 +316,10 @@ entry:
|
||||
}
|
||||
|
||||
define <8 x i32> @test_srld_3(<8 x i32> %InVec) {
|
||||
; X32-LABEL: test_srld_3:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: vpsrld $31, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: test_srld_3:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: vpsrld $31, %ymm0, %ymm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_srld_3:
|
||||
; X64: # %bb.0: # %entry
|
||||
@ -331,9 +331,9 @@ entry:
|
||||
}
|
||||
|
||||
define <4 x i64> @test_srlq_1(<4 x i64> %InVec) {
|
||||
; X32-LABEL: test_srlq_1:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: test_srlq_1:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_srlq_1:
|
||||
; X64: # %bb.0: # %entry
|
||||
@ -344,10 +344,10 @@ entry:
|
||||
}
|
||||
|
||||
define <4 x i64> @test_srlq_2(<4 x i64> %InVec) {
|
||||
; X32-LABEL: test_srlq_2:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: vpsrlq $1, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: test_srlq_2:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: vpsrlq $1, %ymm0, %ymm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_srlq_2:
|
||||
; X64: # %bb.0: # %entry
|
||||
@ -359,10 +359,10 @@ entry:
|
||||
}
|
||||
|
||||
define <4 x i64> @test_srlq_3(<4 x i64> %InVec) {
|
||||
; X32-LABEL: test_srlq_3:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: vpsrlq $63, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: test_srlq_3:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: vpsrlq $63, %ymm0, %ymm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_srlq_3:
|
||||
; X64: # %bb.0: # %entry
|
||||
@ -374,25 +374,25 @@ entry:
|
||||
}
|
||||
|
||||
define <4 x i32> @srl_trunc_and_v4i64(<4 x i32> %x, <4 x i64> %y) nounwind {
|
||||
; X32-SLOW-LABEL: srl_trunc_and_v4i64:
|
||||
; X32-SLOW: # %bb.0:
|
||||
; X32-SLOW-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||
; X32-SLOW-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
|
||||
; X32-SLOW-NEXT: vbroadcastss {{.*#+}} xmm2 = [8,8,8,8]
|
||||
; X32-SLOW-NEXT: vandps %xmm2, %xmm1, %xmm1
|
||||
; X32-SLOW-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
|
||||
; X32-SLOW-NEXT: vzeroupper
|
||||
; X32-SLOW-NEXT: retl
|
||||
; X86-SLOW-LABEL: srl_trunc_and_v4i64:
|
||||
; X86-SLOW: # %bb.0:
|
||||
; X86-SLOW-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||
; X86-SLOW-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
|
||||
; X86-SLOW-NEXT: vbroadcastss {{.*#+}} xmm2 = [8,8,8,8]
|
||||
; X86-SLOW-NEXT: vandps %xmm2, %xmm1, %xmm1
|
||||
; X86-SLOW-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
|
||||
; X86-SLOW-NEXT: vzeroupper
|
||||
; X86-SLOW-NEXT: retl
|
||||
;
|
||||
; X32-FAST-LABEL: srl_trunc_and_v4i64:
|
||||
; X32-FAST: # %bb.0:
|
||||
; X32-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u>
|
||||
; X32-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1
|
||||
; X32-FAST-NEXT: vpbroadcastd {{.*#+}} xmm2 = [8,8,8,8]
|
||||
; X32-FAST-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; X32-FAST-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
|
||||
; X32-FAST-NEXT: vzeroupper
|
||||
; X32-FAST-NEXT: retl
|
||||
; X86-FAST-LABEL: srl_trunc_and_v4i64:
|
||||
; X86-FAST: # %bb.0:
|
||||
; X86-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u>
|
||||
; X86-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1
|
||||
; X86-FAST-NEXT: vpbroadcastd {{.*#+}} xmm2 = [8,8,8,8]
|
||||
; X86-FAST-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; X86-FAST-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
|
||||
; X86-FAST-NEXT: vzeroupper
|
||||
; X86-FAST-NEXT: retl
|
||||
;
|
||||
; X64-SLOW-LABEL: srl_trunc_and_v4i64:
|
||||
; X64-SLOW: # %bb.0:
|
||||
@ -424,16 +424,16 @@ define <4 x i32> @srl_trunc_and_v4i64(<4 x i32> %x, <4 x i64> %y) nounwind {
|
||||
;
|
||||
|
||||
define <8 x i16> @shl_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
|
||||
; X32-LABEL: shl_8i16:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; X32-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
|
||||
; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; X32-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: shl_8i16:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; X86-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; X86-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
|
||||
; X86-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
|
||||
; X86-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; X86-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: shl_8i16:
|
||||
; X64: # %bb.0:
|
||||
@ -450,19 +450,19 @@ define <8 x i16> @shl_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
|
||||
}
|
||||
|
||||
define <16 x i16> @shl_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
|
||||
; X32-LABEL: shl_16i16:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
|
||||
; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
|
||||
; X32-NEXT: vpsllvd %ymm3, %ymm4, %ymm3
|
||||
; X32-NEXT: vpsrld $16, %ymm3, %ymm3
|
||||
; X32-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
|
||||
; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
|
||||
; X32-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: vpsrld $16, %ymm0, %ymm0
|
||||
; X32-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: shl_16i16:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; X86-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
|
||||
; X86-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
|
||||
; X86-NEXT: vpsllvd %ymm3, %ymm4, %ymm3
|
||||
; X86-NEXT: vpsrld $16, %ymm3, %ymm3
|
||||
; X86-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
|
||||
; X86-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
|
||||
; X86-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
|
||||
; X86-NEXT: vpsrld $16, %ymm0, %ymm0
|
||||
; X86-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: shl_16i16:
|
||||
; X64: # %bb.0:
|
||||
@ -482,29 +482,29 @@ define <16 x i16> @shl_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
|
||||
}
|
||||
|
||||
define <32 x i8> @shl_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
|
||||
; X32-LABEL: shl_32i8:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; X32-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; X32-NEXT: vpand {{\.LCPI[0-9]+_[0-9]+}}, %ymm2, %ymm2
|
||||
; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; X32-NEXT: vpsllw $2, %ymm0, %ymm2
|
||||
; X32-NEXT: vpand {{\.LCPI[0-9]+_[0-9]+}}, %ymm2, %ymm2
|
||||
; X32-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; X32-NEXT: vpaddb %ymm0, %ymm0, %ymm2
|
||||
; X32-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: shl_32i8:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; X86-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm2, %ymm2
|
||||
; X86-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; X86-NEXT: vpsllw $2, %ymm0, %ymm2
|
||||
; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm2, %ymm2
|
||||
; X86-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; X86-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; X86-NEXT: vpaddb %ymm0, %ymm0, %ymm2
|
||||
; X86-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; X86-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: shl_32i8:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; X64-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; X64-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; X64-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; X64-NEXT: vpsllw $2, %ymm0, %ymm2
|
||||
; X64-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; X64-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; X64-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; X64-NEXT: vpaddb %ymm0, %ymm0, %ymm2
|
||||
@ -516,15 +516,15 @@ define <32 x i8> @shl_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
|
||||
}
|
||||
|
||||
define <8 x i16> @ashr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
|
||||
; X32-LABEL: ashr_8i16:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; X32-NEXT: vpmovsxwd %xmm0, %ymm0
|
||||
; X32-NEXT: vpsravd %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X32-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: ashr_8i16:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; X86-NEXT: vpmovsxwd %xmm0, %ymm0
|
||||
; X86-NEXT: vpsravd %ymm1, %ymm0, %ymm0
|
||||
; X86-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X86-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: ashr_8i16:
|
||||
; X64: # %bb.0:
|
||||
@ -540,19 +540,19 @@ define <8 x i16> @ashr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
|
||||
}
|
||||
|
||||
define <16 x i16> @ashr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
|
||||
; X32-LABEL: ashr_16i16:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
|
||||
; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
|
||||
; X32-NEXT: vpsravd %ymm3, %ymm4, %ymm3
|
||||
; X32-NEXT: vpsrld $16, %ymm3, %ymm3
|
||||
; X32-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
|
||||
; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
|
||||
; X32-NEXT: vpsravd %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: vpsrld $16, %ymm0, %ymm0
|
||||
; X32-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: ashr_16i16:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; X86-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
|
||||
; X86-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
|
||||
; X86-NEXT: vpsravd %ymm3, %ymm4, %ymm3
|
||||
; X86-NEXT: vpsrld $16, %ymm3, %ymm3
|
||||
; X86-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
|
||||
; X86-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
|
||||
; X86-NEXT: vpsravd %ymm1, %ymm0, %ymm0
|
||||
; X86-NEXT: vpsrld $16, %ymm0, %ymm0
|
||||
; X86-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: ashr_16i16:
|
||||
; X64: # %bb.0:
|
||||
@ -572,33 +572,33 @@ define <16 x i16> @ashr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
|
||||
}
|
||||
|
||||
define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
|
||||
; X32-LABEL: ashr_32i8:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; X32-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; X32-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; X32-NEXT: vpsraw $4, %ymm3, %ymm4
|
||||
; X32-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
|
||||
; X32-NEXT: vpsraw $2, %ymm3, %ymm4
|
||||
; X32-NEXT: vpaddw %ymm2, %ymm2, %ymm2
|
||||
; X32-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
|
||||
; X32-NEXT: vpsraw $1, %ymm3, %ymm4
|
||||
; X32-NEXT: vpaddw %ymm2, %ymm2, %ymm2
|
||||
; X32-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
|
||||
; X32-NEXT: vpsrlw $8, %ymm2, %ymm2
|
||||
; X32-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; X32-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; X32-NEXT: vpsraw $4, %ymm0, %ymm3
|
||||
; X32-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
|
||||
; X32-NEXT: vpsraw $2, %ymm0, %ymm3
|
||||
; X32-NEXT: vpaddw %ymm1, %ymm1, %ymm1
|
||||
; X32-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
|
||||
; X32-NEXT: vpsraw $1, %ymm0, %ymm3
|
||||
; X32-NEXT: vpaddw %ymm1, %ymm1, %ymm1
|
||||
; X32-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
|
||||
; X32-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||
; X32-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: ashr_32i8:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; X86-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; X86-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; X86-NEXT: vpsraw $4, %ymm3, %ymm4
|
||||
; X86-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
|
||||
; X86-NEXT: vpsraw $2, %ymm3, %ymm4
|
||||
; X86-NEXT: vpaddw %ymm2, %ymm2, %ymm2
|
||||
; X86-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
|
||||
; X86-NEXT: vpsraw $1, %ymm3, %ymm4
|
||||
; X86-NEXT: vpaddw %ymm2, %ymm2, %ymm2
|
||||
; X86-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
|
||||
; X86-NEXT: vpsrlw $8, %ymm2, %ymm2
|
||||
; X86-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; X86-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; X86-NEXT: vpsraw $4, %ymm0, %ymm3
|
||||
; X86-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
|
||||
; X86-NEXT: vpsraw $2, %ymm0, %ymm3
|
||||
; X86-NEXT: vpaddw %ymm1, %ymm1, %ymm1
|
||||
; X86-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
|
||||
; X86-NEXT: vpsraw $1, %ymm0, %ymm3
|
||||
; X86-NEXT: vpaddw %ymm1, %ymm1, %ymm1
|
||||
; X86-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
|
||||
; X86-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||
; X86-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: ashr_32i8:
|
||||
; X64: # %bb.0:
|
||||
@ -632,15 +632,15 @@ define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
|
||||
}
|
||||
|
||||
define <8 x i16> @lshr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
|
||||
; X32-LABEL: lshr_8i16:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; X32-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X32-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: lshr_8i16:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; X86-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; X86-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
|
||||
; X86-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; X86-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: lshr_8i16:
|
||||
; X64: # %bb.0:
|
||||
@ -656,19 +656,19 @@ define <8 x i16> @lshr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
|
||||
}
|
||||
|
||||
define <16 x i16> @lshr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
|
||||
; X32-LABEL: lshr_16i16:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
|
||||
; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
|
||||
; X32-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3
|
||||
; X32-NEXT: vpsrld $16, %ymm3, %ymm3
|
||||
; X32-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
|
||||
; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
|
||||
; X32-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
|
||||
; X32-NEXT: vpsrld $16, %ymm0, %ymm0
|
||||
; X32-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: lshr_16i16:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; X86-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
|
||||
; X86-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
|
||||
; X86-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3
|
||||
; X86-NEXT: vpsrld $16, %ymm3, %ymm3
|
||||
; X86-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
|
||||
; X86-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
|
||||
; X86-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
|
||||
; X86-NEXT: vpsrld $16, %ymm0, %ymm0
|
||||
; X86-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: lshr_16i16:
|
||||
; X64: # %bb.0:
|
||||
@ -688,34 +688,34 @@ define <16 x i16> @lshr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
|
||||
}
|
||||
|
||||
define <32 x i8> @lshr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
|
||||
; X32-LABEL: lshr_32i8:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; X32-NEXT: vpsrlw $4, %ymm0, %ymm2
|
||||
; X32-NEXT: vpand {{\.LCPI[0-9]+_[0-9]+}}, %ymm2, %ymm2
|
||||
; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; X32-NEXT: vpsrlw $2, %ymm0, %ymm2
|
||||
; X32-NEXT: vpand {{\.LCPI[0-9]+_[0-9]+}}, %ymm2, %ymm2
|
||||
; X32-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; X32-NEXT: vpsrlw $1, %ymm0, %ymm2
|
||||
; X32-NEXT: vpand {{\.LCPI[0-9]+_[0-9]+}}, %ymm2, %ymm2
|
||||
; X32-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: lshr_32i8:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; X86-NEXT: vpsrlw $4, %ymm0, %ymm2
|
||||
; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm2, %ymm2
|
||||
; X86-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; X86-NEXT: vpsrlw $2, %ymm0, %ymm2
|
||||
; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm2, %ymm2
|
||||
; X86-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; X86-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; X86-NEXT: vpsrlw $1, %ymm0, %ymm2
|
||||
; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm2, %ymm2
|
||||
; X86-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; X86-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: lshr_32i8:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; X64-NEXT: vpsrlw $4, %ymm0, %ymm2
|
||||
; X64-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; X64-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; X64-NEXT: vpsrlw $2, %ymm0, %ymm2
|
||||
; X64-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; X64-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; X64-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; X64-NEXT: vpsrlw $1, %ymm0, %ymm2
|
||||
; X64-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; X64-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; X64-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
|
@ -11,10 +11,10 @@ define <32 x i8> @var_shl_v32i8(<32 x i8> %a, <32 x i8> %b) {
|
||||
; AVX256: # %bb.0:
|
||||
; AVX256-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; AVX256-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; AVX256-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX256-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX256-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; AVX256-NEXT: vpsllw $2, %ymm0, %ymm2
|
||||
; AVX256-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX256-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX256-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; AVX256-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; AVX256-NEXT: vpaddb %ymm0, %ymm0, %ymm2
|
||||
@ -34,10 +34,10 @@ define <32 x i8> @var_shl_v32i8(<32 x i8> %a, <32 x i8> %b) {
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm2
|
||||
; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm2
|
||||
@ -115,10 +115,10 @@ define <16 x i8> @var_shl_v16i8(<16 x i8> %a, <16 x i8> %b) {
|
||||
; AVX256VL: # %bb.0:
|
||||
; AVX256VL-NEXT: vpsllw $5, %xmm1, %xmm1
|
||||
; AVX256VL-NEXT: vpsllw $4, %xmm0, %xmm2
|
||||
; AVX256VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX256VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX256VL-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; AVX256VL-NEXT: vpsllw $2, %xmm0, %xmm2
|
||||
; AVX256VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX256VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX256VL-NEXT: vpaddb %xmm1, %xmm1, %xmm1
|
||||
; AVX256VL-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; AVX256VL-NEXT: vpaddb %xmm0, %xmm0, %xmm2
|
||||
@ -153,14 +153,14 @@ define <32 x i8> @var_lshr_v32i8(<32 x i8> %a, <32 x i8> %b) {
|
||||
; AVX256: # %bb.0:
|
||||
; AVX256-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; AVX256-NEXT: vpsrlw $4, %ymm0, %ymm2
|
||||
; AVX256-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX256-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX256-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; AVX256-NEXT: vpsrlw $2, %ymm0, %ymm2
|
||||
; AVX256-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX256-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX256-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; AVX256-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; AVX256-NEXT: vpsrlw $1, %ymm0, %ymm2
|
||||
; AVX256-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX256-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX256-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; AVX256-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; AVX256-NEXT: retq
|
||||
@ -177,14 +177,14 @@ define <32 x i8> @var_lshr_v32i8(<32 x i8> %a, <32 x i8> %b) {
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm2
|
||||
; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpsrlw $2, %ymm0, %ymm2
|
||||
; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpsrlw $1, %ymm0, %ymm2
|
||||
; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
@ -259,14 +259,14 @@ define <16 x i8> @var_lshr_v16i8(<16 x i8> %a, <16 x i8> %b) {
|
||||
; AVX256VL: # %bb.0:
|
||||
; AVX256VL-NEXT: vpsllw $5, %xmm1, %xmm1
|
||||
; AVX256VL-NEXT: vpsrlw $4, %xmm0, %xmm2
|
||||
; AVX256VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX256VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX256VL-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; AVX256VL-NEXT: vpsrlw $2, %xmm0, %xmm2
|
||||
; AVX256VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX256VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX256VL-NEXT: vpaddb %xmm1, %xmm1, %xmm1
|
||||
; AVX256VL-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; AVX256VL-NEXT: vpsrlw $1, %xmm0, %xmm2
|
||||
; AVX256VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX256VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX256VL-NEXT: vpaddb %xmm1, %xmm1, %xmm1
|
||||
; AVX256VL-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; AVX256VL-NEXT: retq
|
||||
|
@ -959,7 +959,7 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind {
|
||||
;
|
||||
; AVX2-LABEL: constant_shift_v2i64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpsrlvq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [4611686018427387904,72057594037927936]
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm0
|
||||
@ -967,7 +967,7 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind {
|
||||
;
|
||||
; XOP-LABEL: constant_shift_v2i64:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshaq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshaq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: constant_shift_v2i64:
|
||||
@ -981,7 +981,7 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind {
|
||||
;
|
||||
; AVX512VL-LABEL: constant_shift_v2i64:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsravq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpsravq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-SSE-LABEL: constant_shift_v2i64:
|
||||
@ -1040,27 +1040,27 @@ define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) nounwind {
|
||||
;
|
||||
; AVX2-LABEL: constant_shift_v4i32:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: constant_shift_v4i32:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpshad {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpshad {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: constant_shift_v4i32:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: constant_shift_v4i32:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: constant_shift_v4i32:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-SSE-LABEL: constant_shift_v4i32:
|
||||
@ -1110,7 +1110,7 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
|
||||
;
|
||||
; AVX-LABEL: constant_shift_v8i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpmulhw {{.*}}(%rip), %xmm0, %xmm1
|
||||
; AVX-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
|
||||
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
|
||||
; AVX-NEXT: vpsraw $1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3,4,5,6,7]
|
||||
@ -1118,13 +1118,13 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
|
||||
;
|
||||
; XOP-LABEL: constant_shift_v8i16:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshaw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshaw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constant_shift_v8i16:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpmovsxwd %xmm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
@ -1142,14 +1142,14 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
|
||||
; AVX512DQVL-LABEL: constant_shift_v8i16:
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vpmovsxwd %xmm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpmovdw %ymm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vzeroupper
|
||||
; AVX512DQVL-NEXT: retq
|
||||
;
|
||||
; AVX512BWVL-LABEL: constant_shift_v8i16:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpsravw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vpsravw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: retq
|
||||
;
|
||||
; X86-SSE-LABEL: constant_shift_v8i16:
|
||||
@ -1179,11 +1179,11 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
|
||||
; SSE-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
|
||||
; SSE-NEXT: psraw $8, %xmm1
|
||||
; SSE-NEXT: pmullw {{.*}}(%rip), %xmm1
|
||||
; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE-NEXT: psrlw $8, %xmm1
|
||||
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE-NEXT: psraw $8, %xmm0
|
||||
; SSE-NEXT: pmullw {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE-NEXT: psrlw $8, %xmm0
|
||||
; SSE-NEXT: packuswb %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
@ -1192,11 +1192,11 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX1-NEXT: vpsraw $8, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX1-NEXT: vpsraw $8, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
@ -1204,7 +1204,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
|
||||
; AVX2-LABEL: constant_shift_v16i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0
|
||||
; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
@ -1213,13 +1213,13 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
|
||||
;
|
||||
; XOP-LABEL: constant_shift_v16i8:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshab {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshab {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constant_shift_v16i8:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpsravd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
@ -1237,7 +1237,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
|
||||
; AVX512DQVL-LABEL: constant_shift_v16i8:
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; AVX512DQVL-NEXT: vpsravd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQVL-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vzeroupper
|
||||
; AVX512DQVL-NEXT: retq
|
||||
@ -1245,7 +1245,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
|
||||
; AVX512BWVL-LABEL: constant_shift_v16i8:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpmovsxbw %xmm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpsravw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpsravw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vzeroupper
|
||||
; AVX512BWVL-NEXT: retq
|
||||
@ -1255,11 +1255,11 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
|
||||
; X86-SSE-NEXT: movdqa %xmm0, %xmm1
|
||||
; X86-SSE-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
|
||||
; X86-SSE-NEXT: psraw $8, %xmm1
|
||||
; X86-SSE-NEXT: pmullw {{\.LCPI[0-9]+_[0-9]+}}, %xmm1
|
||||
; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
|
||||
; X86-SSE-NEXT: psrlw $8, %xmm1
|
||||
; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; X86-SSE-NEXT: psraw $8, %xmm0
|
||||
; X86-SSE-NEXT: pmullw {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: psrlw $8, %xmm0
|
||||
; X86-SSE-NEXT: packuswb %xmm1, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
@ -1306,7 +1306,7 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) nounwind {
|
||||
;
|
||||
; XOP-LABEL: splatconstant_shift_v2i64:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshaq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshaq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatconstant_shift_v2i64:
|
||||
@ -1407,7 +1407,7 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) nounwind {
|
||||
; SSE-LABEL: splatconstant_shift_v16i8:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: psrlw $3, %xmm0
|
||||
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; SSE-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE-NEXT: psubb %xmm1, %xmm0
|
||||
@ -1416,7 +1416,7 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) nounwind {
|
||||
; AVX-LABEL: splatconstant_shift_v16i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpsrlw $3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsubb %xmm1, %xmm0, %xmm0
|
||||
@ -1424,13 +1424,13 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) nounwind {
|
||||
;
|
||||
; XOP-LABEL: splatconstant_shift_v16i8:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshab {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshab {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatconstant_shift_v16i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsrlw $3, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm0
|
||||
@ -1440,14 +1440,14 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) nounwind {
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsrlw $3, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-NEXT: vpternlogq $108, {{.*}}(%rip), %xmm1, %xmm0
|
||||
; AVX512VL-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
|
||||
; AVX512VL-NEXT: vpsubb %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-SSE-LABEL: splatconstant_shift_v16i8:
|
||||
; X86-SSE: # %bb.0:
|
||||
; X86-SSE-NEXT: psrlw $3, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; X86-SSE-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE-NEXT: psubb %xmm1, %xmm0
|
||||
|
@ -1025,7 +1025,7 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind {
|
||||
;
|
||||
; AVX2-LABEL: constant_shift_v4i64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [4611686018427387904,72057594037927936,4294967296,2]
|
||||
; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0
|
||||
@ -1033,15 +1033,15 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind {
|
||||
;
|
||||
; XOPAVX1-LABEL: constant_shift_v4i64:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpshaq {{.*}}(%rip), %xmm0, %xmm1
|
||||
; XOPAVX1-NEXT: vpshaq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpshaq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpshaq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: constant_shift_v4i64:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [4611686018427387904,72057594037927936,4294967296,2]
|
||||
; XOPAVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0
|
||||
@ -1057,7 +1057,7 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind {
|
||||
;
|
||||
; AVX512VL-LABEL: constant_shift_v4i64:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsravq {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpsravq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-AVX1-LABEL: constant_shift_v4i64:
|
||||
@ -1112,30 +1112,30 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind {
|
||||
;
|
||||
; AVX2-LABEL: constant_shift_v8i32:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: constant_shift_v8i32:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpshad {{.*}}(%rip), %xmm0, %xmm1
|
||||
; XOPAVX1-NEXT: vpshad {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpshad {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpshad {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: constant_shift_v8i32:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: constant_shift_v8i32:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: constant_shift_v8i32:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-AVX1-LABEL: constant_shift_v8i32:
|
||||
@ -1158,7 +1158,7 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind {
|
||||
;
|
||||
; X86-AVX2-LABEL: constant_shift_v8i32:
|
||||
; X86-AVX2: # %bb.0:
|
||||
; X86-AVX2-NEXT: vpsravd {{\.LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: retl
|
||||
%shift = ashr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
|
||||
ret <8 x i32> %shift
|
||||
@ -1167,18 +1167,18 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind {
|
||||
define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
|
||||
; AVX1-LABEL: constant_shift_v16i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpmulhw {{.*}}(%rip), %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
|
||||
; AVX1-NEXT: vpsraw $1, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7]
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpmulhw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: constant_shift_v16i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpmulhw {{.*}}(%rip), %ymm0, %ymm1
|
||||
; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0],xmm1[1,2,3,4,5,6,7]
|
||||
; AVX2-NEXT: vpsraw $1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2,3,4,5,6,7]
|
||||
@ -1187,24 +1187,24 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
|
||||
;
|
||||
; XOPAVX1-LABEL: constant_shift_v16i16:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpshaw {{.*}}(%rip), %xmm0, %xmm1
|
||||
; XOPAVX1-NEXT: vpshaw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpshaw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpshaw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: constant_shift_v16i16:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpshaw {{.*}}(%rip), %xmm0, %xmm1
|
||||
; XOPAVX2-NEXT: vpshaw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
|
||||
; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||
; XOPAVX2-NEXT: vpshaw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: vpshaw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constant_shift_v16i16:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpsravd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
@ -1219,29 +1219,29 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
|
||||
; AVX512DQVL-LABEL: constant_shift_v16i16:
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vpmovsxwd %ymm0, %zmm0
|
||||
; AVX512DQVL-NEXT: vpsravd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQVL-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQVL-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512DQVL-NEXT: retq
|
||||
;
|
||||
; AVX512BWVL-LABEL: constant_shift_v16i16:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpsravw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpsravw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: retq
|
||||
;
|
||||
; X86-AVX1-LABEL: constant_shift_v16i16:
|
||||
; X86-AVX1: # %bb.0:
|
||||
; X86-AVX1-NEXT: vpmulhw {{\.LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1
|
||||
; X86-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1
|
||||
; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
|
||||
; X86-AVX1-NEXT: vpsraw $1, %xmm0, %xmm2
|
||||
; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7]
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpmulhw {{\.LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; X86-AVX1-NEXT: retl
|
||||
;
|
||||
; X86-AVX2-LABEL: constant_shift_v16i16:
|
||||
; X86-AVX2: # %bb.0:
|
||||
; X86-AVX2-NEXT: vpmulhw {{\.LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm1
|
||||
; X86-AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm1
|
||||
; X86-AVX2-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0],xmm1[1,2,3,4,5,6,7]
|
||||
; X86-AVX2-NEXT: vpsraw $1, %xmm0, %xmm0
|
||||
; X86-AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2,3,4,5,6,7]
|
||||
@ -1282,11 +1282,11 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX2-NEXT: vpsraw $8, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX2-NEXT: vpsraw $8, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
@ -1313,11 +1313,11 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX512DQ-NEXT: vpsraw $8, %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: vpsrlw $8, %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX512DQ-NEXT: vpsraw $8, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
@ -1325,7 +1325,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
|
||||
; AVX512BW-LABEL: constant_shift_v32i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0
|
||||
; AVX512BW-NEXT: vpsravw {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpsravw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
@ -1333,11 +1333,11 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; AVX512DQVL-NEXT: vpsraw $8, %ymm1, %ymm1
|
||||
; AVX512DQVL-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX512DQVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
|
||||
; AVX512DQVL-NEXT: vpsrlw $8, %ymm1, %ymm1
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; AVX512DQVL-NEXT: vpsraw $8, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: retq
|
||||
@ -1345,7 +1345,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
|
||||
; AVX512BWVL-LABEL: constant_shift_v32i8:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpmovsxbw %ymm0, %zmm0
|
||||
; AVX512BWVL-NEXT: vpsravw {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BWVL-NEXT: vpsravw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
|
||||
; AVX512BWVL-NEXT: retq
|
||||
;
|
||||
@ -1379,11 +1379,11 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
|
||||
; X86-AVX2: # %bb.0:
|
||||
; X86-AVX2-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
|
||||
; X86-AVX2-NEXT: vpsraw $8, %ymm1, %ymm1
|
||||
; X86-AVX2-NEXT: vpmullw {{\.LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1
|
||||
; X86-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1
|
||||
; X86-AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1
|
||||
; X86-AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
|
||||
; X86-AVX2-NEXT: vpsraw $8, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vpmullw {{\.LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: retl
|
||||
@ -1591,7 +1591,7 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
|
||||
; AVX2-LABEL: splatconstant_shift_v32i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpsrlw $3, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsubb %ymm1, %ymm0, %ymm0
|
||||
@ -1609,7 +1609,7 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
|
||||
; XOPAVX2-LABEL: splatconstant_shift_v32i8:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpsrlw $3, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; XOPAVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: vpsubb %ymm1, %ymm0, %ymm0
|
||||
@ -1618,7 +1618,7 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
|
||||
; AVX512-LABEL: splatconstant_shift_v32i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsrlw $3, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpsubb %ymm1, %ymm0, %ymm0
|
||||
@ -1628,7 +1628,7 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsrlw $3, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-NEXT: vpternlogq $108, {{.*}}(%rip), %ymm1, %ymm0
|
||||
; AVX512VL-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
|
||||
; AVX512VL-NEXT: vpsubb %ymm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
@ -1651,7 +1651,7 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
|
||||
; X86-AVX2-LABEL: splatconstant_shift_v32i8:
|
||||
; X86-AVX2: # %bb.0:
|
||||
; X86-AVX2-NEXT: vpsrlw $3, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vpand {{\.LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; X86-AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vpsubb %ymm1, %ymm0, %ymm0
|
||||
|
@ -233,7 +233,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
|
||||
define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind {
|
||||
; ALL-LABEL: constant_shift_v8i64:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vpsravq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; ALL-NEXT: vpsravq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; ALL-NEXT: retq
|
||||
%shift = ashr <8 x i64> %a, <i64 1, i64 7, i64 31, i64 62, i64 1, i64 7, i64 31, i64 62>
|
||||
ret <8 x i64> %shift
|
||||
@ -242,7 +242,7 @@ define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind {
|
||||
define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind {
|
||||
; ALL-LABEL: constant_shift_v16i32:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vpsravd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; ALL-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; ALL-NEXT: retq
|
||||
%shift = ashr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
|
||||
ret <16 x i32> %shift
|
||||
@ -264,7 +264,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind {
|
||||
;
|
||||
; AVX512BW-LABEL: constant_shift_v32i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpsravw {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpsravw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%shift = ashr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
|
||||
ret <32 x i16> %shift
|
||||
@ -303,11 +303,11 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm1 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
|
||||
; AVX512BW-NEXT: vpsraw $8, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
|
||||
; AVX512BW-NEXT: vpsraw $8, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
@ -375,7 +375,7 @@ define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpsrlw $3, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512BW-NEXT: vpternlogq $108, {{.*}}(%rip), %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
|
||||
|
@ -1741,27 +1741,27 @@ define <2 x i32> @constant_shift_v2i32(<2 x i32> %a) nounwind {
|
||||
;
|
||||
; AVX2-LABEL: constant_shift_v2i32:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: constant_shift_v2i32:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpshad {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpshad {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: constant_shift_v2i32:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: constant_shift_v2i32:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: constant_shift_v2i32:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-SSE-LABEL: constant_shift_v2i32:
|
||||
@ -1803,7 +1803,7 @@ define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind {
|
||||
;
|
||||
; AVX-LABEL: constant_shift_v4i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpmulhw {{.*}}(%rip), %xmm0, %xmm1
|
||||
; AVX-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
|
||||
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
|
||||
; AVX-NEXT: vpsraw $1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3,4,5,6,7]
|
||||
@ -1811,13 +1811,13 @@ define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind {
|
||||
;
|
||||
; XOP-LABEL: constant_shift_v4i16:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshaw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshaw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constant_shift_v4i16:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpmovsxwd %xmm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
@ -1835,14 +1835,14 @@ define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind {
|
||||
; AVX512DQVL-LABEL: constant_shift_v4i16:
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vpmovsxwd %xmm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpmovdw %ymm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vzeroupper
|
||||
; AVX512DQVL-NEXT: retq
|
||||
;
|
||||
; AVX512BWVL-LABEL: constant_shift_v4i16:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpsravw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vpsravw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: retq
|
||||
;
|
||||
; X86-SSE-LABEL: constant_shift_v4i16:
|
||||
@ -1891,7 +1891,7 @@ define <2 x i16> @constant_shift_v2i16(<2 x i16> %a) nounwind {
|
||||
;
|
||||
; XOP-LABEL: constant_shift_v2i16:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshaw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshaw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constant_shift_v2i16:
|
||||
@ -1919,7 +1919,7 @@ define <2 x i16> @constant_shift_v2i16(<2 x i16> %a) nounwind {
|
||||
;
|
||||
; AVX512BWVL-LABEL: constant_shift_v2i16:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpsravw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vpsravw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: retq
|
||||
;
|
||||
; X86-SSE-LABEL: constant_shift_v2i16:
|
||||
@ -1944,7 +1944,7 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind {
|
||||
; SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
|
||||
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE-NEXT: psraw $8, %xmm0
|
||||
; SSE-NEXT: pmullw {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE-NEXT: psrlw $8, %xmm0
|
||||
; SSE-NEXT: packuswb %xmm2, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
@ -1955,7 +1955,7 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind {
|
||||
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
|
||||
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX1-NEXT: vpsraw $8, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
@ -1963,7 +1963,7 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind {
|
||||
; AVX2-LABEL: constant_shift_v8i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0
|
||||
; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
@ -1972,13 +1972,13 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind {
|
||||
;
|
||||
; XOP-LABEL: constant_shift_v8i8:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshab {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshab {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constant_shift_v8i8:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpsravd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
@ -1996,7 +1996,7 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind {
|
||||
; AVX512DQVL-LABEL: constant_shift_v8i8:
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; AVX512DQVL-NEXT: vpsravd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQVL-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vzeroupper
|
||||
; AVX512DQVL-NEXT: retq
|
||||
@ -2004,7 +2004,7 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind {
|
||||
; AVX512BWVL-LABEL: constant_shift_v8i8:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpmovsxbw %xmm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpsravw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpsravw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vzeroupper
|
||||
; AVX512BWVL-NEXT: retq
|
||||
@ -2016,7 +2016,7 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind {
|
||||
; X86-SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
|
||||
; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; X86-SSE-NEXT: psraw $8, %xmm0
|
||||
; X86-SSE-NEXT: pmullw {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: psrlw $8, %xmm0
|
||||
; X86-SSE-NEXT: packuswb %xmm2, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
@ -2032,7 +2032,7 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind {
|
||||
; SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
|
||||
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE-NEXT: psraw $8, %xmm0
|
||||
; SSE-NEXT: pmullw {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE-NEXT: psrlw $8, %xmm0
|
||||
; SSE-NEXT: packuswb %xmm2, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
@ -2043,7 +2043,7 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind {
|
||||
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
|
||||
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX1-NEXT: vpsraw $8, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
@ -2051,7 +2051,7 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind {
|
||||
; AVX2-LABEL: constant_shift_v4i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0
|
||||
; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
@ -2060,13 +2060,13 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind {
|
||||
;
|
||||
; XOP-LABEL: constant_shift_v4i8:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshab {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshab {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constant_shift_v4i8:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpsravd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
@ -2084,7 +2084,7 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind {
|
||||
; AVX512DQVL-LABEL: constant_shift_v4i8:
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; AVX512DQVL-NEXT: vpsravd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQVL-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vzeroupper
|
||||
; AVX512DQVL-NEXT: retq
|
||||
@ -2092,7 +2092,7 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind {
|
||||
; AVX512BWVL-LABEL: constant_shift_v4i8:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpmovsxbw %xmm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpsravw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpsravw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vzeroupper
|
||||
; AVX512BWVL-NEXT: retq
|
||||
@ -2104,7 +2104,7 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind {
|
||||
; X86-SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
|
||||
; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; X86-SSE-NEXT: psraw $8, %xmm0
|
||||
; X86-SSE-NEXT: pmullw {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: psrlw $8, %xmm0
|
||||
; X86-SSE-NEXT: packuswb %xmm2, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
@ -2120,7 +2120,7 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind {
|
||||
; SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
|
||||
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE-NEXT: psraw $8, %xmm0
|
||||
; SSE-NEXT: pmullw {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE-NEXT: psrlw $8, %xmm0
|
||||
; SSE-NEXT: packuswb %xmm2, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
@ -2131,7 +2131,7 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind {
|
||||
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
|
||||
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX1-NEXT: vpsraw $8, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
@ -2139,7 +2139,7 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind {
|
||||
; AVX2-LABEL: constant_shift_v2i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0
|
||||
; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
@ -2148,13 +2148,13 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind {
|
||||
;
|
||||
; XOP-LABEL: constant_shift_v2i8:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshab {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshab {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constant_shift_v2i8:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpsravd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
@ -2172,7 +2172,7 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind {
|
||||
; AVX512DQVL-LABEL: constant_shift_v2i8:
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; AVX512DQVL-NEXT: vpsravd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQVL-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vzeroupper
|
||||
; AVX512DQVL-NEXT: retq
|
||||
@ -2180,7 +2180,7 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind {
|
||||
; AVX512BWVL-LABEL: constant_shift_v2i8:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpmovsxbw %xmm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpsravw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpsravw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vzeroupper
|
||||
; AVX512BWVL-NEXT: retq
|
||||
@ -2192,7 +2192,7 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind {
|
||||
; X86-SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
|
||||
; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; X86-SSE-NEXT: psraw $8, %xmm0
|
||||
; X86-SSE-NEXT: pmullw {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: psrlw $8, %xmm0
|
||||
; X86-SSE-NEXT: packuswb %xmm2, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
@ -2310,7 +2310,7 @@ define <8 x i8> @splatconstant_shift_v8i8(<8 x i8> %a) nounwind {
|
||||
; SSE-LABEL: splatconstant_shift_v8i8:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: psrlw $3, %xmm0
|
||||
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; SSE-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE-NEXT: psubb %xmm1, %xmm0
|
||||
@ -2319,7 +2319,7 @@ define <8 x i8> @splatconstant_shift_v8i8(<8 x i8> %a) nounwind {
|
||||
; AVX-LABEL: splatconstant_shift_v8i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpsrlw $3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsubb %xmm1, %xmm0, %xmm0
|
||||
@ -2327,13 +2327,13 @@ define <8 x i8> @splatconstant_shift_v8i8(<8 x i8> %a) nounwind {
|
||||
;
|
||||
; XOP-LABEL: splatconstant_shift_v8i8:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshab {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshab {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatconstant_shift_v8i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsrlw $3, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm0
|
||||
@ -2343,14 +2343,14 @@ define <8 x i8> @splatconstant_shift_v8i8(<8 x i8> %a) nounwind {
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsrlw $3, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-NEXT: vpternlogq $108, {{.*}}(%rip), %xmm1, %xmm0
|
||||
; AVX512VL-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
|
||||
; AVX512VL-NEXT: vpsubb %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-SSE-LABEL: splatconstant_shift_v8i8:
|
||||
; X86-SSE: # %bb.0:
|
||||
; X86-SSE-NEXT: psrlw $3, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; X86-SSE-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE-NEXT: psubb %xmm1, %xmm0
|
||||
@ -2363,7 +2363,7 @@ define <4 x i8> @splatconstant_shift_v4i8(<4 x i8> %a) nounwind {
|
||||
; SSE-LABEL: splatconstant_shift_v4i8:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: psrlw $3, %xmm0
|
||||
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; SSE-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE-NEXT: psubb %xmm1, %xmm0
|
||||
@ -2372,7 +2372,7 @@ define <4 x i8> @splatconstant_shift_v4i8(<4 x i8> %a) nounwind {
|
||||
; AVX-LABEL: splatconstant_shift_v4i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpsrlw $3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsubb %xmm1, %xmm0, %xmm0
|
||||
@ -2380,13 +2380,13 @@ define <4 x i8> @splatconstant_shift_v4i8(<4 x i8> %a) nounwind {
|
||||
;
|
||||
; XOP-LABEL: splatconstant_shift_v4i8:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshab {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshab {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatconstant_shift_v4i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsrlw $3, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm0
|
||||
@ -2396,14 +2396,14 @@ define <4 x i8> @splatconstant_shift_v4i8(<4 x i8> %a) nounwind {
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsrlw $3, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-NEXT: vpternlogq $108, {{.*}}(%rip), %xmm1, %xmm0
|
||||
; AVX512VL-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
|
||||
; AVX512VL-NEXT: vpsubb %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-SSE-LABEL: splatconstant_shift_v4i8:
|
||||
; X86-SSE: # %bb.0:
|
||||
; X86-SSE-NEXT: psrlw $3, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; X86-SSE-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE-NEXT: psubb %xmm1, %xmm0
|
||||
@ -2416,7 +2416,7 @@ define <2 x i8> @splatconstant_shift_v2i8(<2 x i8> %a) nounwind {
|
||||
; SSE-LABEL: splatconstant_shift_v2i8:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: psrlw $3, %xmm0
|
||||
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; SSE-NEXT: pxor %xmm1, %xmm0
|
||||
; SSE-NEXT: psubb %xmm1, %xmm0
|
||||
@ -2425,7 +2425,7 @@ define <2 x i8> @splatconstant_shift_v2i8(<2 x i8> %a) nounwind {
|
||||
; AVX-LABEL: splatconstant_shift_v2i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpsrlw $3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsubb %xmm1, %xmm0, %xmm0
|
||||
@ -2433,13 +2433,13 @@ define <2 x i8> @splatconstant_shift_v2i8(<2 x i8> %a) nounwind {
|
||||
;
|
||||
; XOP-LABEL: splatconstant_shift_v2i8:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshab {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshab {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatconstant_shift_v2i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsrlw $3, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm0
|
||||
@ -2449,14 +2449,14 @@ define <2 x i8> @splatconstant_shift_v2i8(<2 x i8> %a) nounwind {
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsrlw $3, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-NEXT: vpternlogq $108, {{.*}}(%rip), %xmm1, %xmm0
|
||||
; AVX512VL-NEXT: vpternlogq $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
|
||||
; AVX512VL-NEXT: vpsubb %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-SSE-LABEL: splatconstant_shift_v2i8:
|
||||
; X86-SSE: # %bb.0:
|
||||
; X86-SSE-NEXT: psrlw $3, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; X86-SSE-NEXT: pxor %xmm1, %xmm0
|
||||
; X86-SSE-NEXT: psubb %xmm1, %xmm0
|
||||
|
@ -373,7 +373,7 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
|
||||
; SSE2-NEXT: pandn %xmm0, %xmm4
|
||||
; SSE2-NEXT: psrlw $4, %xmm0
|
||||
; SSE2-NEXT: pand %xmm3, %xmm0
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: por %xmm4, %xmm0
|
||||
; SSE2-NEXT: paddb %xmm1, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm3, %xmm3
|
||||
@ -382,7 +382,7 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
|
||||
; SSE2-NEXT: pandn %xmm0, %xmm4
|
||||
; SSE2-NEXT: psrlw $2, %xmm0
|
||||
; SSE2-NEXT: pand %xmm3, %xmm0
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: por %xmm4, %xmm0
|
||||
; SSE2-NEXT: paddb %xmm1, %xmm1
|
||||
; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
|
||||
@ -390,7 +390,7 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
|
||||
; SSE2-NEXT: pandn %xmm0, %xmm1
|
||||
; SSE2-NEXT: psrlw $1, %xmm0
|
||||
; SSE2-NEXT: pand %xmm2, %xmm0
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: por %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -400,18 +400,18 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
|
||||
; SSE41-NEXT: psllw $5, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE41-NEXT: psrlw $4, %xmm3
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm3
|
||||
; SSE41-NEXT: psrlw $2, %xmm3
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
|
||||
; SSE41-NEXT: paddb %xmm1, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm3
|
||||
; SSE41-NEXT: psrlw $1, %xmm3
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
|
||||
; SSE41-NEXT: paddb %xmm1, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
|
||||
@ -422,14 +422,14 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpsllw $5, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpsrlw $4, %xmm0, %xmm2
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsrlw $2, %xmm0, %xmm2
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsrlw $1, %xmm0, %xmm2
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
@ -488,7 +488,7 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
|
||||
; X86-SSE-NEXT: pandn %xmm0, %xmm4
|
||||
; X86-SSE-NEXT: psrlw $4, %xmm0
|
||||
; X86-SSE-NEXT: pand %xmm3, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: por %xmm4, %xmm0
|
||||
; X86-SSE-NEXT: paddb %xmm1, %xmm1
|
||||
; X86-SSE-NEXT: pxor %xmm3, %xmm3
|
||||
@ -497,7 +497,7 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
|
||||
; X86-SSE-NEXT: pandn %xmm0, %xmm4
|
||||
; X86-SSE-NEXT: psrlw $2, %xmm0
|
||||
; X86-SSE-NEXT: pand %xmm3, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: por %xmm4, %xmm0
|
||||
; X86-SSE-NEXT: paddb %xmm1, %xmm1
|
||||
; X86-SSE-NEXT: pcmpgtb %xmm1, %xmm2
|
||||
@ -505,7 +505,7 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
|
||||
; X86-SSE-NEXT: pandn %xmm0, %xmm1
|
||||
; X86-SSE-NEXT: psrlw $1, %xmm0
|
||||
; X86-SSE-NEXT: pand %xmm2, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: por %xmm1, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
%shift = lshr <16 x i8> %a, %b
|
||||
@ -796,27 +796,27 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind {
|
||||
;
|
||||
; AVX2-LABEL: constant_shift_v2i64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpsrlvq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: constant_shift_v2i64:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpshlq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: constant_shift_v2i64:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpsrlvq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: constant_shift_v2i64:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsrlvq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: constant_shift_v2i64:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsrlvq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-SSE-LABEL: constant_shift_v2i64:
|
||||
@ -872,27 +872,27 @@ define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) nounwind {
|
||||
;
|
||||
; AVX2-LABEL: constant_shift_v4i32:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: constant_shift_v4i32:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: constant_shift_v4i32:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: constant_shift_v4i32:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: constant_shift_v4i32:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-SSE-LABEL: constant_shift_v4i32:
|
||||
@ -918,7 +918,7 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535]
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSE2-NEXT: pandn %xmm0, %xmm2
|
||||
; SSE2-NEXT: pmulhuw {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pand %xmm1, %xmm0
|
||||
; SSE2-NEXT: por %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
@ -932,18 +932,18 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
|
||||
;
|
||||
; AVX-LABEL: constant_shift_v8i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm1
|
||||
; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
|
||||
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: constant_shift_v8i16:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshlw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constant_shift_v8i16:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm1
|
||||
; AVX512DQ-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
|
||||
; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
@ -958,13 +958,13 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
|
||||
;
|
||||
; AVX512DQVL-LABEL: constant_shift_v8i16:
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm1
|
||||
; AVX512DQVL-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
|
||||
; AVX512DQVL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
|
||||
; AVX512DQVL-NEXT: retq
|
||||
;
|
||||
; AVX512BWVL-LABEL: constant_shift_v8i16:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpsrlvw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: retq
|
||||
;
|
||||
; X86-SSE-LABEL: constant_shift_v8i16:
|
||||
@ -972,7 +972,7 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
|
||||
; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535]
|
||||
; X86-SSE-NEXT: movdqa %xmm1, %xmm2
|
||||
; X86-SSE-NEXT: pandn %xmm0, %xmm2
|
||||
; X86-SSE-NEXT: pmulhuw {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand %xmm1, %xmm0
|
||||
; X86-SSE-NEXT: por %xmm2, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
@ -986,10 +986,10 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
|
||||
; SSE2-NEXT: pmullw {{.*}}(%rip), %xmm2
|
||||
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
|
||||
; SSE2-NEXT: psrlw $8, %xmm2
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; SSE2-NEXT: pmullw {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: psrlw $8, %xmm0
|
||||
; SSE2-NEXT: packuswb %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
@ -999,9 +999,9 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
|
||||
; SSE41-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
|
||||
; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE41-NEXT: psrlw $8, %xmm0
|
||||
; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm1
|
||||
; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE41-NEXT: psrlw $8, %xmm1
|
||||
; SSE41-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
@ -1011,10 +1011,10 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
@ -1022,7 +1022,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
|
||||
; AVX2-LABEL: constant_shift_v16i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
@ -1031,13 +1031,13 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
|
||||
;
|
||||
; XOP-LABEL: constant_shift_v16i8:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constant_shift_v16i8:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512DQ-NEXT: vpsrlvd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
@ -1055,7 +1055,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
|
||||
; AVX512DQVL-LABEL: constant_shift_v16i8:
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512DQVL-NEXT: vpsrlvd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQVL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vzeroupper
|
||||
; AVX512DQVL-NEXT: retq
|
||||
@ -1063,7 +1063,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
|
||||
; AVX512BWVL-LABEL: constant_shift_v16i8:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX512BWVL-NEXT: vpsrlvw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vzeroupper
|
||||
; AVX512BWVL-NEXT: retq
|
||||
@ -1073,10 +1073,10 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
|
||||
; X86-SSE-NEXT: pxor %xmm1, %xmm1
|
||||
; X86-SSE-NEXT: movdqa %xmm0, %xmm2
|
||||
; X86-SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
|
||||
; X86-SSE-NEXT: pmullw {{\.LCPI[0-9]+_[0-9]+}}, %xmm2
|
||||
; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
|
||||
; X86-SSE-NEXT: psrlw $8, %xmm2
|
||||
; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; X86-SSE-NEXT: pmullw {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: psrlw $8, %xmm0
|
||||
; X86-SSE-NEXT: packuswb %xmm2, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
@ -1194,36 +1194,36 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) nounwind {
|
||||
; SSE-LABEL: splatconstant_shift_v16i8:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: psrlw $3, %xmm0
|
||||
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: splatconstant_shift_v16i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpsrlw $3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: splatconstant_shift_v16i8:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatconstant_shift_v16i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsrlw $3, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatconstant_shift_v16i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsrlw $3, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-SSE-LABEL: splatconstant_shift_v16i8:
|
||||
; X86-SSE: # %bb.0:
|
||||
; X86-SSE-NEXT: psrlw $3, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
%shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
|
||||
ret <16 x i8> %shift
|
||||
|
@ -382,14 +382,14 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm2
|
||||
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrlw $2, %ymm0, %ymm2
|
||||
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrlw $1, %ymm0, %ymm2
|
||||
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
@ -422,14 +422,14 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm2
|
||||
; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpsrlw $2, %ymm0, %ymm2
|
||||
; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpsrlw $1, %ymm0, %ymm2
|
||||
; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
@ -446,14 +446,14 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; AVX512DQVL-NEXT: vpsrlw $4, %ymm0, %ymm2
|
||||
; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpsrlw $2, %ymm0, %ymm2
|
||||
; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpsrlw $1, %ymm0, %ymm2
|
||||
; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: retq
|
||||
@ -504,14 +504,14 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
|
||||
; X86-AVX2: # %bb.0:
|
||||
; X86-AVX2-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; X86-AVX2-NEXT: vpsrlw $4, %ymm0, %ymm2
|
||||
; X86-AVX2-NEXT: vpand {{\.LCPI[0-9]+_[0-9]+}}, %ymm2, %ymm2
|
||||
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm2, %ymm2
|
||||
; X86-AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vpsrlw $2, %ymm0, %ymm2
|
||||
; X86-AVX2-NEXT: vpand {{\.LCPI[0-9]+_[0-9]+}}, %ymm2, %ymm2
|
||||
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm2, %ymm2
|
||||
; X86-AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; X86-AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vpsrlw $1, %ymm0, %ymm2
|
||||
; X86-AVX2-NEXT: vpand {{\.LCPI[0-9]+_[0-9]+}}, %ymm2, %ymm2
|
||||
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm2, %ymm2
|
||||
; X86-AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; X86-AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: retl
|
||||
@ -835,30 +835,30 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind {
|
||||
;
|
||||
; AVX2-LABEL: constant_shift_v4i64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: constant_shift_v4i64:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpshlq {{.*}}(%rip), %xmm0, %xmm1
|
||||
; XOPAVX1-NEXT: vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpshlq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: constant_shift_v4i64:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: constant_shift_v4i64:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: constant_shift_v4i64:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-AVX1-LABEL: constant_shift_v4i64:
|
||||
@ -875,7 +875,7 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind {
|
||||
;
|
||||
; X86-AVX2-LABEL: constant_shift_v4i64:
|
||||
; X86-AVX2: # %bb.0:
|
||||
; X86-AVX2-NEXT: vpsrlvq {{\.LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: retl
|
||||
%shift = lshr <4 x i64> %a, <i64 1, i64 7, i64 31, i64 62>
|
||||
ret <4 x i64> %shift
|
||||
@ -902,30 +902,30 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind {
|
||||
;
|
||||
; AVX2-LABEL: constant_shift_v8i32:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: constant_shift_v8i32:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm1
|
||||
; XOPAVX1-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: constant_shift_v8i32:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: constant_shift_v8i32:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: constant_shift_v8i32:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-AVX1-LABEL: constant_shift_v8i32:
|
||||
@ -948,7 +948,7 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind {
|
||||
;
|
||||
; X86-AVX2-LABEL: constant_shift_v8i32:
|
||||
; X86-AVX2: # %bb.0:
|
||||
; X86-AVX2-NEXT: vpsrlvd {{\.LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: retl
|
||||
%shift = lshr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
|
||||
ret <8 x i32> %shift
|
||||
@ -957,38 +957,38 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind {
|
||||
define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
|
||||
; AVX1-LABEL: constant_shift_v16i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: constant_shift_v16i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpmulhuw {{.*}}(%rip), %ymm0, %ymm1
|
||||
; AVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: constant_shift_v16i16:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpshlw {{.*}}(%rip), %xmm0, %xmm1
|
||||
; XOPAVX1-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpshlw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: constant_shift_v16i16:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpmulhuw {{.*}}(%rip), %ymm0, %ymm1
|
||||
; XOPAVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
|
||||
; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
|
||||
; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constant_shift_v16i16:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpmulhuw {{.*}}(%rip), %ymm0, %ymm1
|
||||
; AVX512DQ-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
|
||||
; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
|
||||
; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||
; AVX512DQ-NEXT: retq
|
||||
@ -1003,28 +1003,28 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
|
||||
;
|
||||
; AVX512DQVL-LABEL: constant_shift_v16i16:
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vpmulhuw {{.*}}(%rip), %ymm0, %ymm1
|
||||
; AVX512DQVL-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
|
||||
; AVX512DQVL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
|
||||
; AVX512DQVL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||
; AVX512DQVL-NEXT: retq
|
||||
;
|
||||
; AVX512BWVL-LABEL: constant_shift_v16i16:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpsrlvw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: retq
|
||||
;
|
||||
; X86-AVX1-LABEL: constant_shift_v16i16:
|
||||
; X86-AVX1: # %bb.0:
|
||||
; X86-AVX1-NEXT: vpmulhuw {{\.LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1
|
||||
; X86-AVX1-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1
|
||||
; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpmulhuw {{\.LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; X86-AVX1-NEXT: retl
|
||||
;
|
||||
; X86-AVX2-LABEL: constant_shift_v16i16:
|
||||
; X86-AVX2: # %bb.0:
|
||||
; X86-AVX2-NEXT: vpmulhuw {{\.LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm1
|
||||
; X86-AVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm1
|
||||
; X86-AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
|
||||
; X86-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||
; X86-AVX2-NEXT: retl
|
||||
@ -1060,10 +1060,10 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
|
||||
; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
|
||||
; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
@ -1090,10 +1090,10 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
|
||||
; AVX512DQ-NEXT: vpmullw {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpsrlw $8, %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
|
||||
; AVX512DQ-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
@ -1101,7 +1101,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
|
||||
; AVX512BW-LABEL: constant_shift_v32i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||
; AVX512BW-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
@ -1109,10 +1109,10 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
|
||||
; AVX512DQVL-NEXT: vpmullw {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpsrlw $8, %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
|
||||
; AVX512DQVL-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: retq
|
||||
@ -1120,7 +1120,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
|
||||
; AVX512BWVL-LABEL: constant_shift_v32i8:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||
; AVX512BWVL-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BWVL-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
|
||||
; AVX512BWVL-NEXT: retq
|
||||
;
|
||||
@ -1151,10 +1151,10 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
|
||||
; X86-AVX2: # %bb.0:
|
||||
; X86-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; X86-AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
|
||||
; X86-AVX2-NEXT: vpmullw {{\.LCPI[0-9]+_[0-9]+}}, %ymm2, %ymm2
|
||||
; X86-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm2, %ymm2
|
||||
; X86-AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2
|
||||
; X86-AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
|
||||
; X86-AVX2-NEXT: vpmullw {{\.LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: retl
|
||||
@ -1340,7 +1340,7 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
|
||||
; AVX2-LABEL: splatconstant_shift_v32i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpsrlw $3, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatconstant_shift_v32i8:
|
||||
@ -1355,19 +1355,19 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
|
||||
; XOPAVX2-LABEL: splatconstant_shift_v32i8:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpsrlw $3, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatconstant_shift_v32i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsrlw $3, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatconstant_shift_v32i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsrlw $3, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-AVX1-LABEL: splatconstant_shift_v32i8:
|
||||
@ -1384,7 +1384,7 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
|
||||
; X86-AVX2-LABEL: splatconstant_shift_v32i8:
|
||||
; X86-AVX2: # %bb.0:
|
||||
; X86-AVX2-NEXT: vpsrlw $3, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vpand {{\.LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: retl
|
||||
%shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
|
||||
ret <32 x i8> %shift
|
||||
@ -1451,7 +1451,7 @@ define <4 x i32> @sh_trunc_sh_vec(<4 x i64> %x) {
|
||||
; AVX1-NEXT: vpsrlq $36, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsrlq $36, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
|
||||
; AVX1-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
@ -1471,7 +1471,7 @@ define <4 x i32> @sh_trunc_sh_vec(<4 x i64> %x) {
|
||||
; XOPAVX1-NEXT: vpsrlq $36, %xmm1, %xmm1
|
||||
; XOPAVX1-NEXT: vpsrlq $36, %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
|
||||
; XOPAVX1-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vzeroupper
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
@ -1498,7 +1498,7 @@ define <4 x i32> @sh_trunc_sh_vec(<4 x i64> %x) {
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsrlq $36, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpmovqd %ymm0, %xmm0
|
||||
; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
@ -1508,7 +1508,7 @@ define <4 x i32> @sh_trunc_sh_vec(<4 x i64> %x) {
|
||||
; X86-AVX1-NEXT: vpsrlq $36, %xmm1, %xmm1
|
||||
; X86-AVX1-NEXT: vpsrlq $36, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
|
||||
; X86-AVX1-NEXT: vandps {{\.LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vzeroupper
|
||||
; X86-AVX1-NEXT: retl
|
||||
;
|
||||
|
@ -86,17 +86,17 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
|
||||
; AVX512BW-LABEL: var_shift_v64i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm2
|
||||
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
|
||||
; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
|
||||
; AVX512BW-NEXT: vpsrlw $2, %zmm0, %zmm2
|
||||
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
|
||||
; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
|
||||
; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm2
|
||||
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
|
||||
; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
|
||||
@ -188,7 +188,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
|
||||
define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind {
|
||||
; ALL-LABEL: constant_shift_v8i64:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vpsrlvq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; ALL-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; ALL-NEXT: retq
|
||||
%shift = lshr <8 x i64> %a, <i64 1, i64 7, i64 31, i64 62, i64 1, i64 7, i64 31, i64 62>
|
||||
ret <8 x i64> %shift
|
||||
@ -197,7 +197,7 @@ define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind {
|
||||
define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind {
|
||||
; ALL-LABEL: constant_shift_v16i32:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vpsrlvd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; ALL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; ALL-NEXT: retq
|
||||
%shift = lshr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
|
||||
ret <16 x i32> %shift
|
||||
@ -219,7 +219,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind {
|
||||
;
|
||||
; AVX512BW-LABEL: constant_shift_v32i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%shift = lshr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
|
||||
ret <32 x i16> %shift
|
||||
@ -255,10 +255,10 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
|
||||
; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
|
||||
; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpackuswb %zmm2, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
@ -312,13 +312,13 @@ define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
|
||||
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpsrlw $3, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
|
||||
; AVX512DQ-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatconstant_shift_v64i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpsrlw $3, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
|
||||
ret <64 x i8> %shift
|
||||
|
@ -480,7 +480,7 @@ define <8 x i8> @var_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
|
||||
; SSE2-NEXT: pandn %xmm0, %xmm4
|
||||
; SSE2-NEXT: psrlw $4, %xmm0
|
||||
; SSE2-NEXT: pand %xmm3, %xmm0
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: por %xmm4, %xmm0
|
||||
; SSE2-NEXT: paddb %xmm1, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm3, %xmm3
|
||||
@ -489,7 +489,7 @@ define <8 x i8> @var_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
|
||||
; SSE2-NEXT: pandn %xmm0, %xmm4
|
||||
; SSE2-NEXT: psrlw $2, %xmm0
|
||||
; SSE2-NEXT: pand %xmm3, %xmm0
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: por %xmm4, %xmm0
|
||||
; SSE2-NEXT: paddb %xmm1, %xmm1
|
||||
; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
|
||||
@ -497,7 +497,7 @@ define <8 x i8> @var_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
|
||||
; SSE2-NEXT: pandn %xmm0, %xmm1
|
||||
; SSE2-NEXT: psrlw $1, %xmm0
|
||||
; SSE2-NEXT: pand %xmm2, %xmm0
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: por %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -507,18 +507,18 @@ define <8 x i8> @var_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
|
||||
; SSE41-NEXT: psllw $5, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE41-NEXT: psrlw $4, %xmm3
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm3
|
||||
; SSE41-NEXT: psrlw $2, %xmm3
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
|
||||
; SSE41-NEXT: paddb %xmm1, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm3
|
||||
; SSE41-NEXT: psrlw $1, %xmm3
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
|
||||
; SSE41-NEXT: paddb %xmm1, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
|
||||
@ -529,14 +529,14 @@ define <8 x i8> @var_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpsllw $5, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpsrlw $4, %xmm0, %xmm2
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsrlw $2, %xmm0, %xmm2
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsrlw $1, %xmm0, %xmm2
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
@ -595,7 +595,7 @@ define <8 x i8> @var_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
|
||||
; X86-SSE-NEXT: pandn %xmm0, %xmm4
|
||||
; X86-SSE-NEXT: psrlw $4, %xmm0
|
||||
; X86-SSE-NEXT: pand %xmm3, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: por %xmm4, %xmm0
|
||||
; X86-SSE-NEXT: paddb %xmm1, %xmm1
|
||||
; X86-SSE-NEXT: pxor %xmm3, %xmm3
|
||||
@ -604,7 +604,7 @@ define <8 x i8> @var_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
|
||||
; X86-SSE-NEXT: pandn %xmm0, %xmm4
|
||||
; X86-SSE-NEXT: psrlw $2, %xmm0
|
||||
; X86-SSE-NEXT: pand %xmm3, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: por %xmm4, %xmm0
|
||||
; X86-SSE-NEXT: paddb %xmm1, %xmm1
|
||||
; X86-SSE-NEXT: pcmpgtb %xmm1, %xmm2
|
||||
@ -612,7 +612,7 @@ define <8 x i8> @var_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
|
||||
; X86-SSE-NEXT: pandn %xmm0, %xmm1
|
||||
; X86-SSE-NEXT: psrlw $1, %xmm0
|
||||
; X86-SSE-NEXT: pand %xmm2, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: por %xmm1, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
%shift = lshr <8 x i8> %a, %b
|
||||
@ -630,7 +630,7 @@ define <4 x i8> @var_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
|
||||
; SSE2-NEXT: pandn %xmm0, %xmm4
|
||||
; SSE2-NEXT: psrlw $4, %xmm0
|
||||
; SSE2-NEXT: pand %xmm3, %xmm0
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: por %xmm4, %xmm0
|
||||
; SSE2-NEXT: paddb %xmm1, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm3, %xmm3
|
||||
@ -639,7 +639,7 @@ define <4 x i8> @var_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
|
||||
; SSE2-NEXT: pandn %xmm0, %xmm4
|
||||
; SSE2-NEXT: psrlw $2, %xmm0
|
||||
; SSE2-NEXT: pand %xmm3, %xmm0
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: por %xmm4, %xmm0
|
||||
; SSE2-NEXT: paddb %xmm1, %xmm1
|
||||
; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
|
||||
@ -647,7 +647,7 @@ define <4 x i8> @var_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
|
||||
; SSE2-NEXT: pandn %xmm0, %xmm1
|
||||
; SSE2-NEXT: psrlw $1, %xmm0
|
||||
; SSE2-NEXT: pand %xmm2, %xmm0
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: por %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -657,18 +657,18 @@ define <4 x i8> @var_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
|
||||
; SSE41-NEXT: psllw $5, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE41-NEXT: psrlw $4, %xmm3
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm3
|
||||
; SSE41-NEXT: psrlw $2, %xmm3
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
|
||||
; SSE41-NEXT: paddb %xmm1, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm3
|
||||
; SSE41-NEXT: psrlw $1, %xmm3
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
|
||||
; SSE41-NEXT: paddb %xmm1, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
|
||||
@ -679,14 +679,14 @@ define <4 x i8> @var_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpsllw $5, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpsrlw $4, %xmm0, %xmm2
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsrlw $2, %xmm0, %xmm2
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsrlw $1, %xmm0, %xmm2
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
@ -745,7 +745,7 @@ define <4 x i8> @var_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
|
||||
; X86-SSE-NEXT: pandn %xmm0, %xmm4
|
||||
; X86-SSE-NEXT: psrlw $4, %xmm0
|
||||
; X86-SSE-NEXT: pand %xmm3, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: por %xmm4, %xmm0
|
||||
; X86-SSE-NEXT: paddb %xmm1, %xmm1
|
||||
; X86-SSE-NEXT: pxor %xmm3, %xmm3
|
||||
@ -754,7 +754,7 @@ define <4 x i8> @var_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
|
||||
; X86-SSE-NEXT: pandn %xmm0, %xmm4
|
||||
; X86-SSE-NEXT: psrlw $2, %xmm0
|
||||
; X86-SSE-NEXT: pand %xmm3, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: por %xmm4, %xmm0
|
||||
; X86-SSE-NEXT: paddb %xmm1, %xmm1
|
||||
; X86-SSE-NEXT: pcmpgtb %xmm1, %xmm2
|
||||
@ -762,7 +762,7 @@ define <4 x i8> @var_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
|
||||
; X86-SSE-NEXT: pandn %xmm0, %xmm1
|
||||
; X86-SSE-NEXT: psrlw $1, %xmm0
|
||||
; X86-SSE-NEXT: pand %xmm2, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: por %xmm1, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
%shift = lshr <4 x i8> %a, %b
|
||||
@ -780,7 +780,7 @@ define <2 x i8> @var_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
|
||||
; SSE2-NEXT: pandn %xmm0, %xmm4
|
||||
; SSE2-NEXT: psrlw $4, %xmm0
|
||||
; SSE2-NEXT: pand %xmm3, %xmm0
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: por %xmm4, %xmm0
|
||||
; SSE2-NEXT: paddb %xmm1, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm3, %xmm3
|
||||
@ -789,7 +789,7 @@ define <2 x i8> @var_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
|
||||
; SSE2-NEXT: pandn %xmm0, %xmm4
|
||||
; SSE2-NEXT: psrlw $2, %xmm0
|
||||
; SSE2-NEXT: pand %xmm3, %xmm0
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: por %xmm4, %xmm0
|
||||
; SSE2-NEXT: paddb %xmm1, %xmm1
|
||||
; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
|
||||
@ -797,7 +797,7 @@ define <2 x i8> @var_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
|
||||
; SSE2-NEXT: pandn %xmm0, %xmm1
|
||||
; SSE2-NEXT: psrlw $1, %xmm0
|
||||
; SSE2-NEXT: pand %xmm2, %xmm0
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: por %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -807,18 +807,18 @@ define <2 x i8> @var_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
|
||||
; SSE41-NEXT: psllw $5, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE41-NEXT: psrlw $4, %xmm3
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm3
|
||||
; SSE41-NEXT: psrlw $2, %xmm3
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
|
||||
; SSE41-NEXT: paddb %xmm1, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm3
|
||||
; SSE41-NEXT: psrlw $1, %xmm3
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
|
||||
; SSE41-NEXT: paddb %xmm1, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
|
||||
@ -829,14 +829,14 @@ define <2 x i8> @var_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpsllw $5, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpsrlw $4, %xmm0, %xmm2
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsrlw $2, %xmm0, %xmm2
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsrlw $1, %xmm0, %xmm2
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
@ -895,7 +895,7 @@ define <2 x i8> @var_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
|
||||
; X86-SSE-NEXT: pandn %xmm0, %xmm4
|
||||
; X86-SSE-NEXT: psrlw $4, %xmm0
|
||||
; X86-SSE-NEXT: pand %xmm3, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: por %xmm4, %xmm0
|
||||
; X86-SSE-NEXT: paddb %xmm1, %xmm1
|
||||
; X86-SSE-NEXT: pxor %xmm3, %xmm3
|
||||
@ -904,7 +904,7 @@ define <2 x i8> @var_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
|
||||
; X86-SSE-NEXT: pandn %xmm0, %xmm4
|
||||
; X86-SSE-NEXT: psrlw $2, %xmm0
|
||||
; X86-SSE-NEXT: pand %xmm3, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: por %xmm4, %xmm0
|
||||
; X86-SSE-NEXT: paddb %xmm1, %xmm1
|
||||
; X86-SSE-NEXT: pcmpgtb %xmm1, %xmm2
|
||||
@ -912,7 +912,7 @@ define <2 x i8> @var_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
|
||||
; X86-SSE-NEXT: pandn %xmm0, %xmm1
|
||||
; X86-SSE-NEXT: psrlw $1, %xmm0
|
||||
; X86-SSE-NEXT: pand %xmm2, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: por %xmm1, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
%shift = lshr <2 x i8> %a, %b
|
||||
@ -1447,27 +1447,27 @@ define <2 x i32> @constant_shift_v2i32(<2 x i32> %a) nounwind {
|
||||
;
|
||||
; AVX2-LABEL: constant_shift_v2i32:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: constant_shift_v2i32:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: constant_shift_v2i32:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: constant_shift_v2i32:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: constant_shift_v2i32:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-SSE-LABEL: constant_shift_v2i32:
|
||||
@ -1489,7 +1489,7 @@ define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind {
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535]
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSE2-NEXT: pandn %xmm0, %xmm2
|
||||
; SSE2-NEXT: pmulhuw {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pand %xmm1, %xmm0
|
||||
; SSE2-NEXT: por %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
@ -1503,18 +1503,18 @@ define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind {
|
||||
;
|
||||
; AVX-LABEL: constant_shift_v4i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm1
|
||||
; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
|
||||
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: constant_shift_v4i16:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshlw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constant_shift_v4i16:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm1
|
||||
; AVX512DQ-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
|
||||
; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
@ -1529,13 +1529,13 @@ define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind {
|
||||
;
|
||||
; AVX512DQVL-LABEL: constant_shift_v4i16:
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm1
|
||||
; AVX512DQVL-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
|
||||
; AVX512DQVL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
|
||||
; AVX512DQVL-NEXT: retq
|
||||
;
|
||||
; AVX512BWVL-LABEL: constant_shift_v4i16:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpsrlvw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: retq
|
||||
;
|
||||
; X86-SSE-LABEL: constant_shift_v4i16:
|
||||
@ -1543,7 +1543,7 @@ define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind {
|
||||
; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535]
|
||||
; X86-SSE-NEXT: movdqa %xmm1, %xmm2
|
||||
; X86-SSE-NEXT: pandn %xmm0, %xmm2
|
||||
; X86-SSE-NEXT: pmulhuw {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand %xmm1, %xmm0
|
||||
; X86-SSE-NEXT: por %xmm2, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
@ -1580,7 +1580,7 @@ define <2 x i16> @constant_shift_v2i16(<2 x i16> %a) nounwind {
|
||||
;
|
||||
; XOP-LABEL: constant_shift_v2i16:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshlw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constant_shift_v2i16:
|
||||
@ -1608,7 +1608,7 @@ define <2 x i16> @constant_shift_v2i16(<2 x i16> %a) nounwind {
|
||||
;
|
||||
; AVX512BWVL-LABEL: constant_shift_v2i16:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpsrlvw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: retq
|
||||
;
|
||||
; X86-SSE-LABEL: constant_shift_v2i16:
|
||||
@ -1632,7 +1632,7 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind {
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; SSE2-NEXT: pmullw {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: psrlw $8, %xmm0
|
||||
; SSE2-NEXT: packuswb %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
@ -1642,7 +1642,7 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind {
|
||||
; SSE41-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
|
||||
; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm1
|
||||
; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE41-NEXT: psrlw $8, %xmm1
|
||||
; SSE41-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
@ -1653,7 +1653,7 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind {
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
|
||||
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
@ -1661,7 +1661,7 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind {
|
||||
; AVX2-LABEL: constant_shift_v8i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
@ -1670,13 +1670,13 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind {
|
||||
;
|
||||
; XOP-LABEL: constant_shift_v8i8:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constant_shift_v8i8:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512DQ-NEXT: vpsrlvd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
@ -1694,7 +1694,7 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind {
|
||||
; AVX512DQVL-LABEL: constant_shift_v8i8:
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512DQVL-NEXT: vpsrlvd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQVL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vzeroupper
|
||||
; AVX512DQVL-NEXT: retq
|
||||
@ -1702,7 +1702,7 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind {
|
||||
; AVX512BWVL-LABEL: constant_shift_v8i8:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX512BWVL-NEXT: vpsrlvw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vzeroupper
|
||||
; AVX512BWVL-NEXT: retq
|
||||
@ -1713,7 +1713,7 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind {
|
||||
; X86-SSE-NEXT: movdqa %xmm0, %xmm2
|
||||
; X86-SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
|
||||
; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; X86-SSE-NEXT: pmullw {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: psrlw $8, %xmm0
|
||||
; X86-SSE-NEXT: packuswb %xmm2, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
@ -1728,7 +1728,7 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind {
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; SSE2-NEXT: pmullw {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: psrlw $8, %xmm0
|
||||
; SSE2-NEXT: packuswb %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
@ -1738,7 +1738,7 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind {
|
||||
; SSE41-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
|
||||
; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm1
|
||||
; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE41-NEXT: psrlw $8, %xmm1
|
||||
; SSE41-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
@ -1749,7 +1749,7 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind {
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
|
||||
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
@ -1757,7 +1757,7 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind {
|
||||
; AVX2-LABEL: constant_shift_v4i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
@ -1766,13 +1766,13 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind {
|
||||
;
|
||||
; XOP-LABEL: constant_shift_v4i8:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constant_shift_v4i8:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512DQ-NEXT: vpsrlvd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
@ -1790,7 +1790,7 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind {
|
||||
; AVX512DQVL-LABEL: constant_shift_v4i8:
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512DQVL-NEXT: vpsrlvd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQVL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vzeroupper
|
||||
; AVX512DQVL-NEXT: retq
|
||||
@ -1798,7 +1798,7 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind {
|
||||
; AVX512BWVL-LABEL: constant_shift_v4i8:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX512BWVL-NEXT: vpsrlvw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vzeroupper
|
||||
; AVX512BWVL-NEXT: retq
|
||||
@ -1809,7 +1809,7 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind {
|
||||
; X86-SSE-NEXT: movdqa %xmm0, %xmm2
|
||||
; X86-SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
|
||||
; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; X86-SSE-NEXT: pmullw {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: psrlw $8, %xmm0
|
||||
; X86-SSE-NEXT: packuswb %xmm2, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
@ -1824,7 +1824,7 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind {
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; SSE2-NEXT: pmullw {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: psrlw $8, %xmm0
|
||||
; SSE2-NEXT: packuswb %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
@ -1834,7 +1834,7 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind {
|
||||
; SSE41-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
|
||||
; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm1
|
||||
; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE41-NEXT: psrlw $8, %xmm1
|
||||
; SSE41-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
@ -1845,7 +1845,7 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind {
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
|
||||
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
@ -1853,7 +1853,7 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind {
|
||||
; AVX2-LABEL: constant_shift_v2i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
@ -1862,13 +1862,13 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind {
|
||||
;
|
||||
; XOP-LABEL: constant_shift_v2i8:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constant_shift_v2i8:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512DQ-NEXT: vpsrlvd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
@ -1886,7 +1886,7 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind {
|
||||
; AVX512DQVL-LABEL: constant_shift_v2i8:
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512DQVL-NEXT: vpsrlvd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQVL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vzeroupper
|
||||
; AVX512DQVL-NEXT: retq
|
||||
@ -1894,7 +1894,7 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind {
|
||||
; AVX512BWVL-LABEL: constant_shift_v2i8:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX512BWVL-NEXT: vpsrlvw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vzeroupper
|
||||
; AVX512BWVL-NEXT: retq
|
||||
@ -1905,7 +1905,7 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind {
|
||||
; X86-SSE-NEXT: movdqa %xmm0, %xmm2
|
||||
; X86-SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
|
||||
; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; X86-SSE-NEXT: pmullw {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: psrlw $8, %xmm0
|
||||
; X86-SSE-NEXT: packuswb %xmm2, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
@ -2023,36 +2023,36 @@ define <8 x i8> @splatconstant_shift_v8i8(<8 x i8> %a) nounwind {
|
||||
; SSE-LABEL: splatconstant_shift_v8i8:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: psrlw $3, %xmm0
|
||||
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: splatconstant_shift_v8i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpsrlw $3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: splatconstant_shift_v8i8:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatconstant_shift_v8i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsrlw $3, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatconstant_shift_v8i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsrlw $3, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-SSE-LABEL: splatconstant_shift_v8i8:
|
||||
; X86-SSE: # %bb.0:
|
||||
; X86-SSE-NEXT: psrlw $3, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
%shift = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
|
||||
ret <8 x i8> %shift
|
||||
@ -2062,36 +2062,36 @@ define <4 x i8> @splatconstant_shift_v4i8(<4 x i8> %a) nounwind {
|
||||
; SSE-LABEL: splatconstant_shift_v4i8:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: psrlw $3, %xmm0
|
||||
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: splatconstant_shift_v4i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpsrlw $3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: splatconstant_shift_v4i8:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatconstant_shift_v4i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsrlw $3, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatconstant_shift_v4i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsrlw $3, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-SSE-LABEL: splatconstant_shift_v4i8:
|
||||
; X86-SSE: # %bb.0:
|
||||
; X86-SSE-NEXT: psrlw $3, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
%shift = lshr <4 x i8> %a, <i8 3, i8 3, i8 3, i8 3>
|
||||
ret <4 x i8> %shift
|
||||
@ -2101,36 +2101,36 @@ define <2 x i8> @splatconstant_shift_v2i8(<2 x i8> %a) nounwind {
|
||||
; SSE-LABEL: splatconstant_shift_v2i8:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: psrlw $3, %xmm0
|
||||
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: splatconstant_shift_v2i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpsrlw $3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: splatconstant_shift_v2i8:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatconstant_shift_v2i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsrlw $3, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatconstant_shift_v2i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsrlw $3, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-SSE-LABEL: splatconstant_shift_v2i8:
|
||||
; X86-SSE: # %bb.0:
|
||||
; X86-SSE-NEXT: psrlw $3, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
%shift = lshr <2 x i8> %a, <i8 3, i8 3>
|
||||
ret <2 x i8> %shift
|
||||
|
@ -85,7 +85,7 @@ define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
|
||||
; SSE2-LABEL: var_shift_v4i32:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pslld $23, %xmm1
|
||||
; SSE2-NEXT: paddd {{.*}}(%rip), %xmm1
|
||||
; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE2-NEXT: cvttps2dq %xmm1, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
|
||||
; SSE2-NEXT: pmuludq %xmm1, %xmm0
|
||||
@ -99,7 +99,7 @@ define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
|
||||
; SSE41-LABEL: var_shift_v4i32:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pslld $23, %xmm1
|
||||
; SSE41-NEXT: paddd {{.*}}(%rip), %xmm1
|
||||
; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE41-NEXT: cvttps2dq %xmm1, %xmm1
|
||||
; SSE41-NEXT: pmulld %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
@ -107,7 +107,7 @@ define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
|
||||
; AVX1-LABEL: var_shift_v4i32:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
@ -140,7 +140,7 @@ define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
|
||||
; X86-SSE-LABEL: var_shift_v4i32:
|
||||
; X86-SSE: # %bb.0:
|
||||
; X86-SSE-NEXT: pslld $23, %xmm1
|
||||
; X86-SSE-NEXT: paddd {{\.LCPI[0-9]+_[0-9]+}}, %xmm1
|
||||
; X86-SSE-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
|
||||
; X86-SSE-NEXT: cvttps2dq %xmm1, %xmm1
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
|
||||
; X86-SSE-NEXT: pmuludq %xmm1, %xmm0
|
||||
@ -292,7 +292,7 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
|
||||
; SSE2-NEXT: pandn %xmm0, %xmm4
|
||||
; SSE2-NEXT: psllw $4, %xmm0
|
||||
; SSE2-NEXT: pand %xmm3, %xmm0
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: por %xmm4, %xmm0
|
||||
; SSE2-NEXT: paddb %xmm1, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm3, %xmm3
|
||||
@ -301,7 +301,7 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
|
||||
; SSE2-NEXT: pandn %xmm0, %xmm4
|
||||
; SSE2-NEXT: psllw $2, %xmm0
|
||||
; SSE2-NEXT: pand %xmm3, %xmm0
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: por %xmm4, %xmm0
|
||||
; SSE2-NEXT: paddb %xmm1, %xmm1
|
||||
; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
|
||||
@ -318,12 +318,12 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
|
||||
; SSE41-NEXT: psllw $5, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE41-NEXT: psllw $4, %xmm3
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm3
|
||||
; SSE41-NEXT: psllw $2, %xmm3
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
|
||||
; SSE41-NEXT: paddb %xmm1, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
|
||||
@ -339,10 +339,10 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpsllw $5, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpsllw $4, %xmm0, %xmm2
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsllw $2, %xmm0, %xmm2
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpaddb %xmm0, %xmm0, %xmm2
|
||||
@ -402,7 +402,7 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
|
||||
; X86-SSE-NEXT: pandn %xmm0, %xmm4
|
||||
; X86-SSE-NEXT: psllw $4, %xmm0
|
||||
; X86-SSE-NEXT: pand %xmm3, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: por %xmm4, %xmm0
|
||||
; X86-SSE-NEXT: paddb %xmm1, %xmm1
|
||||
; X86-SSE-NEXT: pxor %xmm3, %xmm3
|
||||
@ -411,7 +411,7 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
|
||||
; X86-SSE-NEXT: pandn %xmm0, %xmm4
|
||||
; X86-SSE-NEXT: psllw $2, %xmm0
|
||||
; X86-SSE-NEXT: pand %xmm3, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: por %xmm4, %xmm0
|
||||
; X86-SSE-NEXT: paddb %xmm1, %xmm1
|
||||
; X86-SSE-NEXT: pcmpgtb %xmm1, %xmm2
|
||||
@ -705,27 +705,27 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind {
|
||||
;
|
||||
; AVX2-LABEL: constant_shift_v2i64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: constant_shift_v2i64:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpshlq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: constant_shift_v2i64:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: constant_shift_v2i64:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: constant_shift_v2i64:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-SSE-LABEL: constant_shift_v2i64:
|
||||
@ -754,37 +754,37 @@ define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) nounwind {
|
||||
;
|
||||
; SSE41-LABEL: constant_shift_v4i32:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: constant_shift_v4i32:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: constant_shift_v4i32:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: constant_shift_v4i32:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: constant_shift_v4i32:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: constant_shift_v4i32:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: constant_shift_v4i32:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-SSE-LABEL: constant_shift_v4i32:
|
||||
@ -805,22 +805,22 @@ define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) nounwind {
|
||||
define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
|
||||
; SSE-LABEL: constant_shift_v8i16:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pmullw {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: constant_shift_v8i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: constant_shift_v8i16:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshlw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constant_shift_v8i16:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: constant_shift_v8i16:
|
||||
@ -834,17 +834,17 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
|
||||
;
|
||||
; AVX512DQVL-LABEL: constant_shift_v8i16:
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: retq
|
||||
;
|
||||
; AVX512BWVL-LABEL: constant_shift_v8i16:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpsllvw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: retq
|
||||
;
|
||||
; X86-SSE-LABEL: constant_shift_v8i16:
|
||||
; X86-SSE: # %bb.0:
|
||||
; X86-SSE-NEXT: pmullw {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
%shift = shl <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
|
||||
ret <8 x i16> %shift
|
||||
@ -855,11 +855,11 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; SSE2-NEXT: pmullw {{.*}}(%rip), %xmm1
|
||||
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
|
||||
; SSE2-NEXT: pand %xmm2, %xmm1
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: pmullw {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pand %xmm2, %xmm0
|
||||
; SSE2-NEXT: packuswb %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
@ -868,10 +868,10 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
|
||||
; SSE41-NEXT: pand %xmm2, %xmm0
|
||||
; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm1
|
||||
; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE41-NEXT: pand %xmm2, %xmm1
|
||||
; SSE41-NEXT: packuswb %xmm0, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
@ -880,11 +880,11 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
|
||||
; AVX1-LABEL: constant_shift_v16i8:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
|
||||
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
@ -892,8 +892,8 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
|
||||
; AVX2-LABEL: constant_shift_v16i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vzeroupper
|
||||
@ -901,13 +901,13 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
|
||||
;
|
||||
; XOP-LABEL: constant_shift_v16i8:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constant_shift_v16i8:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512DQ-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
@ -925,7 +925,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
|
||||
; AVX512DQVL-LABEL: constant_shift_v16i8:
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512DQVL-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQVL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vzeroupper
|
||||
; AVX512DQVL-NEXT: retq
|
||||
@ -933,7 +933,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
|
||||
; AVX512BWVL-LABEL: constant_shift_v16i8:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX512BWVL-NEXT: vpsllvw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vzeroupper
|
||||
; AVX512BWVL-NEXT: retq
|
||||
@ -942,11 +942,11 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
|
||||
; X86-SSE: # %bb.0:
|
||||
; X86-SSE-NEXT: movdqa %xmm0, %xmm1
|
||||
; X86-SSE-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; X86-SSE-NEXT: pmullw {{\.LCPI[0-9]+_[0-9]+}}, %xmm1
|
||||
; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
|
||||
; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
|
||||
; X86-SSE-NEXT: pand %xmm2, %xmm1
|
||||
; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; X86-SSE-NEXT: pmullw {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand %xmm2, %xmm0
|
||||
; X86-SSE-NEXT: packuswb %xmm1, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
@ -1064,36 +1064,36 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) nounwind {
|
||||
; SSE-LABEL: splatconstant_shift_v16i8:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: psllw $3, %xmm0
|
||||
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: splatconstant_shift_v16i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpsllw $3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: splatconstant_shift_v16i8:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatconstant_shift_v16i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsllw $3, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatconstant_shift_v16i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsllw $3, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-SSE-LABEL: splatconstant_shift_v16i8:
|
||||
; X86-SSE: # %bb.0:
|
||||
; X86-SSE-NEXT: psllw $3, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
%shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
|
||||
ret <16 x i8> %shift
|
||||
|
@ -325,10 +325,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsllw $2, %ymm0, %ymm2
|
||||
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm2
|
||||
@ -358,10 +358,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm2
|
||||
; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm2
|
||||
@ -381,10 +381,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; AVX512DQVL-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpsllw $2, %ymm0, %ymm2
|
||||
; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpaddb %ymm0, %ymm0, %ymm2
|
||||
@ -435,10 +435,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
|
||||
; X86-AVX2: # %bb.0:
|
||||
; X86-AVX2-NEXT: vpsllw $5, %ymm1, %ymm1
|
||||
; X86-AVX2-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; X86-AVX2-NEXT: vpand {{\.LCPI[0-9]+_[0-9]+}}, %ymm2, %ymm2
|
||||
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm2, %ymm2
|
||||
; X86-AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vpsllw $2, %ymm0, %ymm2
|
||||
; X86-AVX2-NEXT: vpand {{\.LCPI[0-9]+_[0-9]+}}, %ymm2, %ymm2
|
||||
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm2, %ymm2
|
||||
; X86-AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||
; X86-AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm2
|
||||
@ -760,30 +760,30 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind {
|
||||
;
|
||||
; AVX2-LABEL: constant_shift_v4i64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: constant_shift_v4i64:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpshlq {{.*}}(%rip), %xmm0, %xmm1
|
||||
; XOPAVX1-NEXT: vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpshlq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: constant_shift_v4i64:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: constant_shift_v4i64:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: constant_shift_v4i64:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-AVX1-LABEL: constant_shift_v4i64:
|
||||
@ -800,7 +800,7 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind {
|
||||
;
|
||||
; X86-AVX2-LABEL: constant_shift_v4i64:
|
||||
; X86-AVX2: # %bb.0:
|
||||
; X86-AVX2-NEXT: vpsllvq {{\.LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: retl
|
||||
%shift = shl <4 x i64> %a, <i64 1, i64 7, i64 31, i64 62>
|
||||
ret <4 x i64> %shift
|
||||
@ -809,51 +809,51 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind {
|
||||
define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind {
|
||||
; AVX1-LABEL: constant_shift_v8i32:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: constant_shift_v8i32:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: constant_shift_v8i32:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm1
|
||||
; XOPAVX1-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: constant_shift_v8i32:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: constant_shift_v8i32:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: constant_shift_v8i32:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-AVX1-LABEL: constant_shift_v8i32:
|
||||
; X86-AVX1: # %bb.0:
|
||||
; X86-AVX1-NEXT: vpmulld {{\.LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1
|
||||
; X86-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpmulld {{\.LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; X86-AVX1-NEXT: retl
|
||||
;
|
||||
; X86-AVX2-LABEL: constant_shift_v8i32:
|
||||
; X86-AVX2: # %bb.0:
|
||||
; X86-AVX2-NEXT: vpsllvd {{\.LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: retl
|
||||
%shift = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
|
||||
ret <8 x i32> %shift
|
||||
@ -862,33 +862,33 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind {
|
||||
define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
|
||||
; AVX1-LABEL: constant_shift_v16i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: constant_shift_v16i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: constant_shift_v16i16:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpshlw {{.*}}(%rip), %xmm0, %xmm1
|
||||
; XOPAVX1-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
|
||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpshlw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: constant_shift_v16i16:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constant_shift_v16i16:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: constant_shift_v16i16:
|
||||
@ -901,25 +901,25 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
|
||||
;
|
||||
; AVX512DQVL-LABEL: constant_shift_v16i16:
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: retq
|
||||
;
|
||||
; AVX512BWVL-LABEL: constant_shift_v16i16:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpsllvw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: retq
|
||||
;
|
||||
; X86-AVX1-LABEL: constant_shift_v16i16:
|
||||
; X86-AVX1: # %bb.0:
|
||||
; X86-AVX1-NEXT: vpmullw {{\.LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1
|
||||
; X86-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpmullw {{\.LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
|
||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; X86-AVX1-NEXT: retl
|
||||
;
|
||||
; X86-AVX2-LABEL: constant_shift_v16i16:
|
||||
; X86-AVX2: # %bb.0:
|
||||
; X86-AVX2-NEXT: vpmullw {{\.LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: retl
|
||||
%shift = shl <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
|
||||
ret <16 x i16> %shift
|
||||
@ -952,12 +952,12 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
|
||||
; AVX2-LABEL: constant_shift_v32i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
|
||||
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
|
||||
; AVX2-NEXT: # ymm2 = mem[0,1,0,1]
|
||||
; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsllw $2, %ymm0, %ymm1
|
||||
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm1
|
||||
@ -986,12 +986,12 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
|
||||
; AVX512DQ-LABEL: constant_shift_v32i8:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
|
||||
; AVX512DQ-NEXT: # ymm2 = mem[0,1,0,1]
|
||||
; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm1
|
||||
; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm1
|
||||
@ -1002,19 +1002,19 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
|
||||
; AVX512BW-LABEL: constant_shift_v32i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||
; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512DQVL-LABEL: constant_shift_v32i8:
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
|
||||
; AVX512DQVL-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
|
||||
; AVX512DQVL-NEXT: # ymm2 = mem[0,1,0,1]
|
||||
; AVX512DQVL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpsllw $2, %ymm0, %ymm1
|
||||
; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
|
||||
; AVX512DQVL-NEXT: vpaddb %ymm2, %ymm2, %ymm2
|
||||
; AVX512DQVL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: vpaddb %ymm0, %ymm0, %ymm1
|
||||
@ -1025,7 +1025,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
|
||||
; AVX512BWVL-LABEL: constant_shift_v32i8:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||
; AVX512BWVL-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BWVL-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
|
||||
; AVX512BWVL-NEXT: retq
|
||||
;
|
||||
@ -1055,12 +1055,12 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
|
||||
; X86-AVX2-LABEL: constant_shift_v32i8:
|
||||
; X86-AVX2: # %bb.0:
|
||||
; X86-AVX2-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; X86-AVX2-NEXT: vpand {{\.LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1
|
||||
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1
|
||||
; X86-AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
|
||||
; X86-AVX2-NEXT: # ymm2 = mem[0,1,0,1]
|
||||
; X86-AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vpsllw $2, %ymm0, %ymm1
|
||||
; X86-AVX2-NEXT: vpand {{\.LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1
|
||||
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1
|
||||
; X86-AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2
|
||||
; X86-AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm1
|
||||
@ -1249,7 +1249,7 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
|
||||
; AVX2-LABEL: splatconstant_shift_v32i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpsllw $3, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatconstant_shift_v32i8:
|
||||
@ -1264,19 +1264,19 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
|
||||
; XOPAVX2-LABEL: splatconstant_shift_v32i8:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpsllw $3, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatconstant_shift_v32i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsllw $3, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatconstant_shift_v32i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsllw $3, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-AVX1-LABEL: splatconstant_shift_v32i8:
|
||||
@ -1293,7 +1293,7 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
|
||||
; X86-AVX2-LABEL: splatconstant_shift_v32i8:
|
||||
; X86-AVX2: # %bb.0:
|
||||
; X86-AVX2-NEXT: vpsllw $3, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vpand {{\.LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
|
||||
; X86-AVX2-NEXT: retl
|
||||
%shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
|
||||
ret <32 x i8> %shift
|
||||
|
@ -83,12 +83,12 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
|
||||
; AVX512BW-LABEL: var_shift_v64i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
|
||||
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
|
||||
; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
|
||||
; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm2
|
||||
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
|
||||
; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
|
||||
@ -181,7 +181,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
|
||||
define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind {
|
||||
; ALL-LABEL: constant_shift_v8i64:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vpsllvq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; ALL-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; ALL-NEXT: retq
|
||||
%shift = shl <8 x i64> %a, <i64 1, i64 7, i64 31, i64 62, i64 1, i64 7, i64 31, i64 62>
|
||||
ret <8 x i64> %shift
|
||||
@ -190,7 +190,7 @@ define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind {
|
||||
define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind {
|
||||
; ALL-LABEL: constant_shift_v16i32:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; ALL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; ALL-NEXT: retq
|
||||
%shift = shl <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
|
||||
ret <16 x i32> %shift
|
||||
@ -208,7 +208,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind {
|
||||
;
|
||||
; AVX512BW-LABEL: constant_shift_v32i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%shift = shl <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
|
||||
ret <32 x i16> %shift
|
||||
@ -249,10 +249,10 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
|
||||
; AVX512BW-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
||||
; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
|
||||
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
|
||||
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
|
||||
; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm2
|
||||
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
|
||||
; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
|
||||
@ -310,13 +310,13 @@ define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
|
||||
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpsllw $3, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
|
||||
; AVX512DQ-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatconstant_shift_v64i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpsllw $3, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
|
||||
ret <64 x i8> %shift
|
||||
|
@ -21,7 +21,7 @@ define <2 x i32> @var_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
|
||||
; SSE2-LABEL: var_shift_v2i32:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pslld $23, %xmm1
|
||||
; SSE2-NEXT: paddd {{.*}}(%rip), %xmm1
|
||||
; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE2-NEXT: cvttps2dq %xmm1, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
|
||||
; SSE2-NEXT: pmuludq %xmm1, %xmm0
|
||||
@ -35,7 +35,7 @@ define <2 x i32> @var_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
|
||||
; SSE41-LABEL: var_shift_v2i32:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pslld $23, %xmm1
|
||||
; SSE41-NEXT: paddd {{.*}}(%rip), %xmm1
|
||||
; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE41-NEXT: cvttps2dq %xmm1, %xmm1
|
||||
; SSE41-NEXT: pmulld %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
@ -43,7 +43,7 @@ define <2 x i32> @var_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
|
||||
; AVX1-LABEL: var_shift_v2i32:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
@ -76,7 +76,7 @@ define <2 x i32> @var_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
|
||||
; X86-SSE-LABEL: var_shift_v2i32:
|
||||
; X86-SSE: # %bb.0:
|
||||
; X86-SSE-NEXT: pslld $23, %xmm1
|
||||
; X86-SSE-NEXT: paddd {{\.LCPI[0-9]+_[0-9]+}}, %xmm1
|
||||
; X86-SSE-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
|
||||
; X86-SSE-NEXT: cvttps2dq %xmm1, %xmm1
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
|
||||
; X86-SSE-NEXT: pmuludq %xmm1, %xmm0
|
||||
@ -355,7 +355,7 @@ define <8 x i8> @var_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
|
||||
; SSE2-NEXT: pandn %xmm0, %xmm4
|
||||
; SSE2-NEXT: psllw $4, %xmm0
|
||||
; SSE2-NEXT: pand %xmm3, %xmm0
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: por %xmm4, %xmm0
|
||||
; SSE2-NEXT: paddb %xmm1, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm3, %xmm3
|
||||
@ -364,7 +364,7 @@ define <8 x i8> @var_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
|
||||
; SSE2-NEXT: pandn %xmm0, %xmm4
|
||||
; SSE2-NEXT: psllw $2, %xmm0
|
||||
; SSE2-NEXT: pand %xmm3, %xmm0
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: por %xmm4, %xmm0
|
||||
; SSE2-NEXT: paddb %xmm1, %xmm1
|
||||
; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
|
||||
@ -381,12 +381,12 @@ define <8 x i8> @var_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
|
||||
; SSE41-NEXT: psllw $5, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE41-NEXT: psllw $4, %xmm3
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm3
|
||||
; SSE41-NEXT: psllw $2, %xmm3
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
|
||||
; SSE41-NEXT: paddb %xmm1, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
|
||||
@ -402,10 +402,10 @@ define <8 x i8> @var_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpsllw $5, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpsllw $4, %xmm0, %xmm2
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsllw $2, %xmm0, %xmm2
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpaddb %xmm0, %xmm0, %xmm2
|
||||
@ -465,7 +465,7 @@ define <8 x i8> @var_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
|
||||
; X86-SSE-NEXT: pandn %xmm0, %xmm4
|
||||
; X86-SSE-NEXT: psllw $4, %xmm0
|
||||
; X86-SSE-NEXT: pand %xmm3, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: por %xmm4, %xmm0
|
||||
; X86-SSE-NEXT: paddb %xmm1, %xmm1
|
||||
; X86-SSE-NEXT: pxor %xmm3, %xmm3
|
||||
@ -474,7 +474,7 @@ define <8 x i8> @var_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
|
||||
; X86-SSE-NEXT: pandn %xmm0, %xmm4
|
||||
; X86-SSE-NEXT: psllw $2, %xmm0
|
||||
; X86-SSE-NEXT: pand %xmm3, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: por %xmm4, %xmm0
|
||||
; X86-SSE-NEXT: paddb %xmm1, %xmm1
|
||||
; X86-SSE-NEXT: pcmpgtb %xmm1, %xmm2
|
||||
@ -499,7 +499,7 @@ define <4 x i8> @var_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
|
||||
; SSE2-NEXT: pandn %xmm0, %xmm4
|
||||
; SSE2-NEXT: psllw $4, %xmm0
|
||||
; SSE2-NEXT: pand %xmm3, %xmm0
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: por %xmm4, %xmm0
|
||||
; SSE2-NEXT: paddb %xmm1, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm3, %xmm3
|
||||
@ -508,7 +508,7 @@ define <4 x i8> @var_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
|
||||
; SSE2-NEXT: pandn %xmm0, %xmm4
|
||||
; SSE2-NEXT: psllw $2, %xmm0
|
||||
; SSE2-NEXT: pand %xmm3, %xmm0
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: por %xmm4, %xmm0
|
||||
; SSE2-NEXT: paddb %xmm1, %xmm1
|
||||
; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
|
||||
@ -525,12 +525,12 @@ define <4 x i8> @var_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
|
||||
; SSE41-NEXT: psllw $5, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE41-NEXT: psllw $4, %xmm3
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm3
|
||||
; SSE41-NEXT: psllw $2, %xmm3
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
|
||||
; SSE41-NEXT: paddb %xmm1, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
|
||||
@ -546,10 +546,10 @@ define <4 x i8> @var_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpsllw $5, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpsllw $4, %xmm0, %xmm2
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsllw $2, %xmm0, %xmm2
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpaddb %xmm0, %xmm0, %xmm2
|
||||
@ -609,7 +609,7 @@ define <4 x i8> @var_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
|
||||
; X86-SSE-NEXT: pandn %xmm0, %xmm4
|
||||
; X86-SSE-NEXT: psllw $4, %xmm0
|
||||
; X86-SSE-NEXT: pand %xmm3, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: por %xmm4, %xmm0
|
||||
; X86-SSE-NEXT: paddb %xmm1, %xmm1
|
||||
; X86-SSE-NEXT: pxor %xmm3, %xmm3
|
||||
@ -618,7 +618,7 @@ define <4 x i8> @var_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
|
||||
; X86-SSE-NEXT: pandn %xmm0, %xmm4
|
||||
; X86-SSE-NEXT: psllw $2, %xmm0
|
||||
; X86-SSE-NEXT: pand %xmm3, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: por %xmm4, %xmm0
|
||||
; X86-SSE-NEXT: paddb %xmm1, %xmm1
|
||||
; X86-SSE-NEXT: pcmpgtb %xmm1, %xmm2
|
||||
@ -643,7 +643,7 @@ define <2 x i8> @var_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
|
||||
; SSE2-NEXT: pandn %xmm0, %xmm4
|
||||
; SSE2-NEXT: psllw $4, %xmm0
|
||||
; SSE2-NEXT: pand %xmm3, %xmm0
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: por %xmm4, %xmm0
|
||||
; SSE2-NEXT: paddb %xmm1, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm3, %xmm3
|
||||
@ -652,7 +652,7 @@ define <2 x i8> @var_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
|
||||
; SSE2-NEXT: pandn %xmm0, %xmm4
|
||||
; SSE2-NEXT: psllw $2, %xmm0
|
||||
; SSE2-NEXT: pand %xmm3, %xmm0
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: por %xmm4, %xmm0
|
||||
; SSE2-NEXT: paddb %xmm1, %xmm1
|
||||
; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
|
||||
@ -669,12 +669,12 @@ define <2 x i8> @var_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
|
||||
; SSE41-NEXT: psllw $5, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE41-NEXT: psllw $4, %xmm3
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
|
||||
; SSE41-NEXT: movdqa %xmm2, %xmm3
|
||||
; SSE41-NEXT: psllw $2, %xmm3
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
|
||||
; SSE41-NEXT: paddb %xmm1, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2
|
||||
@ -690,10 +690,10 @@ define <2 x i8> @var_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpsllw $5, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpsllw $4, %xmm0, %xmm2
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsllw $2, %xmm0, %xmm2
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
||||
; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpaddb %xmm0, %xmm0, %xmm2
|
||||
@ -753,7 +753,7 @@ define <2 x i8> @var_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
|
||||
; X86-SSE-NEXT: pandn %xmm0, %xmm4
|
||||
; X86-SSE-NEXT: psllw $4, %xmm0
|
||||
; X86-SSE-NEXT: pand %xmm3, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: por %xmm4, %xmm0
|
||||
; X86-SSE-NEXT: paddb %xmm1, %xmm1
|
||||
; X86-SSE-NEXT: pxor %xmm3, %xmm3
|
||||
@ -762,7 +762,7 @@ define <2 x i8> @var_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
|
||||
; X86-SSE-NEXT: pandn %xmm0, %xmm4
|
||||
; X86-SSE-NEXT: psllw $2, %xmm0
|
||||
; X86-SSE-NEXT: pand %xmm3, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: por %xmm4, %xmm0
|
||||
; X86-SSE-NEXT: paddb %xmm1, %xmm1
|
||||
; X86-SSE-NEXT: pcmpgtb %xmm1, %xmm2
|
||||
@ -1291,27 +1291,27 @@ define <2 x i32> @constant_shift_v2i32(<2 x i32> %a) nounwind {
|
||||
;
|
||||
; AVX2-LABEL: constant_shift_v2i32:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: constant_shift_v2i32:
|
||||
; XOPAVX1: # %bb.0:
|
||||
; XOPAVX1-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX1-NEXT: retq
|
||||
;
|
||||
; XOPAVX2-LABEL: constant_shift_v2i32:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOPAVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: constant_shift_v2i32:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: constant_shift_v2i32:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-SSE-LABEL: constant_shift_v2i32:
|
||||
@ -1330,22 +1330,22 @@ define <2 x i32> @constant_shift_v2i32(<2 x i32> %a) nounwind {
|
||||
define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind {
|
||||
; SSE-LABEL: constant_shift_v4i16:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pmullw {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: constant_shift_v4i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: constant_shift_v4i16:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshlw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constant_shift_v4i16:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: constant_shift_v4i16:
|
||||
@ -1359,17 +1359,17 @@ define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind {
|
||||
;
|
||||
; AVX512DQVL-LABEL: constant_shift_v4i16:
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: retq
|
||||
;
|
||||
; AVX512BWVL-LABEL: constant_shift_v4i16:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpsllvw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: retq
|
||||
;
|
||||
; X86-SSE-LABEL: constant_shift_v4i16:
|
||||
; X86-SSE: # %bb.0:
|
||||
; X86-SSE-NEXT: pmullw {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
%shift = shl <4 x i16> %a, <i16 0, i16 1, i16 2, i16 3>
|
||||
ret <4 x i16> %shift
|
||||
@ -1378,7 +1378,7 @@ define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind {
|
||||
define <2 x i16> @constant_shift_v2i16(<2 x i16> %a) nounwind {
|
||||
; SSE2-LABEL: constant_shift_v2i16:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pmullw {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: constant_shift_v2i16:
|
||||
@ -1398,7 +1398,7 @@ define <2 x i16> @constant_shift_v2i16(<2 x i16> %a) nounwind {
|
||||
;
|
||||
; XOP-LABEL: constant_shift_v2i16:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshlw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constant_shift_v2i16:
|
||||
@ -1426,12 +1426,12 @@ define <2 x i16> @constant_shift_v2i16(<2 x i16> %a) nounwind {
|
||||
;
|
||||
; AVX512BWVL-LABEL: constant_shift_v2i16:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpsllvw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: retq
|
||||
;
|
||||
; X86-SSE-LABEL: constant_shift_v2i16:
|
||||
; X86-SSE: # %bb.0:
|
||||
; X86-SSE-NEXT: pmullw {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
%shift = shl <2 x i16> %a, <i16 2, i16 3>
|
||||
ret <2 x i16> %shift
|
||||
@ -1441,8 +1441,8 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind {
|
||||
; SSE2-LABEL: constant_shift_v8i8:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: pmullw {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
@ -1450,8 +1450,8 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind {
|
||||
; SSE41-LABEL: constant_shift_v8i8:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE41-NEXT: packuswb %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
@ -1459,8 +1459,8 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind {
|
||||
; AVX1-LABEL: constant_shift_v8i8:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
@ -1468,8 +1468,8 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind {
|
||||
; AVX2-LABEL: constant_shift_v8i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vzeroupper
|
||||
@ -1477,13 +1477,13 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind {
|
||||
;
|
||||
; XOP-LABEL: constant_shift_v8i8:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constant_shift_v8i8:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512DQ-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
@ -1501,7 +1501,7 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind {
|
||||
; AVX512DQVL-LABEL: constant_shift_v8i8:
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512DQVL-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQVL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vzeroupper
|
||||
; AVX512DQVL-NEXT: retq
|
||||
@ -1509,7 +1509,7 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind {
|
||||
; AVX512BWVL-LABEL: constant_shift_v8i8:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX512BWVL-NEXT: vpsllvw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vzeroupper
|
||||
; AVX512BWVL-NEXT: retq
|
||||
@ -1517,8 +1517,8 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind {
|
||||
; X86-SSE-LABEL: constant_shift_v8i8:
|
||||
; X86-SSE: # %bb.0:
|
||||
; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; X86-SSE-NEXT: pmullw {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pxor %xmm1, %xmm1
|
||||
; X86-SSE-NEXT: packuswb %xmm1, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
@ -1530,8 +1530,8 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind {
|
||||
; SSE2-LABEL: constant_shift_v4i8:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: pmullw {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
@ -1539,8 +1539,8 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind {
|
||||
; SSE41-LABEL: constant_shift_v4i8:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE41-NEXT: packuswb %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
@ -1548,8 +1548,8 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind {
|
||||
; AVX1-LABEL: constant_shift_v4i8:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
@ -1557,8 +1557,8 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind {
|
||||
; AVX2-LABEL: constant_shift_v4i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vzeroupper
|
||||
@ -1566,13 +1566,13 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind {
|
||||
;
|
||||
; XOP-LABEL: constant_shift_v4i8:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constant_shift_v4i8:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512DQ-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
@ -1590,7 +1590,7 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind {
|
||||
; AVX512DQVL-LABEL: constant_shift_v4i8:
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512DQVL-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQVL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vzeroupper
|
||||
; AVX512DQVL-NEXT: retq
|
||||
@ -1598,7 +1598,7 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind {
|
||||
; AVX512BWVL-LABEL: constant_shift_v4i8:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX512BWVL-NEXT: vpsllvw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vzeroupper
|
||||
; AVX512BWVL-NEXT: retq
|
||||
@ -1606,8 +1606,8 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind {
|
||||
; X86-SSE-LABEL: constant_shift_v4i8:
|
||||
; X86-SSE: # %bb.0:
|
||||
; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; X86-SSE-NEXT: pmullw {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pxor %xmm1, %xmm1
|
||||
; X86-SSE-NEXT: packuswb %xmm1, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
@ -1619,8 +1619,8 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind {
|
||||
; SSE2-LABEL: constant_shift_v2i8:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: pmullw {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE2-NEXT: packuswb %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
@ -1628,8 +1628,8 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind {
|
||||
; SSE41-LABEL: constant_shift_v2i8:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE41-NEXT: packuswb %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
@ -1637,8 +1637,8 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind {
|
||||
; AVX1-LABEL: constant_shift_v2i8:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
@ -1646,8 +1646,8 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind {
|
||||
; AVX2-LABEL: constant_shift_v2i8:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vzeroupper
|
||||
@ -1655,13 +1655,13 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind {
|
||||
;
|
||||
; XOP-LABEL: constant_shift_v2i8:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: constant_shift_v2i8:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512DQ-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
@ -1679,7 +1679,7 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind {
|
||||
; AVX512DQVL-LABEL: constant_shift_v2i8:
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512DQVL-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQVL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: vzeroupper
|
||||
; AVX512DQVL-NEXT: retq
|
||||
@ -1687,7 +1687,7 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind {
|
||||
; AVX512BWVL-LABEL: constant_shift_v2i8:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX512BWVL-NEXT: vpsllvw {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vzeroupper
|
||||
; AVX512BWVL-NEXT: retq
|
||||
@ -1695,8 +1695,8 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind {
|
||||
; X86-SSE-LABEL: constant_shift_v2i8:
|
||||
; X86-SSE: # %bb.0:
|
||||
; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; X86-SSE-NEXT: pmullw {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pxor %xmm1, %xmm1
|
||||
; X86-SSE-NEXT: packuswb %xmm1, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
@ -1814,36 +1814,36 @@ define <8 x i8> @splatconstant_shift_v8i8(<8 x i8> %a) nounwind {
|
||||
; SSE-LABEL: splatconstant_shift_v8i8:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: psllw $3, %xmm0
|
||||
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: splatconstant_shift_v8i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpsllw $3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: splatconstant_shift_v8i8:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatconstant_shift_v8i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsllw $3, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatconstant_shift_v8i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsllw $3, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-SSE-LABEL: splatconstant_shift_v8i8:
|
||||
; X86-SSE: # %bb.0:
|
||||
; X86-SSE-NEXT: psllw $3, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
%shift = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
|
||||
ret <8 x i8> %shift
|
||||
@ -1853,36 +1853,36 @@ define <4 x i8> @splatconstant_shift_v4i8(<4 x i8> %a) nounwind {
|
||||
; SSE-LABEL: splatconstant_shift_v4i8:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: psllw $3, %xmm0
|
||||
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: splatconstant_shift_v4i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpsllw $3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: splatconstant_shift_v4i8:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatconstant_shift_v4i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsllw $3, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatconstant_shift_v4i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsllw $3, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-SSE-LABEL: splatconstant_shift_v4i8:
|
||||
; X86-SSE: # %bb.0:
|
||||
; X86-SSE-NEXT: psllw $3, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
%shift = shl <4 x i8> %a, <i8 3, i8 3, i8 3, i8 3>
|
||||
ret <4 x i8> %shift
|
||||
@ -1892,36 +1892,36 @@ define <2 x i8> @splatconstant_shift_v2i8(<2 x i8> %a) nounwind {
|
||||
; SSE-LABEL: splatconstant_shift_v2i8:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: psllw $3, %xmm0
|
||||
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: splatconstant_shift_v2i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpsllw $3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: splatconstant_shift_v2i8:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatconstant_shift_v2i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsllw $3, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatconstant_shift_v2i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpsllw $3, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; X86-SSE-LABEL: splatconstant_shift_v2i8:
|
||||
; X86-SSE: # %bb.0:
|
||||
; X86-SSE-NEXT: psllw $3, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
|
||||
; X86-SSE-NEXT: retl
|
||||
%shift = shl <2 x i8> %a, <i8 3, i8 3>
|
||||
ret <2 x i8> %shift
|
||||
|
Loading…
Reference in New Issue
Block a user