1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

[X86] Add avx512f only command lines to the vector add/sub saturation tests. NFC

Gives us coverage of splitting the v32i16/v64i8 when we have
avx512f and not avx512bw.

Considering making v32i16/v64i8 a legal type on avx512f which
needs this test coverage.
This commit is contained in:
Craig Topper 2020-03-14 16:28:28 -07:00
parent 16ceeef4af
commit fa8604e589
4 changed files with 560 additions and 247 deletions

View File

@ -4,7 +4,8 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
declare <1 x i8> @llvm.sadd.sat.v1i8(<1 x i8>, <1 x i8>) declare <1 x i8> @llvm.sadd.sat.v1i8(<1 x i8>, <1 x i8>)
declare <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8>, <2 x i8>) declare <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8>, <2 x i8>)
@ -111,10 +112,19 @@ define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
; AVX2-NEXT: vpaddsb %ymm3, %ymm1, %ymm1 ; AVX2-NEXT: vpaddsb %ymm3, %ymm1, %ymm1
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512-LABEL: v64i8: ; AVX512F-LABEL: v64i8:
; AVX512: # %bb.0: ; AVX512F: # %bb.0:
; AVX512-NEXT: vpaddsb %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512-NEXT: retq ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512F-NEXT: vpaddsb %ymm2, %ymm3, %ymm2
; AVX512F-NEXT: vpaddsb %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpaddsb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%z = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> %x, <64 x i8> %y) %z = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> %x, <64 x i8> %y)
ret <64 x i8> %z ret <64 x i8> %z
} }
@ -191,10 +201,19 @@ define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
; AVX2-NEXT: vpaddsw %ymm3, %ymm1, %ymm1 ; AVX2-NEXT: vpaddsw %ymm3, %ymm1, %ymm1
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512-LABEL: v32i16: ; AVX512F-LABEL: v32i16:
; AVX512: # %bb.0: ; AVX512F: # %bb.0:
; AVX512-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512-NEXT: retq ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512F-NEXT: vpaddsw %ymm2, %ymm3, %ymm2
; AVX512F-NEXT: vpaddsw %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%z = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> %x, <32 x i16> %y) %z = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> %x, <32 x i16> %y)
ret <32 x i16> %z ret <32 x i16> %z
} }
@ -551,15 +570,28 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
; AVX2-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512-LABEL: v16i1: ; AVX512F-LABEL: v16i1:
; AVX512: # %bb.0: ; AVX512F: # %bb.0:
; AVX512-NEXT: vpsllw $7, %xmm1, %xmm1 ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
; AVX512-NEXT: vpmovb2m %xmm1, %k0 ; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
; AVX512-NEXT: vpsllw $7, %xmm0, %xmm0 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
; AVX512-NEXT: vpmovb2m %xmm0, %k1 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512-NEXT: korw %k0, %k1, %k0 ; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpmovm2b %k0, %xmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
; AVX512-NEXT: retq ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v16i1:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $7, %xmm1, %xmm1
; AVX512BW-NEXT: vpmovb2m %xmm1, %k0
; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
; AVX512BW-NEXT: vpmovb2m %xmm0, %k1
; AVX512BW-NEXT: korw %k0, %k1, %k0
; AVX512BW-NEXT: vpmovm2b %k0, %xmm0
; AVX512BW-NEXT: retq
%z = call <16 x i1> @llvm.sadd.sat.v16i1(<16 x i1> %x, <16 x i1> %y) %z = call <16 x i1> @llvm.sadd.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
ret <16 x i1> %z ret <16 x i1> %z
} }
@ -639,19 +671,30 @@ define <2 x i32> @v2i32(<2 x i32> %x, <2 x i32> %y) nounwind {
; AVX2-NEXT: vblendvps %xmm0, %xmm3, %xmm2, %xmm0 ; AVX2-NEXT: vblendvps %xmm0, %xmm3, %xmm2, %xmm0
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512-LABEL: v2i32: ; AVX512F-LABEL: v2i32:
; AVX512: # %bb.0: ; AVX512F: # %bb.0:
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpaddd %xmm1, %xmm0, %xmm2
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm2, %k0 ; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483647,2147483647,2147483647,2147483647]
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm1 ; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k1 ; AVX512F-NEXT: vblendvps %xmm2, %xmm3, %xmm4, %xmm3
; AVX512-NEXT: kxorw %k1, %k0, %k1 ; AVX512F-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm2, %k2 ; AVX512F-NEXT: vpxor %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648] ; AVX512F-NEXT: vblendvps %xmm0, %xmm3, %xmm2, %xmm0
; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k2} ; AVX512F-NEXT: retq
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} ;
; AVX512-NEXT: vmovdqa %xmm1, %xmm0 ; AVX512BW-LABEL: v2i32:
; AVX512-NEXT: retq ; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm2, %k0
; AVX512BW-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
; AVX512BW-NEXT: kxorw %k1, %k0, %k1
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm2, %k2
; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
; AVX512BW-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k2}
; AVX512BW-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
; AVX512BW-NEXT: vmovdqa %xmm1, %xmm0
; AVX512BW-NEXT: retq
%z = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %x, <2 x i32> %y) %z = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %x, <2 x i32> %y)
ret <2 x i32> %z ret <2 x i32> %z
} }
@ -729,19 +772,30 @@ define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
; AVX2-NEXT: vblendvps %xmm0, %xmm3, %xmm2, %xmm0 ; AVX2-NEXT: vblendvps %xmm0, %xmm3, %xmm2, %xmm0
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512-LABEL: v4i32: ; AVX512F-LABEL: v4i32:
; AVX512: # %bb.0: ; AVX512F: # %bb.0:
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpaddd %xmm1, %xmm0, %xmm2
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm2, %k0 ; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483647,2147483647,2147483647,2147483647]
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm1 ; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k1 ; AVX512F-NEXT: vblendvps %xmm2, %xmm3, %xmm4, %xmm3
; AVX512-NEXT: kxorw %k1, %k0, %k1 ; AVX512F-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm2, %k2 ; AVX512F-NEXT: vpxor %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648] ; AVX512F-NEXT: vblendvps %xmm0, %xmm3, %xmm2, %xmm0
; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k2} ; AVX512F-NEXT: retq
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} ;
; AVX512-NEXT: vmovdqa %xmm1, %xmm0 ; AVX512BW-LABEL: v4i32:
; AVX512-NEXT: retq ; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm2, %k0
; AVX512BW-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
; AVX512BW-NEXT: kxorw %k1, %k0, %k1
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm2, %k2
; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
; AVX512BW-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k2}
; AVX512BW-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
; AVX512BW-NEXT: vmovdqa %xmm1, %xmm0
; AVX512BW-NEXT: retq
%z = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y) %z = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
ret <4 x i32> %z ret <4 x i32> %z
} }
@ -866,19 +920,30 @@ define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
; AVX2-NEXT: vblendvps %ymm0, %ymm3, %ymm2, %ymm0 ; AVX2-NEXT: vblendvps %ymm0, %ymm3, %ymm2, %ymm0
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512-LABEL: v8i32: ; AVX512F-LABEL: v8i32:
; AVX512: # %bb.0: ; AVX512F: # %bb.0:
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpaddd %ymm1, %ymm0, %ymm2
; AVX512-NEXT: vpcmpgtd %ymm1, %ymm2, %k0 ; AVX512F-NEXT: vbroadcastss {{.*#+}} ymm3 = [2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647]
; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm1 ; AVX512F-NEXT: vbroadcastss {{.*#+}} ymm4 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k1 ; AVX512F-NEXT: vblendvps %ymm2, %ymm3, %ymm4, %ymm3
; AVX512-NEXT: kxorw %k1, %k0, %k1 ; AVX512F-NEXT: vpcmpgtd %ymm2, %ymm0, %ymm0
; AVX512-NEXT: vpcmpgtd %ymm1, %ymm2, %k2 ; AVX512F-NEXT: vpxor %ymm0, %ymm1, %ymm0
; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648] ; AVX512F-NEXT: vblendvps %ymm0, %ymm3, %ymm2, %ymm0
; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k2} ; AVX512F-NEXT: retq
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} ;
; AVX512-NEXT: vmovdqa %ymm1, %ymm0 ; AVX512BW-LABEL: v8i32:
; AVX512-NEXT: retq ; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm2, %k0
; AVX512BW-NEXT: vpaddd %ymm1, %ymm0, %ymm1
; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm0, %k1
; AVX512BW-NEXT: kxorw %k1, %k0, %k1
; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm2, %k2
; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
; AVX512BW-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k2}
; AVX512BW-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
; AVX512BW-NEXT: vmovdqa %ymm1, %ymm0
; AVX512BW-NEXT: retq
%z = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> %x, <8 x i32> %y) %z = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> %x, <8 x i32> %y)
ret <8 x i32> %z ret <8 x i32> %z
} }
@ -1221,19 +1286,29 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
; AVX2-NEXT: vblendvpd %xmm0, %xmm3, %xmm2, %xmm0 ; AVX2-NEXT: vblendvpd %xmm0, %xmm3, %xmm2, %xmm0
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512-LABEL: v2i64: ; AVX512F-LABEL: v2i64:
; AVX512: # %bb.0: ; AVX512F: # %bb.0:
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpaddq %xmm1, %xmm0, %xmm2
; AVX512-NEXT: vpcmpgtq %xmm1, %xmm2, %k0 ; AVX512F-NEXT: vmovapd {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm1 ; AVX512F-NEXT: vblendvpd %xmm2, {{.*}}(%rip), %xmm3, %xmm3
; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %k1 ; AVX512F-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0
; AVX512-NEXT: kxorw %k1, %k0, %k1 ; AVX512F-NEXT: vpxor %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vpcmpgtq %xmm1, %xmm2, %k2 ; AVX512F-NEXT: vblendvpd %xmm0, %xmm3, %xmm2, %xmm0
; AVX512-NEXT: vmovdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] ; AVX512F-NEXT: retq
; AVX512-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k2} ;
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} ; AVX512BW-LABEL: v2i64:
; AVX512-NEXT: vmovdqa %xmm1, %xmm0 ; AVX512BW: # %bb.0:
; AVX512-NEXT: retq ; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm2, %k0
; AVX512BW-NEXT: vpaddq %xmm1, %xmm0, %xmm1
; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm0, %k1
; AVX512BW-NEXT: kxorw %k1, %k0, %k1
; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm2, %k2
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
; AVX512BW-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k2}
; AVX512BW-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1}
; AVX512BW-NEXT: vmovdqa %xmm1, %xmm0
; AVX512BW-NEXT: retq
%z = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %x, <2 x i64> %y) %z = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %x, <2 x i64> %y)
ret <2 x i64> %z ret <2 x i64> %z
} }
@ -1426,19 +1501,30 @@ define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
; AVX2-NEXT: vblendvpd %ymm0, %ymm3, %ymm2, %ymm0 ; AVX2-NEXT: vblendvpd %ymm0, %ymm3, %ymm2, %ymm0
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512-LABEL: v4i64: ; AVX512F-LABEL: v4i64:
; AVX512: # %bb.0: ; AVX512F: # %bb.0:
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpaddq %ymm1, %ymm0, %ymm2
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm2, %k0 ; AVX512F-NEXT: vbroadcastsd {{.*#+}} ymm3 = [9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807]
; AVX512-NEXT: vpaddq %ymm1, %ymm0, %ymm1 ; AVX512F-NEXT: vbroadcastsd {{.*#+}} ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1 ; AVX512F-NEXT: vblendvpd %ymm2, %ymm3, %ymm4, %ymm3
; AVX512-NEXT: kxorw %k1, %k0, %k1 ; AVX512F-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm2, %k2 ; AVX512F-NEXT: vpxor %ymm0, %ymm1, %ymm0
; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm0 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX512F-NEXT: vblendvpd %ymm0, %ymm3, %ymm2, %ymm0
; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm0 {%k2} ; AVX512F-NEXT: retq
; AVX512-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} ;
; AVX512-NEXT: vmovdqa %ymm1, %ymm0 ; AVX512BW-LABEL: v4i64:
; AVX512-NEXT: retq ; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: vpcmpgtq %ymm1, %ymm2, %k0
; AVX512BW-NEXT: vpaddq %ymm1, %ymm0, %ymm1
; AVX512BW-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
; AVX512BW-NEXT: kxorw %k1, %k0, %k1
; AVX512BW-NEXT: vpcmpgtq %ymm1, %ymm2, %k2
; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm0 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; AVX512BW-NEXT: vpbroadcastq {{.*}}(%rip), %ymm0 {%k2}
; AVX512BW-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1}
; AVX512BW-NEXT: vmovdqa %ymm1, %ymm0
; AVX512BW-NEXT: retq
%z = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> %x, <4 x i64> %y) %z = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> %x, <4 x i64> %y)
ret <4 x i64> %z ret <4 x i64> %z
} }

View File

@ -4,7 +4,8 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
declare <1 x i8> @llvm.ssub.sat.v1i8(<1 x i8>, <1 x i8>) declare <1 x i8> @llvm.ssub.sat.v1i8(<1 x i8>, <1 x i8>)
declare <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8>, <2 x i8>) declare <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8>, <2 x i8>)
@ -111,10 +112,19 @@ define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
; AVX2-NEXT: vpsubsb %ymm3, %ymm1, %ymm1 ; AVX2-NEXT: vpsubsb %ymm3, %ymm1, %ymm1
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512-LABEL: v64i8: ; AVX512F-LABEL: v64i8:
; AVX512: # %bb.0: ; AVX512F: # %bb.0:
; AVX512-NEXT: vpsubsb %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512-NEXT: retq ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512F-NEXT: vpsubsb %ymm2, %ymm3, %ymm2
; AVX512F-NEXT: vpsubsb %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsubsb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%z = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> %x, <64 x i8> %y) %z = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> %x, <64 x i8> %y)
ret <64 x i8> %z ret <64 x i8> %z
} }
@ -191,10 +201,19 @@ define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
; AVX2-NEXT: vpsubsw %ymm3, %ymm1, %ymm1 ; AVX2-NEXT: vpsubsw %ymm3, %ymm1, %ymm1
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512-LABEL: v32i16: ; AVX512F-LABEL: v32i16:
; AVX512: # %bb.0: ; AVX512F: # %bb.0:
; AVX512-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512-NEXT: retq ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512F-NEXT: vpsubsw %ymm2, %ymm3, %ymm2
; AVX512F-NEXT: vpsubsw %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%z = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> %x, <32 x i16> %y) %z = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> %x, <32 x i16> %y)
ret <32 x i16> %z ret <32 x i16> %z
} }
@ -547,15 +566,28 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
; AVX2-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512-LABEL: v16i1: ; AVX512F-LABEL: v16i1:
; AVX512: # %bb.0: ; AVX512F: # %bb.0:
; AVX512-NEXT: vpsllw $7, %xmm0, %xmm0 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
; AVX512-NEXT: vpmovb2m %xmm0, %k0 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512-NEXT: vpsllw $7, %xmm1, %xmm0 ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
; AVX512-NEXT: vpmovb2m %xmm0, %k1 ; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
; AVX512-NEXT: kandnw %k0, %k1, %k0 ; AVX512F-NEXT: vptestnmd %zmm1, %zmm1, %k1
; AVX512-NEXT: vpmovm2b %k0, %xmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; AVX512-NEXT: retq ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v16i1:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
; AVX512BW-NEXT: vpmovb2m %xmm0, %k0
; AVX512BW-NEXT: vpsllw $7, %xmm1, %xmm0
; AVX512BW-NEXT: vpmovb2m %xmm0, %k1
; AVX512BW-NEXT: kandnw %k0, %k1, %k0
; AVX512BW-NEXT: vpmovm2b %k0, %xmm0
; AVX512BW-NEXT: retq
%z = call <16 x i1> @llvm.ssub.sat.v16i1(<16 x i1> %x, <16 x i1> %y) %z = call <16 x i1> @llvm.ssub.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
ret <16 x i1> %z ret <16 x i1> %z
} }
@ -641,19 +673,32 @@ define <2 x i32> @v2i32(<2 x i32> %x, <2 x i32> %y) nounwind {
; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 ; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512-LABEL: v2i32: ; AVX512F-LABEL: v2i32:
; AVX512: # %bb.0: ; AVX512F: # %bb.0:
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512-NEXT: vpcmpgtd %xmm2, %xmm1, %k0 ; AVX512F-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm2
; AVX512-NEXT: vpsubd %xmm1, %xmm0, %xmm1 ; AVX512F-NEXT: vpsubd %xmm1, %xmm0, %xmm1
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k1 ; AVX512F-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: kxorw %k1, %k0, %k1 ; AVX512F-NEXT: vpxor %xmm0, %xmm2, %xmm0
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm2, %k2 ; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647]
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648] ; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k2} ; AVX512F-NEXT: vblendvps %xmm1, %xmm2, %xmm3, %xmm2
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} ; AVX512F-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
; AVX512-NEXT: vmovdqa %xmm1, %xmm0 ; AVX512F-NEXT: retq
; AVX512-NEXT: retq ;
; AVX512BW-LABEL: v2i32:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: vpcmpgtd %xmm2, %xmm1, %k0
; AVX512BW-NEXT: vpsubd %xmm1, %xmm0, %xmm1
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
; AVX512BW-NEXT: kxorw %k1, %k0, %k1
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm2, %k2
; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
; AVX512BW-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k2}
; AVX512BW-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
; AVX512BW-NEXT: vmovdqa %xmm1, %xmm0
; AVX512BW-NEXT: retq
%z = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %x, <2 x i32> %y) %z = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %x, <2 x i32> %y)
ret <2 x i32> %z ret <2 x i32> %z
} }
@ -737,19 +782,32 @@ define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 ; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512-LABEL: v4i32: ; AVX512F-LABEL: v4i32:
; AVX512: # %bb.0: ; AVX512F: # %bb.0:
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512-NEXT: vpcmpgtd %xmm2, %xmm1, %k0 ; AVX512F-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm2
; AVX512-NEXT: vpsubd %xmm1, %xmm0, %xmm1 ; AVX512F-NEXT: vpsubd %xmm1, %xmm0, %xmm1
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k1 ; AVX512F-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: kxorw %k1, %k0, %k1 ; AVX512F-NEXT: vpxor %xmm0, %xmm2, %xmm0
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm2, %k2 ; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647]
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648] ; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k2} ; AVX512F-NEXT: vblendvps %xmm1, %xmm2, %xmm3, %xmm2
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} ; AVX512F-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
; AVX512-NEXT: vmovdqa %xmm1, %xmm0 ; AVX512F-NEXT: retq
; AVX512-NEXT: retq ;
; AVX512BW-LABEL: v4i32:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: vpcmpgtd %xmm2, %xmm1, %k0
; AVX512BW-NEXT: vpsubd %xmm1, %xmm0, %xmm1
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
; AVX512BW-NEXT: kxorw %k1, %k0, %k1
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm2, %k2
; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
; AVX512BW-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k2}
; AVX512BW-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
; AVX512BW-NEXT: vmovdqa %xmm1, %xmm0
; AVX512BW-NEXT: retq
%z = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y) %z = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
ret <4 x i32> %z ret <4 x i32> %z
} }
@ -883,19 +941,32 @@ define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
; AVX2-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0 ; AVX2-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512-LABEL: v8i32: ; AVX512F-LABEL: v8i32:
; AVX512: # %bb.0: ; AVX512F: # %bb.0:
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512-NEXT: vpcmpgtd %ymm2, %ymm1, %k0 ; AVX512F-NEXT: vpcmpgtd %ymm2, %ymm1, %ymm2
; AVX512-NEXT: vpsubd %ymm1, %ymm0, %ymm1 ; AVX512F-NEXT: vpsubd %ymm1, %ymm0, %ymm1
; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k1 ; AVX512F-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: kxorw %k1, %k0, %k1 ; AVX512F-NEXT: vpxor %ymm0, %ymm2, %ymm0
; AVX512-NEXT: vpcmpgtd %ymm1, %ymm2, %k2 ; AVX512F-NEXT: vbroadcastss {{.*#+}} ymm2 = [2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647]
; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648] ; AVX512F-NEXT: vbroadcastss {{.*#+}} ymm3 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k2} ; AVX512F-NEXT: vblendvps %ymm1, %ymm2, %ymm3, %ymm2
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} ; AVX512F-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0
; AVX512-NEXT: vmovdqa %ymm1, %ymm0 ; AVX512F-NEXT: retq
; AVX512-NEXT: retq ;
; AVX512BW-LABEL: v8i32:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: vpcmpgtd %ymm2, %ymm1, %k0
; AVX512BW-NEXT: vpsubd %ymm1, %ymm0, %ymm1
; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm0, %k1
; AVX512BW-NEXT: kxorw %k1, %k0, %k1
; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm2, %k2
; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
; AVX512BW-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k2}
; AVX512BW-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
; AVX512BW-NEXT: vmovdqa %ymm1, %ymm0
; AVX512BW-NEXT: retq
%z = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> %x, <8 x i32> %y) %z = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> %x, <8 x i32> %y)
ret <8 x i32> %z ret <8 x i32> %z
} }
@ -1280,19 +1351,31 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
; AVX2-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 ; AVX2-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512-LABEL: v2i64: ; AVX512F-LABEL: v2i64:
; AVX512: # %bb.0: ; AVX512F: # %bb.0:
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512-NEXT: vpcmpgtq %xmm2, %xmm1, %k0 ; AVX512F-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm2
; AVX512-NEXT: vpsubq %xmm1, %xmm0, %xmm1 ; AVX512F-NEXT: vpsubq %xmm1, %xmm0, %xmm1
; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %k1 ; AVX512F-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; AVX512-NEXT: kxorw %k1, %k0, %k1 ; AVX512F-NEXT: vpxor %xmm0, %xmm2, %xmm0
; AVX512-NEXT: vpcmpgtq %xmm1, %xmm2, %k2 ; AVX512F-NEXT: vmovapd {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX512-NEXT: vmovdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] ; AVX512F-NEXT: vblendvpd %xmm1, {{.*}}(%rip), %xmm2, %xmm2
; AVX512-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k2} ; AVX512F-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} ; AVX512F-NEXT: retq
; AVX512-NEXT: vmovdqa %xmm1, %xmm0 ;
; AVX512-NEXT: retq ; AVX512BW-LABEL: v2i64:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: vpcmpgtq %xmm2, %xmm1, %k0
; AVX512BW-NEXT: vpsubq %xmm1, %xmm0, %xmm1
; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm0, %k1
; AVX512BW-NEXT: kxorw %k1, %k0, %k1
; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm2, %k2
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
; AVX512BW-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k2}
; AVX512BW-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1}
; AVX512BW-NEXT: vmovdqa %xmm1, %xmm0
; AVX512BW-NEXT: retq
%z = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %x, <2 x i64> %y) %z = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %x, <2 x i64> %y)
ret <2 x i64> %z ret <2 x i64> %z
} }
@ -1532,19 +1615,32 @@ define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
; AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 ; AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512-LABEL: v4i64: ; AVX512F-LABEL: v4i64:
; AVX512: # %bb.0: ; AVX512F: # %bb.0:
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512-NEXT: vpcmpgtq %ymm2, %ymm1, %k0 ; AVX512F-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm2
; AVX512-NEXT: vpsubq %ymm1, %ymm0, %ymm1 ; AVX512F-NEXT: vpsubq %ymm1, %ymm0, %ymm1
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1 ; AVX512F-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; AVX512-NEXT: kxorw %k1, %k0, %k1 ; AVX512F-NEXT: vpxor %ymm0, %ymm2, %ymm0
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm2, %k2 ; AVX512F-NEXT: vbroadcastsd {{.*#+}} ymm2 = [9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807]
; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm0 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX512F-NEXT: vbroadcastsd {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm0 {%k2} ; AVX512F-NEXT: vblendvpd %ymm1, %ymm2, %ymm3, %ymm2
; AVX512-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} ; AVX512F-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
; AVX512-NEXT: vmovdqa %ymm1, %ymm0 ; AVX512F-NEXT: retq
; AVX512-NEXT: retq ;
; AVX512BW-LABEL: v4i64:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: vpcmpgtq %ymm2, %ymm1, %k0
; AVX512BW-NEXT: vpsubq %ymm1, %ymm0, %ymm1
; AVX512BW-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
; AVX512BW-NEXT: kxorw %k1, %k0, %k1
; AVX512BW-NEXT: vpcmpgtq %ymm1, %ymm2, %k2
; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm0 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; AVX512BW-NEXT: vpbroadcastq {{.*}}(%rip), %ymm0 {%k2}
; AVX512BW-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1}
; AVX512BW-NEXT: vmovdqa %ymm1, %ymm0
; AVX512BW-NEXT: retq
%z = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> %x, <4 x i64> %y) %z = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> %x, <4 x i64> %y)
ret <4 x i64> %z ret <4 x i64> %z
} }

View File

@ -4,7 +4,8 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
declare <1 x i8> @llvm.uadd.sat.v1i8(<1 x i8>, <1 x i8>) declare <1 x i8> @llvm.uadd.sat.v1i8(<1 x i8>, <1 x i8>)
declare <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8>, <2 x i8>) declare <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8>, <2 x i8>)
@ -111,10 +112,19 @@ define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
; AVX2-NEXT: vpaddusb %ymm3, %ymm1, %ymm1 ; AVX2-NEXT: vpaddusb %ymm3, %ymm1, %ymm1
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512-LABEL: v64i8: ; AVX512F-LABEL: v64i8:
; AVX512: # %bb.0: ; AVX512F: # %bb.0:
; AVX512-NEXT: vpaddusb %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512-NEXT: retq ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512F-NEXT: vpaddusb %ymm2, %ymm3, %ymm2
; AVX512F-NEXT: vpaddusb %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpaddusb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%z = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> %x, <64 x i8> %y) %z = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> %x, <64 x i8> %y)
ret <64 x i8> %z ret <64 x i8> %z
} }
@ -191,10 +201,19 @@ define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
; AVX2-NEXT: vpaddusw %ymm3, %ymm1, %ymm1 ; AVX2-NEXT: vpaddusw %ymm3, %ymm1, %ymm1
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512-LABEL: v32i16: ; AVX512F-LABEL: v32i16:
; AVX512: # %bb.0: ; AVX512F: # %bb.0:
; AVX512-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512-NEXT: retq ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512F-NEXT: vpaddusw %ymm2, %ymm3, %ymm2
; AVX512F-NEXT: vpaddusw %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%z = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> %x, <32 x i16> %y) %z = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> %x, <32 x i16> %y)
ret <32 x i16> %z ret <32 x i16> %z
} }
@ -524,15 +543,28 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512-LABEL: v16i1: ; AVX512F-LABEL: v16i1:
; AVX512: # %bb.0: ; AVX512F: # %bb.0:
; AVX512-NEXT: vpsllw $7, %xmm1, %xmm1 ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
; AVX512-NEXT: vpmovb2m %xmm1, %k0 ; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
; AVX512-NEXT: vpsllw $7, %xmm0, %xmm0 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
; AVX512-NEXT: vpmovb2m %xmm0, %k1 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512-NEXT: korw %k0, %k1, %k0 ; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpmovm2b %k0, %xmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
; AVX512-NEXT: retq ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v16i1:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $7, %xmm1, %xmm1
; AVX512BW-NEXT: vpmovb2m %xmm1, %k0
; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
; AVX512BW-NEXT: vpmovb2m %xmm0, %k1
; AVX512BW-NEXT: korw %k0, %k1, %k0
; AVX512BW-NEXT: vpmovm2b %k0, %xmm0
; AVX512BW-NEXT: retq
%z = call <16 x i1> @llvm.uadd.sat.v16i1(<16 x i1> %x, <16 x i1> %y) %z = call <16 x i1> @llvm.uadd.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
ret <16 x i1> %z ret <16 x i1> %z
} }
@ -584,13 +616,23 @@ define <2 x i32> @v2i32(<2 x i32> %x, <2 x i32> %y) nounwind {
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512-LABEL: v2i32: ; AVX512F-LABEL: v2i32:
; AVX512: # %bb.0: ; AVX512F: # %bb.0:
; AVX512-NEXT: vmovdqa %xmm1, %xmm2 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm2 ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm2
; AVX512-NEXT: vpminud %xmm2, %xmm0, %xmm0 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm2
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: vpminud %xmm2, %xmm0, %xmm0
; AVX512-NEXT: retq ; AVX512F-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v2i32:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovdqa %xmm1, %xmm2
; AVX512BW-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm2
; AVX512BW-NEXT: vpminud %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: retq
%z = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %x, <2 x i32> %y) %z = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %x, <2 x i32> %y)
ret <2 x i32> %z ret <2 x i32> %z
} }
@ -640,13 +682,23 @@ define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512-LABEL: v4i32: ; AVX512F-LABEL: v4i32:
; AVX512: # %bb.0: ; AVX512F: # %bb.0:
; AVX512-NEXT: vmovdqa %xmm1, %xmm2 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm2 ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm2
; AVX512-NEXT: vpminud %xmm2, %xmm0, %xmm0 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm2
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: vpminud %xmm2, %xmm0, %xmm0
; AVX512-NEXT: retq ; AVX512F-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v4i32:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovdqa %xmm1, %xmm2
; AVX512BW-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm2
; AVX512BW-NEXT: vpminud %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: retq
%z = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y) %z = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
ret <4 x i32> %z ret <4 x i32> %z
} }
@ -719,13 +771,22 @@ define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512-LABEL: v8i32: ; AVX512F-LABEL: v8i32:
; AVX512: # %bb.0: ; AVX512F: # %bb.0:
; AVX512-NEXT: vmovdqa %ymm1, %ymm2 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512-NEXT: vpternlogq $15, %ymm1, %ymm1, %ymm2 ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm2
; AVX512-NEXT: vpminud %ymm2, %ymm0, %ymm0 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm2
; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpminud %ymm2, %ymm0, %ymm0
; AVX512-NEXT: retq ; AVX512F-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v8i32:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovdqa %ymm1, %ymm2
; AVX512BW-NEXT: vpternlogq $15, %ymm1, %ymm1, %ymm2
; AVX512BW-NEXT: vpminud %ymm2, %ymm0, %ymm0
; AVX512BW-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: retq
%z = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> %x, <8 x i32> %y) %z = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> %x, <8 x i32> %y)
ret <8 x i32> %z ret <8 x i32> %z
} }
@ -926,13 +987,24 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512-LABEL: v2i64: ; AVX512F-LABEL: v2i64:
; AVX512: # %bb.0: ; AVX512F: # %bb.0:
; AVX512-NEXT: vmovdqa %xmm1, %xmm2 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm2 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512-NEXT: vpminuq %xmm2, %xmm0, %xmm0 ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm2
; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm2
; AVX512-NEXT: retq ; AVX512F-NEXT: vpminuq %zmm2, %zmm0, %zmm0
; AVX512F-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v2i64:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovdqa %xmm1, %xmm2
; AVX512BW-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm2
; AVX512BW-NEXT: vpminuq %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: retq
%z = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %x, <2 x i64> %y) %z = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %x, <2 x i64> %y)
ret <2 x i64> %z ret <2 x i64> %z
} }
@ -1063,13 +1135,23 @@ define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512-LABEL: v4i64: ; AVX512F-LABEL: v4i64:
; AVX512: # %bb.0: ; AVX512F: # %bb.0:
; AVX512-NEXT: vmovdqa %ymm1, %ymm2 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512-NEXT: vpternlogq $15, %ymm1, %ymm1, %ymm2 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vpminuq %ymm2, %ymm0, %ymm0 ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm2
; AVX512-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm2
; AVX512-NEXT: retq ; AVX512F-NEXT: vpminuq %zmm2, %zmm0, %zmm0
; AVX512F-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v4i64:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovdqa %ymm1, %ymm2
; AVX512BW-NEXT: vpternlogq $15, %ymm1, %ymm1, %ymm2
; AVX512BW-NEXT: vpminuq %ymm2, %ymm0, %ymm0
; AVX512BW-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: retq
%z = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> %x, <4 x i64> %y) %z = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> %x, <4 x i64> %y)
ret <4 x i64> %z ret <4 x i64> %z
} }

View File

@ -4,7 +4,8 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
declare <1 x i8> @llvm.usub.sat.v1i8(<1 x i8>, <1 x i8>) declare <1 x i8> @llvm.usub.sat.v1i8(<1 x i8>, <1 x i8>)
declare <2 x i8> @llvm.usub.sat.v2i8(<2 x i8>, <2 x i8>) declare <2 x i8> @llvm.usub.sat.v2i8(<2 x i8>, <2 x i8>)
@ -111,10 +112,19 @@ define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
; AVX2-NEXT: vpsubusb %ymm3, %ymm1, %ymm1 ; AVX2-NEXT: vpsubusb %ymm3, %ymm1, %ymm1
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512-LABEL: v64i8: ; AVX512F-LABEL: v64i8:
; AVX512: # %bb.0: ; AVX512F: # %bb.0:
; AVX512-NEXT: vpsubusb %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512-NEXT: retq ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512F-NEXT: vpsubusb %ymm2, %ymm3, %ymm2
; AVX512F-NEXT: vpsubusb %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsubusb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%z = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> %x, <64 x i8> %y) %z = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> %x, <64 x i8> %y)
ret <64 x i8> %z ret <64 x i8> %z
} }
@ -191,10 +201,19 @@ define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
; AVX2-NEXT: vpsubusw %ymm3, %ymm1, %ymm1 ; AVX2-NEXT: vpsubusw %ymm3, %ymm1, %ymm1
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512-LABEL: v32i16: ; AVX512F-LABEL: v32i16:
; AVX512: # %bb.0: ; AVX512F: # %bb.0:
; AVX512-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512-NEXT: retq ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512F-NEXT: vpsubusw %ymm2, %ymm3, %ymm2
; AVX512F-NEXT: vpsubusw %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%z = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> %x, <32 x i16> %y) %z = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> %x, <32 x i16> %y)
ret <32 x i16> %z ret <32 x i16> %z
} }
@ -524,15 +543,28 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512-LABEL: v16i1: ; AVX512F-LABEL: v16i1:
; AVX512: # %bb.0: ; AVX512F: # %bb.0:
; AVX512-NEXT: vpsllw $7, %xmm0, %xmm0 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
; AVX512-NEXT: vpmovb2m %xmm0, %k0 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512-NEXT: vpsllw $7, %xmm1, %xmm0 ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
; AVX512-NEXT: vpmovb2m %xmm0, %k1 ; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
; AVX512-NEXT: kandnw %k0, %k1, %k0 ; AVX512F-NEXT: vptestnmd %zmm1, %zmm1, %k1
; AVX512-NEXT: vpmovm2b %k0, %xmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; AVX512-NEXT: retq ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v16i1:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
; AVX512BW-NEXT: vpmovb2m %xmm0, %k0
; AVX512BW-NEXT: vpsllw $7, %xmm1, %xmm0
; AVX512BW-NEXT: vpmovb2m %xmm0, %k1
; AVX512BW-NEXT: kandnw %k0, %k1, %k0
; AVX512BW-NEXT: vpmovm2b %k0, %xmm0
; AVX512BW-NEXT: retq
%z = call <16 x i1> @llvm.usub.sat.v16i1(<16 x i1> %x, <16 x i1> %y) %z = call <16 x i1> @llvm.usub.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
ret <16 x i1> %z ret <16 x i1> %z
} }
@ -866,11 +898,20 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
; AVX2-NEXT: vpand %xmm0, %xmm2, %xmm0 ; AVX2-NEXT: vpand %xmm0, %xmm2, %xmm0
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512-LABEL: v2i64: ; AVX512F-LABEL: v2i64:
; AVX512: # %bb.0: ; AVX512F: # %bb.0:
; AVX512-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512-NEXT: vpsubq %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512-NEXT: retq ; AVX512F-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpsubq %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v2i64:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpsubq %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: retq
%z = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %x, <2 x i64> %y) %z = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %x, <2 x i64> %y)
ret <2 x i64> %z ret <2 x i64> %z
} }
@ -998,11 +1039,19 @@ define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
; AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512-LABEL: v4i64: ; AVX512F-LABEL: v4i64:
; AVX512: # %bb.0: ; AVX512F: # %bb.0:
; AVX512-NEXT: vpmaxuq %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512-NEXT: vpsubq %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: retq ; AVX512F-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpsubq %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v4i64:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpmaxuq %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpsubq %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: retq
%z = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> %x, <4 x i64> %y) %z = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> %x, <4 x i64> %y)
ret <4 x i64> %z ret <4 x i64> %z
} }