mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-21 18:22:53 +01:00
[X86] Add avx512f only command lines to the vector add/sub saturation tests. NFC
Gives us coverage of splitting the v32i16/v64i8 when we have avx512f and not avx512bw. Considering making v32i16/v64i8 a legal type on avx512f which needs this test coverage.
This commit is contained in:
parent
16ceeef4af
commit
fa8604e589
@ -4,7 +4,8 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
|
||||
|
||||
declare <1 x i8> @llvm.sadd.sat.v1i8(<1 x i8>, <1 x i8>)
|
||||
declare <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8>, <2 x i8>)
|
||||
@ -111,10 +112,19 @@ define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
|
||||
; AVX2-NEXT: vpaddsb %ymm3, %ymm1, %ymm1
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v64i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpaddsb %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: v64i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
|
||||
; AVX512F-NEXT: vpaddsb %ymm2, %ymm3, %ymm2
|
||||
; AVX512F-NEXT: vpaddsb %ymm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: v64i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpaddsb %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%z = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> %x, <64 x i8> %y)
|
||||
ret <64 x i8> %z
|
||||
}
|
||||
@ -191,10 +201,19 @@ define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
|
||||
; AVX2-NEXT: vpaddsw %ymm3, %ymm1, %ymm1
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v32i16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpaddsw %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: v32i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
|
||||
; AVX512F-NEXT: vpaddsw %ymm2, %ymm3, %ymm2
|
||||
; AVX512F-NEXT: vpaddsw %ymm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: v32i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%z = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> %x, <32 x i16> %y)
|
||||
ret <32 x i16> %z
|
||||
}
|
||||
@ -551,15 +570,28 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
|
||||
; AVX2-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v16i1:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsllw $7, %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpmovb2m %xmm1, %k0
|
||||
; AVX512-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovb2m %xmm0, %k1
|
||||
; AVX512-NEXT: korw %k0, %k1, %k0
|
||||
; AVX512-NEXT: vpmovm2b %k0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: v16i1:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
|
||||
; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
|
||||
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: v16i1:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpsllw $7, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpmovb2m %xmm1, %k0
|
||||
; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpmovb2m %xmm0, %k1
|
||||
; AVX512BW-NEXT: korw %k0, %k1, %k0
|
||||
; AVX512BW-NEXT: vpmovm2b %k0, %xmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%z = call <16 x i1> @llvm.sadd.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
|
||||
ret <16 x i1> %z
|
||||
}
|
||||
@ -639,19 +671,30 @@ define <2 x i32> @v2i32(<2 x i32> %x, <2 x i32> %y) nounwind {
|
||||
; AVX2-NEXT: vblendvps %xmm0, %xmm3, %xmm2, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v2i32:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm2, %k0
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
|
||||
; AVX512-NEXT: kxorw %k1, %k0, %k1
|
||||
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm2, %k2
|
||||
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k2}
|
||||
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
|
||||
; AVX512-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: v2i32:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpaddd %xmm1, %xmm0, %xmm2
|
||||
; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483647,2147483647,2147483647,2147483647]
|
||||
; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; AVX512F-NEXT: vblendvps %xmm2, %xmm3, %xmm4, %xmm3
|
||||
; AVX512F-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpxor %xmm0, %xmm1, %xmm0
|
||||
; AVX512F-NEXT: vblendvps %xmm0, %xmm3, %xmm2, %xmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: v2i32:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm2, %k0
|
||||
; AVX512BW-NEXT: vpaddd %xmm1, %xmm0, %xmm1
|
||||
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
|
||||
; AVX512BW-NEXT: kxorw %k1, %k0, %k1
|
||||
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm2, %k2
|
||||
; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; AVX512BW-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k2}
|
||||
; AVX512BW-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
|
||||
; AVX512BW-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%z = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %x, <2 x i32> %y)
|
||||
ret <2 x i32> %z
|
||||
}
|
||||
@ -729,19 +772,30 @@ define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
|
||||
; AVX2-NEXT: vblendvps %xmm0, %xmm3, %xmm2, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v4i32:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm2, %k0
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
|
||||
; AVX512-NEXT: kxorw %k1, %k0, %k1
|
||||
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm2, %k2
|
||||
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k2}
|
||||
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
|
||||
; AVX512-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: v4i32:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpaddd %xmm1, %xmm0, %xmm2
|
||||
; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483647,2147483647,2147483647,2147483647]
|
||||
; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; AVX512F-NEXT: vblendvps %xmm2, %xmm3, %xmm4, %xmm3
|
||||
; AVX512F-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpxor %xmm0, %xmm1, %xmm0
|
||||
; AVX512F-NEXT: vblendvps %xmm0, %xmm3, %xmm2, %xmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: v4i32:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm2, %k0
|
||||
; AVX512BW-NEXT: vpaddd %xmm1, %xmm0, %xmm1
|
||||
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
|
||||
; AVX512BW-NEXT: kxorw %k1, %k0, %k1
|
||||
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm2, %k2
|
||||
; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; AVX512BW-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k2}
|
||||
; AVX512BW-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
|
||||
; AVX512BW-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%z = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
|
||||
ret <4 x i32> %z
|
||||
}
|
||||
@ -866,19 +920,30 @@ define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
|
||||
; AVX2-NEXT: vblendvps %ymm0, %ymm3, %ymm2, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v8i32:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512-NEXT: vpcmpgtd %ymm1, %ymm2, %k0
|
||||
; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm1
|
||||
; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k1
|
||||
; AVX512-NEXT: kxorw %k1, %k0, %k1
|
||||
; AVX512-NEXT: vpcmpgtd %ymm1, %ymm2, %k2
|
||||
; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
|
||||
; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k2}
|
||||
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
|
||||
; AVX512-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: v8i32:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpaddd %ymm1, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vbroadcastss {{.*#+}} ymm3 = [2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647]
|
||||
; AVX512F-NEXT: vbroadcastss {{.*#+}} ymm4 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
|
||||
; AVX512F-NEXT: vblendvps %ymm2, %ymm3, %ymm4, %ymm3
|
||||
; AVX512F-NEXT: vpcmpgtd %ymm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpxor %ymm0, %ymm1, %ymm0
|
||||
; AVX512F-NEXT: vblendvps %ymm0, %ymm3, %ymm2, %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: v8i32:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm2, %k0
|
||||
; AVX512BW-NEXT: vpaddd %ymm1, %ymm0, %ymm1
|
||||
; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm0, %k1
|
||||
; AVX512BW-NEXT: kxorw %k1, %k0, %k1
|
||||
; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm2, %k2
|
||||
; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
|
||||
; AVX512BW-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k2}
|
||||
; AVX512BW-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
|
||||
; AVX512BW-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%z = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> %x, <8 x i32> %y)
|
||||
ret <8 x i32> %z
|
||||
}
|
||||
@ -1221,19 +1286,29 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
|
||||
; AVX2-NEXT: vblendvpd %xmm0, %xmm3, %xmm2, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v2i64:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512-NEXT: vpcmpgtq %xmm1, %xmm2, %k0
|
||||
; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %k1
|
||||
; AVX512-NEXT: kxorw %k1, %k0, %k1
|
||||
; AVX512-NEXT: vpcmpgtq %xmm1, %xmm2, %k2
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
|
||||
; AVX512-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k2}
|
||||
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1}
|
||||
; AVX512-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: v2i64:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpaddq %xmm1, %xmm0, %xmm2
|
||||
; AVX512F-NEXT: vmovapd {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
|
||||
; AVX512F-NEXT: vblendvpd %xmm2, {{.*}}(%rip), %xmm3, %xmm3
|
||||
; AVX512F-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpxor %xmm0, %xmm1, %xmm0
|
||||
; AVX512F-NEXT: vblendvpd %xmm0, %xmm3, %xmm2, %xmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: v2i64:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm2, %k0
|
||||
; AVX512BW-NEXT: vpaddq %xmm1, %xmm0, %xmm1
|
||||
; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm0, %k1
|
||||
; AVX512BW-NEXT: kxorw %k1, %k0, %k1
|
||||
; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm2, %k2
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
|
||||
; AVX512BW-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k2}
|
||||
; AVX512BW-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1}
|
||||
; AVX512BW-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%z = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %x, <2 x i64> %y)
|
||||
ret <2 x i64> %z
|
||||
}
|
||||
@ -1426,19 +1501,30 @@ define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
|
||||
; AVX2-NEXT: vblendvpd %ymm0, %ymm3, %ymm2, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v4i64:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm2, %k0
|
||||
; AVX512-NEXT: vpaddq %ymm1, %ymm0, %ymm1
|
||||
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
|
||||
; AVX512-NEXT: kxorw %k1, %k0, %k1
|
||||
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm2, %k2
|
||||
; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm0 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
||||
; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm0 {%k2}
|
||||
; AVX512-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1}
|
||||
; AVX512-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: v4i64:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpaddq %ymm1, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vbroadcastsd {{.*#+}} ymm3 = [9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807]
|
||||
; AVX512F-NEXT: vbroadcastsd {{.*#+}} ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
||||
; AVX512F-NEXT: vblendvpd %ymm2, %ymm3, %ymm4, %ymm3
|
||||
; AVX512F-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpxor %ymm0, %ymm1, %ymm0
|
||||
; AVX512F-NEXT: vblendvpd %ymm0, %ymm3, %ymm2, %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: v4i64:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpcmpgtq %ymm1, %ymm2, %k0
|
||||
; AVX512BW-NEXT: vpaddq %ymm1, %ymm0, %ymm1
|
||||
; AVX512BW-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
|
||||
; AVX512BW-NEXT: kxorw %k1, %k0, %k1
|
||||
; AVX512BW-NEXT: vpcmpgtq %ymm1, %ymm2, %k2
|
||||
; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm0 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
||||
; AVX512BW-NEXT: vpbroadcastq {{.*}}(%rip), %ymm0 {%k2}
|
||||
; AVX512BW-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1}
|
||||
; AVX512BW-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%z = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> %x, <4 x i64> %y)
|
||||
ret <4 x i64> %z
|
||||
}
|
||||
|
@ -4,7 +4,8 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
|
||||
|
||||
declare <1 x i8> @llvm.ssub.sat.v1i8(<1 x i8>, <1 x i8>)
|
||||
declare <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8>, <2 x i8>)
|
||||
@ -111,10 +112,19 @@ define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
|
||||
; AVX2-NEXT: vpsubsb %ymm3, %ymm1, %ymm1
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v64i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsubsb %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: v64i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
|
||||
; AVX512F-NEXT: vpsubsb %ymm2, %ymm3, %ymm2
|
||||
; AVX512F-NEXT: vpsubsb %ymm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: v64i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpsubsb %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%z = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> %x, <64 x i8> %y)
|
||||
ret <64 x i8> %z
|
||||
}
|
||||
@ -191,10 +201,19 @@ define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
|
||||
; AVX2-NEXT: vpsubsw %ymm3, %ymm1, %ymm1
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v32i16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsubsw %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: v32i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
|
||||
; AVX512F-NEXT: vpsubsw %ymm2, %ymm3, %ymm2
|
||||
; AVX512F-NEXT: vpsubsw %ymm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: v32i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%z = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> %x, <32 x i16> %y)
|
||||
ret <32 x i16> %z
|
||||
}
|
||||
@ -547,15 +566,28 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
|
||||
; AVX2-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v16i1:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovb2m %xmm0, %k0
|
||||
; AVX512-NEXT: vpsllw $7, %xmm1, %xmm0
|
||||
; AVX512-NEXT: vpmovb2m %xmm0, %k1
|
||||
; AVX512-NEXT: kandnw %k0, %k1, %k0
|
||||
; AVX512-NEXT: vpmovm2b %k0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: v16i1:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
|
||||
; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vptestnmd %zmm1, %zmm1, %k1
|
||||
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
|
||||
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: v16i1:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpmovb2m %xmm0, %k0
|
||||
; AVX512BW-NEXT: vpsllw $7, %xmm1, %xmm0
|
||||
; AVX512BW-NEXT: vpmovb2m %xmm0, %k1
|
||||
; AVX512BW-NEXT: kandnw %k0, %k1, %k0
|
||||
; AVX512BW-NEXT: vpmovm2b %k0, %xmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%z = call <16 x i1> @llvm.ssub.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
|
||||
ret <16 x i1> %z
|
||||
}
|
||||
@ -641,19 +673,32 @@ define <2 x i32> @v2i32(<2 x i32> %x, <2 x i32> %y) nounwind {
|
||||
; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v2i32:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512-NEXT: vpcmpgtd %xmm2, %xmm1, %k0
|
||||
; AVX512-NEXT: vpsubd %xmm1, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
|
||||
; AVX512-NEXT: kxorw %k1, %k0, %k1
|
||||
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm2, %k2
|
||||
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k2}
|
||||
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
|
||||
; AVX512-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: v2i32:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm2
|
||||
; AVX512F-NEXT: vpsubd %xmm1, %xmm0, %xmm1
|
||||
; AVX512F-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpxor %xmm0, %xmm2, %xmm0
|
||||
; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647]
|
||||
; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; AVX512F-NEXT: vblendvps %xmm1, %xmm2, %xmm3, %xmm2
|
||||
; AVX512F-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: v2i32:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpcmpgtd %xmm2, %xmm1, %k0
|
||||
; AVX512BW-NEXT: vpsubd %xmm1, %xmm0, %xmm1
|
||||
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
|
||||
; AVX512BW-NEXT: kxorw %k1, %k0, %k1
|
||||
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm2, %k2
|
||||
; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; AVX512BW-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k2}
|
||||
; AVX512BW-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
|
||||
; AVX512BW-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%z = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %x, <2 x i32> %y)
|
||||
ret <2 x i32> %z
|
||||
}
|
||||
@ -737,19 +782,32 @@ define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
|
||||
; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v4i32:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512-NEXT: vpcmpgtd %xmm2, %xmm1, %k0
|
||||
; AVX512-NEXT: vpsubd %xmm1, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
|
||||
; AVX512-NEXT: kxorw %k1, %k0, %k1
|
||||
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm2, %k2
|
||||
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k2}
|
||||
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
|
||||
; AVX512-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: v4i32:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm2
|
||||
; AVX512F-NEXT: vpsubd %xmm1, %xmm0, %xmm1
|
||||
; AVX512F-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpxor %xmm0, %xmm2, %xmm0
|
||||
; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647]
|
||||
; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; AVX512F-NEXT: vblendvps %xmm1, %xmm2, %xmm3, %xmm2
|
||||
; AVX512F-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: v4i32:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpcmpgtd %xmm2, %xmm1, %k0
|
||||
; AVX512BW-NEXT: vpsubd %xmm1, %xmm0, %xmm1
|
||||
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
|
||||
; AVX512BW-NEXT: kxorw %k1, %k0, %k1
|
||||
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm2, %k2
|
||||
; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
|
||||
; AVX512BW-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k2}
|
||||
; AVX512BW-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
|
||||
; AVX512BW-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%z = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
|
||||
ret <4 x i32> %z
|
||||
}
|
||||
@ -883,19 +941,32 @@ define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
|
||||
; AVX2-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v8i32:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512-NEXT: vpcmpgtd %ymm2, %ymm1, %k0
|
||||
; AVX512-NEXT: vpsubd %ymm1, %ymm0, %ymm1
|
||||
; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k1
|
||||
; AVX512-NEXT: kxorw %k1, %k0, %k1
|
||||
; AVX512-NEXT: vpcmpgtd %ymm1, %ymm2, %k2
|
||||
; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
|
||||
; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k2}
|
||||
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
|
||||
; AVX512-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: v8i32:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpcmpgtd %ymm2, %ymm1, %ymm2
|
||||
; AVX512F-NEXT: vpsubd %ymm1, %ymm0, %ymm1
|
||||
; AVX512F-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpxor %ymm0, %ymm2, %ymm0
|
||||
; AVX512F-NEXT: vbroadcastss {{.*#+}} ymm2 = [2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647]
|
||||
; AVX512F-NEXT: vbroadcastss {{.*#+}} ymm3 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
|
||||
; AVX512F-NEXT: vblendvps %ymm1, %ymm2, %ymm3, %ymm2
|
||||
; AVX512F-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: v8i32:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpcmpgtd %ymm2, %ymm1, %k0
|
||||
; AVX512BW-NEXT: vpsubd %ymm1, %ymm0, %ymm1
|
||||
; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm0, %k1
|
||||
; AVX512BW-NEXT: kxorw %k1, %k0, %k1
|
||||
; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm2, %k2
|
||||
; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
|
||||
; AVX512BW-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k2}
|
||||
; AVX512BW-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
|
||||
; AVX512BW-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%z = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> %x, <8 x i32> %y)
|
||||
ret <8 x i32> %z
|
||||
}
|
||||
@ -1280,19 +1351,31 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
|
||||
; AVX2-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v2i64:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512-NEXT: vpcmpgtq %xmm2, %xmm1, %k0
|
||||
; AVX512-NEXT: vpsubq %xmm1, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %k1
|
||||
; AVX512-NEXT: kxorw %k1, %k0, %k1
|
||||
; AVX512-NEXT: vpcmpgtq %xmm1, %xmm2, %k2
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
|
||||
; AVX512-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k2}
|
||||
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1}
|
||||
; AVX512-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: v2i64:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm2
|
||||
; AVX512F-NEXT: vpsubq %xmm1, %xmm0, %xmm1
|
||||
; AVX512F-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpxor %xmm0, %xmm2, %xmm0
|
||||
; AVX512F-NEXT: vmovapd {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
||||
; AVX512F-NEXT: vblendvpd %xmm1, {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: v2i64:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpcmpgtq %xmm2, %xmm1, %k0
|
||||
; AVX512BW-NEXT: vpsubq %xmm1, %xmm0, %xmm1
|
||||
; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm0, %k1
|
||||
; AVX512BW-NEXT: kxorw %k1, %k0, %k1
|
||||
; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm2, %k2
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
|
||||
; AVX512BW-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k2}
|
||||
; AVX512BW-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1}
|
||||
; AVX512BW-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%z = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %x, <2 x i64> %y)
|
||||
ret <2 x i64> %z
|
||||
}
|
||||
@ -1532,19 +1615,32 @@ define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
|
||||
; AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v4i64:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512-NEXT: vpcmpgtq %ymm2, %ymm1, %k0
|
||||
; AVX512-NEXT: vpsubq %ymm1, %ymm0, %ymm1
|
||||
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
|
||||
; AVX512-NEXT: kxorw %k1, %k0, %k1
|
||||
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm2, %k2
|
||||
; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm0 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
||||
; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm0 {%k2}
|
||||
; AVX512-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1}
|
||||
; AVX512-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: v4i64:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm2
|
||||
; AVX512F-NEXT: vpsubq %ymm1, %ymm0, %ymm1
|
||||
; AVX512F-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpxor %ymm0, %ymm2, %ymm0
|
||||
; AVX512F-NEXT: vbroadcastsd {{.*#+}} ymm2 = [9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807]
|
||||
; AVX512F-NEXT: vbroadcastsd {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
||||
; AVX512F-NEXT: vblendvpd %ymm1, %ymm2, %ymm3, %ymm2
|
||||
; AVX512F-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: v4i64:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpcmpgtq %ymm2, %ymm1, %k0
|
||||
; AVX512BW-NEXT: vpsubq %ymm1, %ymm0, %ymm1
|
||||
; AVX512BW-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
|
||||
; AVX512BW-NEXT: kxorw %k1, %k0, %k1
|
||||
; AVX512BW-NEXT: vpcmpgtq %ymm1, %ymm2, %k2
|
||||
; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm0 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
|
||||
; AVX512BW-NEXT: vpbroadcastq {{.*}}(%rip), %ymm0 {%k2}
|
||||
; AVX512BW-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1}
|
||||
; AVX512BW-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%z = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> %x, <4 x i64> %y)
|
||||
ret <4 x i64> %z
|
||||
}
|
||||
|
@ -4,7 +4,8 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
|
||||
|
||||
declare <1 x i8> @llvm.uadd.sat.v1i8(<1 x i8>, <1 x i8>)
|
||||
declare <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8>, <2 x i8>)
|
||||
@ -111,10 +112,19 @@ define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
|
||||
; AVX2-NEXT: vpaddusb %ymm3, %ymm1, %ymm1
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v64i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpaddusb %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: v64i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
|
||||
; AVX512F-NEXT: vpaddusb %ymm2, %ymm3, %ymm2
|
||||
; AVX512F-NEXT: vpaddusb %ymm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: v64i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpaddusb %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%z = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> %x, <64 x i8> %y)
|
||||
ret <64 x i8> %z
|
||||
}
|
||||
@ -191,10 +201,19 @@ define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
|
||||
; AVX2-NEXT: vpaddusw %ymm3, %ymm1, %ymm1
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v32i16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpaddusw %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: v32i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
|
||||
; AVX512F-NEXT: vpaddusw %ymm2, %ymm3, %ymm2
|
||||
; AVX512F-NEXT: vpaddusw %ymm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: v32i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%z = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> %x, <32 x i16> %y)
|
||||
ret <32 x i16> %z
|
||||
}
|
||||
@ -524,15 +543,28 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
|
||||
; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v16i1:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsllw $7, %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpmovb2m %xmm1, %k0
|
||||
; AVX512-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovb2m %xmm0, %k1
|
||||
; AVX512-NEXT: korw %k0, %k1, %k0
|
||||
; AVX512-NEXT: vpmovm2b %k0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: v16i1:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
|
||||
; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
|
||||
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: v16i1:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpsllw $7, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpmovb2m %xmm1, %k0
|
||||
; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpmovb2m %xmm0, %k1
|
||||
; AVX512BW-NEXT: korw %k0, %k1, %k0
|
||||
; AVX512BW-NEXT: vpmovm2b %k0, %xmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%z = call <16 x i1> @llvm.uadd.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
|
||||
ret <16 x i1> %z
|
||||
}
|
||||
@ -584,13 +616,23 @@ define <2 x i32> @v2i32(<2 x i32> %x, <2 x i32> %y) nounwind {
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v2i32:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovdqa %xmm1, %xmm2
|
||||
; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm2
|
||||
; AVX512-NEXT: vpminud %xmm2, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: v2i32:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
|
||||
; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm2
|
||||
; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm2
|
||||
; AVX512F-NEXT: vpminud %xmm2, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: v2i32:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa %xmm1, %xmm2
|
||||
; AVX512BW-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm2
|
||||
; AVX512BW-NEXT: vpminud %xmm2, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%z = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %x, <2 x i32> %y)
|
||||
ret <2 x i32> %z
|
||||
}
|
||||
@ -640,13 +682,23 @@ define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v4i32:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovdqa %xmm1, %xmm2
|
||||
; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm2
|
||||
; AVX512-NEXT: vpminud %xmm2, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: v4i32:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
|
||||
; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm2
|
||||
; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm2
|
||||
; AVX512F-NEXT: vpminud %xmm2, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: v4i32:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa %xmm1, %xmm2
|
||||
; AVX512BW-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm2
|
||||
; AVX512BW-NEXT: vpminud %xmm2, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%z = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
|
||||
ret <4 x i32> %z
|
||||
}
|
||||
@ -719,13 +771,22 @@ define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
|
||||
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v8i32:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovdqa %ymm1, %ymm2
|
||||
; AVX512-NEXT: vpternlogq $15, %ymm1, %ymm1, %ymm2
|
||||
; AVX512-NEXT: vpminud %ymm2, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: v8i32:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
|
||||
; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm2
|
||||
; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm2
|
||||
; AVX512F-NEXT: vpminud %ymm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpaddd %ymm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: v8i32:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa %ymm1, %ymm2
|
||||
; AVX512BW-NEXT: vpternlogq $15, %ymm1, %ymm1, %ymm2
|
||||
; AVX512BW-NEXT: vpminud %ymm2, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpaddd %ymm1, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%z = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> %x, <8 x i32> %y)
|
||||
ret <8 x i32> %z
|
||||
}
|
||||
@ -926,13 +987,24 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
|
||||
; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v2i64:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovdqa %xmm1, %xmm2
|
||||
; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm2
|
||||
; AVX512-NEXT: vpminuq %xmm2, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: v2i64:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
|
||||
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm2
|
||||
; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm2
|
||||
; AVX512F-NEXT: vpminuq %zmm2, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: v2i64:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa %xmm1, %xmm2
|
||||
; AVX512BW-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm2
|
||||
; AVX512BW-NEXT: vpminuq %xmm2, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpaddq %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%z = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %x, <2 x i64> %y)
|
||||
ret <2 x i64> %z
|
||||
}
|
||||
@ -1063,13 +1135,23 @@ define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
|
||||
; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v4i64:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovdqa %ymm1, %ymm2
|
||||
; AVX512-NEXT: vpternlogq $15, %ymm1, %ymm1, %ymm2
|
||||
; AVX512-NEXT: vpminuq %ymm2, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpaddq %ymm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: v4i64:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
|
||||
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
||||
; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm2
|
||||
; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm2
|
||||
; AVX512F-NEXT: vpminuq %zmm2, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpaddq %ymm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: v4i64:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa %ymm1, %ymm2
|
||||
; AVX512BW-NEXT: vpternlogq $15, %ymm1, %ymm1, %ymm2
|
||||
; AVX512BW-NEXT: vpminuq %ymm2, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpaddq %ymm1, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%z = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> %x, <4 x i64> %y)
|
||||
ret <4 x i64> %z
|
||||
}
|
||||
|
@ -4,7 +4,8 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
|
||||
|
||||
declare <1 x i8> @llvm.usub.sat.v1i8(<1 x i8>, <1 x i8>)
|
||||
declare <2 x i8> @llvm.usub.sat.v2i8(<2 x i8>, <2 x i8>)
|
||||
@ -111,10 +112,19 @@ define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
|
||||
; AVX2-NEXT: vpsubusb %ymm3, %ymm1, %ymm1
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v64i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsubusb %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: v64i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
|
||||
; AVX512F-NEXT: vpsubusb %ymm2, %ymm3, %ymm2
|
||||
; AVX512F-NEXT: vpsubusb %ymm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: v64i8:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpsubusb %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%z = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> %x, <64 x i8> %y)
|
||||
ret <64 x i8> %z
|
||||
}
|
||||
@ -191,10 +201,19 @@ define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
|
||||
; AVX2-NEXT: vpsubusw %ymm3, %ymm1, %ymm1
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v32i16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsubusw %zmm1, %zmm0, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: v32i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
|
||||
; AVX512F-NEXT: vpsubusw %ymm2, %ymm3, %ymm2
|
||||
; AVX512F-NEXT: vpsubusw %ymm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: v32i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%z = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> %x, <32 x i16> %y)
|
||||
ret <32 x i16> %z
|
||||
}
|
||||
@ -524,15 +543,28 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
|
||||
; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v16i1:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpmovb2m %xmm0, %k0
|
||||
; AVX512-NEXT: vpsllw $7, %xmm1, %xmm0
|
||||
; AVX512-NEXT: vpmovb2m %xmm0, %k1
|
||||
; AVX512-NEXT: kandnw %k0, %k1, %k0
|
||||
; AVX512-NEXT: vpmovm2b %k0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: v16i1:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
|
||||
; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vptestnmd %zmm1, %zmm1, %k1
|
||||
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
|
||||
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: v16i1:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpmovb2m %xmm0, %k0
|
||||
; AVX512BW-NEXT: vpsllw $7, %xmm1, %xmm0
|
||||
; AVX512BW-NEXT: vpmovb2m %xmm0, %k1
|
||||
; AVX512BW-NEXT: kandnw %k0, %k1, %k0
|
||||
; AVX512BW-NEXT: vpmovm2b %k0, %xmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%z = call <16 x i1> @llvm.usub.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
|
||||
ret <16 x i1> %z
|
||||
}
|
||||
@ -866,11 +898,20 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
|
||||
; AVX2-NEXT: vpand %xmm0, %xmm2, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v2i64:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpsubq %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: v2i64:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
|
||||
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512F-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpsubq %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: v2i64:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpsubq %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%z = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %x, <2 x i64> %y)
|
||||
ret <2 x i64> %z
|
||||
}
|
||||
@ -998,11 +1039,19 @@ define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
|
||||
; AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: v4i64:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpmaxuq %ymm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpsubq %ymm1, %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: v4i64:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
|
||||
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
||||
; AVX512F-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpsubq %ymm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: v4i64:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpmaxuq %ymm1, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpsubq %ymm1, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%z = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> %x, <4 x i64> %y)
|
||||
ret <4 x i64> %z
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user