1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00
llvm-mirror/test/CodeGen/X86/vector-arith-sat.ll
Petar Jovanovic 9686e666ef Correct dwarf unwind information in function epilogue
This patch aims to provide correct dwarf unwind information in function
epilogue for X86.
It consists of two parts. The first part inserts CFI instructions that set
appropriate cfa offset and cfa register in emitEpilogue() in
X86FrameLowering. This part is X86 specific.

The second part is platform independent and ensures that:

* CFI instructions do not affect code generation (they are not counted as
  instructions when tail duplicating or tail merging)
* Unwind information remains correct when a function is modified by
  different passes. This is done in a late pass by analyzing information
  about cfa offset and cfa register in BBs and inserting additional CFI
  directives where necessary.

Added CFIInstrInserter pass:

* analyzes each basic block to determine cfa offset and register are valid
  at its entry and exit
* verifies that outgoing cfa offset and register of predecessor blocks match
  incoming values of their successors
* inserts additional CFI directives at basic block beginning to correct the
  rule for calculating CFA

Having CFI instructions in function epilogue can cause incorrect CFA
calculation rule for some basic blocks. This can happen if, due to basic
block reordering, or the existence of multiple epilogue blocks, some of the
blocks have wrong cfa offset and register values set by the epilogue block
above them.
CFIInstrInserter is currently run only on X86, but can be used by any target
that implements support for adding CFI instructions in epilogue.

Patch by Violeta Vukobrat.

Differential Revision: https://reviews.llvm.org/D42848

llvm-svn: 330706
2018-04-24 10:32:08 +00:00

3026 lines
193 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx2 | FileCheck %s --check-prefix=AVX2 --check-prefix=X86 --check-prefix=X86-AVX
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq -show-mc-encoding | FileCheck %s --check-prefix=AVX512VL --check-prefix=X86 --check-prefix=X86-AVX512VL
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx2 | FileCheck %s --check-prefix=AVX2 --check-prefix=X64 --check-prefix=X64-AVX
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq -show-mc-encoding | FileCheck %s --check-prefix=AVX512VL --check-prefix=X64 --check-prefix=X64-AVX512VL
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW
; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding | FileCheck %s
; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=-avx,+sse2 | FileCheck %s --check-prefix=SSE
define <32 x i8> @test_x86_avx2_padds_b(<32 x i8> %a0, <32 x i8> %a1) {
; AVX2-LABEL: test_x86_avx2_padds_b:
; AVX2: ## %bb.0:
; AVX2-NEXT: vpaddsb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: test_x86_avx2_padds_b:
; AVX512VL: ## %bb.0:
; AVX512VL-NEXT: vpaddsb %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: ret{{[l|q]}}
%1 = sext <32 x i8> %a0 to <32 x i16>
%2 = sext <32 x i8> %a1 to <32 x i16>
%3 = add nsw <32 x i16> %1, %2
%4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%8 = trunc <32 x i16> %7 to <32 x i8>
ret <32 x i8> %8
}
define <16 x i16> @test_x86_avx2_padds_w(<16 x i16> %a0, <16 x i16> %a1) {
; AVX2-LABEL: test_x86_avx2_padds_w:
; AVX2: ## %bb.0:
; AVX2-NEXT: vpaddsw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: test_x86_avx2_padds_w:
; AVX512VL: ## %bb.0:
; AVX512VL-NEXT: vpaddsw %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: ret{{[l|q]}}
%1 = sext <16 x i16> %a0 to <16 x i32>
%2 = sext <16 x i16> %a1 to <16 x i32>
%3 = add nsw <16 x i32> %1, %2
%4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <16 x i32> %7 to <16 x i16>
ret <16 x i16> %8
}
define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
; X86-AVX-LABEL: test_mask_adds_epi16_rr_512:
; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vpaddsw %ymm2, %ymm0, %ymm0
; X86-AVX-NEXT: vpaddsw %ymm3, %ymm1, %ymm1
; X86-AVX-NEXT: retl
;
; X86-AVX512VL-LABEL: test_mask_adds_epi16_rr_512:
; X86-AVX512VL: ## %bb.0:
; X86-AVX512VL-NEXT: vpaddsw %zmm1, %zmm0, %zmm0
; X86-AVX512VL-NEXT: retl
;
; X64-AVX-LABEL: test_mask_adds_epi16_rr_512:
; X64-AVX: ## %bb.0:
; X64-AVX-NEXT: vpaddsw %ymm2, %ymm0, %ymm0
; X64-AVX-NEXT: vpaddsw %ymm3, %ymm1, %ymm1
; X64-AVX-NEXT: retq
;
; X64-AVX512VL-LABEL: test_mask_adds_epi16_rr_512:
; X64-AVX512VL: ## %bb.0:
; X64-AVX512VL-NEXT: vpaddsw %zmm1, %zmm0, %zmm0
; X64-AVX512VL-NEXT: retq
%1 = sext <32 x i16> %a to <32 x i32>
%2 = sext <32 x i16> %b to <32 x i32>
%3 = add nsw <32 x i32> %1, %2
%4 = icmp slt <32 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <32 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <32 x i1> %6, <32 x i32> %5, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <32 x i32> %7 to <32 x i16>
ret <32 x i16> %8
}
define <32 x i8> @test_x86_avx2_paddus_b(<32 x i8> %a0, <32 x i8> %a1) {
; AVX2-LABEL: test_x86_avx2_paddus_b:
; AVX2: ## %bb.0:
; AVX2-NEXT: vpaddusb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: test_x86_avx2_paddus_b:
; AVX512VL: ## %bb.0:
; AVX512VL-NEXT: vpaddusb %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: ret{{[l|q]}}
%1 = zext <32 x i8> %a0 to <32 x i16>
%2 = zext <32 x i8> %a1 to <32 x i16>
%3 = add nsw <32 x i16> %1, %2
%4 = icmp ult <32 x i16> %3, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%6 = trunc <32 x i16> %5 to <32 x i8>
ret <32 x i8> %6
}
define <16 x i16> @test_x86_avx2_paddus_w(<16 x i16> %a0, <16 x i16> %a1) {
; AVX2-LABEL: test_x86_avx2_paddus_w:
; AVX2: ## %bb.0:
; AVX2-NEXT: vpaddusw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: test_x86_avx2_paddus_w:
; AVX512VL: ## %bb.0:
; AVX512VL-NEXT: vpaddusw %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: ret{{[l|q]}}
%1 = zext <16 x i16> %a0 to <16 x i32>
%2 = zext <16 x i16> %a1 to <16 x i32>
%3 = add nsw <16 x i32> %1, %2
%4 = icmp ult <16 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%6 = trunc <16 x i32> %5 to <16 x i16>
ret <16 x i16> %6
}
define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
; X86-AVX-LABEL: test_mask_adds_epu16_rr_512:
; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vpaddusw %ymm2, %ymm0, %ymm0
; X86-AVX-NEXT: vpaddusw %ymm3, %ymm1, %ymm1
; X86-AVX-NEXT: retl
;
; X86-AVX512VL-LABEL: test_mask_adds_epu16_rr_512:
; X86-AVX512VL: ## %bb.0:
; X86-AVX512VL-NEXT: vpaddusw %zmm1, %zmm0, %zmm0
; X86-AVX512VL-NEXT: retl
;
; X64-AVX-LABEL: test_mask_adds_epu16_rr_512:
; X64-AVX: ## %bb.0:
; X64-AVX-NEXT: vpaddusw %ymm2, %ymm0, %ymm0
; X64-AVX-NEXT: vpaddusw %ymm3, %ymm1, %ymm1
; X64-AVX-NEXT: retq
;
; X64-AVX512VL-LABEL: test_mask_adds_epu16_rr_512:
; X64-AVX512VL: ## %bb.0:
; X64-AVX512VL-NEXT: vpaddusw %zmm1, %zmm0, %zmm0
; X64-AVX512VL-NEXT: retq
%1 = zext <32 x i16> %a to <32 x i32>
%2 = zext <32 x i16> %b to <32 x i32>
%3 = add nsw <32 x i32> %1, %2
%4 = icmp ult <32 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%6 = trunc <32 x i32> %5 to <32 x i16>
ret <32 x i16> %6
}
define <32 x i8> @test_x86_avx2_psubs_b(<32 x i8> %a0, <32 x i8> %a1) {
; AVX2-LABEL: test_x86_avx2_psubs_b:
; AVX2: ## %bb.0:
; AVX2-NEXT: vpsubsb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: test_x86_avx2_psubs_b:
; AVX512VL: ## %bb.0:
; AVX512VL-NEXT: vpsubsb %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: ret{{[l|q]}}
%1 = sext <32 x i8> %a0 to <32 x i16>
%2 = sext <32 x i8> %a1 to <32 x i16>
%3 = sub nsw <32 x i16> %1, %2
%4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%8 = trunc <32 x i16> %7 to <32 x i8>
ret <32 x i8> %8
}
define <16 x i16> @test_x86_avx2_psubs_w(<16 x i16> %a0, <16 x i16> %a1) {
; AVX2-LABEL: test_x86_avx2_psubs_w:
; AVX2: ## %bb.0:
; AVX2-NEXT: vpsubsw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: test_x86_avx2_psubs_w:
; AVX512VL: ## %bb.0:
; AVX512VL-NEXT: vpsubsw %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: ret{{[l|q]}}
%1 = sext <16 x i16> %a0 to <16 x i32>
%2 = sext <16 x i16> %a1 to <16 x i32>
%3 = sub nsw <16 x i32> %1, %2
%4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <16 x i32> %7 to <16 x i16>
ret <16 x i16> %8
}
define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
; X86-AVX-LABEL: test_mask_subs_epi16_rr_512:
; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vpsubsw %ymm2, %ymm0, %ymm0
; X86-AVX-NEXT: vpsubsw %ymm3, %ymm1, %ymm1
; X86-AVX-NEXT: retl
;
; X86-AVX512VL-LABEL: test_mask_subs_epi16_rr_512:
; X86-AVX512VL: ## %bb.0:
; X86-AVX512VL-NEXT: vpsubsw %zmm1, %zmm0, %zmm0
; X86-AVX512VL-NEXT: retl
;
; X64-AVX-LABEL: test_mask_subs_epi16_rr_512:
; X64-AVX: ## %bb.0:
; X64-AVX-NEXT: vpsubsw %ymm2, %ymm0, %ymm0
; X64-AVX-NEXT: vpsubsw %ymm3, %ymm1, %ymm1
; X64-AVX-NEXT: retq
;
; X64-AVX512VL-LABEL: test_mask_subs_epi16_rr_512:
; X64-AVX512VL: ## %bb.0:
; X64-AVX512VL-NEXT: vpsubsw %zmm1, %zmm0, %zmm0
; X64-AVX512VL-NEXT: retq
%1 = sext <32 x i16> %a to <32 x i32>
%2 = sext <32 x i16> %b to <32 x i32>
%3 = sub nsw <32 x i32> %1, %2
%4 = icmp slt <32 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <32 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <32 x i1> %6, <32 x i32> %5, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <32 x i32> %7 to <32 x i16>
ret <32 x i16> %8
}
define <32 x i8> @test_x86_avx2_psubus_b(<32 x i8> %a0, <32 x i8> %a1) {
; AVX2-LABEL: test_x86_avx2_psubus_b:
; AVX2: ## %bb.0:
; AVX2-NEXT: vpsubusb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: test_x86_avx2_psubus_b:
; AVX512VL: ## %bb.0:
; AVX512VL-NEXT: vpsubusb %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: ret{{[l|q]}}
%cmp = icmp ugt <32 x i8> %a0, %a1
%sel = select <32 x i1> %cmp, <32 x i8> %a0, <32 x i8> %a1
%sub = sub <32 x i8> %sel, %a1
ret <32 x i8> %sub
}
define <16 x i16> @test_x86_avx2_psubus_w(<16 x i16> %a0, <16 x i16> %a1) {
; AVX2-LABEL: test_x86_avx2_psubus_w:
; AVX2: ## %bb.0:
; AVX2-NEXT: vpsubusw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: test_x86_avx2_psubus_w:
; AVX512VL: ## %bb.0:
; AVX512VL-NEXT: vpsubusw %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: ret{{[l|q]}}
%cmp = icmp ugt <16 x i16> %a0, %a1
%sel = select <16 x i1> %cmp, <16 x i16> %a0, <16 x i16> %a1
%sub = sub <16 x i16> %sel, %a1
ret <16 x i16> %sub
}
define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
; X86-AVX-LABEL: test_mask_subs_epu16_rr_512:
; X86-AVX: ## %bb.0:
; X86-AVX-NEXT: vpsubusw %ymm2, %ymm0, %ymm0
; X86-AVX-NEXT: vpsubusw %ymm3, %ymm1, %ymm1
; X86-AVX-NEXT: retl
;
; X86-AVX512VL-LABEL: test_mask_subs_epu16_rr_512:
; X86-AVX512VL: ## %bb.0:
; X86-AVX512VL-NEXT: vpsubusw %zmm1, %zmm0, %zmm0
; X86-AVX512VL-NEXT: retl
;
; X64-AVX-LABEL: test_mask_subs_epu16_rr_512:
; X64-AVX: ## %bb.0:
; X64-AVX-NEXT: vpsubusw %ymm2, %ymm0, %ymm0
; X64-AVX-NEXT: vpsubusw %ymm3, %ymm1, %ymm1
; X64-AVX-NEXT: retq
;
; X64-AVX512VL-LABEL: test_mask_subs_epu16_rr_512:
; X64-AVX512VL: ## %bb.0:
; X64-AVX512VL-NEXT: vpsubusw %zmm1, %zmm0, %zmm0
; X64-AVX512VL-NEXT: retq
%cmp = icmp ugt <32 x i16> %a, %b
%sel = select <32 x i1> %cmp, <32 x i16> %a, <32 x i16> %b
%sub = sub <32 x i16> %sel, %b
ret <32 x i16> %sub
}
define <32 x i16> @test_mask_adds_epi16_rr_512_avx512(<32 x i16> %a, <32 x i16> %b) {
; AVX512BW-LABEL: test_mask_adds_epi16_rr_512_avx512:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_adds_epi16_rr_512_avx512:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: retl
%1 = sext <32 x i16> %a to <32 x i32>
%2 = sext <32 x i16> %b to <32 x i32>
%3 = add nsw <32 x i32> %1, %2
%4 = icmp slt <32 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <32 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <32 x i1> %6, <32 x i32> %5, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <32 x i32> %7 to <32 x i16>
ret <32 x i16> %8
}
define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
; AVX512BW-LABEL: test_mask_adds_epi16_rrk_512:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_adds_epi16_rrk_512:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-32-NEXT: retl
%1 = sext <32 x i16> %a to <32 x i32>
%2 = sext <32 x i16> %b to <32 x i32>
%3 = add nsw <32 x i32> %1, %2
%4 = icmp slt <32 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <32 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <32 x i1> %6, <32 x i32> %5, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <32 x i32> %7 to <32 x i16>
%9 = bitcast i32 %mask to <32 x i1>
%10 = select <32 x i1> %9, <32 x i16> %8, <32 x i16> %passThru
ret <32 x i16> %10
}
define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
; AVX512BW-LABEL: test_mask_adds_epi16_rrkz_512:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_adds_epi16_rrkz_512:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
; AVX512F-32-NEXT: retl
%1 = sext <32 x i16> %a to <32 x i32>
%2 = sext <32 x i16> %b to <32 x i32>
%3 = add nsw <32 x i32> %1, %2
%4 = icmp slt <32 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <32 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <32 x i1> %6, <32 x i32> %5, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <32 x i32> %7 to <32 x i16>
%9 = bitcast i32 %mask to <32 x i1>
%10 = select <32 x i1> %9, <32 x i16> %8, <32 x i16> zeroinitializer
ret <32 x i16> %10
}
define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
; AVX512BW-LABEL: test_mask_adds_epi16_rm_512:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_adds_epi16_rm_512:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm0
; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%1 = sext <32 x i16> %a to <32 x i32>
%2 = sext <32 x i16> %b to <32 x i32>
%3 = add nsw <32 x i32> %1, %2
%4 = icmp slt <32 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <32 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <32 x i1> %6, <32 x i32> %5, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <32 x i32> %7 to <32 x i16>
ret <32 x i16> %8
}
define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
; AVX512BW-LABEL: test_mask_adds_epi16_rmk_512:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: kmovd %esi, %k1
; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm1 {%k1}
; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_adds_epi16_rmk_512:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm1 {%k1}
; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%1 = sext <32 x i16> %a to <32 x i32>
%2 = sext <32 x i16> %b to <32 x i32>
%3 = add nsw <32 x i32> %1, %2
%4 = icmp slt <32 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <32 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <32 x i1> %6, <32 x i32> %5, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <32 x i32> %7 to <32 x i16>
%9 = bitcast i32 %mask to <32 x i1>
%10 = select <32 x i1> %9, <32 x i16> %8, <32 x i16> %passThru
ret <32 x i16> %10
}
define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
; AVX512BW-LABEL: test_mask_adds_epi16_rmkz_512:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: kmovd %esi, %k1
; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_adds_epi16_rmkz_512:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z}
; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%1 = sext <32 x i16> %a to <32 x i32>
%2 = sext <32 x i16> %b to <32 x i32>
%3 = add nsw <32 x i32> %1, %2
%4 = icmp slt <32 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <32 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <32 x i1> %6, <32 x i32> %5, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <32 x i32> %7 to <32 x i16>
%9 = bitcast i32 %mask to <32 x i1>
%10 = select <32 x i1> %9, <32 x i16> %8, <32 x i16> zeroinitializer
ret <32 x i16> %10
}
define <64 x i16> @test_mask_adds_epi16_rr_1024(<64 x i16> %a, <64 x i16> %b) {
; AVX512BW-LABEL: test_mask_adds_epi16_rr_1024:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vpaddsw %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpaddsw %zmm3, %zmm1, %zmm1
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_adds_epi16_rr_1024:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: pushl %ebp
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
; AVX512F-32-NEXT: .cfi_offset %ebp, -8
; AVX512F-32-NEXT: movl %esp, %ebp
; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
; AVX512F-32-NEXT: andl $-64, %esp
; AVX512F-32-NEXT: subl $64, %esp
; AVX512F-32-NEXT: vpaddsw %zmm2, %zmm0, %zmm0
; AVX512F-32-NEXT: vpaddsw 8(%ebp), %zmm1, %zmm1
; AVX512F-32-NEXT: movl %ebp, %esp
; AVX512F-32-NEXT: popl %ebp
; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
; AVX512F-32-NEXT: retl
%1 = sext <64 x i16> %a to <64 x i32>
%2 = sext <64 x i16> %b to <64 x i32>
%3 = add nsw <64 x i32> %1, %2
%4 = icmp slt <64 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <64 x i1> %4, <64 x i32> %3, <64 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <64 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <64 x i1> %6, <64 x i32> %5, <64 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <64 x i32> %7 to <64 x i16>
ret <64 x i16> %8
}
define <32 x i16> @test_mask_subs_epi16_rr_512_avx512(<32 x i16> %a, <32 x i16> %b) {
; AVX512BW-LABEL: test_mask_subs_epi16_rr_512_avx512:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_subs_epi16_rr_512_avx512:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: retl
%1 = sext <32 x i16> %a to <32 x i32>
%2 = sext <32 x i16> %b to <32 x i32>
%3 = sub nsw <32 x i32> %1, %2
%4 = icmp slt <32 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <32 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <32 x i1> %6, <32 x i32> %5, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <32 x i32> %7 to <32 x i16>
ret <32 x i16> %8
}
define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
; AVX512BW-LABEL: test_mask_subs_epi16_rrk_512:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_subs_epi16_rrk_512:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-32-NEXT: retl
%1 = sext <32 x i16> %a to <32 x i32>
%2 = sext <32 x i16> %b to <32 x i32>
%3 = sub nsw <32 x i32> %1, %2
%4 = icmp slt <32 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <32 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <32 x i1> %6, <32 x i32> %5, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <32 x i32> %7 to <32 x i16>
%9 = bitcast i32 %mask to <32 x i1>
%10 = select <32 x i1> %9, <32 x i16> %8, <32 x i16> %passThru
ret <32 x i16> %10
}
define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
; AVX512BW-LABEL: test_mask_subs_epi16_rrkz_512:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_subs_epi16_rrkz_512:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
; AVX512F-32-NEXT: retl
%1 = sext <32 x i16> %a to <32 x i32>
%2 = sext <32 x i16> %b to <32 x i32>
%3 = sub nsw <32 x i32> %1, %2
%4 = icmp slt <32 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <32 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <32 x i1> %6, <32 x i32> %5, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <32 x i32> %7 to <32 x i16>
%9 = bitcast i32 %mask to <32 x i1>
%10 = select <32 x i1> %9, <32 x i16> %8, <32 x i16> zeroinitializer
ret <32 x i16> %10
}
define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
; AVX512BW-LABEL: test_mask_subs_epi16_rm_512:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_subs_epi16_rm_512:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm0
; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%1 = sext <32 x i16> %a to <32 x i32>
%2 = sext <32 x i16> %b to <32 x i32>
%3 = sub nsw <32 x i32> %1, %2
%4 = icmp slt <32 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <32 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <32 x i1> %6, <32 x i32> %5, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <32 x i32> %7 to <32 x i16>
ret <32 x i16> %8
}
define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
; AVX512BW-LABEL: test_mask_subs_epi16_rmk_512:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: kmovd %esi, %k1
; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm1 {%k1}
; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_subs_epi16_rmk_512:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm1 {%k1}
; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%1 = sext <32 x i16> %a to <32 x i32>
%2 = sext <32 x i16> %b to <32 x i32>
%3 = sub nsw <32 x i32> %1, %2
%4 = icmp slt <32 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <32 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <32 x i1> %6, <32 x i32> %5, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <32 x i32> %7 to <32 x i16>
%9 = bitcast i32 %mask to <32 x i1>
%10 = select <32 x i1> %9, <32 x i16> %8, <32 x i16> %passThru
ret <32 x i16> %10
}
define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
; AVX512BW-LABEL: test_mask_subs_epi16_rmkz_512:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: kmovd %esi, %k1
; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_subs_epi16_rmkz_512:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z}
; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%1 = sext <32 x i16> %a to <32 x i32>
%2 = sext <32 x i16> %b to <32 x i32>
%3 = sub nsw <32 x i32> %1, %2
%4 = icmp slt <32 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <32 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <32 x i1> %6, <32 x i32> %5, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <32 x i32> %7 to <32 x i16>
%9 = bitcast i32 %mask to <32 x i1>
%10 = select <32 x i1> %9, <32 x i16> %8, <32 x i16> zeroinitializer
ret <32 x i16> %10
}
define <64 x i16> @test_mask_subs_epi16_rr_1024(<64 x i16> %a, <64 x i16> %b) {
; AVX512BW-LABEL: test_mask_subs_epi16_rr_1024:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vpsubsw %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpsubsw %zmm3, %zmm1, %zmm1
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_subs_epi16_rr_1024:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: pushl %ebp
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
; AVX512F-32-NEXT: .cfi_offset %ebp, -8
; AVX512F-32-NEXT: movl %esp, %ebp
; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
; AVX512F-32-NEXT: andl $-64, %esp
; AVX512F-32-NEXT: subl $64, %esp
; AVX512F-32-NEXT: vpsubsw %zmm2, %zmm0, %zmm0
; AVX512F-32-NEXT: vpsubsw 8(%ebp), %zmm1, %zmm1
; AVX512F-32-NEXT: movl %ebp, %esp
; AVX512F-32-NEXT: popl %ebp
; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
; AVX512F-32-NEXT: retl
%1 = sext <64 x i16> %a to <64 x i32>
%2 = sext <64 x i16> %b to <64 x i32>
%3 = sub nsw <64 x i32> %1, %2
%4 = icmp slt <64 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <64 x i1> %4, <64 x i32> %3, <64 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <64 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <64 x i1> %6, <64 x i32> %5, <64 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <64 x i32> %7 to <64 x i16>
ret <64 x i16> %8
}
define <32 x i16> @test_mask_adds_epu16_rr_512_avx512(<32 x i16> %a, <32 x i16> %b) {
; AVX512BW-LABEL: test_mask_adds_epu16_rr_512_avx512:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_adds_epu16_rr_512_avx512:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: retl
%1 = zext <32 x i16> %a to <32 x i32>
%2 = zext <32 x i16> %b to <32 x i32>
%3 = add nsw <32 x i32> %1, %2
%4 = icmp ult <32 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%6 = trunc <32 x i32> %5 to <32 x i16>
ret <32 x i16> %6
}
define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
; AVX512BW-LABEL: test_mask_adds_epu16_rrk_512:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_adds_epu16_rrk_512:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-32-NEXT: retl
%1 = zext <32 x i16> %a to <32 x i32>
%2 = zext <32 x i16> %b to <32 x i32>
%3 = add nsw <32 x i32> %1, %2
%4 = icmp ult <32 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%6 = trunc <32 x i32> %5 to <32 x i16>
%7 = bitcast i32 %mask to <32 x i1>
%8 = select <32 x i1> %7, <32 x i16> %6, <32 x i16> %passThru
ret <32 x i16> %8
}
define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
; AVX512BW-LABEL: test_mask_adds_epu16_rrkz_512:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_adds_epu16_rrkz_512:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
; AVX512F-32-NEXT: retl
%1 = zext <32 x i16> %a to <32 x i32>
%2 = zext <32 x i16> %b to <32 x i32>
%3 = add nsw <32 x i32> %1, %2
%4 = icmp ult <32 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%6 = trunc <32 x i32> %5 to <32 x i16>
%7 = bitcast i32 %mask to <32 x i1>
%8 = select <32 x i1> %7, <32 x i16> %6, <32 x i16> zeroinitializer
ret <32 x i16> %8
}
define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
; AVX512BW-LABEL: test_mask_adds_epu16_rm_512:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_adds_epu16_rm_512:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm0
; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%1 = zext <32 x i16> %a to <32 x i32>
%2 = zext <32 x i16> %b to <32 x i32>
%3 = add nsw <32 x i32> %1, %2
%4 = icmp ult <32 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%6 = trunc <32 x i32> %5 to <32 x i16>
ret <32 x i16> %6
}
define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
; AVX512BW-LABEL: test_mask_adds_epu16_rmk_512:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: kmovd %esi, %k1
; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm1 {%k1}
; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_adds_epu16_rmk_512:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm1 {%k1}
; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%1 = zext <32 x i16> %a to <32 x i32>
%2 = zext <32 x i16> %b to <32 x i32>
%3 = add nsw <32 x i32> %1, %2
%4 = icmp ult <32 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%6 = trunc <32 x i32> %5 to <32 x i16>
%7 = bitcast i32 %mask to <32 x i1>
%8 = select <32 x i1> %7, <32 x i16> %6, <32 x i16> %passThru
ret <32 x i16> %8
}
define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
; AVX512BW-LABEL: test_mask_adds_epu16_rmkz_512:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: kmovd %esi, %k1
; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_adds_epu16_rmkz_512:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm0 {%k1} {z}
; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%1 = zext <32 x i16> %a to <32 x i32>
%2 = zext <32 x i16> %b to <32 x i32>
%3 = add nsw <32 x i32> %1, %2
%4 = icmp ult <32 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%6 = trunc <32 x i32> %5 to <32 x i16>
%7 = bitcast i32 %mask to <32 x i1>
%8 = select <32 x i1> %7, <32 x i16> %6, <32 x i16> zeroinitializer
ret <32 x i16> %8
}
define <64 x i16> @test_mask_adds_epu16_rr_1024(<64 x i16> %a, <64 x i16> %b) {
; AVX512BW-LABEL: test_mask_adds_epu16_rr_1024:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vpaddusw %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpaddusw %zmm3, %zmm1, %zmm1
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_adds_epu16_rr_1024:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: pushl %ebp
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
; AVX512F-32-NEXT: .cfi_offset %ebp, -8
; AVX512F-32-NEXT: movl %esp, %ebp
; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
; AVX512F-32-NEXT: andl $-64, %esp
; AVX512F-32-NEXT: subl $64, %esp
; AVX512F-32-NEXT: vpaddusw %zmm2, %zmm0, %zmm0
; AVX512F-32-NEXT: vpaddusw 8(%ebp), %zmm1, %zmm1
; AVX512F-32-NEXT: movl %ebp, %esp
; AVX512F-32-NEXT: popl %ebp
; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
; AVX512F-32-NEXT: retl
%1 = zext <64 x i16> %a to <64 x i32>
%2 = zext <64 x i16> %b to <64 x i32>
%3 = add nsw <64 x i32> %1, %2
%4 = icmp ult <64 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%5 = select <64 x i1> %4, <64 x i32> %3, <64 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%6 = trunc <64 x i32> %5 to <64 x i16>
ret <64 x i16> %6
}
define <32 x i16> @test_mask_subs_epu16_rr_512_avx512(<32 x i16> %a, <32 x i16> %b) {
; AVX512BW-LABEL: test_mask_subs_epu16_rr_512_avx512:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_subs_epu16_rr_512_avx512:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: retl
%cmp = icmp ugt <32 x i16> %a, %b
%sel = select <32 x i1> %cmp, <32 x i16> %a, <32 x i16> %b
%sub = sub <32 x i16> %sel, %b
ret <32 x i16> %sub
}
define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
; AVX512BW-LABEL: test_mask_subs_epu16_rrk_512:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_subs_epu16_rrk_512:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
; AVX512F-32-NEXT: retl
%cmp = icmp ugt <32 x i16> %a, %b
%sel = select <32 x i1> %cmp, <32 x i16> %a, <32 x i16> %b
%sub = sub <32 x i16> %sel, %b
%bc = bitcast i32 %mask to <32 x i1>
%res = select <32 x i1> %bc, <32 x i16> %sub, <32 x i16> %passThru
ret <32 x i16> %res
}
define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
; AVX512BW-LABEL: test_mask_subs_epu16_rrkz_512:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_subs_epu16_rrkz_512:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
; AVX512F-32-NEXT: retl
%cmp = icmp ugt <32 x i16> %a, %b
%sel = select <32 x i1> %cmp, <32 x i16> %a, <32 x i16> %b
%sub = sub <32 x i16> %sel, %b
%bc = bitcast i32 %mask to <32 x i1>
%res = select <32 x i1> %bc, <32 x i16> %sub, <32 x i16> zeroinitializer
ret <32 x i16> %res
}
define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
; AVX512BW-LABEL: test_mask_subs_epu16_rm_512:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_subs_epu16_rm_512:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm0
; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%cmp = icmp ugt <32 x i16> %a, %b
%sel = select <32 x i1> %cmp, <32 x i16> %a, <32 x i16> %b
%sub = sub <32 x i16> %sel, %b
ret <32 x i16> %sub
}
define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
; AVX512BW-LABEL: test_mask_subs_epu16_rmk_512:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: kmovd %esi, %k1
; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm1 {%k1}
; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_subs_epu16_rmk_512:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm1 {%k1}
; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%cmp = icmp ugt <32 x i16> %a, %b
%sel = select <32 x i1> %cmp, <32 x i16> %a, <32 x i16> %b
%sub = sub <32 x i16> %sel, %b
%bc = bitcast i32 %mask to <32 x i1>
%res = select <32 x i1> %bc, <32 x i16> %sub, <32 x i16> %passThru
ret <32 x i16> %res
}
define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
; AVX512BW-LABEL: test_mask_subs_epu16_rmkz_512:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: kmovd %esi, %k1
; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_subs_epu16_rmkz_512:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm0 {%k1} {z}
; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%cmp = icmp ugt <32 x i16> %a, %b
%sel = select <32 x i1> %cmp, <32 x i16> %a, <32 x i16> %b
%sub = sub <32 x i16> %sel, %b
%bc = bitcast i32 %mask to <32 x i1>
%res = select <32 x i1> %bc, <32 x i16> %sub, <32 x i16> zeroinitializer
ret <32 x i16> %res
}
define <64 x i16> @test_mask_subs_epu16_rr_1024(<64 x i16> %a, <64 x i16> %b) {
; AVX512BW-LABEL: test_mask_subs_epu16_rr_1024:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vpsubusw %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpsubusw %zmm3, %zmm1, %zmm1
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_subs_epu16_rr_1024:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: pushl %ebp
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
; AVX512F-32-NEXT: .cfi_offset %ebp, -8
; AVX512F-32-NEXT: movl %esp, %ebp
; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
; AVX512F-32-NEXT: andl $-64, %esp
; AVX512F-32-NEXT: subl $64, %esp
; AVX512F-32-NEXT: vpsubusw %zmm2, %zmm0, %zmm0
; AVX512F-32-NEXT: vpsubusw 8(%ebp), %zmm1, %zmm1
; AVX512F-32-NEXT: movl %ebp, %esp
; AVX512F-32-NEXT: popl %ebp
; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
; AVX512F-32-NEXT: retl
%cmp = icmp ugt <64 x i16> %a, %b
%sel = select <64 x i1> %cmp, <64 x i16> %a, <64 x i16> %b
%sub = sub <64 x i16> %sel, %b
ret <64 x i16> %sub
}
define <8 x i16> @test_mask_adds_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_mask_adds_epi16_rr_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = sext <8 x i16> %a to <8 x i32>
%2 = sext <8 x i16> %b to <8 x i32>
%3 = add nsw <8 x i32> %1, %2
%4 = icmp slt <8 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <8 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <8 x i32> %7 to <8 x i16>
ret <8 x i16> %8
}
define <8 x i16> @test_mask_adds_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
; CHECK-LABEL: test_mask_adds_epi16_rrk_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovdqa %xmm2, %xmm0
; CHECK-NEXT: retq
%1 = sext <8 x i16> %a to <8 x i32>
%2 = sext <8 x i16> %b to <8 x i32>
%3 = add nsw <8 x i32> %1, %2
%4 = icmp slt <8 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <8 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <8 x i32> %7 to <8 x i16>
%9 = bitcast i8 %mask to <8 x i1>
%10 = select <8 x i1> %9, <8 x i16> %8, <8 x i16> %passThru
ret <8 x i16> %10
}
define <8 x i16> @test_mask_adds_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
; CHECK-LABEL: test_mask_adds_epi16_rrkz_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = sext <8 x i16> %a to <8 x i32>
%2 = sext <8 x i16> %b to <8 x i32>
%3 = add nsw <8 x i32> %1, %2
%4 = icmp slt <8 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <8 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <8 x i32> %7 to <8 x i16>
%9 = bitcast i8 %mask to <8 x i1>
%10 = select <8 x i1> %9, <8 x i16> %8, <8 x i16> zeroinitializer
ret <8 x i16> %10
}
define <8 x i16> @test_mask_adds_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
; CHECK-LABEL: test_mask_adds_epi16_rm_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpaddsw (%rdi), %xmm0, %xmm0
; CHECK-NEXT: retq
%b = load <8 x i16>, <8 x i16>* %ptr_b
%1 = sext <8 x i16> %a to <8 x i32>
%2 = sext <8 x i16> %b to <8 x i32>
%3 = add nsw <8 x i32> %1, %2
%4 = icmp slt <8 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <8 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <8 x i32> %7 to <8 x i16>
ret <8 x i16> %8
}
define <8 x i16> @test_mask_adds_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
; CHECK-LABEL: test_mask_adds_epi16_rmk_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpaddsw (%rdi), %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%b = load <8 x i16>, <8 x i16>* %ptr_b
%1 = sext <8 x i16> %a to <8 x i32>
%2 = sext <8 x i16> %b to <8 x i32>
%3 = add nsw <8 x i32> %1, %2
%4 = icmp slt <8 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <8 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <8 x i32> %7 to <8 x i16>
%9 = bitcast i8 %mask to <8 x i1>
%10 = select <8 x i1> %9, <8 x i16> %8, <8 x i16> %passThru
ret <8 x i16> %10
}
define <8 x i16> @test_mask_adds_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
; CHECK-LABEL: test_mask_adds_epi16_rmkz_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%b = load <8 x i16>, <8 x i16>* %ptr_b
%1 = sext <8 x i16> %a to <8 x i32>
%2 = sext <8 x i16> %b to <8 x i32>
%3 = add nsw <8 x i32> %1, %2
%4 = icmp slt <8 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <8 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <8 x i32> %7 to <8 x i16>
%9 = bitcast i8 %mask to <8 x i1>
%10 = select <8 x i1> %9, <8 x i16> %8, <8 x i16> zeroinitializer
ret <8 x i16> %10
}
define <16 x i16> @test_mask_adds_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
; CHECK-LABEL: test_mask_adds_epi16_rr_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpaddsw %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
%1 = sext <16 x i16> %a to <16 x i32>
%2 = sext <16 x i16> %b to <16 x i32>
%3 = add nsw <16 x i32> %1, %2
%4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <16 x i32> %7 to <16 x i16>
ret <16 x i16> %8
}
define <16 x i16> @test_mask_adds_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epi16_rrk_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpaddsw %ymm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vmovdqa %ymm2, %ymm0
; CHECK-NEXT: retq
%1 = sext <16 x i16> %a to <16 x i32>
%2 = sext <16 x i16> %b to <16 x i32>
%3 = add nsw <16 x i32> %1, %2
%4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <16 x i32> %7 to <16 x i16>
%9 = bitcast i16 %mask to <16 x i1>
%10 = select <16 x i1> %9, <16 x i16> %8, <16 x i16> %passThru
ret <16 x i16> %10
}
define <16 x i16> @test_mask_adds_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epi16_rrkz_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = sext <16 x i16> %a to <16 x i32>
%2 = sext <16 x i16> %b to <16 x i32>
%3 = add nsw <16 x i32> %1, %2
%4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <16 x i32> %7 to <16 x i16>
%9 = bitcast i16 %mask to <16 x i1>
%10 = select <16 x i1> %9, <16 x i16> %8, <16 x i16> zeroinitializer
ret <16 x i16> %10
}
define <16 x i16> @test_mask_adds_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
; CHECK-LABEL: test_mask_adds_epi16_rm_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpaddsw (%rdi), %ymm0, %ymm0
; CHECK-NEXT: retq
%b = load <16 x i16>, <16 x i16>* %ptr_b
%1 = sext <16 x i16> %a to <16 x i32>
%2 = sext <16 x i16> %b to <16 x i32>
%3 = add nsw <16 x i32> %1, %2
%4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <16 x i32> %7 to <16 x i16>
ret <16 x i16> %8
}
define <16 x i16> @test_mask_adds_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epi16_rmk_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpaddsw (%rdi), %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%b = load <16 x i16>, <16 x i16>* %ptr_b
%1 = sext <16 x i16> %a to <16 x i32>
%2 = sext <16 x i16> %b to <16 x i32>
%3 = add nsw <16 x i32> %1, %2
%4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <16 x i32> %7 to <16 x i16>
%9 = bitcast i16 %mask to <16 x i1>
%10 = select <16 x i1> %9, <16 x i16> %8, <16 x i16> %passThru
ret <16 x i16> %10
}
define <16 x i16> @test_mask_adds_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epi16_rmkz_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%b = load <16 x i16>, <16 x i16>* %ptr_b
%1 = sext <16 x i16> %a to <16 x i32>
%2 = sext <16 x i16> %b to <16 x i32>
%3 = add nsw <16 x i32> %1, %2
%4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <16 x i32> %7 to <16 x i16>
%9 = bitcast i16 %mask to <16 x i1>
%10 = select <16 x i1> %9, <16 x i16> %8, <16 x i16> zeroinitializer
ret <16 x i16> %10
}
define <8 x i16> @test_mask_subs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_mask_subs_epi16_rr_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = sext <8 x i16> %a to <8 x i32>
%2 = sext <8 x i16> %b to <8 x i32>
%3 = sub nsw <8 x i32> %1, %2
%4 = icmp slt <8 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <8 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <8 x i32> %7 to <8 x i16>
ret <8 x i16> %8
}
define <8 x i16> @test_mask_subs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
; CHECK-LABEL: test_mask_subs_epi16_rrk_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovdqa %xmm2, %xmm0
; CHECK-NEXT: retq
%1 = sext <8 x i16> %a to <8 x i32>
%2 = sext <8 x i16> %b to <8 x i32>
%3 = sub nsw <8 x i32> %1, %2
%4 = icmp slt <8 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <8 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <8 x i32> %7 to <8 x i16>
%9 = bitcast i8 %mask to <8 x i1>
%10 = select <8 x i1> %9, <8 x i16> %8, <8 x i16> %passThru
ret <8 x i16> %10
}
define <8 x i16> @test_mask_subs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
; CHECK-LABEL: test_mask_subs_epi16_rrkz_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = sext <8 x i16> %a to <8 x i32>
%2 = sext <8 x i16> %b to <8 x i32>
%3 = sub nsw <8 x i32> %1, %2
%4 = icmp slt <8 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <8 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <8 x i32> %7 to <8 x i16>
%9 = bitcast i8 %mask to <8 x i1>
%10 = select <8 x i1> %9, <8 x i16> %8, <8 x i16> zeroinitializer
ret <8 x i16> %10
}
define <8 x i16> @test_mask_subs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
; CHECK-LABEL: test_mask_subs_epi16_rm_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpsubsw (%rdi), %xmm0, %xmm0
; CHECK-NEXT: retq
%b = load <8 x i16>, <8 x i16>* %ptr_b
%1 = sext <8 x i16> %a to <8 x i32>
%2 = sext <8 x i16> %b to <8 x i32>
%3 = sub nsw <8 x i32> %1, %2
%4 = icmp slt <8 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <8 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <8 x i32> %7 to <8 x i16>
ret <8 x i16> %8
}
define <8 x i16> @test_mask_subs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
; CHECK-LABEL: test_mask_subs_epi16_rmk_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpsubsw (%rdi), %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%b = load <8 x i16>, <8 x i16>* %ptr_b
%1 = sext <8 x i16> %a to <8 x i32>
%2 = sext <8 x i16> %b to <8 x i32>
%3 = sub nsw <8 x i32> %1, %2
%4 = icmp slt <8 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <8 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <8 x i32> %7 to <8 x i16>
%9 = bitcast i8 %mask to <8 x i1>
%10 = select <8 x i1> %9, <8 x i16> %8, <8 x i16> %passThru
ret <8 x i16> %10
}
define <8 x i16> @test_mask_subs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
; CHECK-LABEL: test_mask_subs_epi16_rmkz_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%b = load <8 x i16>, <8 x i16>* %ptr_b
%1 = sext <8 x i16> %a to <8 x i32>
%2 = sext <8 x i16> %b to <8 x i32>
%3 = sub nsw <8 x i32> %1, %2
%4 = icmp slt <8 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <8 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <8 x i32> %7 to <8 x i16>
%9 = bitcast i8 %mask to <8 x i1>
%10 = select <8 x i1> %9, <8 x i16> %8, <8 x i16> zeroinitializer
ret <8 x i16> %10
}
define <16 x i16> @test_mask_subs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
; CHECK-LABEL: test_mask_subs_epi16_rr_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpsubsw %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
%1 = sext <16 x i16> %a to <16 x i32>
%2 = sext <16 x i16> %b to <16 x i32>
%3 = sub nsw <16 x i32> %1, %2
%4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <16 x i32> %7 to <16 x i16>
ret <16 x i16> %8
}
define <16 x i16> @test_mask_subs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epi16_rrk_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpsubsw %ymm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vmovdqa %ymm2, %ymm0
; CHECK-NEXT: retq
%1 = sext <16 x i16> %a to <16 x i32>
%2 = sext <16 x i16> %b to <16 x i32>
%3 = sub nsw <16 x i32> %1, %2
%4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <16 x i32> %7 to <16 x i16>
%9 = bitcast i16 %mask to <16 x i1>
%10 = select <16 x i1> %9, <16 x i16> %8, <16 x i16> %passThru
ret <16 x i16> %10
}
define <16 x i16> @test_mask_subs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epi16_rrkz_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = sext <16 x i16> %a to <16 x i32>
%2 = sext <16 x i16> %b to <16 x i32>
%3 = sub nsw <16 x i32> %1, %2
%4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <16 x i32> %7 to <16 x i16>
%9 = bitcast i16 %mask to <16 x i1>
%10 = select <16 x i1> %9, <16 x i16> %8, <16 x i16> zeroinitializer
ret <16 x i16> %10
}
define <16 x i16> @test_mask_subs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
; CHECK-LABEL: test_mask_subs_epi16_rm_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpsubsw (%rdi), %ymm0, %ymm0
; CHECK-NEXT: retq
%b = load <16 x i16>, <16 x i16>* %ptr_b
%1 = sext <16 x i16> %a to <16 x i32>
%2 = sext <16 x i16> %b to <16 x i32>
%3 = sub nsw <16 x i32> %1, %2
%4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <16 x i32> %7 to <16 x i16>
ret <16 x i16> %8
}
define <16 x i16> @test_mask_subs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epi16_rmk_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpsubsw (%rdi), %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%b = load <16 x i16>, <16 x i16>* %ptr_b
%1 = sext <16 x i16> %a to <16 x i32>
%2 = sext <16 x i16> %b to <16 x i32>
%3 = sub nsw <16 x i32> %1, %2
%4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <16 x i32> %7 to <16 x i16>
%9 = bitcast i16 %mask to <16 x i1>
%10 = select <16 x i1> %9, <16 x i16> %8, <16 x i16> %passThru
ret <16 x i16> %10
}
define <16 x i16> @test_mask_subs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epi16_rmkz_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%b = load <16 x i16>, <16 x i16>* %ptr_b
%1 = sext <16 x i16> %a to <16 x i32>
%2 = sext <16 x i16> %b to <16 x i32>
%3 = sub nsw <16 x i32> %1, %2
%4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <16 x i32> %7 to <16 x i16>
%9 = bitcast i16 %mask to <16 x i1>
%10 = select <16 x i1> %9, <16 x i16> %8, <16 x i16> zeroinitializer
ret <16 x i16> %10
}
define <8 x i16> @test_mask_adds_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_mask_adds_epu16_rr_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = zext <8 x i16> %a to <8 x i32>
%2 = zext <8 x i16> %b to <8 x i32>
%3 = add nsw <8 x i32> %1, %2
%4 = icmp ult <8 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%6 = trunc <8 x i32> %5 to <8 x i16>
ret <8 x i16> %6
}
define <8 x i16> @test_mask_adds_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
; CHECK-LABEL: test_mask_adds_epu16_rrk_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovdqa %xmm2, %xmm0
; CHECK-NEXT: retq
%1 = zext <8 x i16> %a to <8 x i32>
%2 = zext <8 x i16> %b to <8 x i32>
%3 = add nsw <8 x i32> %1, %2
%4 = icmp ult <8 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%6 = trunc <8 x i32> %5 to <8 x i16>
%7 = bitcast i8 %mask to <8 x i1>
%8 = select <8 x i1> %7, <8 x i16> %6, <8 x i16> %passThru
ret <8 x i16> %8
}
define <8 x i16> @test_mask_adds_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
; CHECK-LABEL: test_mask_adds_epu16_rrkz_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = zext <8 x i16> %a to <8 x i32>
%2 = zext <8 x i16> %b to <8 x i32>
%3 = add nsw <8 x i32> %1, %2
%4 = icmp ult <8 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%6 = trunc <8 x i32> %5 to <8 x i16>
%7 = bitcast i8 %mask to <8 x i1>
%8 = select <8 x i1> %7, <8 x i16> %6, <8 x i16> zeroinitializer
ret <8 x i16> %8
}
define <8 x i16> @test_mask_adds_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
; CHECK-LABEL: test_mask_adds_epu16_rm_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpaddusw (%rdi), %xmm0, %xmm0
; CHECK-NEXT: retq
%b = load <8 x i16>, <8 x i16>* %ptr_b
%1 = zext <8 x i16> %a to <8 x i32>
%2 = zext <8 x i16> %b to <8 x i32>
%3 = add nsw <8 x i32> %1, %2
%4 = icmp ult <8 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%6 = trunc <8 x i32> %5 to <8 x i16>
ret <8 x i16> %6
}
define <8 x i16> @test_mask_adds_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
; CHECK-LABEL: test_mask_adds_epu16_rmk_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpaddusw (%rdi), %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%b = load <8 x i16>, <8 x i16>* %ptr_b
%1 = zext <8 x i16> %a to <8 x i32>
%2 = zext <8 x i16> %b to <8 x i32>
%3 = add nsw <8 x i32> %1, %2
%4 = icmp ult <8 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%6 = trunc <8 x i32> %5 to <8 x i16>
%7 = bitcast i8 %mask to <8 x i1>
%8 = select <8 x i1> %7, <8 x i16> %6, <8 x i16> %passThru
ret <8 x i16> %8
}
define <8 x i16> @test_mask_adds_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
; CHECK-LABEL: test_mask_adds_epu16_rmkz_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%b = load <8 x i16>, <8 x i16>* %ptr_b
%1 = zext <8 x i16> %a to <8 x i32>
%2 = zext <8 x i16> %b to <8 x i32>
%3 = add nsw <8 x i32> %1, %2
%4 = icmp ult <8 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%6 = trunc <8 x i32> %5 to <8 x i16>
%7 = bitcast i8 %mask to <8 x i1>
%8 = select <8 x i1> %7, <8 x i16> %6, <8 x i16> zeroinitializer
ret <8 x i16> %8
}
define <16 x i16> @test_mask_adds_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) {
; CHECK-LABEL: test_mask_adds_epu16_rr_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpaddusw %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
%1 = zext <16 x i16> %a to <16 x i32>
%2 = zext <16 x i16> %b to <16 x i32>
%3 = add nsw <16 x i32> %1, %2
%4 = icmp ult <16 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%6 = trunc <16 x i32> %5 to <16 x i16>
ret <16 x i16> %6
}
define <16 x i16> @test_mask_adds_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epu16_rrk_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpaddusw %ymm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vmovdqa %ymm2, %ymm0
; CHECK-NEXT: retq
%1 = zext <16 x i16> %a to <16 x i32>
%2 = zext <16 x i16> %b to <16 x i32>
%3 = add nsw <16 x i32> %1, %2
%4 = icmp ult <16 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%6 = trunc <16 x i32> %5 to <16 x i16>
%7 = bitcast i16 %mask to <16 x i1>
%8 = select <16 x i1> %7, <16 x i16> %6, <16 x i16> %passThru
ret <16 x i16> %8
}
define <16 x i16> @test_mask_adds_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epu16_rrkz_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = zext <16 x i16> %a to <16 x i32>
%2 = zext <16 x i16> %b to <16 x i32>
%3 = add nsw <16 x i32> %1, %2
%4 = icmp ult <16 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%6 = trunc <16 x i32> %5 to <16 x i16>
%7 = bitcast i16 %mask to <16 x i1>
%8 = select <16 x i1> %7, <16 x i16> %6, <16 x i16> zeroinitializer
ret <16 x i16> %8
}
define <16 x i16> @test_mask_adds_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
; CHECK-LABEL: test_mask_adds_epu16_rm_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpaddusw (%rdi), %ymm0, %ymm0
; CHECK-NEXT: retq
%b = load <16 x i16>, <16 x i16>* %ptr_b
%1 = zext <16 x i16> %a to <16 x i32>
%2 = zext <16 x i16> %b to <16 x i32>
%3 = add nsw <16 x i32> %1, %2
%4 = icmp ult <16 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%6 = trunc <16 x i32> %5 to <16 x i16>
ret <16 x i16> %6
}
define <16 x i16> @test_mask_adds_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epu16_rmk_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpaddusw (%rdi), %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%b = load <16 x i16>, <16 x i16>* %ptr_b
%1 = zext <16 x i16> %a to <16 x i32>
%2 = zext <16 x i16> %b to <16 x i32>
%3 = add nsw <16 x i32> %1, %2
%4 = icmp ult <16 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%6 = trunc <16 x i32> %5 to <16 x i16>
%7 = bitcast i16 %mask to <16 x i1>
%8 = select <16 x i1> %7, <16 x i16> %6, <16 x i16> %passThru
ret <16 x i16> %8
}
define <16 x i16> @test_mask_adds_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epu16_rmkz_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%b = load <16 x i16>, <16 x i16>* %ptr_b
%1 = zext <16 x i16> %a to <16 x i32>
%2 = zext <16 x i16> %b to <16 x i32>
%3 = add nsw <16 x i32> %1, %2
%4 = icmp ult <16 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%6 = trunc <16 x i32> %5 to <16 x i16>
%7 = bitcast i16 %mask to <16 x i1>
%8 = select <16 x i1> %7, <16 x i16> %6, <16 x i16> zeroinitializer
ret <16 x i16> %8
}
define <8 x i16> @test_mask_subs_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_mask_subs_epu16_rr_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%cmp = icmp ugt <8 x i16> %a, %b
%sel = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
%sub = sub <8 x i16> %sel, %b
ret <8 x i16> %sub
}
define <8 x i16> @test_mask_subs_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
; CHECK-LABEL: test_mask_subs_epu16_rrk_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovdqa %xmm2, %xmm0
; CHECK-NEXT: retq
%cmp = icmp ugt <8 x i16> %a, %b
%sel = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
%sub = sub <8 x i16> %sel, %b
%bc = bitcast i8 %mask to <8 x i1>
%res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> %passThru
ret <8 x i16> %res
}
define <8 x i16> @test_mask_subs_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
; CHECK-LABEL: test_mask_subs_epu16_rrkz_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%cmp = icmp ugt <8 x i16> %a, %b
%sel = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
%sub = sub <8 x i16> %sel, %b
%bc = bitcast i8 %mask to <8 x i1>
%res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> zeroinitializer
ret <8 x i16> %res
}
define <8 x i16> @test_mask_subs_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
; CHECK-LABEL: test_mask_subs_epu16_rm_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpsubusw (%rdi), %xmm0, %xmm0
; CHECK-NEXT: retq
%b = load <8 x i16>, <8 x i16>* %ptr_b
%cmp = icmp ugt <8 x i16> %a, %b
%sel = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
%sub = sub <8 x i16> %sel, %b
ret <8 x i16> %sub
}
define <8 x i16> @test_mask_subs_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
; CHECK-LABEL: test_mask_subs_epu16_rmk_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpsubusw (%rdi), %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%b = load <8 x i16>, <8 x i16>* %ptr_b
%cmp = icmp ugt <8 x i16> %a, %b
%sel = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
%sub = sub <8 x i16> %sel, %b
%bc = bitcast i8 %mask to <8 x i1>
%res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> %passThru
ret <8 x i16> %res
}
define <8 x i16> @test_mask_subs_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
; CHECK-LABEL: test_mask_subs_epu16_rmkz_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%b = load <8 x i16>, <8 x i16>* %ptr_b
%cmp = icmp ugt <8 x i16> %a, %b
%sel = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
%sub = sub <8 x i16> %sel, %b
%bc = bitcast i8 %mask to <8 x i1>
%res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> zeroinitializer
ret <8 x i16> %res
}
define <16 x i16> @test_mask_subs_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) {
; CHECK-LABEL: test_mask_subs_epu16_rr_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpsubusw %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
%cmp = icmp ugt <16 x i16> %a, %b
%sel = select <16 x i1> %cmp, <16 x i16> %a, <16 x i16> %b
%sub = sub <16 x i16> %sel, %b
ret <16 x i16> %sub
}
define <16 x i16> @test_mask_subs_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epu16_rrk_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpsubusw %ymm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vmovdqa %ymm2, %ymm0
; CHECK-NEXT: retq
%cmp = icmp ugt <16 x i16> %a, %b
%sel = select <16 x i1> %cmp, <16 x i16> %a, <16 x i16> %b
%sub = sub <16 x i16> %sel, %b
%bc = bitcast i16 %mask to <16 x i1>
%res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> %passThru
ret <16 x i16> %res
}
define <16 x i16> @test_mask_subs_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epu16_rrkz_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%cmp = icmp ugt <16 x i16> %a, %b
%sel = select <16 x i1> %cmp, <16 x i16> %a, <16 x i16> %b
%sub = sub <16 x i16> %sel, %b
%bc = bitcast i16 %mask to <16 x i1>
%res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> zeroinitializer
ret <16 x i16> %res
}
define <16 x i16> @test_mask_subs_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
; CHECK-LABEL: test_mask_subs_epu16_rm_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpsubusw (%rdi), %ymm0, %ymm0
; CHECK-NEXT: retq
%b = load <16 x i16>, <16 x i16>* %ptr_b
%cmp = icmp ugt <16 x i16> %a, %b
%sel = select <16 x i1> %cmp, <16 x i16> %a, <16 x i16> %b
%sub = sub <16 x i16> %sel, %b
ret <16 x i16> %sub
}
define <16 x i16> @test_mask_subs_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epu16_rmk_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpsubusw (%rdi), %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%b = load <16 x i16>, <16 x i16>* %ptr_b
%cmp = icmp ugt <16 x i16> %a, %b
%sel = select <16 x i1> %cmp, <16 x i16> %a, <16 x i16> %b
%sub = sub <16 x i16> %sel, %b
%bc = bitcast i16 %mask to <16 x i1>
%res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> %passThru
ret <16 x i16> %res
}
define <16 x i16> @test_mask_subs_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epu16_rmkz_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%b = load <16 x i16>, <16 x i16>* %ptr_b
%cmp = icmp ugt <16 x i16> %a, %b
%sel = select <16 x i1> %cmp, <16 x i16> %a, <16 x i16> %b
%sub = sub <16 x i16> %sel, %b
%bc = bitcast i16 %mask to <16 x i1>
%res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> zeroinitializer
ret <16 x i16> %res
}
define <16 x i8> @test_mask_adds_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test_mask_adds_epi8_rr_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = sext <16 x i8> %a to <16 x i16>
%2 = sext <16 x i8> %b to <16 x i16>
%3 = add nsw <16 x i16> %1, %2
%4 = icmp slt <16 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%6 = icmp sgt <16 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%7 = select <16 x i1> %6, <16 x i16> %5, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%8 = trunc <16 x i16> %7 to <16 x i8>
ret <16 x i8> %8
}
define <16 x i8> @test_mask_adds_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epi8_rrk_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovdqa %xmm2, %xmm0
; CHECK-NEXT: retq
%1 = sext <16 x i8> %a to <16 x i16>
%2 = sext <16 x i8> %b to <16 x i16>
%3 = add nsw <16 x i16> %1, %2
%4 = icmp slt <16 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%6 = icmp sgt <16 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%7 = select <16 x i1> %6, <16 x i16> %5, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%8 = trunc <16 x i16> %7 to <16 x i8>
%9 = bitcast i16 %mask to <16 x i1>
%10 = select <16 x i1> %9, <16 x i8> %8, <16 x i8> %passThru
ret <16 x i8> %10
}
define <16 x i8> @test_mask_adds_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epi8_rrkz_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = sext <16 x i8> %a to <16 x i16>
%2 = sext <16 x i8> %b to <16 x i16>
%3 = add nsw <16 x i16> %1, %2
%4 = icmp slt <16 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%6 = icmp sgt <16 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%7 = select <16 x i1> %6, <16 x i16> %5, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%8 = trunc <16 x i16> %7 to <16 x i8>
%9 = bitcast i16 %mask to <16 x i1>
%10 = select <16 x i1> %9, <16 x i8> %8, <16 x i8> zeroinitializer
ret <16 x i8> %10
}
define <16 x i8> @test_mask_adds_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
; CHECK-LABEL: test_mask_adds_epi8_rm_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpaddsb (%rdi), %xmm0, %xmm0
; CHECK-NEXT: retq
%b = load <16 x i8>, <16 x i8>* %ptr_b
%1 = sext <16 x i8> %a to <16 x i16>
%2 = sext <16 x i8> %b to <16 x i16>
%3 = add nsw <16 x i16> %1, %2
%4 = icmp slt <16 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%6 = icmp sgt <16 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%7 = select <16 x i1> %6, <16 x i16> %5, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%8 = trunc <16 x i16> %7 to <16 x i8>
ret <16 x i8> %8
}
define <16 x i8> @test_mask_adds_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epi8_rmk_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpaddsb (%rdi), %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%b = load <16 x i8>, <16 x i8>* %ptr_b
%1 = sext <16 x i8> %a to <16 x i16>
%2 = sext <16 x i8> %b to <16 x i16>
%3 = add nsw <16 x i16> %1, %2
%4 = icmp slt <16 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%6 = icmp sgt <16 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%7 = select <16 x i1> %6, <16 x i16> %5, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%8 = trunc <16 x i16> %7 to <16 x i8>
%9 = bitcast i16 %mask to <16 x i1>
%10 = select <16 x i1> %9, <16 x i8> %8, <16 x i8> %passThru
ret <16 x i8> %10
}
define <16 x i8> @test_mask_adds_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epi8_rmkz_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%b = load <16 x i8>, <16 x i8>* %ptr_b
%1 = sext <16 x i8> %a to <16 x i16>
%2 = sext <16 x i8> %b to <16 x i16>
%3 = add nsw <16 x i16> %1, %2
%4 = icmp slt <16 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%6 = icmp sgt <16 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%7 = select <16 x i1> %6, <16 x i16> %5, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%8 = trunc <16 x i16> %7 to <16 x i8>
%9 = bitcast i16 %mask to <16 x i1>
%10 = select <16 x i1> %9, <16 x i8> %8, <16 x i8> zeroinitializer
ret <16 x i8> %10
}
define <32 x i8> @test_mask_adds_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) {
; CHECK-LABEL: test_mask_adds_epi8_rr_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpaddsb %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
%1 = sext <32 x i8> %a to <32 x i16>
%2 = sext <32 x i8> %b to <32 x i16>
%3 = add nsw <32 x i16> %1, %2
%4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%8 = trunc <32 x i16> %7 to <32 x i8>
ret <32 x i8> %8
}
define <32 x i8> @test_mask_adds_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
; CHECK-LABEL: test_mask_adds_epi8_rrk_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpaddsb %ymm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vmovdqa %ymm2, %ymm0
; CHECK-NEXT: retq
%1 = sext <32 x i8> %a to <32 x i16>
%2 = sext <32 x i8> %b to <32 x i16>
%3 = add nsw <32 x i16> %1, %2
%4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%8 = trunc <32 x i16> %7 to <32 x i8>
%9 = bitcast i32 %mask to <32 x i1>
%10 = select <32 x i1> %9, <32 x i8> %8, <32 x i8> %passThru
ret <32 x i8> %10
}
define <32 x i8> @test_mask_adds_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
; CHECK-LABEL: test_mask_adds_epi8_rrkz_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = sext <32 x i8> %a to <32 x i16>
%2 = sext <32 x i8> %b to <32 x i16>
%3 = add nsw <32 x i16> %1, %2
%4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%8 = trunc <32 x i16> %7 to <32 x i8>
%9 = bitcast i32 %mask to <32 x i1>
%10 = select <32 x i1> %9, <32 x i8> %8, <32 x i8> zeroinitializer
ret <32 x i8> %10
}
define <32 x i8> @test_mask_adds_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
; CHECK-LABEL: test_mask_adds_epi8_rm_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpaddsb (%rdi), %ymm0, %ymm0
; CHECK-NEXT: retq
%b = load <32 x i8>, <32 x i8>* %ptr_b
%1 = sext <32 x i8> %a to <32 x i16>
%2 = sext <32 x i8> %b to <32 x i16>
%3 = add nsw <32 x i16> %1, %2
%4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%8 = trunc <32 x i16> %7 to <32 x i8>
ret <32 x i8> %8
}
define <32 x i8> @test_mask_adds_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
; CHECK-LABEL: test_mask_adds_epi8_rmk_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpaddsb (%rdi), %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%b = load <32 x i8>, <32 x i8>* %ptr_b
%1 = sext <32 x i8> %a to <32 x i16>
%2 = sext <32 x i8> %b to <32 x i16>
%3 = add nsw <32 x i16> %1, %2
%4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%8 = trunc <32 x i16> %7 to <32 x i8>
%9 = bitcast i32 %mask to <32 x i1>
%10 = select <32 x i1> %9, <32 x i8> %8, <32 x i8> %passThru
ret <32 x i8> %10
}
define <32 x i8> @test_mask_adds_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
; CHECK-LABEL: test_mask_adds_epi8_rmkz_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%b = load <32 x i8>, <32 x i8>* %ptr_b
%1 = sext <32 x i8> %a to <32 x i16>
%2 = sext <32 x i8> %b to <32 x i16>
%3 = add nsw <32 x i16> %1, %2
%4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%8 = trunc <32 x i16> %7 to <32 x i8>
%9 = bitcast i32 %mask to <32 x i1>
%10 = select <32 x i1> %9, <32 x i8> %8, <32 x i8> zeroinitializer
ret <32 x i8> %10
}
define <16 x i8> @test_mask_subs_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test_mask_subs_epi8_rr_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = sext <16 x i8> %a to <16 x i16>
%2 = sext <16 x i8> %b to <16 x i16>
%3 = sub nsw <16 x i16> %1, %2
%4 = icmp slt <16 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%6 = icmp sgt <16 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%7 = select <16 x i1> %6, <16 x i16> %5, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%8 = trunc <16 x i16> %7 to <16 x i8>
ret <16 x i8> %8
}
define <16 x i8> @test_mask_subs_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epi8_rrk_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovdqa %xmm2, %xmm0
; CHECK-NEXT: retq
%1 = sext <16 x i8> %a to <16 x i16>
%2 = sext <16 x i8> %b to <16 x i16>
%3 = sub nsw <16 x i16> %1, %2
%4 = icmp slt <16 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%6 = icmp sgt <16 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%7 = select <16 x i1> %6, <16 x i16> %5, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%8 = trunc <16 x i16> %7 to <16 x i8>
%9 = bitcast i16 %mask to <16 x i1>
%10 = select <16 x i1> %9, <16 x i8> %8, <16 x i8> %passThru
ret <16 x i8> %10
}
define <16 x i8> @test_mask_subs_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epi8_rrkz_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = sext <16 x i8> %a to <16 x i16>
%2 = sext <16 x i8> %b to <16 x i16>
%3 = sub nsw <16 x i16> %1, %2
%4 = icmp slt <16 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%6 = icmp sgt <16 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%7 = select <16 x i1> %6, <16 x i16> %5, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%8 = trunc <16 x i16> %7 to <16 x i8>
%9 = bitcast i16 %mask to <16 x i1>
%10 = select <16 x i1> %9, <16 x i8> %8, <16 x i8> zeroinitializer
ret <16 x i8> %10
}
define <16 x i8> @test_mask_subs_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
; CHECK-LABEL: test_mask_subs_epi8_rm_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpsubsb (%rdi), %xmm0, %xmm0
; CHECK-NEXT: retq
%b = load <16 x i8>, <16 x i8>* %ptr_b
%1 = sext <16 x i8> %a to <16 x i16>
%2 = sext <16 x i8> %b to <16 x i16>
%3 = sub nsw <16 x i16> %1, %2
%4 = icmp slt <16 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%6 = icmp sgt <16 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%7 = select <16 x i1> %6, <16 x i16> %5, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%8 = trunc <16 x i16> %7 to <16 x i8>
ret <16 x i8> %8
}
define <16 x i8> @test_mask_subs_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epi8_rmk_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpsubsb (%rdi), %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%b = load <16 x i8>, <16 x i8>* %ptr_b
%1 = sext <16 x i8> %a to <16 x i16>
%2 = sext <16 x i8> %b to <16 x i16>
%3 = sub nsw <16 x i16> %1, %2
%4 = icmp slt <16 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%6 = icmp sgt <16 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%7 = select <16 x i1> %6, <16 x i16> %5, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%8 = trunc <16 x i16> %7 to <16 x i8>
%9 = bitcast i16 %mask to <16 x i1>
%10 = select <16 x i1> %9, <16 x i8> %8, <16 x i8> %passThru
ret <16 x i8> %10
}
define <16 x i8> @test_mask_subs_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epi8_rmkz_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%b = load <16 x i8>, <16 x i8>* %ptr_b
%1 = sext <16 x i8> %a to <16 x i16>
%2 = sext <16 x i8> %b to <16 x i16>
%3 = sub nsw <16 x i16> %1, %2
%4 = icmp slt <16 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%6 = icmp sgt <16 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%7 = select <16 x i1> %6, <16 x i16> %5, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%8 = trunc <16 x i16> %7 to <16 x i8>
%9 = bitcast i16 %mask to <16 x i1>
%10 = select <16 x i1> %9, <16 x i8> %8, <16 x i8> zeroinitializer
ret <16 x i8> %10
}
define <32 x i8> @test_mask_subs_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) {
; CHECK-LABEL: test_mask_subs_epi8_rr_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpsubsb %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
%1 = sext <32 x i8> %a to <32 x i16>
%2 = sext <32 x i8> %b to <32 x i16>
%3 = sub nsw <32 x i16> %1, %2
%4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%8 = trunc <32 x i16> %7 to <32 x i8>
ret <32 x i8> %8
}
define <32 x i8> @test_mask_subs_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
; CHECK-LABEL: test_mask_subs_epi8_rrk_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpsubsb %ymm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vmovdqa %ymm2, %ymm0
; CHECK-NEXT: retq
%1 = sext <32 x i8> %a to <32 x i16>
%2 = sext <32 x i8> %b to <32 x i16>
%3 = sub nsw <32 x i16> %1, %2
%4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%8 = trunc <32 x i16> %7 to <32 x i8>
%9 = bitcast i32 %mask to <32 x i1>
%10 = select <32 x i1> %9, <32 x i8> %8, <32 x i8> %passThru
ret <32 x i8> %10
}
define <32 x i8> @test_mask_subs_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
; CHECK-LABEL: test_mask_subs_epi8_rrkz_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = sext <32 x i8> %a to <32 x i16>
%2 = sext <32 x i8> %b to <32 x i16>
%3 = sub nsw <32 x i16> %1, %2
%4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%8 = trunc <32 x i16> %7 to <32 x i8>
%9 = bitcast i32 %mask to <32 x i1>
%10 = select <32 x i1> %9, <32 x i8> %8, <32 x i8> zeroinitializer
ret <32 x i8> %10
}
define <32 x i8> @test_mask_subs_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
; CHECK-LABEL: test_mask_subs_epi8_rm_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpsubsb (%rdi), %ymm0, %ymm0
; CHECK-NEXT: retq
%b = load <32 x i8>, <32 x i8>* %ptr_b
%1 = sext <32 x i8> %a to <32 x i16>
%2 = sext <32 x i8> %b to <32 x i16>
%3 = sub nsw <32 x i16> %1, %2
%4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%8 = trunc <32 x i16> %7 to <32 x i8>
ret <32 x i8> %8
}
define <32 x i8> @test_mask_subs_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
; CHECK-LABEL: test_mask_subs_epi8_rmk_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpsubsb (%rdi), %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%b = load <32 x i8>, <32 x i8>* %ptr_b
%1 = sext <32 x i8> %a to <32 x i16>
%2 = sext <32 x i8> %b to <32 x i16>
%3 = sub nsw <32 x i16> %1, %2
%4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%8 = trunc <32 x i16> %7 to <32 x i8>
%9 = bitcast i32 %mask to <32 x i1>
%10 = select <32 x i1> %9, <32 x i8> %8, <32 x i8> %passThru
ret <32 x i8> %10
}
define <32 x i8> @test_mask_subs_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
; CHECK-LABEL: test_mask_subs_epi8_rmkz_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%b = load <32 x i8>, <32 x i8>* %ptr_b
%1 = sext <32 x i8> %a to <32 x i16>
%2 = sext <32 x i8> %b to <32 x i16>
%3 = sub nsw <32 x i16> %1, %2
%4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%8 = trunc <32 x i16> %7 to <32 x i8>
%9 = bitcast i32 %mask to <32 x i1>
%10 = select <32 x i1> %9, <32 x i8> %8, <32 x i8> zeroinitializer
ret <32 x i8> %10
}
define <16 x i8> @test_mask_adds_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test_mask_adds_epu8_rr_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = zext <16 x i8> %a to <16 x i16>
%2 = zext <16 x i8> %b to <16 x i16>
%3 = add nsw <16 x i16> %1, %2
%4 = icmp ult <16 x i16> %3, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%6 = trunc <16 x i16> %5 to <16 x i8>
ret <16 x i8> %6
}
define <16 x i8> @test_mask_adds_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epu8_rrk_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovdqa %xmm2, %xmm0
; CHECK-NEXT: retq
%1 = zext <16 x i8> %a to <16 x i16>
%2 = zext <16 x i8> %b to <16 x i16>
%3 = add nsw <16 x i16> %1, %2
%4 = icmp ult <16 x i16> %3, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%6 = trunc <16 x i16> %5 to <16 x i8>
%7 = bitcast i16 %mask to <16 x i1>
%8 = select <16 x i1> %7, <16 x i8> %6, <16 x i8> %passThru
ret <16 x i8> %8
}
define <16 x i8> @test_mask_adds_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epu8_rrkz_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = zext <16 x i8> %a to <16 x i16>
%2 = zext <16 x i8> %b to <16 x i16>
%3 = add nsw <16 x i16> %1, %2
%4 = icmp ult <16 x i16> %3, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%6 = trunc <16 x i16> %5 to <16 x i8>
%7 = bitcast i16 %mask to <16 x i1>
%8 = select <16 x i1> %7, <16 x i8> %6, <16 x i8> zeroinitializer
ret <16 x i8> %8
}
define <16 x i8> @test_mask_adds_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
; CHECK-LABEL: test_mask_adds_epu8_rm_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpaddusb (%rdi), %xmm0, %xmm0
; CHECK-NEXT: retq
%b = load <16 x i8>, <16 x i8>* %ptr_b
%1 = zext <16 x i8> %a to <16 x i16>
%2 = zext <16 x i8> %b to <16 x i16>
%3 = add nsw <16 x i16> %1, %2
%4 = icmp ult <16 x i16> %3, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%6 = trunc <16 x i16> %5 to <16 x i8>
ret <16 x i8> %6
}
define <16 x i8> @test_mask_adds_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epu8_rmk_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpaddusb (%rdi), %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%b = load <16 x i8>, <16 x i8>* %ptr_b
%1 = zext <16 x i8> %a to <16 x i16>
%2 = zext <16 x i8> %b to <16 x i16>
%3 = add nsw <16 x i16> %1, %2
%4 = icmp ult <16 x i16> %3, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%6 = trunc <16 x i16> %5 to <16 x i8>
%7 = bitcast i16 %mask to <16 x i1>
%8 = select <16 x i1> %7, <16 x i8> %6, <16 x i8> %passThru
ret <16 x i8> %8
}
define <16 x i8> @test_mask_adds_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epu8_rmkz_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%b = load <16 x i8>, <16 x i8>* %ptr_b
%1 = zext <16 x i8> %a to <16 x i16>
%2 = zext <16 x i8> %b to <16 x i16>
%3 = add nsw <16 x i16> %1, %2
%4 = icmp ult <16 x i16> %3, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%6 = trunc <16 x i16> %5 to <16 x i8>
%7 = bitcast i16 %mask to <16 x i1>
%8 = select <16 x i1> %7, <16 x i8> %6, <16 x i8> zeroinitializer
ret <16 x i8> %8
}
define <32 x i8> @test_mask_adds_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) {
; CHECK-LABEL: test_mask_adds_epu8_rr_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpaddusb %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
%1 = zext <32 x i8> %a to <32 x i16>
%2 = zext <32 x i8> %b to <32 x i16>
%3 = add nsw <32 x i16> %1, %2
%4 = icmp ult <32 x i16> %3, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%6 = trunc <32 x i16> %5 to <32 x i8>
ret <32 x i8> %6
}
define <32 x i8> @test_mask_adds_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
; CHECK-LABEL: test_mask_adds_epu8_rrk_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpaddusb %ymm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vmovdqa %ymm2, %ymm0
; CHECK-NEXT: retq
%1 = zext <32 x i8> %a to <32 x i16>
%2 = zext <32 x i8> %b to <32 x i16>
%3 = add nsw <32 x i16> %1, %2
%4 = icmp ult <32 x i16> %3, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%6 = trunc <32 x i16> %5 to <32 x i8>
%7 = bitcast i32 %mask to <32 x i1>
%8 = select <32 x i1> %7, <32 x i8> %6, <32 x i8> %passThru
ret <32 x i8> %8
}
define <32 x i8> @test_mask_adds_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
; CHECK-LABEL: test_mask_adds_epu8_rrkz_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%1 = zext <32 x i8> %a to <32 x i16>
%2 = zext <32 x i8> %b to <32 x i16>
%3 = add nsw <32 x i16> %1, %2
%4 = icmp ult <32 x i16> %3, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%6 = trunc <32 x i16> %5 to <32 x i8>
%7 = bitcast i32 %mask to <32 x i1>
%8 = select <32 x i1> %7, <32 x i8> %6, <32 x i8> zeroinitializer
ret <32 x i8> %8
}
define <32 x i8> @test_mask_adds_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
; CHECK-LABEL: test_mask_adds_epu8_rm_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpaddusb (%rdi), %ymm0, %ymm0
; CHECK-NEXT: retq
%b = load <32 x i8>, <32 x i8>* %ptr_b
%1 = zext <32 x i8> %a to <32 x i16>
%2 = zext <32 x i8> %b to <32 x i16>
%3 = add nsw <32 x i16> %1, %2
%4 = icmp ult <32 x i16> %3, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%6 = trunc <32 x i16> %5 to <32 x i8>
ret <32 x i8> %6
}
define <32 x i8> @test_mask_adds_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
; CHECK-LABEL: test_mask_adds_epu8_rmk_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpaddusb (%rdi), %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%b = load <32 x i8>, <32 x i8>* %ptr_b
%1 = zext <32 x i8> %a to <32 x i16>
%2 = zext <32 x i8> %b to <32 x i16>
%3 = add nsw <32 x i16> %1, %2
%4 = icmp ult <32 x i16> %3, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%6 = trunc <32 x i16> %5 to <32 x i8>
%7 = bitcast i32 %mask to <32 x i1>
%8 = select <32 x i1> %7, <32 x i8> %6, <32 x i8> %passThru
ret <32 x i8> %8
}
define <32 x i8> @test_mask_adds_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
; CHECK-LABEL: test_mask_adds_epu8_rmkz_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%b = load <32 x i8>, <32 x i8>* %ptr_b
%1 = zext <32 x i8> %a to <32 x i16>
%2 = zext <32 x i8> %b to <32 x i16>
%3 = add nsw <32 x i16> %1, %2
%4 = icmp ult <32 x i16> %3, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%6 = trunc <32 x i16> %5 to <32 x i8>
%7 = bitcast i32 %mask to <32 x i1>
%8 = select <32 x i1> %7, <32 x i8> %6, <32 x i8> zeroinitializer
ret <32 x i8> %8
}
define <16 x i8> @test_mask_subs_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test_mask_subs_epu8_rr_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%cmp = icmp ugt <16 x i8> %a, %b
%sel = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
%sub = sub <16 x i8> %sel, %b
ret <16 x i8> %sub
}
define <16 x i8> @test_mask_subs_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epu8_rrk_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vmovdqa %xmm2, %xmm0
; CHECK-NEXT: retq
%cmp = icmp ugt <16 x i8> %a, %b
%sel = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
%sub = sub <16 x i8> %sel, %b
%bc = bitcast i16 %mask to <16 x i1>
%res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> %passThru
ret <16 x i8> %res
}
define <16 x i8> @test_mask_subs_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epu8_rrkz_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%cmp = icmp ugt <16 x i8> %a, %b
%sel = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
%sub = sub <16 x i8> %sel, %b
%bc = bitcast i16 %mask to <16 x i1>
%res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> zeroinitializer
ret <16 x i8> %res
}
define <16 x i8> @test_mask_subs_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
; CHECK-LABEL: test_mask_subs_epu8_rm_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpsubusb (%rdi), %xmm0, %xmm0
; CHECK-NEXT: retq
%b = load <16 x i8>, <16 x i8>* %ptr_b
%cmp = icmp ugt <16 x i8> %a, %b
%sel = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
%sub = sub <16 x i8> %sel, %b
ret <16 x i8> %sub
}
define <16 x i8> @test_mask_subs_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epu8_rmk_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpsubusb (%rdi), %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%b = load <16 x i8>, <16 x i8>* %ptr_b
%cmp = icmp ugt <16 x i8> %a, %b
%sel = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
%sub = sub <16 x i8> %sel, %b
%bc = bitcast i16 %mask to <16 x i1>
%res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> %passThru
ret <16 x i8> %res
}
define <16 x i8> @test_mask_subs_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epu8_rmkz_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%b = load <16 x i8>, <16 x i8>* %ptr_b
%cmp = icmp ugt <16 x i8> %a, %b
%sel = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
%sub = sub <16 x i8> %sel, %b
%bc = bitcast i16 %mask to <16 x i1>
%res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> zeroinitializer
ret <16 x i8> %res
}
define <32 x i8> @test_mask_subs_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) {
; CHECK-LABEL: test_mask_subs_epu8_rr_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpsubusb %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
%cmp = icmp ugt <32 x i8> %a, %b
%sel = select <32 x i1> %cmp, <32 x i8> %a, <32 x i8> %b
%sub = sub <32 x i8> %sel, %b
ret <32 x i8> %sub
}
define <32 x i8> @test_mask_subs_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
; CHECK-LABEL: test_mask_subs_epu8_rrk_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpsubusb %ymm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vmovdqa %ymm2, %ymm0
; CHECK-NEXT: retq
%cmp = icmp ugt <32 x i8> %a, %b
%sel = select <32 x i1> %cmp, <32 x i8> %a, <32 x i8> %b
%sub = sub <32 x i8> %sel, %b
%bc = bitcast i32 %mask to <32 x i1>
%res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> %passThru
ret <32 x i8> %res
}
define <32 x i8> @test_mask_subs_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
; CHECK-LABEL: test_mask_subs_epu8_rrkz_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%cmp = icmp ugt <32 x i8> %a, %b
%sel = select <32 x i1> %cmp, <32 x i8> %a, <32 x i8> %b
%sub = sub <32 x i8> %sel, %b
%bc = bitcast i32 %mask to <32 x i1>
%res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> zeroinitializer
ret <32 x i8> %res
}
define <32 x i8> @test_mask_subs_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
; CHECK-LABEL: test_mask_subs_epu8_rm_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpsubusb (%rdi), %ymm0, %ymm0
; CHECK-NEXT: retq
%b = load <32 x i8>, <32 x i8>* %ptr_b
%cmp = icmp ugt <32 x i8> %a, %b
%sel = select <32 x i1> %cmp, <32 x i8> %a, <32 x i8> %b
%sub = sub <32 x i8> %sel, %b
ret <32 x i8> %sub
}
define <32 x i8> @test_mask_subs_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
; CHECK-LABEL: test_mask_subs_epu8_rmk_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpsubusb (%rdi), %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%b = load <32 x i8>, <32 x i8>* %ptr_b
%cmp = icmp ugt <32 x i8> %a, %b
%sel = select <32 x i1> %cmp, <32 x i8> %a, <32 x i8> %b
%sub = sub <32 x i8> %sel, %b
%bc = bitcast i32 %mask to <32 x i1>
%res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> %passThru
ret <32 x i8> %res
}
define <32 x i8> @test_mask_subs_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
; CHECK-LABEL: test_mask_subs_epu8_rmkz_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%b = load <32 x i8>, <32 x i8>* %ptr_b
%cmp = icmp ugt <32 x i8> %a, %b
%sel = select <32 x i1> %cmp, <32 x i8> %a, <32 x i8> %b
%sub = sub <32 x i8> %sel, %b
%bc = bitcast i32 %mask to <32 x i1>
%res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> zeroinitializer
ret <32 x i8> %res
}
define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) {
; SSE-LABEL: test_x86_sse2_padds_b:
; SSE: ## %bb.0:
; SSE-NEXT: paddsb %xmm1, %xmm0
; SSE-NEXT: retl
%1 = sext <16 x i8> %a0 to <16 x i16>
%2 = sext <16 x i8> %a1 to <16 x i16>
%3 = add nsw <16 x i16> %1, %2
%4 = icmp slt <16 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%6 = icmp sgt <16 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%7 = select <16 x i1> %6, <16 x i16> %5, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%8 = trunc <16 x i16> %7 to <16 x i8>
ret <16 x i8> %8
}
define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) {
; SSE-LABEL: test_x86_sse2_padds_w:
; SSE: ## %bb.0:
; SSE-NEXT: paddsw %xmm1, %xmm0
; SSE-NEXT: retl
%1 = sext <8 x i16> %a0 to <8 x i32>
%2 = sext <8 x i16> %a1 to <8 x i32>
%3 = add nsw <8 x i32> %1, %2
%4 = icmp slt <8 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <8 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <8 x i32> %7 to <8 x i16>
ret <8 x i16> %8
}
define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) {
; SSE-LABEL: test_x86_sse2_paddus_b:
; SSE: ## %bb.0:
; SSE-NEXT: paddusb %xmm1, %xmm0
; SSE-NEXT: retl
%1 = zext <16 x i8> %a0 to <16 x i16>
%2 = zext <16 x i8> %a1 to <16 x i16>
%3 = add nsw <16 x i16> %1, %2
%4 = icmp ult <16 x i16> %3, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%6 = trunc <16 x i16> %5 to <16 x i8>
ret <16 x i8> %6
}
define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) {
; SSE-LABEL: test_x86_sse2_paddus_w:
; SSE: ## %bb.0:
; SSE-NEXT: paddusw %xmm1, %xmm0
; SSE-NEXT: retl
%1 = zext <8 x i16> %a0 to <8 x i32>
%2 = zext <8 x i16> %a1 to <8 x i32>
%3 = add nsw <8 x i32> %1, %2
%4 = icmp ult <8 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%6 = trunc <8 x i32> %5 to <8 x i16>
ret <8 x i16> %6
}
define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) {
; SSE-LABEL: test_x86_sse2_psubs_b:
; SSE: ## %bb.0:
; SSE-NEXT: psubsb %xmm1, %xmm0
; SSE-NEXT: retl
%1 = sext <16 x i8> %a0 to <16 x i16>
%2 = sext <16 x i8> %a1 to <16 x i16>
%3 = sub nsw <16 x i16> %1, %2
%4 = icmp slt <16 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%6 = icmp sgt <16 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%7 = select <16 x i1> %6, <16 x i16> %5, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%8 = trunc <16 x i16> %7 to <16 x i8>
ret <16 x i8> %8
}
define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) {
; SSE-LABEL: test_x86_sse2_psubs_w:
; SSE: ## %bb.0:
; SSE-NEXT: psubsw %xmm1, %xmm0
; SSE-NEXT: retl
%1 = sext <8 x i16> %a0 to <8 x i32>
%2 = sext <8 x i16> %a1 to <8 x i32>
%3 = sub nsw <8 x i32> %1, %2
%4 = icmp slt <8 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <8 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <8 x i32> %7 to <8 x i16>
ret <8 x i16> %8
}
define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) {
; SSE-LABEL: test_x86_sse2_psubus_b:
; SSE: ## %bb.0:
; SSE-NEXT: psubusb %xmm1, %xmm0
; SSE-NEXT: retl
%cmp = icmp ugt <16 x i8> %a0, %a1
%sel = select <16 x i1> %cmp, <16 x i8> %a0, <16 x i8> %a1
%sub = sub <16 x i8> %sel, %a1
ret <16 x i8> %sub
}
define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
; SSE-LABEL: test_x86_sse2_psubus_w:
; SSE: ## %bb.0:
; SSE-NEXT: psubusw %xmm1, %xmm0
; SSE-NEXT: retl
%cmp = icmp ugt <8 x i16> %a0, %a1
%sel = select <8 x i1> %cmp, <8 x i16> %a0, <8 x i16> %a1
%sub = sub <8 x i16> %sel, %a1
ret <8 x i16> %sub
}
define <8 x i8> @test_x86_sse2_padds_b_64(<8 x i8> %a0, <8 x i8> %a1) {
; AVX512BW-LABEL: test_x86_sse2_padds_b_64:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vpsllw $8, %xmm0, %xmm0
; AVX512BW-NEXT: vpsraw $8, %xmm0, %xmm0
; AVX512BW-NEXT: vpsllw $8, %xmm1, %xmm1
; AVX512BW-NEXT: vpsraw $8, %xmm1, %xmm1
; AVX512BW-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpminsw {{.*}}(%rip), %xmm0, %xmm0
; AVX512BW-NEXT: vpmaxsw {{.*}}(%rip), %xmm0, %xmm0
; AVX512BW-NEXT: retq
;
; SSE-LABEL: test_x86_sse2_padds_b_64:
; SSE: ## %bb.0:
; SSE-NEXT: psllw $8, %xmm0
; SSE-NEXT: psraw $8, %xmm0
; SSE-NEXT: psllw $8, %xmm1
; SSE-NEXT: psraw $8, %xmm1
; SSE-NEXT: paddw %xmm1, %xmm0
; SSE-NEXT: pminsw LCPI144_0, %xmm0
; SSE-NEXT: pmaxsw LCPI144_1, %xmm0
; SSE-NEXT: retl
%1 = sext <8 x i8> %a0 to <8 x i16>
%2 = sext <8 x i8> %a1 to <8 x i16>
%3 = add nsw <8 x i16> %1, %2
%4 = icmp slt <8 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%6 = icmp sgt <8 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%7 = select <8 x i1> %6, <8 x i16> %5, <8 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%8 = trunc <8 x i16> %7 to <8 x i8>
ret <8 x i8> %8
}
define <4 x i16> @test_x86_sse2_padds_w_64(<4 x i16> %a0, <4 x i16> %a1) {
; AVX512BW-LABEL: test_x86_sse2_padds_w_64:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vpslld $16, %xmm0, %xmm0
; AVX512BW-NEXT: vpsrad $16, %xmm0, %xmm0
; AVX512BW-NEXT: vpslld $16, %xmm1, %xmm1
; AVX512BW-NEXT: vpsrad $16, %xmm1, %xmm1
; AVX512BW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767]
; AVX512BW-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294934528,4294934528,4294934528,4294934528]
; AVX512BW-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: retq
;
; SSE-LABEL: test_x86_sse2_padds_w_64:
; SSE: ## %bb.0:
; SSE-NEXT: pslld $16, %xmm0
; SSE-NEXT: psrad $16, %xmm0
; SSE-NEXT: pslld $16, %xmm1
; SSE-NEXT: psrad $16, %xmm1
; SSE-NEXT: paddd %xmm0, %xmm1
; SSE-NEXT: movdqa {{.*#+}} xmm0 = [32767,32767,32767,32767]
; SSE-NEXT: movdqa %xmm0, %xmm2
; SSE-NEXT: pcmpgtd %xmm1, %xmm2
; SSE-NEXT: pand %xmm2, %xmm1
; SSE-NEXT: pandn %xmm0, %xmm2
; SSE-NEXT: por %xmm1, %xmm2
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [4294934528,4294934528,4294934528,4294934528]
; SSE-NEXT: movdqa %xmm2, %xmm0
; SSE-NEXT: pcmpgtd %xmm1, %xmm0
; SSE-NEXT: pand %xmm0, %xmm2
; SSE-NEXT: pandn %xmm1, %xmm0
; SSE-NEXT: por %xmm2, %xmm0
; SSE-NEXT: retl
%1 = sext <4 x i16> %a0 to <4 x i32>
%2 = sext <4 x i16> %a1 to <4 x i32>
%3 = add nsw <4 x i32> %1, %2
%4 = icmp slt <4 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <4 x i1> %4, <4 x i32> %3, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <4 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <4 x i1> %6, <4 x i32> %5, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <4 x i32> %7 to <4 x i16>
ret <4 x i16> %8
}
define <8 x i8> @test_x86_sse2_paddus_b_64(<8 x i8> %a0, <8 x i8> %a1) {
; AVX512BW-LABEL: test_x86_sse2_paddus_b_64:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; AVX512BW-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512BW-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
; AVX512BW-NEXT: retq
;
; SSE-LABEL: test_x86_sse2_paddus_b_64:
; SSE: ## %bb.0:
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: pand %xmm2, %xmm1
; SSE-NEXT: paddw %xmm1, %xmm0
; SSE-NEXT: pminsw LCPI146_0, %xmm0
; SSE-NEXT: retl
%1 = zext <8 x i8> %a0 to <8 x i16>
%2 = zext <8 x i8> %a1 to <8 x i16>
%3 = add nsw <8 x i16> %1, %2
%4 = icmp ult <8 x i16> %3, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%6 = trunc <8 x i16> %5 to <8 x i8>
ret <8 x i8> %6
}
define <4 x i16> @test_x86_sse2_paddus_w_64(<4 x i16> %a0, <4 x i16> %a1) {
; AVX512BW-LABEL: test_x86_sse2_paddus_w_64:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
; AVX512BW-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
; AVX512BW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65535,65535,65535,65535]
; AVX512BW-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: retq
;
; SSE-LABEL: test_x86_sse2_paddus_w_64:
; SSE: ## %bb.0:
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [65535,0,65535,0,65535,0,65535,0]
; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: pand %xmm2, %xmm1
; SSE-NEXT: paddd %xmm0, %xmm1
; SSE-NEXT: movdqa %xmm2, %xmm0
; SSE-NEXT: pcmpgtd %xmm1, %xmm0
; SSE-NEXT: pand %xmm0, %xmm1
; SSE-NEXT: pandn %xmm2, %xmm0
; SSE-NEXT: por %xmm1, %xmm0
; SSE-NEXT: retl
%1 = zext <4 x i16> %a0 to <4 x i32>
%2 = zext <4 x i16> %a1 to <4 x i32>
%3 = add nsw <4 x i32> %1, %2
%4 = icmp ult <4 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535>
%5 = select <4 x i1> %4, <4 x i32> %3, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
%6 = trunc <4 x i32> %5 to <4 x i16>
ret <4 x i16> %6
}
define <8 x i8> @test_x86_sse2_psubs_b_64(<8 x i8> %a0, <8 x i8> %a1) {
; AVX512BW-LABEL: test_x86_sse2_psubs_b_64:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vpsllw $8, %xmm0, %xmm0
; AVX512BW-NEXT: vpsraw $8, %xmm0, %xmm0
; AVX512BW-NEXT: vpsllw $8, %xmm1, %xmm1
; AVX512BW-NEXT: vpsraw $8, %xmm1, %xmm1
; AVX512BW-NEXT: vpsubw %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpminsw {{.*}}(%rip), %xmm0, %xmm0
; AVX512BW-NEXT: vpmaxsw {{.*}}(%rip), %xmm0, %xmm0
; AVX512BW-NEXT: retq
;
; SSE-LABEL: test_x86_sse2_psubs_b_64:
; SSE: ## %bb.0:
; SSE-NEXT: psllw $8, %xmm0
; SSE-NEXT: psraw $8, %xmm0
; SSE-NEXT: psllw $8, %xmm1
; SSE-NEXT: psraw $8, %xmm1
; SSE-NEXT: psubw %xmm1, %xmm0
; SSE-NEXT: pminsw LCPI148_0, %xmm0
; SSE-NEXT: pmaxsw LCPI148_1, %xmm0
; SSE-NEXT: retl
%1 = sext <8 x i8> %a0 to <8 x i16>
%2 = sext <8 x i8> %a1 to <8 x i16>
%3 = sub nsw <8 x i16> %1, %2
%4 = icmp slt <8 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
%6 = icmp sgt <8 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%7 = select <8 x i1> %6, <8 x i16> %5, <8 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
%8 = trunc <8 x i16> %7 to <8 x i8>
ret <8 x i8> %8
}
define <4 x i16> @test_x86_sse2_psubs_w_64(<4 x i16> %a0, <4 x i16> %a1) {
; AVX512BW-LABEL: test_x86_sse2_psubs_w_64:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vpslld $16, %xmm0, %xmm0
; AVX512BW-NEXT: vpsrad $16, %xmm0, %xmm0
; AVX512BW-NEXT: vpslld $16, %xmm1, %xmm1
; AVX512BW-NEXT: vpsrad $16, %xmm1, %xmm1
; AVX512BW-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767]
; AVX512BW-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294934528,4294934528,4294934528,4294934528]
; AVX512BW-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: retq
;
; SSE-LABEL: test_x86_sse2_psubs_w_64:
; SSE: ## %bb.0:
; SSE-NEXT: pslld $16, %xmm0
; SSE-NEXT: psrad $16, %xmm0
; SSE-NEXT: pslld $16, %xmm1
; SSE-NEXT: psrad $16, %xmm1
; SSE-NEXT: psubd %xmm1, %xmm0
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767]
; SSE-NEXT: movdqa %xmm1, %xmm2
; SSE-NEXT: pcmpgtd %xmm0, %xmm2
; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: pandn %xmm1, %xmm2
; SSE-NEXT: por %xmm0, %xmm2
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [4294934528,4294934528,4294934528,4294934528]
; SSE-NEXT: movdqa %xmm2, %xmm0
; SSE-NEXT: pcmpgtd %xmm1, %xmm0
; SSE-NEXT: pand %xmm0, %xmm2
; SSE-NEXT: pandn %xmm1, %xmm0
; SSE-NEXT: por %xmm2, %xmm0
; SSE-NEXT: retl
%1 = sext <4 x i16> %a0 to <4 x i32>
%2 = sext <4 x i16> %a1 to <4 x i32>
%3 = sub nsw <4 x i32> %1, %2
%4 = icmp slt <4 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767>
%5 = select <4 x i1> %4, <4 x i32> %3, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>
%6 = icmp sgt <4 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%7 = select <4 x i1> %6, <4 x i32> %5, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%8 = trunc <4 x i32> %7 to <4 x i16>
ret <4 x i16> %8
}
define <8 x i8> @test_x86_sse2_psubus_b_64(<8 x i8> %a0, <8 x i8> %a1) {
; AVX512BW-LABEL: test_x86_sse2_psubus_b_64:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
; AVX512BW-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vpmaxuw %xmm3, %xmm0, %xmm0
; AVX512BW-NEXT: vpsubw %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: retq
;
; SSE-LABEL: test_x86_sse2_psubus_b_64:
; SSE: ## %bb.0:
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE-NEXT: movdqa %xmm1, %xmm3
; SSE-NEXT: pand %xmm2, %xmm3
; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: pmaxsw %xmm3, %xmm0
; SSE-NEXT: psubw %xmm1, %xmm0
; SSE-NEXT: retl
%cmp = icmp ugt <8 x i8> %a0, %a1
%sel = select <8 x i1> %cmp, <8 x i8> %a0, <8 x i8> %a1
%sub = sub <8 x i8> %sel, %a1
ret <8 x i8> %sub
}
define <4 x i16> @test_x86_sse2_psubus_w_64(<4 x i16> %a0, <4 x i16> %a1) {
; AVX512BW-LABEL: test_x86_sse2_psubus_w_64:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: vpblendw {{.*#+}} xmm3 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
; AVX512BW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
; AVX512BW-NEXT: vpmaxud %xmm3, %xmm0, %xmm0
; AVX512BW-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: retq
;
; SSE-LABEL: test_x86_sse2_psubus_w_64:
; SSE: ## %bb.0:
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [65535,0,65535,0,65535,0,65535,0]
; SSE-NEXT: movdqa %xmm1, %xmm3
; SSE-NEXT: pand %xmm2, %xmm3
; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: movdqa %xmm0, %xmm2
; SSE-NEXT: pcmpgtd %xmm3, %xmm2
; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: pandn %xmm3, %xmm2
; SSE-NEXT: por %xmm0, %xmm2
; SSE-NEXT: psubd %xmm1, %xmm2
; SSE-NEXT: movdqa %xmm2, %xmm0
; SSE-NEXT: retl
%cmp = icmp ugt <4 x i16> %a0, %a1
%sel = select <4 x i1> %cmp, <4 x i16> %a0, <4 x i16> %a1
%sub = sub <4 x i16> %sel, %a1
ret <4 x i16> %sub
}