1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[X86] Add test cases for opportunities to use KTEST when check if the result of ANDing two mask registers is zero.

The test cases are constructed to avoid folding the AND into a masked compare operation.

Currently we emit a KAND and a KORTEST for these cases.

llvm-svn: 350287
This commit is contained in:
Craig Topper 2019-01-03 07:12:54 +00:00
parent df8395cc95
commit 827ea76953

View File

@ -3479,3 +3479,729 @@ define void @mask_not_cast(i8*, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>) {
ret void
}
declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>)
define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
; KNL-LABEL: ktest_3:
; KNL: ## %bb.0:
; KNL-NEXT: pushq %rax
; KNL-NEXT: .cfi_def_cfa_offset 16
; KNL-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
; KNL-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
; KNL-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
; KNL-NEXT: vptestnmd %zmm1, %zmm1, %k1
; KNL-NEXT: vptestnmd %zmm2, %zmm2, %k2
; KNL-NEXT: vptestnmd %zmm3, %zmm3, %k3
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: korw %k3, %k2, %k1
; KNL-NEXT: kandw %k1, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: testb %al, %al
; KNL-NEXT: je LBB71_1
; KNL-NEXT: ## %bb.2: ## %exit
; KNL-NEXT: popq %rax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
; KNL-NEXT: LBB71_1: ## %bar
; KNL-NEXT: vzeroupper
; KNL-NEXT: callq _foo
; KNL-NEXT: popq %rax
; KNL-NEXT: retq
;
; SKX-LABEL: ktest_3:
; SKX: ## %bb.0:
; SKX-NEXT: pushq %rax
; SKX-NEXT: .cfi_def_cfa_offset 16
; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k0
; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1
; SKX-NEXT: korb %k1, %k0, %k0
; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1
; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k2
; SKX-NEXT: korb %k2, %k1, %k1
; SKX-NEXT: kandb %k1, %k0, %k0
; SKX-NEXT: kortestb %k0, %k0
; SKX-NEXT: je LBB71_1
; SKX-NEXT: ## %bb.2: ## %exit
; SKX-NEXT: popq %rax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; SKX-NEXT: LBB71_1: ## %bar
; SKX-NEXT: vzeroupper
; SKX-NEXT: callq _foo
; SKX-NEXT: popq %rax
; SKX-NEXT: retq
;
; AVX512BW-LABEL: ktest_3:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: pushq %rax
; AVX512BW-NEXT: .cfi_def_cfa_offset 16
; AVX512BW-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
; AVX512BW-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
; AVX512BW-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512BW-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
; AVX512BW-NEXT: vptestnmd %zmm1, %zmm1, %k1
; AVX512BW-NEXT: vptestnmd %zmm2, %zmm2, %k2
; AVX512BW-NEXT: vptestnmd %zmm3, %zmm3, %k3
; AVX512BW-NEXT: korw %k1, %k0, %k0
; AVX512BW-NEXT: korw %k3, %k2, %k1
; AVX512BW-NEXT: kandw %k1, %k0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: testb %al, %al
; AVX512BW-NEXT: je LBB71_1
; AVX512BW-NEXT: ## %bb.2: ## %exit
; AVX512BW-NEXT: popq %rax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512BW-NEXT: LBB71_1: ## %bar
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: callq _foo
; AVX512BW-NEXT: popq %rax
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: ktest_3:
; AVX512DQ: ## %bb.0:
; AVX512DQ-NEXT: pushq %rax
; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
; AVX512DQ-NEXT: ## kill: def $ymm3 killed $ymm3 def $zmm3
; AVX512DQ-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
; AVX512DQ-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512DQ-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0
; AVX512DQ-NEXT: vptestnmd %zmm1, %zmm1, %k1
; AVX512DQ-NEXT: vptestnmd %zmm2, %zmm2, %k2
; AVX512DQ-NEXT: vptestnmd %zmm3, %zmm3, %k3
; AVX512DQ-NEXT: korb %k1, %k0, %k0
; AVX512DQ-NEXT: korb %k3, %k2, %k1
; AVX512DQ-NEXT: kandb %k1, %k0, %k0
; AVX512DQ-NEXT: kortestb %k0, %k0
; AVX512DQ-NEXT: je LBB71_1
; AVX512DQ-NEXT: ## %bb.2: ## %exit
; AVX512DQ-NEXT: popq %rax
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
; AVX512DQ-NEXT: LBB71_1: ## %bar
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: callq _foo
; AVX512DQ-NEXT: popq %rax
; AVX512DQ-NEXT: retq
;
; X86-LABEL: ktest_3:
; X86: ## %bb.0:
; X86-NEXT: subl $12, %esp
; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: vptestnmd %ymm0, %ymm0, %k0
; X86-NEXT: vptestnmd %ymm1, %ymm1, %k1
; X86-NEXT: korb %k1, %k0, %k0
; X86-NEXT: vptestnmd %ymm2, %ymm2, %k1
; X86-NEXT: vptestnmd %ymm3, %ymm3, %k2
; X86-NEXT: korb %k2, %k1, %k1
; X86-NEXT: kandb %k1, %k0, %k0
; X86-NEXT: kortestb %k0, %k0
; X86-NEXT: je LBB71_1
; X86-NEXT: ## %bb.2: ## %exit
; X86-NEXT: addl $12, %esp
; X86-NEXT: vzeroupper
; X86-NEXT: retl
; X86-NEXT: LBB71_1: ## %bar
; X86-NEXT: vzeroupper
; X86-NEXT: calll _foo
; X86-NEXT: addl $12, %esp
; X86-NEXT: retl
%a = icmp eq <8 x i32> %w, zeroinitializer
%b = icmp eq <8 x i32> %x, zeroinitializer
%c = icmp eq <8 x i32> %y, zeroinitializer
%d = icmp eq <8 x i32> %z, zeroinitializer
%e = or <8 x i1> %a, %b
%f = or <8 x i1> %c, %d
%g = and <8 x i1> %e, %f
%h = bitcast <8 x i1> %g to i8
%i = icmp eq i8 %h, 0
br i1 %i, label %bar, label %exit
bar:
call void @foo()
br label %exit
exit:
ret void
}
define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
; KNL-LABEL: ktest_4:
; KNL: ## %bb.0:
; KNL-NEXT: pushq %rax
; KNL-NEXT: .cfi_def_cfa_offset 16
; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1
; KNL-NEXT: vptestnmq %zmm2, %zmm2, %k2
; KNL-NEXT: vptestnmq %zmm3, %zmm3, %k3
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: korw %k3, %k2, %k1
; KNL-NEXT: kandw %k1, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: testb %al, %al
; KNL-NEXT: je LBB72_1
; KNL-NEXT: ## %bb.2: ## %exit
; KNL-NEXT: popq %rax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
; KNL-NEXT: LBB72_1: ## %bar
; KNL-NEXT: vzeroupper
; KNL-NEXT: callq _foo
; KNL-NEXT: popq %rax
; KNL-NEXT: retq
;
; SKX-LABEL: ktest_4:
; SKX: ## %bb.0:
; SKX-NEXT: pushq %rax
; SKX-NEXT: .cfi_def_cfa_offset 16
; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k0
; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1
; SKX-NEXT: korb %k1, %k0, %k0
; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1
; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k2
; SKX-NEXT: korb %k2, %k1, %k1
; SKX-NEXT: kandb %k1, %k0, %k0
; SKX-NEXT: kortestb %k0, %k0
; SKX-NEXT: je LBB72_1
; SKX-NEXT: ## %bb.2: ## %exit
; SKX-NEXT: popq %rax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; SKX-NEXT: LBB72_1: ## %bar
; SKX-NEXT: vzeroupper
; SKX-NEXT: callq _foo
; SKX-NEXT: popq %rax
; SKX-NEXT: retq
;
; AVX512BW-LABEL: ktest_4:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: pushq %rax
; AVX512BW-NEXT: .cfi_def_cfa_offset 16
; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0
; AVX512BW-NEXT: vptestnmq %zmm1, %zmm1, %k1
; AVX512BW-NEXT: vptestnmq %zmm2, %zmm2, %k2
; AVX512BW-NEXT: vptestnmq %zmm3, %zmm3, %k3
; AVX512BW-NEXT: korw %k1, %k0, %k0
; AVX512BW-NEXT: korw %k3, %k2, %k1
; AVX512BW-NEXT: kandw %k1, %k0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: testb %al, %al
; AVX512BW-NEXT: je LBB72_1
; AVX512BW-NEXT: ## %bb.2: ## %exit
; AVX512BW-NEXT: popq %rax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512BW-NEXT: LBB72_1: ## %bar
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: callq _foo
; AVX512BW-NEXT: popq %rax
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: ktest_4:
; AVX512DQ: ## %bb.0:
; AVX512DQ-NEXT: pushq %rax
; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
; AVX512DQ-NEXT: vptestnmq %zmm0, %zmm0, %k0
; AVX512DQ-NEXT: vptestnmq %zmm1, %zmm1, %k1
; AVX512DQ-NEXT: korb %k1, %k0, %k0
; AVX512DQ-NEXT: vptestnmq %zmm2, %zmm2, %k1
; AVX512DQ-NEXT: vptestnmq %zmm3, %zmm3, %k2
; AVX512DQ-NEXT: korb %k2, %k1, %k1
; AVX512DQ-NEXT: kandb %k1, %k0, %k0
; AVX512DQ-NEXT: kortestb %k0, %k0
; AVX512DQ-NEXT: je LBB72_1
; AVX512DQ-NEXT: ## %bb.2: ## %exit
; AVX512DQ-NEXT: popq %rax
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
; AVX512DQ-NEXT: LBB72_1: ## %bar
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: callq _foo
; AVX512DQ-NEXT: popq %rax
; AVX512DQ-NEXT: retq
;
; X86-LABEL: ktest_4:
; X86: ## %bb.0:
; X86-NEXT: subl $12, %esp
; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: vptestnmq %zmm0, %zmm0, %k0
; X86-NEXT: vptestnmq %zmm1, %zmm1, %k1
; X86-NEXT: korb %k1, %k0, %k0
; X86-NEXT: vptestnmq %zmm2, %zmm2, %k1
; X86-NEXT: vptestnmq %zmm3, %zmm3, %k2
; X86-NEXT: korb %k2, %k1, %k1
; X86-NEXT: kandb %k1, %k0, %k0
; X86-NEXT: kortestb %k0, %k0
; X86-NEXT: je LBB72_1
; X86-NEXT: ## %bb.2: ## %exit
; X86-NEXT: addl $12, %esp
; X86-NEXT: vzeroupper
; X86-NEXT: retl
; X86-NEXT: LBB72_1: ## %bar
; X86-NEXT: vzeroupper
; X86-NEXT: calll _foo
; X86-NEXT: addl $12, %esp
; X86-NEXT: retl
%a = icmp eq <8 x i64> %w, zeroinitializer
%b = icmp eq <8 x i64> %x, zeroinitializer
%c = icmp eq <8 x i64> %y, zeroinitializer
%d = icmp eq <8 x i64> %z, zeroinitializer
%e = or <8 x i1> %a, %b
%f = or <8 x i1> %c, %d
%g = and <8 x i1> %e, %f
%h = bitcast <8 x i1> %g to i8
%i = icmp eq i8 %h, 0
br i1 %i, label %bar, label %exit
bar:
call void @foo()
br label %exit
exit:
ret void
}
define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z) {
; CHECK-LABEL: ktest_5:
; CHECK: ## %bb.0:
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k0
; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
; CHECK-NEXT: korw %k1, %k0, %k0
; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1
; CHECK-NEXT: vptestnmd %zmm3, %zmm3, %k2
; CHECK-NEXT: korw %k2, %k1, %k1
; CHECK-NEXT: kandw %k1, %k0, %k0
; CHECK-NEXT: kortestw %k0, %k0
; CHECK-NEXT: je LBB73_1
; CHECK-NEXT: ## %bb.2: ## %exit
; CHECK-NEXT: popq %rax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
; CHECK-NEXT: LBB73_1: ## %bar
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: callq _foo
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
;
; X86-LABEL: ktest_5:
; X86: ## %bb.0:
; X86-NEXT: subl $12, %esp
; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
; X86-NEXT: vptestnmd %zmm1, %zmm1, %k1
; X86-NEXT: korw %k1, %k0, %k0
; X86-NEXT: vptestnmd %zmm2, %zmm2, %k1
; X86-NEXT: vptestnmd %zmm3, %zmm3, %k2
; X86-NEXT: korw %k2, %k1, %k1
; X86-NEXT: kandw %k1, %k0, %k0
; X86-NEXT: kortestw %k0, %k0
; X86-NEXT: je LBB73_1
; X86-NEXT: ## %bb.2: ## %exit
; X86-NEXT: addl $12, %esp
; X86-NEXT: vzeroupper
; X86-NEXT: retl
; X86-NEXT: LBB73_1: ## %bar
; X86-NEXT: vzeroupper
; X86-NEXT: calll _foo
; X86-NEXT: addl $12, %esp
; X86-NEXT: retl
%a = icmp eq <16 x i32> %w, zeroinitializer
%b = icmp eq <16 x i32> %x, zeroinitializer
%c = icmp eq <16 x i32> %y, zeroinitializer
%d = icmp eq <16 x i32> %z, zeroinitializer
%e = or <16 x i1> %a, %b
%f = or <16 x i1> %c, %d
%g = and <16 x i1> %e, %f
%h = bitcast <16 x i1> %g to i16
%i = icmp eq i16 %h, 0
br i1 %i, label %bar, label %exit
bar:
call void @foo()
br label %exit
exit:
ret void
}
define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z) {
; KNL-LABEL: ktest_6:
; KNL: ## %bb.0:
; KNL-NEXT: pushq %rax
; KNL-NEXT: .cfi_def_cfa_offset 16
; KNL-NEXT: vpxor %xmm8, %xmm8, %xmm8
; KNL-NEXT: vpcmpeqw %ymm8, %ymm0, %ymm0
; KNL-NEXT: vpcmpeqw %ymm8, %ymm1, %ymm1
; KNL-NEXT: vpcmpeqw %ymm8, %ymm2, %ymm2
; KNL-NEXT: vpor %ymm2, %ymm0, %ymm0
; KNL-NEXT: vpcmpeqw %ymm8, %ymm3, %ymm2
; KNL-NEXT: vpor %ymm2, %ymm1, %ymm1
; KNL-NEXT: vpcmpeqw %ymm8, %ymm4, %ymm2
; KNL-NEXT: vpcmpeqw %ymm8, %ymm5, %ymm3
; KNL-NEXT: vpcmpeqw %ymm8, %ymm6, %ymm4
; KNL-NEXT: vpor %ymm4, %ymm2, %ymm2
; KNL-NEXT: vpand %ymm2, %ymm0, %ymm0
; KNL-NEXT: vpcmpeqw %ymm8, %ymm7, %ymm2
; KNL-NEXT: vpor %ymm2, %ymm3, %ymm2
; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpmovsxwd %ymm1, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: shll $16, %ecx
; KNL-NEXT: orl %eax, %ecx
; KNL-NEXT: je LBB74_1
; KNL-NEXT: ## %bb.2: ## %exit
; KNL-NEXT: popq %rax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
; KNL-NEXT: LBB74_1: ## %bar
; KNL-NEXT: vzeroupper
; KNL-NEXT: callq _foo
; KNL-NEXT: popq %rax
; KNL-NEXT: retq
;
; SKX-LABEL: ktest_6:
; SKX: ## %bb.0:
; SKX-NEXT: pushq %rax
; SKX-NEXT: .cfi_def_cfa_offset 16
; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k0
; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1
; SKX-NEXT: kord %k1, %k0, %k0
; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1
; SKX-NEXT: vptestnmw %zmm3, %zmm3, %k2
; SKX-NEXT: kord %k2, %k1, %k1
; SKX-NEXT: kandd %k1, %k0, %k0
; SKX-NEXT: kortestd %k0, %k0
; SKX-NEXT: je LBB74_1
; SKX-NEXT: ## %bb.2: ## %exit
; SKX-NEXT: popq %rax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; SKX-NEXT: LBB74_1: ## %bar
; SKX-NEXT: vzeroupper
; SKX-NEXT: callq _foo
; SKX-NEXT: popq %rax
; SKX-NEXT: retq
;
; AVX512BW-LABEL: ktest_6:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: pushq %rax
; AVX512BW-NEXT: .cfi_def_cfa_offset 16
; AVX512BW-NEXT: vptestnmw %zmm0, %zmm0, %k0
; AVX512BW-NEXT: vptestnmw %zmm1, %zmm1, %k1
; AVX512BW-NEXT: kord %k1, %k0, %k0
; AVX512BW-NEXT: vptestnmw %zmm2, %zmm2, %k1
; AVX512BW-NEXT: vptestnmw %zmm3, %zmm3, %k2
; AVX512BW-NEXT: kord %k2, %k1, %k1
; AVX512BW-NEXT: kandd %k1, %k0, %k0
; AVX512BW-NEXT: kortestd %k0, %k0
; AVX512BW-NEXT: je LBB74_1
; AVX512BW-NEXT: ## %bb.2: ## %exit
; AVX512BW-NEXT: popq %rax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512BW-NEXT: LBB74_1: ## %bar
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: callq _foo
; AVX512BW-NEXT: popq %rax
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: ktest_6:
; AVX512DQ: ## %bb.0:
; AVX512DQ-NEXT: pushq %rax
; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
; AVX512DQ-NEXT: vpxor %xmm8, %xmm8, %xmm8
; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm0, %ymm0
; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm1, %ymm1
; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm2, %ymm2
; AVX512DQ-NEXT: vpor %ymm2, %ymm0, %ymm0
; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm3, %ymm2
; AVX512DQ-NEXT: vpor %ymm2, %ymm1, %ymm1
; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm4, %ymm2
; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm5, %ymm3
; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm6, %ymm4
; AVX512DQ-NEXT: vpor %ymm4, %ymm2, %ymm2
; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX512DQ-NEXT: vpcmpeqw %ymm8, %ymm7, %ymm2
; AVX512DQ-NEXT: vpor %ymm2, %ymm3, %ymm2
; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm0
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
; AVX512DQ-NEXT: kmovw %k0, %ecx
; AVX512DQ-NEXT: shll $16, %ecx
; AVX512DQ-NEXT: orl %eax, %ecx
; AVX512DQ-NEXT: je LBB74_1
; AVX512DQ-NEXT: ## %bb.2: ## %exit
; AVX512DQ-NEXT: popq %rax
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
; AVX512DQ-NEXT: LBB74_1: ## %bar
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: callq _foo
; AVX512DQ-NEXT: popq %rax
; AVX512DQ-NEXT: retq
;
; X86-LABEL: ktest_6:
; X86: ## %bb.0:
; X86-NEXT: subl $12, %esp
; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: vptestnmw %zmm0, %zmm0, %k0
; X86-NEXT: vptestnmw %zmm1, %zmm1, %k1
; X86-NEXT: kord %k1, %k0, %k0
; X86-NEXT: vptestnmw %zmm2, %zmm2, %k1
; X86-NEXT: vptestnmw %zmm3, %zmm3, %k2
; X86-NEXT: kord %k2, %k1, %k1
; X86-NEXT: kandd %k1, %k0, %k0
; X86-NEXT: kortestd %k0, %k0
; X86-NEXT: je LBB74_1
; X86-NEXT: ## %bb.2: ## %exit
; X86-NEXT: addl $12, %esp
; X86-NEXT: vzeroupper
; X86-NEXT: retl
; X86-NEXT: LBB74_1: ## %bar
; X86-NEXT: vzeroupper
; X86-NEXT: calll _foo
; X86-NEXT: addl $12, %esp
; X86-NEXT: retl
%a = icmp eq <32 x i16> %w, zeroinitializer
%b = icmp eq <32 x i16> %x, zeroinitializer
%c = icmp eq <32 x i16> %y, zeroinitializer
%d = icmp eq <32 x i16> %z, zeroinitializer
%e = or <32 x i1> %a, %b
%f = or <32 x i1> %c, %d
%g = and <32 x i1> %e, %f
%h = bitcast <32 x i1> %g to i32
%i = icmp eq i32 %h, 0
br i1 %i, label %bar, label %exit
bar:
call void @foo()
br label %exit
exit:
ret void
}
define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
; KNL-LABEL: ktest_7:
; KNL: ## %bb.0:
; KNL-NEXT: pushq %rax
; KNL-NEXT: .cfi_def_cfa_offset 16
; KNL-NEXT: vpxor %xmm8, %xmm8, %xmm8
; KNL-NEXT: vpcmpeqb %ymm8, %ymm0, %ymm9
; KNL-NEXT: vextracti128 $1, %ymm9, %xmm0
; KNL-NEXT: vpcmpeqb %ymm8, %ymm1, %ymm10
; KNL-NEXT: vextracti128 $1, %ymm10, %xmm1
; KNL-NEXT: vpcmpeqb %ymm8, %ymm2, %ymm11
; KNL-NEXT: vextracti128 $1, %ymm11, %xmm2
; KNL-NEXT: vpor %xmm2, %xmm0, %xmm13
; KNL-NEXT: vpcmpeqb %ymm8, %ymm3, %ymm2
; KNL-NEXT: vextracti128 $1, %ymm2, %xmm3
; KNL-NEXT: vpor %xmm3, %xmm1, %xmm12
; KNL-NEXT: vpcmpeqb %ymm8, %ymm4, %ymm3
; KNL-NEXT: vextracti128 $1, %ymm3, %xmm4
; KNL-NEXT: vpcmpeqb %ymm8, %ymm5, %ymm5
; KNL-NEXT: vextracti128 $1, %ymm5, %xmm1
; KNL-NEXT: vpcmpeqb %ymm8, %ymm6, %ymm6
; KNL-NEXT: vextracti128 $1, %ymm6, %xmm0
; KNL-NEXT: vpor %xmm0, %xmm4, %xmm0
; KNL-NEXT: vpand %xmm0, %xmm13, %xmm0
; KNL-NEXT: vpcmpeqb %ymm8, %ymm7, %ymm4
; KNL-NEXT: vextracti128 $1, %ymm4, %xmm7
; KNL-NEXT: vpor %xmm7, %xmm1, %xmm1
; KNL-NEXT: vpand %xmm1, %xmm12, %xmm1
; KNL-NEXT: vpor %xmm2, %xmm10, %xmm2
; KNL-NEXT: vpor %xmm11, %xmm9, %xmm7
; KNL-NEXT: vpor %xmm4, %xmm5, %xmm4
; KNL-NEXT: vpand %xmm4, %xmm2, %xmm2
; KNL-NEXT: vpor %xmm6, %xmm3, %xmm3
; KNL-NEXT: vpand %xmm3, %xmm7, %xmm3
; KNL-NEXT: vpmovsxbd %xmm3, %zmm3
; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: shll $16, %ecx
; KNL-NEXT: orl %eax, %ecx
; KNL-NEXT: vpmovsxbd %xmm2, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpmovsxbd %xmm1, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %edx
; KNL-NEXT: shll $16, %edx
; KNL-NEXT: orl %eax, %edx
; KNL-NEXT: shlq $32, %rdx
; KNL-NEXT: orq %rcx, %rdx
; KNL-NEXT: je LBB75_1
; KNL-NEXT: ## %bb.2: ## %exit
; KNL-NEXT: popq %rax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
; KNL-NEXT: LBB75_1: ## %bar
; KNL-NEXT: vzeroupper
; KNL-NEXT: callq _foo
; KNL-NEXT: popq %rax
; KNL-NEXT: retq
;
; SKX-LABEL: ktest_7:
; SKX: ## %bb.0:
; SKX-NEXT: pushq %rax
; SKX-NEXT: .cfi_def_cfa_offset 16
; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k0
; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1
; SKX-NEXT: korq %k1, %k0, %k0
; SKX-NEXT: vptestnmb %zmm2, %zmm2, %k1
; SKX-NEXT: vptestnmb %zmm3, %zmm3, %k2
; SKX-NEXT: korq %k2, %k1, %k1
; SKX-NEXT: kandq %k1, %k0, %k0
; SKX-NEXT: kortestq %k0, %k0
; SKX-NEXT: je LBB75_1
; SKX-NEXT: ## %bb.2: ## %exit
; SKX-NEXT: popq %rax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; SKX-NEXT: LBB75_1: ## %bar
; SKX-NEXT: vzeroupper
; SKX-NEXT: callq _foo
; SKX-NEXT: popq %rax
; SKX-NEXT: retq
;
; AVX512BW-LABEL: ktest_7:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: pushq %rax
; AVX512BW-NEXT: .cfi_def_cfa_offset 16
; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0
; AVX512BW-NEXT: vptestnmb %zmm1, %zmm1, %k1
; AVX512BW-NEXT: korq %k1, %k0, %k0
; AVX512BW-NEXT: vptestnmb %zmm2, %zmm2, %k1
; AVX512BW-NEXT: vptestnmb %zmm3, %zmm3, %k2
; AVX512BW-NEXT: korq %k2, %k1, %k1
; AVX512BW-NEXT: kandq %k1, %k0, %k0
; AVX512BW-NEXT: kortestq %k0, %k0
; AVX512BW-NEXT: je LBB75_1
; AVX512BW-NEXT: ## %bb.2: ## %exit
; AVX512BW-NEXT: popq %rax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512BW-NEXT: LBB75_1: ## %bar
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: callq _foo
; AVX512BW-NEXT: popq %rax
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: ktest_7:
; AVX512DQ: ## %bb.0:
; AVX512DQ-NEXT: pushq %rax
; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
; AVX512DQ-NEXT: vpxor %xmm8, %xmm8, %xmm8
; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm0, %ymm9
; AVX512DQ-NEXT: vextracti128 $1, %ymm9, %xmm0
; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm1, %ymm10
; AVX512DQ-NEXT: vextracti128 $1, %ymm10, %xmm1
; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm2, %ymm11
; AVX512DQ-NEXT: vextracti128 $1, %ymm11, %xmm2
; AVX512DQ-NEXT: vpor %xmm2, %xmm0, %xmm13
; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm3, %ymm2
; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm3
; AVX512DQ-NEXT: vpor %xmm3, %xmm1, %xmm12
; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm4, %ymm3
; AVX512DQ-NEXT: vextracti128 $1, %ymm3, %xmm4
; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm5, %ymm5
; AVX512DQ-NEXT: vextracti128 $1, %ymm5, %xmm1
; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm6, %ymm6
; AVX512DQ-NEXT: vextracti128 $1, %ymm6, %xmm0
; AVX512DQ-NEXT: vpor %xmm0, %xmm4, %xmm0
; AVX512DQ-NEXT: vpand %xmm0, %xmm13, %xmm0
; AVX512DQ-NEXT: vpcmpeqb %ymm8, %ymm7, %ymm4
; AVX512DQ-NEXT: vextracti128 $1, %ymm4, %xmm7
; AVX512DQ-NEXT: vpor %xmm7, %xmm1, %xmm1
; AVX512DQ-NEXT: vpand %xmm1, %xmm12, %xmm1
; AVX512DQ-NEXT: vpor %xmm2, %xmm10, %xmm2
; AVX512DQ-NEXT: vpor %xmm11, %xmm9, %xmm7
; AVX512DQ-NEXT: vpor %xmm4, %xmm5, %xmm4
; AVX512DQ-NEXT: vpand %xmm4, %xmm2, %xmm2
; AVX512DQ-NEXT: vpor %xmm6, %xmm3, %xmm3
; AVX512DQ-NEXT: vpand %xmm3, %xmm7, %xmm3
; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3
; AVX512DQ-NEXT: vpmovd2m %zmm3, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
; AVX512DQ-NEXT: kmovw %k0, %ecx
; AVX512DQ-NEXT: shll $16, %ecx
; AVX512DQ-NEXT: orl %eax, %ecx
; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm0
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm0
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
; AVX512DQ-NEXT: kmovw %k0, %edx
; AVX512DQ-NEXT: shll $16, %edx
; AVX512DQ-NEXT: orl %eax, %edx
; AVX512DQ-NEXT: shlq $32, %rdx
; AVX512DQ-NEXT: orq %rcx, %rdx
; AVX512DQ-NEXT: je LBB75_1
; AVX512DQ-NEXT: ## %bb.2: ## %exit
; AVX512DQ-NEXT: popq %rax
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
; AVX512DQ-NEXT: LBB75_1: ## %bar
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: callq _foo
; AVX512DQ-NEXT: popq %rax
; AVX512DQ-NEXT: retq
;
; X86-LABEL: ktest_7:
; X86: ## %bb.0:
; X86-NEXT: subl $12, %esp
; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: vptestnmb %zmm0, %zmm0, %k0
; X86-NEXT: vptestnmb %zmm1, %zmm1, %k1
; X86-NEXT: korq %k1, %k0, %k0
; X86-NEXT: vptestnmb %zmm2, %zmm2, %k1
; X86-NEXT: vptestnmb %zmm3, %zmm3, %k2
; X86-NEXT: korq %k2, %k1, %k1
; X86-NEXT: kandq %k1, %k0, %k0
; X86-NEXT: kshiftrq $32, %k0, %k1
; X86-NEXT: kortestd %k1, %k0
; X86-NEXT: je LBB75_1
; X86-NEXT: ## %bb.2: ## %exit
; X86-NEXT: addl $12, %esp
; X86-NEXT: vzeroupper
; X86-NEXT: retl
; X86-NEXT: LBB75_1: ## %bar
; X86-NEXT: vzeroupper
; X86-NEXT: calll _foo
; X86-NEXT: addl $12, %esp
; X86-NEXT: retl
%a = icmp eq <64 x i8> %w, zeroinitializer
%b = icmp eq <64 x i8> %x, zeroinitializer
%c = icmp eq <64 x i8> %y, zeroinitializer
%d = icmp eq <64 x i8> %z, zeroinitializer
%e = or <64 x i1> %a, %b
%f = or <64 x i1> %c, %d
%g = and <64 x i1> %e, %f
%h = bitcast <64 x i1> %g to i64
%i = icmp eq i64 %h, 0
br i1 %i, label %bar, label %exit
bar:
call void @foo()
br label %exit
exit:
ret void
}