1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 19:52:54 +01:00

[CodeGen] Canonicalise adds/subs of i1 vectors using XOR

When calling SelectionDAG::getNode() to create an ADD or SUB
of two vectors with i1 element types we can canonicalise this
to use XOR instead, where 1+1 is treated as wrapping around
to 0 and 0-1 wraps to 1.

I've added the following tests for SVE targets:

  CodeGen/AArch64/sve-pred-arith.ll

and modified some X86 tests to reflect the much simpler codegen
required.

Differential Revision: https://reviews.llvm.org/D97276
This commit is contained in:
David Sherwood 2021-02-22 16:00:38 +00:00
parent 0f1f22a56b
commit cee8b18db2
4 changed files with 221 additions and 312 deletions

View File

@ -5315,6 +5315,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// it's worth handling here.
if (N2C && N2C->isNullValue())
return N1;
if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && VT.isVector() &&
VT.getVectorElementType() == MVT::i1)
return getNode(ISD::XOR, DL, VT, N1, N2);
break;
case ISD::MUL:
assert(VT.isInteger() && "This operator does not apply to FP types!");

View File

@ -0,0 +1,164 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
; WARN-NOT: warning
; LEGAL ADDS
define <vscale x 16 x i1> @add_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
; CHECK-LABEL: add_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: eor p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: ret
%res = add <vscale x 16 x i1> %a, %b
ret <vscale x 16 x i1> %res;
}
define <vscale x 8 x i1> @add_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
; CHECK-LABEL: add_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.h
; CHECK-NEXT: eor p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: ret
%res = add <vscale x 8 x i1> %a, %b
ret <vscale x 8 x i1> %res;
}
define <vscale x 4 x i1> @add_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
; CHECK-LABEL: add_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.s
; CHECK-NEXT: eor p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: ret
%res = add <vscale x 4 x i1> %a, %b
ret <vscale x 4 x i1> %res;
}
define <vscale x 2 x i1> @add_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
; CHECK-LABEL: add_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.d
; CHECK-NEXT: eor p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: ret
%res = add <vscale x 2 x i1> %a, %b
ret <vscale x 2 x i1> %res;
}
; ILLEGAL ADDS
define aarch64_sve_vector_pcs <vscale x 64 x i1> @add_nxv64i1(<vscale x 64 x i1> %a, <vscale x 64 x i1> %b) {
; CHECK-LABEL: add_nxv64i1:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ldr p4, [x3]
; CHECK-NEXT: ldr p5, [x0]
; CHECK-NEXT: ldr p6, [x1]
; CHECK-NEXT: ldr p7, [x2]
; CHECK-NEXT: ptrue p8.b
; CHECK-NEXT: eor p0.b, p8/z, p0.b, p5.b
; CHECK-NEXT: eor p1.b, p8/z, p1.b, p6.b
; CHECK-NEXT: eor p2.b, p8/z, p2.b, p7.b
; CHECK-NEXT: eor p3.b, p8/z, p3.b, p4.b
; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%res = add <vscale x 64 x i1> %a, %b
ret <vscale x 64 x i1> %res;
}
; LEGAL SUBS
define <vscale x 16 x i1> @sub_xv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
; CHECK-LABEL: sub_xv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: eor p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: ret
%res = sub <vscale x 16 x i1> %a, %b
ret <vscale x 16 x i1> %res;
}
define <vscale x 8 x i1> @sub_xv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
; CHECK-LABEL: sub_xv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.h
; CHECK-NEXT: eor p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: ret
%res = sub <vscale x 8 x i1> %a, %b
ret <vscale x 8 x i1> %res;
}
define <vscale x 4 x i1> @sub_xv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
; CHECK-LABEL: sub_xv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.s
; CHECK-NEXT: eor p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: ret
%res = sub <vscale x 4 x i1> %a, %b
ret <vscale x 4 x i1> %res;
}
define <vscale x 2 x i1> @sub_xv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
; CHECK-LABEL: sub_xv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.d
; CHECK-NEXT: eor p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: ret
%res = sub <vscale x 2 x i1> %a, %b
ret <vscale x 2 x i1> %res;
}
; ILLEGAL SUBGS
define aarch64_sve_vector_pcs <vscale x 64 x i1> @sub_nxv64i1(<vscale x 64 x i1> %a, <vscale x 64 x i1> %b) {
; CHECK-LABEL: sub_nxv64i1:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ldr p4, [x3]
; CHECK-NEXT: ldr p5, [x0]
; CHECK-NEXT: ldr p6, [x1]
; CHECK-NEXT: ldr p7, [x2]
; CHECK-NEXT: ptrue p8.b
; CHECK-NEXT: eor p0.b, p8/z, p0.b, p5.b
; CHECK-NEXT: eor p1.b, p8/z, p1.b, p6.b
; CHECK-NEXT: eor p2.b, p8/z, p2.b, p7.b
; CHECK-NEXT: eor p3.b, p8/z, p3.b, p4.b
; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%res = sub <vscale x 64 x i1> %a, %b
ret <vscale x 64 x i1> %res;
}

View File

@ -3804,49 +3804,17 @@ define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) {
}
define i16 @test_v16i1_add(i16 %x, i16 %y) {
; KNL-LABEL: test_v16i1_add:
; KNL: ## %bb.0:
; KNL-NEXT: kmovw %edi, %k0
; KNL-NEXT: kmovw %esi, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
; KNL-NEXT: retq
;
; SKX-LABEL: test_v16i1_add:
; SKX: ## %bb.0:
; SKX-NEXT: kmovd %edi, %k0
; SKX-NEXT: kmovd %esi, %k1
; SKX-NEXT: kxorw %k1, %k0, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
; SKX-NEXT: retq
;
; AVX512BW-LABEL: test_v16i1_add:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: kmovd %edi, %k0
; AVX512BW-NEXT: kmovd %esi, %k1
; AVX512BW-NEXT: kxorw %k1, %k0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: test_v16i1_add:
; AVX512DQ: ## %bb.0:
; AVX512DQ-NEXT: kmovw %edi, %k0
; AVX512DQ-NEXT: kmovw %esi, %k1
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax
; AVX512DQ-NEXT: retq
; CHECK-LABEL: test_v16i1_add:
; CHECK: ## %bb.0:
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: xorl %esi, %eax
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
;
; X86-LABEL: test_v16i1_add:
; X86: ## %bb.0:
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT: kxorw %k1, %k0, %k0
; X86-NEXT: kmovd %k0, %eax
; X86-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: xorw {{[0-9]+}}(%esp), %ax
; X86-NEXT: retl
%m0 = bitcast i16 %x to <16 x i1>
%m1 = bitcast i16 %y to <16 x i1>
@ -3856,49 +3824,17 @@ define i16 @test_v16i1_add(i16 %x, i16 %y) {
}
define i16 @test_v16i1_sub(i16 %x, i16 %y) {
; KNL-LABEL: test_v16i1_sub:
; KNL: ## %bb.0:
; KNL-NEXT: kmovw %edi, %k0
; KNL-NEXT: kmovw %esi, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
; KNL-NEXT: retq
;
; SKX-LABEL: test_v16i1_sub:
; SKX: ## %bb.0:
; SKX-NEXT: kmovd %edi, %k0
; SKX-NEXT: kmovd %esi, %k1
; SKX-NEXT: kxorw %k1, %k0, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
; SKX-NEXT: retq
;
; AVX512BW-LABEL: test_v16i1_sub:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: kmovd %edi, %k0
; AVX512BW-NEXT: kmovd %esi, %k1
; AVX512BW-NEXT: kxorw %k1, %k0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: test_v16i1_sub:
; AVX512DQ: ## %bb.0:
; AVX512DQ-NEXT: kmovw %edi, %k0
; AVX512DQ-NEXT: kmovw %esi, %k1
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax
; AVX512DQ-NEXT: retq
; CHECK-LABEL: test_v16i1_sub:
; CHECK: ## %bb.0:
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: xorl %esi, %eax
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
;
; X86-LABEL: test_v16i1_sub:
; X86: ## %bb.0:
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT: kxorw %k1, %k0, %k0
; X86-NEXT: kmovd %k0, %eax
; X86-NEXT: ## kill: def $ax killed $ax killed $eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: xorw {{[0-9]+}}(%esp), %ax
; X86-NEXT: retl
%m0 = bitcast i16 %x to <16 x i1>
%m1 = bitcast i16 %y to <16 x i1>
@ -3960,49 +3896,17 @@ define i16 @test_v16i1_mul(i16 %x, i16 %y) {
}
define i8 @test_v8i1_add(i8 %x, i8 %y) {
; KNL-LABEL: test_v8i1_add:
; KNL: ## %bb.0:
; KNL-NEXT: kmovw %edi, %k0
; KNL-NEXT: kmovw %esi, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: def $al killed $al killed $eax
; KNL-NEXT: retq
;
; SKX-LABEL: test_v8i1_add:
; SKX: ## %bb.0:
; SKX-NEXT: kmovd %edi, %k0
; SKX-NEXT: kmovd %esi, %k1
; SKX-NEXT: kxorb %k1, %k0, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: ## kill: def $al killed $al killed $eax
; SKX-NEXT: retq
;
; AVX512BW-LABEL: test_v8i1_add:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: kmovd %edi, %k0
; AVX512BW-NEXT: kmovd %esi, %k1
; AVX512BW-NEXT: kxorw %k1, %k0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: test_v8i1_add:
; AVX512DQ: ## %bb.0:
; AVX512DQ-NEXT: kmovw %edi, %k0
; AVX512DQ-NEXT: kmovw %esi, %k1
; AVX512DQ-NEXT: kxorb %k1, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
; AVX512DQ-NEXT: retq
; CHECK-LABEL: test_v8i1_add:
; CHECK: ## %bb.0:
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: xorl %esi, %eax
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
;
; X86-LABEL: test_v8i1_add:
; X86: ## %bb.0:
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
; X86-NEXT: kxorb %k1, %k0, %k0
; X86-NEXT: kmovd %k0, %eax
; X86-NEXT: ## kill: def $al killed $al killed $eax
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: xorb {{[0-9]+}}(%esp), %al
; X86-NEXT: retl
%m0 = bitcast i8 %x to <8 x i1>
%m1 = bitcast i8 %y to <8 x i1>
@ -4012,49 +3916,17 @@ define i8 @test_v8i1_add(i8 %x, i8 %y) {
}
define i8 @test_v8i1_sub(i8 %x, i8 %y) {
; KNL-LABEL: test_v8i1_sub:
; KNL: ## %bb.0:
; KNL-NEXT: kmovw %edi, %k0
; KNL-NEXT: kmovw %esi, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: def $al killed $al killed $eax
; KNL-NEXT: retq
;
; SKX-LABEL: test_v8i1_sub:
; SKX: ## %bb.0:
; SKX-NEXT: kmovd %edi, %k0
; SKX-NEXT: kmovd %esi, %k1
; SKX-NEXT: kxorb %k1, %k0, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: ## kill: def $al killed $al killed $eax
; SKX-NEXT: retq
;
; AVX512BW-LABEL: test_v8i1_sub:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: kmovd %edi, %k0
; AVX512BW-NEXT: kmovd %esi, %k1
; AVX512BW-NEXT: kxorw %k1, %k0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: test_v8i1_sub:
; AVX512DQ: ## %bb.0:
; AVX512DQ-NEXT: kmovw %edi, %k0
; AVX512DQ-NEXT: kmovw %esi, %k1
; AVX512DQ-NEXT: kxorb %k1, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
; AVX512DQ-NEXT: retq
; CHECK-LABEL: test_v8i1_sub:
; CHECK: ## %bb.0:
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: xorl %esi, %eax
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
;
; X86-LABEL: test_v8i1_sub:
; X86: ## %bb.0:
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
; X86-NEXT: kxorb %k1, %k0, %k0
; X86-NEXT: kmovd %k0, %eax
; X86-NEXT: ## kill: def $al killed $al killed $eax
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: xorb {{[0-9]+}}(%esp), %al
; X86-NEXT: retl
%m0 = bitcast i8 %x to <8 x i1>
%m1 = bitcast i8 %y to <8 x i1>
@ -5229,78 +5101,17 @@ define <64 x i1> @mask64_insert(i32 %a) {
}
define i1 @test_v1i1_add(i1 %x, i1 %y) {
; KNL-LABEL: test_v1i1_add:
; KNL: ## %bb.0:
; KNL-NEXT: kmovw %edi, %k0
; KNL-NEXT: kmovw %esi, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; KNL-NEXT: movb -{{[0-9]+}}(%rsp), %al
; KNL-NEXT: retq
;
; SKX-LABEL: test_v1i1_add:
; SKX: ## %bb.0:
; SKX-NEXT: andl $1, %edi
; SKX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; SKX-NEXT: andl $1, %esi
; SKX-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
; SKX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k0
; SKX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; SKX-NEXT: kxorw %k1, %k0, %k0
; SKX-NEXT: kshiftlb $7, %k0, %k0
; SKX-NEXT: kshiftrb $7, %k0, %k0
; SKX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; SKX-NEXT: movb -{{[0-9]+}}(%rsp), %al
; SKX-NEXT: retq
;
; AVX512BW-LABEL: test_v1i1_add:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: kmovd %edi, %k0
; AVX512BW-NEXT: kmovd %esi, %k1
; AVX512BW-NEXT: kxorw %k1, %k0, %k0
; AVX512BW-NEXT: kshiftlw $15, %k0, %k0
; AVX512BW-NEXT: kshiftrw $15, %k0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: test_v1i1_add:
; AVX512DQ: ## %bb.0:
; AVX512DQ-NEXT: andl $1, %edi
; AVX512DQ-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; AVX512DQ-NEXT: andl $1, %esi
; AVX512DQ-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
; AVX512DQ-NEXT: kmovb -{{[0-9]+}}(%rsp), %k0
; AVX512DQ-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
; AVX512DQ-NEXT: kshiftlb $7, %k0, %k0
; AVX512DQ-NEXT: kshiftrb $7, %k0, %k0
; AVX512DQ-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; AVX512DQ-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: retq
; CHECK-LABEL: test_v1i1_add:
; CHECK: ## %bb.0:
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: xorl %esi, %eax
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
;
; X86-LABEL: test_v1i1_add:
; X86: ## %bb.0:
; X86-NEXT: pushl %eax
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: andb $1, %al
; X86-NEXT: movb %al, {{[0-9]+}}(%esp)
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: andb $1, %al
; X86-NEXT: movb %al, {{[0-9]+}}(%esp)
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
; X86-NEXT: kxorw %k1, %k0, %k0
; X86-NEXT: kshiftlb $7, %k0, %k0
; X86-NEXT: kshiftrb $7, %k0, %k0
; X86-NEXT: kmovb %k0, {{[0-9]+}}(%esp)
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: popl %ecx
; X86-NEXT: xorb {{[0-9]+}}(%esp), %al
; X86-NEXT: retl
%m0 = bitcast i1 %x to <1 x i1>
%m1 = bitcast i1 %y to <1 x i1>
@ -5310,78 +5121,17 @@ define i1 @test_v1i1_add(i1 %x, i1 %y) {
}
define i1 @test_v1i1_sub(i1 %x, i1 %y) {
; KNL-LABEL: test_v1i1_sub:
; KNL: ## %bb.0:
; KNL-NEXT: kmovw %edi, %k0
; KNL-NEXT: kmovw %esi, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; KNL-NEXT: movb -{{[0-9]+}}(%rsp), %al
; KNL-NEXT: retq
;
; SKX-LABEL: test_v1i1_sub:
; SKX: ## %bb.0:
; SKX-NEXT: andl $1, %edi
; SKX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; SKX-NEXT: andl $1, %esi
; SKX-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
; SKX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k0
; SKX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; SKX-NEXT: kxorw %k1, %k0, %k0
; SKX-NEXT: kshiftlb $7, %k0, %k0
; SKX-NEXT: kshiftrb $7, %k0, %k0
; SKX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; SKX-NEXT: movb -{{[0-9]+}}(%rsp), %al
; SKX-NEXT: retq
;
; AVX512BW-LABEL: test_v1i1_sub:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: kmovd %edi, %k0
; AVX512BW-NEXT: kmovd %esi, %k1
; AVX512BW-NEXT: kxorw %k1, %k0, %k0
; AVX512BW-NEXT: kshiftlw $15, %k0, %k0
; AVX512BW-NEXT: kshiftrw $15, %k0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: test_v1i1_sub:
; AVX512DQ: ## %bb.0:
; AVX512DQ-NEXT: andl $1, %edi
; AVX512DQ-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; AVX512DQ-NEXT: andl $1, %esi
; AVX512DQ-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
; AVX512DQ-NEXT: kmovb -{{[0-9]+}}(%rsp), %k0
; AVX512DQ-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; AVX512DQ-NEXT: kxorw %k1, %k0, %k0
; AVX512DQ-NEXT: kshiftlb $7, %k0, %k0
; AVX512DQ-NEXT: kshiftrb $7, %k0, %k0
; AVX512DQ-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; AVX512DQ-NEXT: movb -{{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: retq
; CHECK-LABEL: test_v1i1_sub:
; CHECK: ## %bb.0:
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: xorl %esi, %eax
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
;
; X86-LABEL: test_v1i1_sub:
; X86: ## %bb.0:
; X86-NEXT: pushl %eax
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: andb $1, %al
; X86-NEXT: movb %al, {{[0-9]+}}(%esp)
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: andb $1, %al
; X86-NEXT: movb %al, {{[0-9]+}}(%esp)
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k0
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
; X86-NEXT: kxorw %k1, %k0, %k0
; X86-NEXT: kshiftlb $7, %k0, %k0
; X86-NEXT: kshiftrb $7, %k0, %k0
; X86-NEXT: kmovb %k0, {{[0-9]+}}(%esp)
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: popl %ecx
; X86-NEXT: xorb {{[0-9]+}}(%esp), %al
; X86-NEXT: retl
%m0 = bitcast i1 %x to <1 x i1>
%m1 = bitcast i1 %y to <1 x i1>

View File

@ -152,10 +152,8 @@ define i64 @mand64_mem(<64 x i1>* %x, <64 x i1>* %y) {
define i32 @test_v32i1_add(i32 %x, i32 %y) {
; CHECK-LABEL: test_v32i1_add:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k0
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: kxord %k1, %k0, %k0
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: xorl %esi, %eax
; CHECK-NEXT: retq
%m0 = bitcast i32 %x to <32 x i1>
%m1 = bitcast i32 %y to <32 x i1>
@ -167,10 +165,8 @@ define i32 @test_v32i1_add(i32 %x, i32 %y) {
define i32 @test_v32i1_sub(i32 %x, i32 %y) {
; CHECK-LABEL: test_v32i1_sub:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k0
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: kxord %k1, %k0, %k0
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: xorl %esi, %eax
; CHECK-NEXT: retq
%m0 = bitcast i32 %x to <32 x i1>
%m1 = bitcast i32 %y to <32 x i1>
@ -197,10 +193,8 @@ define i32 @test_v32i1_mul(i32 %x, i32 %y) {
define i64 @test_v64i1_add(i64 %x, i64 %y) {
; CHECK-LABEL: test_v64i1_add:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovq %rdi, %k0
; CHECK-NEXT: kmovq %rsi, %k1
; CHECK-NEXT: kxorq %k1, %k0, %k0
; CHECK-NEXT: kmovq %k0, %rax
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: xorq %rsi, %rax
; CHECK-NEXT: retq
%m0 = bitcast i64 %x to <64 x i1>
%m1 = bitcast i64 %y to <64 x i1>
@ -212,10 +206,8 @@ define i64 @test_v64i1_add(i64 %x, i64 %y) {
define i64 @test_v64i1_sub(i64 %x, i64 %y) {
; CHECK-LABEL: test_v64i1_sub:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovq %rdi, %k0
; CHECK-NEXT: kmovq %rsi, %k1
; CHECK-NEXT: kxorq %k1, %k0, %k0
; CHECK-NEXT: kmovq %k0, %rax
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: xorq %rsi, %rax
; CHECK-NEXT: retq
%m0 = bitcast i64 %x to <64 x i1>
%m1 = bitcast i64 %y to <64 x i1>