From acd8515dc0054d68e8cf4f3a58286c245402ad15 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 25 Aug 2020 15:16:50 -0700 Subject: [PATCH] [X86] Remove a redundant COPY_TO_REGCLASS for VK16 after a KMOVWkr in an isel output pattern. KMOVWkr produces VK16, there's no reason to copy it to VK16 again. Test changes are presumably because we were scheduling based on the COPY that is no longer there. --- lib/Target/X86/X86InstrAVX512.td | 7 +- test/CodeGen/X86/avx512-ext.ll | 36 ++++----- test/CodeGen/X86/avx512-insert-extract.ll | 16 ++-- test/CodeGen/X86/avx512-mask-op.ll | 24 +++--- test/CodeGen/X86/avx512-vec-cmp.ll | 24 +++--- test/CodeGen/X86/vec_saddo.ll | 56 +++++++------- test/CodeGen/X86/vec_smulo.ll | 20 ++--- test/CodeGen/X86/vec_ssubo.ll | 54 ++++++------- test/CodeGen/X86/vec_uaddo.ll | 10 +-- test/CodeGen/X86/vec_umulo.ll | 92 +++++++++++------------ test/CodeGen/X86/vec_usubo.ll | 10 +-- 11 files changed, 173 insertions(+), 176 deletions(-) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index f9582238d30..0514a3a3611 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -2967,10 +2967,9 @@ let Predicates = [HasAVX512] in { def : Pat<(insert_subvector (v16i1 immAllZerosV), (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)), - (COPY_TO_REGCLASS - (KMOVWkr (AND32ri8 - (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), - (i32 1))), VK16)>; + (KMOVWkr (AND32ri8 + (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), + (i32 1)))>; } // Mask unary operation diff --git a/test/CodeGen/X86/avx512-ext.ll b/test/CodeGen/X86/avx512-ext.ll index 5ce111806a2..fc1ba049c69 100644 --- a/test/CodeGen/X86/avx512-ext.ll +++ b/test/CodeGen/X86/avx512-ext.ll @@ -1766,39 +1766,39 @@ define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind { define i16 @trunc_i32_to_i1(i32 %a) { ; KNL-LABEL: trunc_i32_to_i1: ; KNL: # %bb.0: -; KNL-NEXT: movw $-4, %ax -; KNL-NEXT: kmovw %eax, %k0 -; KNL-NEXT: kshiftrw $1, %k0, %k0 -; KNL-NEXT: kshiftlw $1, %k0, %k0 ; KNL-NEXT: andl $1, %edi -; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 +; KNL-NEXT: kmovw %edi, %k0 +; KNL-NEXT: movw $-4, %ax +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kshiftrw $1, %k1, %k1 +; KNL-NEXT: kshiftlw $1, %k1, %k1 +; KNL-NEXT: korw %k0, %k1, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: # kill: def $ax killed $ax killed $eax ; KNL-NEXT: retq ; ; SKX-LABEL: trunc_i32_to_i1: ; SKX: # %bb.0: -; SKX-NEXT: movw $-4, %ax -; SKX-NEXT: kmovd %eax, %k0 -; SKX-NEXT: kshiftrw $1, %k0, %k0 -; SKX-NEXT: kshiftlw $1, %k0, %k0 ; SKX-NEXT: andl $1, %edi -; SKX-NEXT: kmovw %edi, %k1 -; SKX-NEXT: korw %k1, %k0, %k0 +; SKX-NEXT: kmovw %edi, %k0 +; SKX-NEXT: movw $-4, %ax +; SKX-NEXT: kmovd %eax, %k1 +; SKX-NEXT: kshiftrw $1, %k1, %k1 +; SKX-NEXT: kshiftlw $1, %k1, %k1 +; SKX-NEXT: korw %k0, %k1, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: # kill: def $ax killed $ax killed $eax ; SKX-NEXT: retq ; ; AVX512DQNOBW-LABEL: trunc_i32_to_i1: ; AVX512DQNOBW: # %bb.0: -; AVX512DQNOBW-NEXT: movw $-4, %ax -; AVX512DQNOBW-NEXT: kmovw %eax, %k0 -; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0 -; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0 ; AVX512DQNOBW-NEXT: andl $1, %edi -; AVX512DQNOBW-NEXT: kmovw %edi, %k1 -; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 +; AVX512DQNOBW-NEXT: kmovw %edi, %k0 +; AVX512DQNOBW-NEXT: movw $-4, %ax +; AVX512DQNOBW-NEXT: kmovw %eax, %k1 +; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k1 +; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1 +; AVX512DQNOBW-NEXT: korw %k0, %k1, %k0 ; AVX512DQNOBW-NEXT: kmovw %k0, %eax ; AVX512DQNOBW-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512DQNOBW-NEXT: retq diff --git a/test/CodeGen/X86/avx512-insert-extract.ll b/test/CodeGen/X86/avx512-insert-extract.ll index 41bdaf21baa..fd722e1beb1 100644 --- a/test/CodeGen/X86/avx512-insert-extract.ll +++ b/test/CodeGen/X86/avx512-insert-extract.ll @@ -2181,32 +2181,32 @@ define i128 @test_insertelement_variable_v128i1(<128 x i8> %a, i8 %b, i32 %index define void @test_concat_v2i1(<2 x half>* %arg, <2 x half>* %arg1, <2 x half>* %arg2) { ; KNL-LABEL: test_concat_v2i1: ; KNL: ## %bb.0: -; KNL-NEXT: movzwl (%rdi), %eax -; KNL-NEXT: movzwl 2(%rdi), %ecx +; KNL-NEXT: movzwl 2(%rdi), %eax +; KNL-NEXT: movzwl (%rdi), %ecx ; KNL-NEXT: vmovd %ecx, %xmm0 ; KNL-NEXT: vcvtph2ps %xmm0, %xmm0 ; KNL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; KNL-NEXT: vucomiss %xmm1, %xmm0 ; KNL-NEXT: setb %cl +; KNL-NEXT: andl $1, %ecx ; KNL-NEXT: kmovw %ecx, %k0 -; KNL-NEXT: kshiftlw $1, %k0, %k0 ; KNL-NEXT: vmovd %eax, %xmm2 ; KNL-NEXT: vcvtph2ps %xmm2, %xmm2 ; KNL-NEXT: vucomiss %xmm1, %xmm2 ; KNL-NEXT: setb %al -; KNL-NEXT: andl $1, %eax ; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: korw %k0, %k1, %k0 +; KNL-NEXT: kshiftlw $1, %k1, %k1 +; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; KNL-NEXT: vucomiss %xmm1, %xmm0 ; KNL-NEXT: seta %al +; KNL-NEXT: andl $1, %eax ; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $1, %k1, %k1 ; KNL-NEXT: vucomiss %xmm1, %xmm2 ; KNL-NEXT: seta %al -; KNL-NEXT: andl $1, %eax ; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kshiftlw $1, %k2, %k2 +; KNL-NEXT: korw %k2, %k1, %k1 ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: kshiftrw $1, %k0, %k1 ; KNL-NEXT: kmovw %k1, %ecx diff --git a/test/CodeGen/X86/avx512-mask-op.ll b/test/CodeGen/X86/avx512-mask-op.ll index e67b8158139..67067e3fff2 100644 --- a/test/CodeGen/X86/avx512-mask-op.ll +++ b/test/CodeGen/X86/avx512-mask-op.ll @@ -5157,13 +5157,13 @@ define <64 x i1> @mask64_insert(i32 %a) { ; KNL-LABEL: mask64_insert: ; KNL: ## %bb.0: ; KNL-NEXT: movq %rdi, %rax -; KNL-NEXT: movw $-4, %cx -; KNL-NEXT: kmovw %ecx, %k0 -; KNL-NEXT: kshiftrw $1, %k0, %k0 -; KNL-NEXT: kshiftlw $1, %k0, %k0 ; KNL-NEXT: andl $1, %esi -; KNL-NEXT: kmovw %esi, %k1 -; KNL-NEXT: korw %k1, %k0, %k0 +; KNL-NEXT: kmovw %esi, %k0 +; KNL-NEXT: movw $-4, %cx +; KNL-NEXT: kmovw %ecx, %k1 +; KNL-NEXT: kshiftrw $1, %k1, %k1 +; KNL-NEXT: kshiftlw $1, %k1, %k1 +; KNL-NEXT: korw %k0, %k1, %k0 ; KNL-NEXT: kmovw %k0, (%rdi) ; KNL-NEXT: movw $-3, 6(%rdi) ; KNL-NEXT: movl $-131075, 2(%rdi) ## imm = 0xFFFDFFFD @@ -5198,13 +5198,13 @@ define <64 x i1> @mask64_insert(i32 %a) { ; AVX512DQ-LABEL: mask64_insert: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: movq %rdi, %rax -; AVX512DQ-NEXT: movw $-4, %cx -; AVX512DQ-NEXT: kmovw %ecx, %k0 -; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0 ; AVX512DQ-NEXT: andl $1, %esi -; AVX512DQ-NEXT: kmovw %esi, %k1 -; AVX512DQ-NEXT: korw %k1, %k0, %k0 +; AVX512DQ-NEXT: kmovw %esi, %k0 +; AVX512DQ-NEXT: movw $-4, %cx +; AVX512DQ-NEXT: kmovw %ecx, %k1 +; AVX512DQ-NEXT: kshiftrw $1, %k1, %k1 +; AVX512DQ-NEXT: kshiftlw $1, %k1, %k1 +; AVX512DQ-NEXT: korw %k0, %k1, %k0 ; AVX512DQ-NEXT: kmovw %k0, (%rdi) ; AVX512DQ-NEXT: movw $-3, 6(%rdi) ; AVX512DQ-NEXT: movl $-131075, 2(%rdi) ## imm = 0xFFFDFFFD diff --git a/test/CodeGen/X86/avx512-vec-cmp.ll b/test/CodeGen/X86/avx512-vec-cmp.ll index 719bd9f9d95..7dcae9a2d24 100644 --- a/test/CodeGen/X86/avx512-vec-cmp.ll +++ b/test/CodeGen/X86/avx512-vec-cmp.ll @@ -1434,8 +1434,8 @@ define <4 x i32> @zext_bool_logic(<4 x i64> %cond1, <4 x i64> %cond2, <4 x i32> define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) { ; KNL-LABEL: half_vec_compare: ; KNL: ## %bb.0: ## %entry -; KNL-NEXT: movzwl (%rdi), %eax ## encoding: [0x0f,0xb7,0x07] -; KNL-NEXT: movzwl 2(%rdi), %ecx ## encoding: [0x0f,0xb7,0x4f,0x02] +; KNL-NEXT: movzwl 2(%rdi), %eax ## encoding: [0x0f,0xb7,0x47,0x02] +; KNL-NEXT: movzwl (%rdi), %ecx ## encoding: [0x0f,0xb7,0x0f] ; KNL-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; KNL-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0] ; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9] @@ -1443,17 +1443,17 @@ define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) { ; KNL-NEXT: setp %cl ## encoding: [0x0f,0x9a,0xc1] ; KNL-NEXT: setne %dl ## encoding: [0x0f,0x95,0xc2] ; KNL-NEXT: orb %cl, %dl ## encoding: [0x08,0xca] +; KNL-NEXT: andl $1, %edx ## encoding: [0x83,0xe2,0x01] ; KNL-NEXT: kmovw %edx, %k0 ## encoding: [0xc5,0xf8,0x92,0xc2] -; KNL-NEXT: kshiftlw $1, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x01] ; KNL-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] ; KNL-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0] ; KNL-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] ; KNL-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0] ; KNL-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1] ; KNL-NEXT: orb %al, %cl ## encoding: [0x08,0xc1] -; KNL-NEXT: andl $1, %ecx ## encoding: [0x83,0xe1,0x01] ; KNL-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] -; KNL-NEXT: korw %k0, %k1, %k1 ## encoding: [0xc5,0xf4,0x45,0xc8] +; KNL-NEXT: kshiftlw $1, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x01] +; KNL-NEXT: korw %k1, %k0, %k1 ## encoding: [0xc5,0xfc,0x45,0xc9] ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff] ; KNL-NEXT: vpmovdw %zmm0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x33,0xc0] ; KNL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc0] @@ -1465,8 +1465,8 @@ define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) { ; ; AVX512BW-LABEL: half_vec_compare: ; AVX512BW: ## %bb.0: ## %entry -; AVX512BW-NEXT: movzwl (%rdi), %eax ## encoding: [0x0f,0xb7,0x07] -; AVX512BW-NEXT: movzwl 2(%rdi), %ecx ## encoding: [0x0f,0xb7,0x4f,0x02] +; AVX512BW-NEXT: movzwl 2(%rdi), %eax ## encoding: [0x0f,0xb7,0x47,0x02] +; AVX512BW-NEXT: movzwl (%rdi), %ecx ## encoding: [0x0f,0xb7,0x0f] ; AVX512BW-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0] ; AVX512BW-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9] @@ -1474,17 +1474,17 @@ define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) { ; AVX512BW-NEXT: setp %cl ## encoding: [0x0f,0x9a,0xc1] ; AVX512BW-NEXT: setne %dl ## encoding: [0x0f,0x95,0xc2] ; AVX512BW-NEXT: orb %cl, %dl ## encoding: [0x08,0xca] -; AVX512BW-NEXT: kmovd %edx, %k0 ## encoding: [0xc5,0xfb,0x92,0xc2] -; AVX512BW-NEXT: kshiftlw $1, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x01] +; AVX512BW-NEXT: andl $1, %edx ## encoding: [0x83,0xe2,0x01] +; AVX512BW-NEXT: kmovw %edx, %k0 ## encoding: [0xc5,0xf8,0x92,0xc2] ; AVX512BW-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] ; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0] ; AVX512BW-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] ; AVX512BW-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0] ; AVX512BW-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1] ; AVX512BW-NEXT: orb %al, %cl ## encoding: [0x08,0xc1] -; AVX512BW-NEXT: andl $1, %ecx ## encoding: [0x83,0xe1,0x01] -; AVX512BW-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] -; AVX512BW-NEXT: korw %k0, %k1, %k0 ## encoding: [0xc5,0xf4,0x45,0xc0] +; AVX512BW-NEXT: kmovd %ecx, %k1 ## encoding: [0xc5,0xfb,0x92,0xc9] +; AVX512BW-NEXT: kshiftlw $1, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x01] +; AVX512BW-NEXT: korw %k1, %k0, %k0 ## encoding: [0xc5,0xfc,0x45,0xc1] ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ## encoding: [0x62,0xf2,0xfe,0x48,0x28,0xc0] ; AVX512BW-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc0] ; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0x05,A,A,A,A] diff --git a/test/CodeGen/X86/vec_saddo.ll b/test/CodeGen/X86/vec_saddo.ll index e1f780da4fc..6bee501e06a 100644 --- a/test/CodeGen/X86/vec_saddo.ll +++ b/test/CodeGen/X86/vec_saddo.ll @@ -1372,48 +1372,48 @@ define <2 x i32> @saddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) ; ; AVX512-LABEL: saddo_v2i128: ; AVX512: # %bb.0: -; AVX512-NEXT: pushq %rbp +; AVX512-NEXT: pushq %r14 ; AVX512-NEXT: pushq %rbx -; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11 ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; AVX512-NEXT: testq %r9, %r9 -; AVX512-NEXT: setns %al -; AVX512-NEXT: testq %rsi, %rsi -; AVX512-NEXT: setns %bl -; AVX512-NEXT: cmpb %al, %bl -; AVX512-NEXT: sete %bpl -; AVX512-NEXT: addq %r8, %rdi -; AVX512-NEXT: adcq %r9, %rsi -; AVX512-NEXT: setns %al -; AVX512-NEXT: cmpb %al, %bl -; AVX512-NEXT: setne %al -; AVX512-NEXT: andb %bpl, %al +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11 ; AVX512-NEXT: addq {{[0-9]+}}(%rsp), %rdx -; AVX512-NEXT: movq %rcx, %rbp -; AVX512-NEXT: adcq %r10, %rbp +; AVX512-NEXT: movq %rcx, %r14 +; AVX512-NEXT: adcq %r11, %r14 ; AVX512-NEXT: setns %bl ; AVX512-NEXT: testq %rcx, %rcx ; AVX512-NEXT: setns %cl ; AVX512-NEXT: cmpb %bl, %cl -; AVX512-NEXT: setne %r8b -; AVX512-NEXT: testq %r10, %r10 +; AVX512-NEXT: setne %bl +; AVX512-NEXT: testq %r11, %r11 +; AVX512-NEXT: setns %al +; AVX512-NEXT: cmpb %al, %cl +; AVX512-NEXT: sete %al +; AVX512-NEXT: andb %bl, %al +; AVX512-NEXT: kmovd %eax, %k0 +; AVX512-NEXT: testq %r9, %r9 +; AVX512-NEXT: setns %al +; AVX512-NEXT: testq %rsi, %rsi +; AVX512-NEXT: setns %cl +; AVX512-NEXT: cmpb %al, %cl +; AVX512-NEXT: sete %al +; AVX512-NEXT: addq %r8, %rdi +; AVX512-NEXT: adcq %r9, %rsi ; AVX512-NEXT: setns %bl ; AVX512-NEXT: cmpb %bl, %cl -; AVX512-NEXT: sete %cl -; AVX512-NEXT: andb %r8b, %cl -; AVX512-NEXT: kmovd %ecx, %k0 +; AVX512-NEXT: setne %cl +; AVX512-NEXT: andb %al, %cl +; AVX512-NEXT: andl $1, %ecx +; AVX512-NEXT: kmovw %ecx, %k1 ; AVX512-NEXT: kshiftlw $1, %k0, %k0 -; AVX512-NEXT: andl $1, %eax -; AVX512-NEXT: kmovw %eax, %k1 ; AVX512-NEXT: korw %k0, %k1, %k1 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512-NEXT: movq %rdx, 16(%r11) -; AVX512-NEXT: movq %rdi, (%r11) -; AVX512-NEXT: movq %rbp, 24(%r11) -; AVX512-NEXT: movq %rsi, 8(%r11) +; AVX512-NEXT: movq %rdx, 16(%r10) +; AVX512-NEXT: movq %rdi, (%r10) +; AVX512-NEXT: movq %r14, 24(%r10) +; AVX512-NEXT: movq %rsi, 8(%r10) ; AVX512-NEXT: popq %rbx -; AVX512-NEXT: popq %rbp +; AVX512-NEXT: popq %r14 ; AVX512-NEXT: retq %t = call {<2 x i128>, <2 x i1>} @llvm.sadd.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1) %val = extractvalue {<2 x i128>, <2 x i1>} %t, 0 diff --git a/test/CodeGen/X86/vec_smulo.ll b/test/CodeGen/X86/vec_smulo.ll index ad0a8f8ff12..1b5aef61ebf 100644 --- a/test/CodeGen/X86/vec_smulo.ll +++ b/test/CodeGen/X86/vec_smulo.ll @@ -3942,39 +3942,39 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) ; AVX512-NEXT: pushq %rbx ; AVX512-NEXT: subq $24, %rsp ; AVX512-NEXT: movq %r8, %rax -; AVX512-NEXT: movq %rcx, %r15 +; AVX512-NEXT: movq %rcx, %r14 ; AVX512-NEXT: movq %rdx, %rbx +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r15 ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r12 -; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r13 ; AVX512-NEXT: movq $0, {{[0-9]+}}(%rsp) ; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %r8 ; AVX512-NEXT: movq %rax, %rdx ; AVX512-NEXT: movq %r9, %rcx ; AVX512-NEXT: callq __muloti4 -; AVX512-NEXT: movq %rax, %r14 +; AVX512-NEXT: movq %rax, %r13 ; AVX512-NEXT: movq %rdx, %rbp ; AVX512-NEXT: movq $0, {{[0-9]+}}(%rsp) ; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %r8 ; AVX512-NEXT: movq %rbx, %rdi -; AVX512-NEXT: movq %r15, %rsi +; AVX512-NEXT: movq %r14, %rsi ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx -; AVX512-NEXT: movq %r13, %rcx +; AVX512-NEXT: movq %r12, %rcx ; AVX512-NEXT: callq __muloti4 ; AVX512-NEXT: cmpq $0, {{[0-9]+}}(%rsp) ; AVX512-NEXT: setne %cl ; AVX512-NEXT: kmovd %ecx, %k0 ; AVX512-NEXT: cmpq $0, {{[0-9]+}}(%rsp) ; AVX512-NEXT: setne %cl -; AVX512-NEXT: kshiftlw $1, %k0, %k0 ; AVX512-NEXT: andl $1, %ecx ; AVX512-NEXT: kmovw %ecx, %k1 +; AVX512-NEXT: kshiftlw $1, %k0, %k0 ; AVX512-NEXT: korw %k0, %k1, %k1 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512-NEXT: movq %rdx, 24(%r12) -; AVX512-NEXT: movq %rax, 16(%r12) -; AVX512-NEXT: movq %rbp, 8(%r12) -; AVX512-NEXT: movq %r14, (%r12) +; AVX512-NEXT: movq %rdx, 24(%r15) +; AVX512-NEXT: movq %rax, 16(%r15) +; AVX512-NEXT: movq %rbp, 8(%r15) +; AVX512-NEXT: movq %r13, (%r15) ; AVX512-NEXT: addq $24, %rsp ; AVX512-NEXT: popq %rbx ; AVX512-NEXT: popq %r12 diff --git a/test/CodeGen/X86/vec_ssubo.ll b/test/CodeGen/X86/vec_ssubo.ll index 4e2c3a57831..9981643ba2d 100644 --- a/test/CodeGen/X86/vec_ssubo.ll +++ b/test/CodeGen/X86/vec_ssubo.ll @@ -1381,48 +1381,48 @@ define <2 x i32> @ssubo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) ; ; AVX512-LABEL: ssubo_v2i128: ; AVX512: # %bb.0: -; AVX512-NEXT: pushq %rbp +; AVX512-NEXT: pushq %r14 ; AVX512-NEXT: pushq %rbx -; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11 ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; AVX512-NEXT: testq %r9, %r9 -; AVX512-NEXT: setns %al -; AVX512-NEXT: testq %rsi, %rsi -; AVX512-NEXT: setns %bl -; AVX512-NEXT: cmpb %al, %bl -; AVX512-NEXT: setne %bpl -; AVX512-NEXT: subq %r8, %rdi -; AVX512-NEXT: sbbq %r9, %rsi -; AVX512-NEXT: setns %al -; AVX512-NEXT: cmpb %al, %bl -; AVX512-NEXT: setne %al -; AVX512-NEXT: andb %bpl, %al +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11 ; AVX512-NEXT: subq {{[0-9]+}}(%rsp), %rdx -; AVX512-NEXT: movq %rcx, %rbp -; AVX512-NEXT: sbbq %r10, %rbp +; AVX512-NEXT: movq %rcx, %r14 +; AVX512-NEXT: sbbq %r11, %r14 ; AVX512-NEXT: setns %bl ; AVX512-NEXT: testq %rcx, %rcx ; AVX512-NEXT: setns %cl ; AVX512-NEXT: cmpb %bl, %cl -; AVX512-NEXT: setne %r8b -; AVX512-NEXT: testq %r10, %r10 +; AVX512-NEXT: setne %bl +; AVX512-NEXT: testq %r11, %r11 +; AVX512-NEXT: setns %al +; AVX512-NEXT: cmpb %al, %cl +; AVX512-NEXT: setne %al +; AVX512-NEXT: andb %bl, %al +; AVX512-NEXT: kmovd %eax, %k0 +; AVX512-NEXT: testq %r9, %r9 +; AVX512-NEXT: setns %al +; AVX512-NEXT: testq %rsi, %rsi +; AVX512-NEXT: setns %cl +; AVX512-NEXT: cmpb %al, %cl +; AVX512-NEXT: setne %al +; AVX512-NEXT: subq %r8, %rdi +; AVX512-NEXT: sbbq %r9, %rsi ; AVX512-NEXT: setns %bl ; AVX512-NEXT: cmpb %bl, %cl ; AVX512-NEXT: setne %cl -; AVX512-NEXT: andb %r8b, %cl -; AVX512-NEXT: kmovd %ecx, %k0 +; AVX512-NEXT: andb %al, %cl +; AVX512-NEXT: andl $1, %ecx +; AVX512-NEXT: kmovw %ecx, %k1 ; AVX512-NEXT: kshiftlw $1, %k0, %k0 -; AVX512-NEXT: andl $1, %eax -; AVX512-NEXT: kmovw %eax, %k1 ; AVX512-NEXT: korw %k0, %k1, %k1 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512-NEXT: movq %rdx, 16(%r11) -; AVX512-NEXT: movq %rdi, (%r11) -; AVX512-NEXT: movq %rbp, 24(%r11) -; AVX512-NEXT: movq %rsi, 8(%r11) +; AVX512-NEXT: movq %rdx, 16(%r10) +; AVX512-NEXT: movq %rdi, (%r10) +; AVX512-NEXT: movq %r14, 24(%r10) +; AVX512-NEXT: movq %rsi, 8(%r10) ; AVX512-NEXT: popq %rbx -; AVX512-NEXT: popq %rbp +; AVX512-NEXT: popq %r14 ; AVX512-NEXT: retq %t = call {<2 x i128>, <2 x i1>} @llvm.ssub.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1) %val = extractvalue {<2 x i128>, <2 x i1>} %t, 0 diff --git a/test/CodeGen/X86/vec_uaddo.ll b/test/CodeGen/X86/vec_uaddo.ll index 9a153253a16..c34653be4a0 100644 --- a/test/CodeGen/X86/vec_uaddo.ll +++ b/test/CodeGen/X86/vec_uaddo.ll @@ -1282,16 +1282,16 @@ define <2 x i32> @uaddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) ; AVX512-LABEL: uaddo_v2i128: ; AVX512: # %bb.0: ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; AVX512-NEXT: addq %r8, %rdi -; AVX512-NEXT: adcq %r9, %rsi -; AVX512-NEXT: setb %r8b ; AVX512-NEXT: addq {{[0-9]+}}(%rsp), %rdx ; AVX512-NEXT: adcq {{[0-9]+}}(%rsp), %rcx ; AVX512-NEXT: setb %al ; AVX512-NEXT: kmovd %eax, %k0 +; AVX512-NEXT: addq %r8, %rdi +; AVX512-NEXT: adcq %r9, %rsi +; AVX512-NEXT: setb %al +; AVX512-NEXT: andl $1, %eax +; AVX512-NEXT: kmovw %eax, %k1 ; AVX512-NEXT: kshiftlw $1, %k0, %k0 -; AVX512-NEXT: andl $1, %r8d -; AVX512-NEXT: kmovw %r8d, %k1 ; AVX512-NEXT: korw %k0, %k1, %k1 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} diff --git a/test/CodeGen/X86/vec_umulo.ll b/test/CodeGen/X86/vec_umulo.ll index 54bb86dc5a0..cc25fd5bec7 100644 --- a/test/CodeGen/X86/vec_umulo.ll +++ b/test/CodeGen/X86/vec_umulo.ll @@ -3689,68 +3689,66 @@ define <2 x i32> @umulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) ; AVX512-NEXT: pushq %r13 ; AVX512-NEXT: pushq %r12 ; AVX512-NEXT: pushq %rbx -; AVX512-NEXT: movq %r9, %r10 -; AVX512-NEXT: movq %rcx, %r9 -; AVX512-NEXT: movq %rdx, %r11 -; AVX512-NEXT: movq %rsi, %rax -; AVX512-NEXT: movq %rdi, %rsi +; AVX512-NEXT: movq %rcx, %rax +; AVX512-NEXT: movq %rdx, %r12 +; AVX512-NEXT: movq %rdi, %r11 ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r14 ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r15 -; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r12 +; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10 ; AVX512-NEXT: testq %r10, %r10 ; AVX512-NEXT: setne %dl -; AVX512-NEXT: testq %rax, %rax -; AVX512-NEXT: setne %bl -; AVX512-NEXT: andb %dl, %bl -; AVX512-NEXT: mulq %r8 -; AVX512-NEXT: movq %rax, %r13 +; AVX512-NEXT: testq %rcx, %rcx +; AVX512-NEXT: setne %r13b +; AVX512-NEXT: andb %dl, %r13b +; AVX512-NEXT: mulq %r15 +; AVX512-NEXT: movq %rax, %rdi ; AVX512-NEXT: seto %bpl ; AVX512-NEXT: movq %r10, %rax -; AVX512-NEXT: mulq %rdi -; AVX512-NEXT: movq %rax, %rdi -; AVX512-NEXT: seto %cl -; AVX512-NEXT: orb %bpl, %cl -; AVX512-NEXT: addq %r13, %rdi -; AVX512-NEXT: movq %rsi, %rax -; AVX512-NEXT: mulq %r8 -; AVX512-NEXT: movq %rax, %r8 -; AVX512-NEXT: movq %rdx, %r10 -; AVX512-NEXT: addq %rdi, %r10 -; AVX512-NEXT: setb %sil -; AVX512-NEXT: orb %cl, %sil -; AVX512-NEXT: orb %bl, %sil -; AVX512-NEXT: testq %r12, %r12 -; AVX512-NEXT: setne %al -; AVX512-NEXT: testq %r9, %r9 -; AVX512-NEXT: setne %bpl -; AVX512-NEXT: andb %al, %bpl -; AVX512-NEXT: movq %r9, %rax -; AVX512-NEXT: mulq %r15 -; AVX512-NEXT: movq %rax, %rdi -; AVX512-NEXT: seto %r9b -; AVX512-NEXT: movq %r12, %rax -; AVX512-NEXT: mulq %r11 +; AVX512-NEXT: mulq %r12 ; AVX512-NEXT: movq %rax, %rbx ; AVX512-NEXT: seto %cl -; AVX512-NEXT: orb %r9b, %cl +; AVX512-NEXT: orb %bpl, %cl ; AVX512-NEXT: addq %rdi, %rbx -; AVX512-NEXT: movq %r11, %rax +; AVX512-NEXT: movq %r12, %rax ; AVX512-NEXT: mulq %r15 -; AVX512-NEXT: addq %rbx, %rdx -; AVX512-NEXT: setb %dil -; AVX512-NEXT: orb %cl, %dil -; AVX512-NEXT: orb %bpl, %dil -; AVX512-NEXT: kmovd %edi, %k0 -; AVX512-NEXT: kshiftlw $1, %k0, %k0 +; AVX512-NEXT: movq %rax, %r10 +; AVX512-NEXT: movq %rdx, %r15 +; AVX512-NEXT: addq %rbx, %r15 +; AVX512-NEXT: setb %al +; AVX512-NEXT: orb %cl, %al +; AVX512-NEXT: orb %r13b, %al +; AVX512-NEXT: kmovd %eax, %k0 +; AVX512-NEXT: testq %r9, %r9 +; AVX512-NEXT: setne %al +; AVX512-NEXT: testq %rsi, %rsi +; AVX512-NEXT: setne %cl +; AVX512-NEXT: andb %al, %cl +; AVX512-NEXT: movq %rsi, %rax +; AVX512-NEXT: mulq %r8 +; AVX512-NEXT: movq %rax, %rsi +; AVX512-NEXT: seto %bpl +; AVX512-NEXT: movq %r9, %rax +; AVX512-NEXT: mulq %r11 +; AVX512-NEXT: movq %rax, %rdi +; AVX512-NEXT: seto %bl +; AVX512-NEXT: orb %bpl, %bl +; AVX512-NEXT: addq %rsi, %rdi +; AVX512-NEXT: movq %r11, %rax +; AVX512-NEXT: mulq %r8 +; AVX512-NEXT: addq %rdi, %rdx +; AVX512-NEXT: setb %sil +; AVX512-NEXT: orb %bl, %sil +; AVX512-NEXT: orb %cl, %sil ; AVX512-NEXT: andl $1, %esi ; AVX512-NEXT: kmovw %esi, %k1 +; AVX512-NEXT: kshiftlw $1, %k0, %k0 ; AVX512-NEXT: korw %k0, %k1, %k1 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512-NEXT: movq %rax, 16(%r14) -; AVX512-NEXT: movq %r8, (%r14) -; AVX512-NEXT: movq %rdx, 24(%r14) -; AVX512-NEXT: movq %r10, 8(%r14) +; AVX512-NEXT: movq %r10, 16(%r14) +; AVX512-NEXT: movq %rax, (%r14) +; AVX512-NEXT: movq %r15, 24(%r14) +; AVX512-NEXT: movq %rdx, 8(%r14) ; AVX512-NEXT: popq %rbx ; AVX512-NEXT: popq %r12 ; AVX512-NEXT: popq %r13 diff --git a/test/CodeGen/X86/vec_usubo.ll b/test/CodeGen/X86/vec_usubo.ll index 0381394e741..76c3e5ad329 100644 --- a/test/CodeGen/X86/vec_usubo.ll +++ b/test/CodeGen/X86/vec_usubo.ll @@ -1329,16 +1329,16 @@ define <2 x i32> @usubo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) ; AVX512-LABEL: usubo_v2i128: ; AVX512: # %bb.0: ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; AVX512-NEXT: subq %r8, %rdi -; AVX512-NEXT: sbbq %r9, %rsi -; AVX512-NEXT: setb %r8b ; AVX512-NEXT: subq {{[0-9]+}}(%rsp), %rdx ; AVX512-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx ; AVX512-NEXT: setb %al ; AVX512-NEXT: kmovd %eax, %k0 +; AVX512-NEXT: subq %r8, %rdi +; AVX512-NEXT: sbbq %r9, %rsi +; AVX512-NEXT: setb %al +; AVX512-NEXT: andl $1, %eax +; AVX512-NEXT: kmovw %eax, %k1 ; AVX512-NEXT: kshiftlw $1, %k0, %k0 -; AVX512-NEXT: andl $1, %r8d -; AVX512-NEXT: kmovw %r8d, %k1 ; AVX512-NEXT: korw %k0, %k1, %k1 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}