1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00
llvm-mirror/test/CodeGen/X86/pr48215.ll
Simon Pilgrim 1bd2f183ce [X86][AVX] Only share broadcasts of different widths from the same SDValue of the same SDNode (PR48215)
D57663 allowed us to reuse broadcasts of the same scalar value by extracting low subvectors from the widest type.

Unfortunately we weren't ensuring the broadcasts were from the same SDValue, just the same SDNode - which failed on multiple-value nodes like ISD::SDIVREM

FYI: I intend to request this be merged into the 11.x release branch.

Differential Revision: https://reviews.llvm.org/D91709
2020-11-19 12:15:18 +00:00

79 lines
3.0 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefixes=AVX1
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX512
; FIXME: Ensure when we merge broadcasts to different widths that they come from the same SDValue.
define i32 @PR48215(i32 %a0, i32 %a1) {
; AVX1-LABEL: PR48215:
; AVX1: ## %bb.0:
; AVX1-NEXT: movl %edi, %eax
; AVX1-NEXT: cltd
; AVX1-NEXT: idivl %esi
; AVX1-NEXT: vmovd %edx, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4,5,6,7]
; AVX1-NEXT: vmovd %eax, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm3
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,2,3]
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm4, %xmm2
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX1-NEXT: vmovmskps %ymm2, %ecx
; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vmovmskps %xmm0, %eax
; AVX1-NEXT: addl %ecx, %eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: PR48215:
; AVX2: ## %bb.0:
; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: cltd
; AVX2-NEXT: idivl %esi
; AVX2-NEXT: vmovd %eax, %xmm0
; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
; AVX2-NEXT: vmovd %edx, %xmm1
; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7]
; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,6,7]
; AVX2-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
; AVX2-NEXT: vmovmskps %ymm0, %ecx
; AVX2-NEXT: vmovmskps %xmm1, %eax
; AVX2-NEXT: addl %ecx, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: PR48215:
; AVX512: ## %bb.0:
; AVX512-NEXT: movl %edi, %eax
; AVX512-NEXT: cltd
; AVX512-NEXT: idivl %esi
; AVX512-NEXT: vpbroadcastd %eax, %ymm0
; AVX512-NEXT: vpbroadcastd %edx, %xmm1
; AVX512-NEXT: vpcmpltd {{.*}}(%rip), %ymm0, %k0
; AVX512-NEXT: vpcmpltd {{.*}}(%rip), %xmm1, %k1
; AVX512-NEXT: kmovw %k0, %eax
; AVX512-NEXT: movzbl %al, %ecx
; AVX512-NEXT: kmovw %k1, %eax
; AVX512-NEXT: andl $15, %eax
; AVX512-NEXT: addl %ecx, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%d = sdiv i32 %a0, %a1
%r = srem i32 %a0, %a1
%dv0 = insertelement <8 x i32> undef, i32 %d, i32 0
%rv0 = insertelement <4 x i32> undef, i32 %r, i32 0
%dv1 = shufflevector <8 x i32> %dv0, <8 x i32> undef, <8 x i32> zeroinitializer
%rv1 = shufflevector <4 x i32> %rv0, <4 x i32> undef, <4 x i32> zeroinitializer
%dc0 = icmp slt <8 x i32> %dv1, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%rc0 = icmp slt <4 x i32> %rv1, <i32 4, i32 5, i32 6, i32 7>
%db0 = bitcast <8 x i1> %dc0 to i8
%rb0 = bitcast <4 x i1> %rc0 to i4
%db1 = zext i8 %db0 to i32
%rb1 = zext i4 %rb0 to i32
%res = add i32 %db1, %rb1
ret i32 %res
}