mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
1bd2f183ce
D57663 allowed us to reuse broadcasts of the same scalar value by extracting low subvectors from the widest type. Unfortunately we weren't ensuring the broadcasts were from the same SDValue, just the same SDNode - which failed on multiple-value nodes like ISD::SDIVREM FYI: I intend to request this be merged into the 11.x release branch. Differential Revision: https://reviews.llvm.org/D91709
79 lines
3.0 KiB
LLVM
79 lines
3.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefixes=AVX1
|
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
|
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX512
|
|
|
|
; FIXME: Ensure when we merge broadcasts to different widths that they come from the same SDValue.
|
|
define i32 @PR48215(i32 %a0, i32 %a1) {
|
|
; AVX1-LABEL: PR48215:
|
|
; AVX1: ## %bb.0:
|
|
; AVX1-NEXT: movl %edi, %eax
|
|
; AVX1-NEXT: cltd
|
|
; AVX1-NEXT: idivl %esi
|
|
; AVX1-NEXT: vmovd %edx, %xmm0
|
|
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
|
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4,5,6,7]
|
|
; AVX1-NEXT: vmovd %eax, %xmm2
|
|
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
|
|
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm3
|
|
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,2,3]
|
|
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm4, %xmm2
|
|
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
|
; AVX1-NEXT: vmovmskps %ymm2, %ecx
|
|
; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
|
; AVX1-NEXT: vmovmskps %xmm0, %eax
|
|
; AVX1-NEXT: addl %ecx, %eax
|
|
; AVX1-NEXT: vzeroupper
|
|
; AVX1-NEXT: retq
|
|
;
|
|
; AVX2-LABEL: PR48215:
|
|
; AVX2: ## %bb.0:
|
|
; AVX2-NEXT: movl %edi, %eax
|
|
; AVX2-NEXT: cltd
|
|
; AVX2-NEXT: idivl %esi
|
|
; AVX2-NEXT: vmovd %eax, %xmm0
|
|
; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
|
|
; AVX2-NEXT: vmovd %edx, %xmm1
|
|
; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
|
|
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7]
|
|
; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0
|
|
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,6,7]
|
|
; AVX2-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
|
|
; AVX2-NEXT: vmovmskps %ymm0, %ecx
|
|
; AVX2-NEXT: vmovmskps %xmm1, %eax
|
|
; AVX2-NEXT: addl %ecx, %eax
|
|
; AVX2-NEXT: vzeroupper
|
|
; AVX2-NEXT: retq
|
|
;
|
|
; AVX512-LABEL: PR48215:
|
|
; AVX512: ## %bb.0:
|
|
; AVX512-NEXT: movl %edi, %eax
|
|
; AVX512-NEXT: cltd
|
|
; AVX512-NEXT: idivl %esi
|
|
; AVX512-NEXT: vpbroadcastd %eax, %ymm0
|
|
; AVX512-NEXT: vpbroadcastd %edx, %xmm1
|
|
; AVX512-NEXT: vpcmpltd {{.*}}(%rip), %ymm0, %k0
|
|
; AVX512-NEXT: vpcmpltd {{.*}}(%rip), %xmm1, %k1
|
|
; AVX512-NEXT: kmovw %k0, %eax
|
|
; AVX512-NEXT: movzbl %al, %ecx
|
|
; AVX512-NEXT: kmovw %k1, %eax
|
|
; AVX512-NEXT: andl $15, %eax
|
|
; AVX512-NEXT: addl %ecx, %eax
|
|
; AVX512-NEXT: vzeroupper
|
|
; AVX512-NEXT: retq
|
|
%d = sdiv i32 %a0, %a1
|
|
%r = srem i32 %a0, %a1
|
|
%dv0 = insertelement <8 x i32> undef, i32 %d, i32 0
|
|
%rv0 = insertelement <4 x i32> undef, i32 %r, i32 0
|
|
%dv1 = shufflevector <8 x i32> %dv0, <8 x i32> undef, <8 x i32> zeroinitializer
|
|
%rv1 = shufflevector <4 x i32> %rv0, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
%dc0 = icmp slt <8 x i32> %dv1, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%rc0 = icmp slt <4 x i32> %rv1, <i32 4, i32 5, i32 6, i32 7>
|
|
%db0 = bitcast <8 x i1> %dc0 to i8
|
|
%rb0 = bitcast <4 x i1> %rc0 to i4
|
|
%db1 = zext i8 %db0 to i32
|
|
%rb1 = zext i4 %rb0 to i32
|
|
%res = add i32 %db1, %rb1
|
|
ret i32 %res
|
|
}
|