1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-22 20:43:44 +02:00
llvm-mirror/test/CodeGen/X86/v8i1-masks.ll
Craig Topper bc75d9ff7b [DAGCombiners] Don't turn ANDs to shuffles with zero so early. Give some other combines a chance to run.
This moves the combine for turning ANDs into shuffle with zero out of SimplifyVBinOps and places it only in visitAND below the reassociate handling. This fixes the specific case I noticed where we failed to combine two ands with constants.

llvm-svn: 321417
2017-12-24 02:05:18 +00:00

176 lines
6.9 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=X64
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=X32-AVX2
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=X64-AVX2
define void @and_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
; X32-LABEL: and_masks:
; X32: ## %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: vmovups (%edx), %ymm0
; X32-NEXT: vmovups (%ecx), %ymm1
; X32-NEXT: vcmpltps %ymm0, %ymm1, %ymm1
; X32-NEXT: vmovups (%eax), %ymm2
; X32-NEXT: vcmpltps %ymm0, %ymm2, %ymm0
; X32-NEXT: vandps %ymm1, %ymm0, %ymm0
; X32-NEXT: vandps LCPI0_0, %ymm0, %ymm0
; X32-NEXT: vmovaps %ymm0, (%eax)
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: and_masks:
; X64: ## %bb.0:
; X64-NEXT: vmovups (%rdi), %ymm0
; X64-NEXT: vmovups (%rsi), %ymm1
; X64-NEXT: vcmpltps %ymm0, %ymm1, %ymm1
; X64-NEXT: vmovups (%rdx), %ymm2
; X64-NEXT: vcmpltps %ymm0, %ymm2, %ymm0
; X64-NEXT: vandps %ymm1, %ymm0, %ymm0
; X64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
; X64-NEXT: vmovaps %ymm0, (%rax)
; X64-NEXT: vzeroupper
; X64-NEXT: retq
;
; X32-AVX2-LABEL: and_masks:
; X32-AVX2: ## %bb.0:
; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-AVX2-NEXT: vmovups (%edx), %ymm0
; X32-AVX2-NEXT: vmovups (%ecx), %ymm1
; X32-AVX2-NEXT: vcmpltps %ymm0, %ymm1, %ymm1
; X32-AVX2-NEXT: vmovups (%eax), %ymm2
; X32-AVX2-NEXT: vcmpltps %ymm0, %ymm2, %ymm0
; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
; X32-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1
; X32-AVX2-NEXT: vandps %ymm1, %ymm0, %ymm0
; X32-AVX2-NEXT: vmovaps %ymm0, (%eax)
; X32-AVX2-NEXT: vzeroupper
; X32-AVX2-NEXT: retl
;
; X64-AVX2-LABEL: and_masks:
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vmovups (%rdi), %ymm0
; X64-AVX2-NEXT: vmovups (%rsi), %ymm1
; X64-AVX2-NEXT: vcmpltps %ymm0, %ymm1, %ymm1
; X64-AVX2-NEXT: vmovups (%rdx), %ymm2
; X64-AVX2-NEXT: vcmpltps %ymm0, %ymm2, %ymm0
; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
; X64-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1
; X64-AVX2-NEXT: vandps %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vmovaps %ymm0, (%rax)
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
%v0 = load <8 x float>, <8 x float>* %a, align 16
%v1 = load <8 x float>, <8 x float>* %b, align 16
%m0 = fcmp olt <8 x float> %v1, %v0
%v2 = load <8 x float>, <8 x float>* %c, align 16
%m1 = fcmp olt <8 x float> %v2, %v0
%mand = and <8 x i1> %m1, %m0
%r = zext <8 x i1> %mand to <8 x i32>
store <8 x i32> %r, <8 x i32>* undef, align 32
ret void
}
define void @neg_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
; X32-LABEL: neg_masks:
; X32: ## %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: vmovups (%ecx), %ymm0
; X32-NEXT: vcmpnltps (%eax), %ymm0, %ymm0
; X32-NEXT: vandps LCPI1_0, %ymm0, %ymm0
; X32-NEXT: vmovaps %ymm0, (%eax)
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: neg_masks:
; X64: ## %bb.0:
; X64-NEXT: vmovups (%rsi), %ymm0
; X64-NEXT: vcmpnltps (%rdi), %ymm0, %ymm0
; X64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
; X64-NEXT: vmovaps %ymm0, (%rax)
; X64-NEXT: vzeroupper
; X64-NEXT: retq
;
; X32-AVX2-LABEL: neg_masks:
; X32-AVX2: ## %bb.0:
; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-AVX2-NEXT: vmovups (%ecx), %ymm0
; X32-AVX2-NEXT: vcmpnltps (%eax), %ymm0, %ymm0
; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
; X32-AVX2-NEXT: vandps %ymm1, %ymm0, %ymm0
; X32-AVX2-NEXT: vmovaps %ymm0, (%eax)
; X32-AVX2-NEXT: vzeroupper
; X32-AVX2-NEXT: retl
;
; X64-AVX2-LABEL: neg_masks:
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vmovups (%rsi), %ymm0
; X64-AVX2-NEXT: vcmpnltps (%rdi), %ymm0, %ymm0
; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
; X64-AVX2-NEXT: vandps %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vmovaps %ymm0, (%rax)
; X64-AVX2-NEXT: vzeroupper
; X64-AVX2-NEXT: retq
%v0 = load <8 x float>, <8 x float>* %a, align 16
%v1 = load <8 x float>, <8 x float>* %b, align 16
%m0 = fcmp olt <8 x float> %v1, %v0
%mand = xor <8 x i1> %m0, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
%r = zext <8 x i1> %mand to <8 x i32>
store <8 x i32> %r, <8 x i32>* undef, align 32
ret void
}
define <8 x i32> @and_mask_constant(<8 x i32> %v0, <8 x i32> %v1) {
; X32-LABEL: and_mask_constant:
; X32: ## %bb.0:
; X32-NEXT: vextractf128 $1, %ymm0, %xmm1
; X32-NEXT: vpxor %xmm2, %xmm2, %xmm2
; X32-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
; X32-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
; X32-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; X32-NEXT: vpand LCPI2_0, %xmm0, %xmm0
; X32-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; X32-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: and_mask_constant:
; X64: ## %bb.0:
; X64-NEXT: vextractf128 $1, %ymm0, %xmm1
; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2
; X64-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
; X64-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
; X64-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; X64-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; X64-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; X64-NEXT: retq
;
; X32-AVX2-LABEL: and_mask_constant:
; X32-AVX2: ## %bb.0:
; X32-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X32-AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
; X32-AVX2-NEXT: vpand LCPI2_0, %ymm0, %ymm0
; X32-AVX2-NEXT: retl
;
; X64-AVX2-LABEL: and_mask_constant:
; X64-AVX2: ## %bb.0:
; X64-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
; X64-AVX2-NEXT: retq
%m = icmp eq <8 x i32> %v0, zeroinitializer
%mand = and <8 x i1> %m, <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false>
%r = zext <8 x i1> %mand to <8 x i32>
ret <8 x i32> %r
}