mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 11:42:57 +01:00
[AVX512] Don't create SHRUNKBLEND SDNodes for 512-bit vectors
There are no 512-bit blend instructions so we shouldn't create SHRUNKBLEND for them. On a side note, it looks like there may be a missed opportunity for constant folding TESTM when LHS and RHS are equal. This fixes PR34139. Differential Revision: https://reviews.llvm.org/D36992 llvm-svn: 311572
This commit is contained in:
parent
e6fa8f8485
commit
0c98193785
@ -30628,6 +30628,9 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
|
||||
// Byte blends are only available in AVX2
|
||||
if (VT == MVT::v32i8 && !Subtarget.hasAVX2())
|
||||
return SDValue();
|
||||
// There are no 512-bit blend instructions that use sign bits.
|
||||
if (VT.is512BitVector())
|
||||
return SDValue();
|
||||
|
||||
assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
|
||||
APInt DemandedMask(APInt::getSignMask(BitWidth));
|
||||
|
24
test/CodeGen/X86/pr34139.ll
Normal file
24
test/CodeGen/X86/pr34139.ll
Normal file
@ -0,0 +1,24 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=knl | FileCheck %s
|
||||
|
||||
define void @f_f(<16 x double>* %ptr) {
|
||||
; CHECK-LABEL: f_f:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovdqa %xmm0, (%rax)
|
||||
; CHECK-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vmovapd (%rdi), %zmm1
|
||||
; CHECK-NEXT: vmovapd 64(%rdi), %zmm2
|
||||
; CHECK-NEXT: vptestmq %zmm0, %zmm0, %k1
|
||||
; CHECK-NEXT: vmovapd %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovapd %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovapd %zmm2, 64(%rdi)
|
||||
; CHECK-NEXT: vmovapd %zmm1, (%rdi)
|
||||
store <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8>* undef
|
||||
%load_mask8.i.i.i = load <16 x i8>, <16 x i8>* undef
|
||||
%v.i.i.i.i = load <16 x double>, <16 x double>* %ptr
|
||||
%mask_vec_i1.i.i.i51.i.i = icmp ne <16 x i8> %load_mask8.i.i.i, zeroinitializer
|
||||
%v1.i.i.i.i = select <16 x i1> %mask_vec_i1.i.i.i51.i.i, <16 x double> undef, <16 x double> %v.i.i.i.i
|
||||
store <16 x double> %v1.i.i.i.i, <16 x double>* %ptr
|
||||
unreachable
|
||||
}
|
Loading…
Reference in New Issue
Block a user