1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-23 13:02:52 +02:00

[X86][SSE] Accept any shuffle mask that is all zeroes

Until we have a better way to extract constants through bitcasted build vectors (and how to handle undefs of partial lanes etc.) at least accept build vectors that are all zeroes.

llvm-svn: 274833
This commit is contained in:
Simon Pilgrim 2016-07-08 10:39:12 +00:00
parent 640cb98b90
commit 228451dbc1
3 changed files with 9 additions and 4 deletions

View File

@ -4788,6 +4788,13 @@ static bool getTargetShuffleMaskIndices(SDValue MaskNode,
if (MaskNode.getOpcode() != ISD::BUILD_VECTOR)
return false;
// We can always decode if the buildvector is all zero constants,
// but can't use isBuildVectorAllZeros as it might contain UNDEFs.
if (llvm::all_of(MaskNode->ops(), X86::isZeroNode)) {
RawMask.append(VT.getSizeInBits() / MaskEltSizeInBits, 0);
return true;
}
// TODO: Handle (MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0
if ((VT.getScalarSizeInBits() % MaskEltSizeInBits) != 0)
return false;

View File

@ -139,8 +139,7 @@ define <2 x double> @combine_vpermilvar_2f64_identity(<2 x double> %a0) {
define <2 x double> @combine_vpermilvar_2f64_movddup(<2 x double> %a0) {
; ALL-LABEL: combine_vpermilvar_2f64_movddup:
; ALL: # BB#0:
; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; ALL-NEXT: vpermilpd %xmm1, %xmm0, %xmm0
; ALL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; ALL-NEXT: retq
%1 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> <i64 0, i64 0>)
ret <2 x double> %1

View File

@ -393,8 +393,7 @@ define <16 x i32> @combine_permvar_as_vpbroadcastd512(<16 x i32> %x0) {
define <8 x i64> @combine_permvar_as_vpbroadcastq512(<8 x i64> %x0) {
; CHECK-LABEL: combine_permvar_as_vpbroadcastq512:
; CHECK: # BB#0:
; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1
; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm0
; CHECK-NEXT: vpbroadcastq %xmm0, %zmm0
; CHECK-NEXT: retq
%1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> zeroinitializer, <8 x i64> undef, i8 -1)
ret <8 x i64> %1