diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 556bb584688..8d95c066f5a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5625,6 +5625,11 @@ static bool scaleShuffleElements(ArrayRef Mask, unsigned NumDstElts, return false; } +static bool canScaleShuffleElements(ArrayRef Mask, unsigned NumDstElts) { + SmallVector WidenedMask; + return scaleShuffleElements(Mask, NumDstElts, WidenedMask); +} + /// Returns true if Elt is a constant zero or a floating point constant +0.0. bool X86::isZeroNode(SDValue Elt) { return isNullConstant(Elt) || isNullFPConstant(Elt); @@ -44486,12 +44491,6 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, SelectionDAG &DAG, RMask.push_back(i); } - // Avoid 128-bit lane crossing if pre-AVX2 and FP (integer will split). - if (!Subtarget.hasAVX2() && VT.isFloatingPoint() && - (isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(), LMask) || - isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(), RMask))) - return false; - // If A and B occur in reverse order in RHS, then canonicalize by commuting // RHS operands and shuffle mask. if (A != C) { @@ -44554,6 +44553,14 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, SelectionDAG &DAG, if (IsIdentityPostShuffle) PostShuffleMask.clear(); + // Avoid 128-bit lane crossing if pre-AVX2 and FP (integer will split), unless + // the shuffle can widen to shuffle entire lanes, which should still be quick. + if (!IsIdentityPostShuffle && !Subtarget.hasAVX2() && VT.isFloatingPoint() && + isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(), + PostShuffleMask) && + !canScaleShuffleElements(PostShuffleMask, 2)) + return false; + // Assume a SingleSource HOP if we only shuffle one input and don't need to // shuffle the result. if (!shouldUseHorizontalOp(LHS == RHS && diff --git a/test/CodeGen/X86/haddsub-4.ll b/test/CodeGen/X86/haddsub-4.ll index 4c1dc71982a..31b6617b45a 100644 --- a/test/CodeGen/X86/haddsub-4.ll +++ b/test/CodeGen/X86/haddsub-4.ll @@ -151,13 +151,11 @@ define <8 x double> @hadd_reverse_v8f64(<8 x double> %a0, <8 x double> %a1) noun ; ; AVX1-LABEL: hadd_reverse_v8f64: ; AVX1: # %bb.0: -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm2[2,3,0,1] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX1-NEXT: vhaddpd %ymm2, %ymm0, %ymm2 -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm3[2,3,0,1] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1] -; AVX1-NEXT: vhaddpd %ymm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovapd %ymm2, %ymm1 +; AVX1-NEXT: vhaddpd %ymm3, %ymm1, %ymm1 +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm1[2,3,0,1] +; AVX1-NEXT: vhaddpd %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] +; AVX1-NEXT: vmovapd %ymm3, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: hadd_reverse_v8f64: