mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[X86][AVX] isHorizontalBinOp - relax no-lane-crossing limit for AVX1-only targets.
Instead of never accepting v8f32/v4f64 FHADD/FHSUB if the input shuffle masks cross lanes, perform the matching and determine if the post shuffle mask simplifies to a 'whole lane shuffle' mask - in which case we are guaranteed to cheaply perform this as a VPERM2F128 shuffle.
This commit is contained in:
parent
33a681d4a6
commit
8c1ad1e305
@ -5625,6 +5625,11 @@ static bool scaleShuffleElements(ArrayRef<int> Mask, unsigned NumDstElts,
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool canScaleShuffleElements(ArrayRef<int> Mask, unsigned NumDstElts) {
|
||||
SmallVector<int, 32> WidenedMask;
|
||||
return scaleShuffleElements(Mask, NumDstElts, WidenedMask);
|
||||
}
|
||||
|
||||
/// Returns true if Elt is a constant zero or a floating point constant +0.0.
|
||||
bool X86::isZeroNode(SDValue Elt) {
|
||||
return isNullConstant(Elt) || isNullFPConstant(Elt);
|
||||
@ -44486,12 +44491,6 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, SelectionDAG &DAG,
|
||||
RMask.push_back(i);
|
||||
}
|
||||
|
||||
// Avoid 128-bit lane crossing if pre-AVX2 and FP (integer will split).
|
||||
if (!Subtarget.hasAVX2() && VT.isFloatingPoint() &&
|
||||
(isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(), LMask) ||
|
||||
isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(), RMask)))
|
||||
return false;
|
||||
|
||||
// If A and B occur in reverse order in RHS, then canonicalize by commuting
|
||||
// RHS operands and shuffle mask.
|
||||
if (A != C) {
|
||||
@ -44554,6 +44553,14 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, SelectionDAG &DAG,
|
||||
if (IsIdentityPostShuffle)
|
||||
PostShuffleMask.clear();
|
||||
|
||||
// Avoid 128-bit lane crossing if pre-AVX2 and FP (integer will split), unless
|
||||
// the shuffle can widen to shuffle entire lanes, which should still be quick.
|
||||
if (!IsIdentityPostShuffle && !Subtarget.hasAVX2() && VT.isFloatingPoint() &&
|
||||
isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(),
|
||||
PostShuffleMask) &&
|
||||
!canScaleShuffleElements(PostShuffleMask, 2))
|
||||
return false;
|
||||
|
||||
// Assume a SingleSource HOP if we only shuffle one input and don't need to
|
||||
// shuffle the result.
|
||||
if (!shouldUseHorizontalOp(LHS == RHS &&
|
||||
|
@ -151,13 +151,11 @@ define <8 x double> @hadd_reverse_v8f64(<8 x double> %a0, <8 x double> %a1) noun
|
||||
;
|
||||
; AVX1-LABEL: hadd_reverse_v8f64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm2[2,3,0,1]
|
||||
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
|
||||
; AVX1-NEXT: vhaddpd %ymm2, %ymm0, %ymm2
|
||||
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm3[2,3,0,1]
|
||||
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1]
|
||||
; AVX1-NEXT: vhaddpd %ymm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vmovapd %ymm2, %ymm1
|
||||
; AVX1-NEXT: vhaddpd %ymm3, %ymm1, %ymm1
|
||||
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm1[2,3,0,1]
|
||||
; AVX1-NEXT: vhaddpd %ymm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX1-NEXT: vmovapd %ymm3, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: hadd_reverse_v8f64:
|
||||
|
Loading…
Reference in New Issue
Block a user