mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
[SelectionDAG] allow undefs when matching splat constants
And use that to transform fsub with zero constant operands. The integer part isn't used yet, but it is proposed for use in D44548, so adding both enhancements here makes that patch simpler. llvm-svn: 343865
This commit is contained in:
parent
cc5583af00
commit
01f181a636
@ -1602,10 +1602,10 @@ SDValue peekThroughOneUseBitcasts(SDValue V);
|
||||
bool isBitwiseNot(SDValue V);
|
||||
|
||||
/// Returns the SDNode if it is a constant splat BuildVector or constant int.
|
||||
ConstantSDNode *isConstOrConstSplat(SDValue N);
|
||||
ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false);
|
||||
|
||||
/// Returns the SDNode if it is a constant splat BuildVector or constant float.
|
||||
ConstantFPSDNode *isConstOrConstSplatFP(SDValue N);
|
||||
ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, bool AllowUndefs = false);
|
||||
|
||||
class GlobalAddressSDNode : public SDNode {
|
||||
friend class SelectionDAG;
|
||||
|
@ -11011,8 +11011,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
|
||||
SDValue DAGCombiner::visitFSUB(SDNode *N) {
|
||||
SDValue N0 = N->getOperand(0);
|
||||
SDValue N1 = N->getOperand(1);
|
||||
ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
|
||||
ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
|
||||
ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
|
||||
ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
|
||||
EVT VT = N->getValueType(0);
|
||||
SDLoc DL(N);
|
||||
const TargetOptions &Options = DAG.getTarget().Options;
|
||||
@ -11044,9 +11044,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
|
||||
return DAG.getConstantFP(0.0f, DL, VT);
|
||||
}
|
||||
|
||||
// (fsub 0, B) -> -B
|
||||
// (fsub -0.0, N1) -> -N1
|
||||
if (N0CFP && N0CFP->isZero()) {
|
||||
if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) {
|
||||
if (N0CFP->isNegative() ||
|
||||
(Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
|
||||
if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
|
||||
return GetNegatedExpression(N1, DAG, LegalOperations);
|
||||
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
|
||||
|
@ -8211,7 +8211,7 @@ bool llvm::isBitwiseNot(SDValue V) {
|
||||
return C && C->isAllOnesValue();
|
||||
}
|
||||
|
||||
ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) {
|
||||
ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs) {
|
||||
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
|
||||
return CN;
|
||||
|
||||
@ -8220,9 +8220,7 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) {
|
||||
ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
|
||||
|
||||
// BuildVectors can truncate their operands. Ignore that case here.
|
||||
// FIXME: We blindly ignore splats which include undef which is overly
|
||||
// pessimistic.
|
||||
if (CN && UndefElements.none() &&
|
||||
if (CN && (UndefElements.none() || AllowUndefs) &&
|
||||
CN->getValueType(0) == N.getValueType().getScalarType())
|
||||
return CN;
|
||||
}
|
||||
@ -8230,15 +8228,14 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N) {
|
||||
ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) {
|
||||
if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
|
||||
return CN;
|
||||
|
||||
if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
|
||||
BitVector UndefElements;
|
||||
ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
|
||||
|
||||
if (CN && UndefElements.none())
|
||||
if (CN && (UndefElements.none() || AllowUndefs))
|
||||
return CN;
|
||||
}
|
||||
|
||||
|
@ -102,16 +102,12 @@ define <2 x float> @fneg_bitcast(i64 %i) nounwind {
|
||||
define <4 x float> @fneg_undef_elts_v4f32(<4 x float> %x) {
|
||||
; X32-SSE-LABEL: fneg_undef_elts_v4f32:
|
||||
; X32-SSE: # %bb.0:
|
||||
; X32-SSE-NEXT: movaps {{.*#+}} xmm1 = <-0,u,u,-0>
|
||||
; X32-SSE-NEXT: subps %xmm0, %xmm1
|
||||
; X32-SSE-NEXT: movaps %xmm1, %xmm0
|
||||
; X32-SSE-NEXT: xorps {{\.LCPI.*}}, %xmm0
|
||||
; X32-SSE-NEXT: retl
|
||||
;
|
||||
; X64-SSE-LABEL: fneg_undef_elts_v4f32:
|
||||
; X64-SSE: # %bb.0:
|
||||
; X64-SSE-NEXT: movaps {{.*#+}} xmm1 = <-0,u,u,-0>
|
||||
; X64-SSE-NEXT: subps %xmm0, %xmm1
|
||||
; X64-SSE-NEXT: movaps %xmm1, %xmm0
|
||||
; X64-SSE-NEXT: xorps {{.*}}(%rip), %xmm0
|
||||
; X64-SSE-NEXT: retq
|
||||
%r = fsub <4 x float> <float -0.0, float undef, float undef, float -0.0>, %x
|
||||
ret <4 x float> %r
|
||||
@ -120,25 +116,13 @@ define <4 x float> @fneg_undef_elts_v4f32(<4 x float> %x) {
|
||||
; This isn't fneg, but similarly check that (X - 0.0) is simplified.
|
||||
|
||||
define <4 x float> @fsub0_undef_elts_v4f32(<4 x float> %x) {
|
||||
; X32-SSE1-LABEL: fsub0_undef_elts_v4f32:
|
||||
; X32-SSE1: # %bb.0:
|
||||
; X32-SSE1-NEXT: retl
|
||||
; X32-SSE-LABEL: fsub0_undef_elts_v4f32:
|
||||
; X32-SSE: # %bb.0:
|
||||
; X32-SSE-NEXT: retl
|
||||
;
|
||||
; X32-SSE2-LABEL: fsub0_undef_elts_v4f32:
|
||||
; X32-SSE2: # %bb.0:
|
||||
; X32-SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; X32-SSE2-NEXT: subps %xmm1, %xmm0
|
||||
; X32-SSE2-NEXT: retl
|
||||
;
|
||||
; X64-SSE1-LABEL: fsub0_undef_elts_v4f32:
|
||||
; X64-SSE1: # %bb.0:
|
||||
; X64-SSE1-NEXT: retq
|
||||
;
|
||||
; X64-SSE2-LABEL: fsub0_undef_elts_v4f32:
|
||||
; X64-SSE2: # %bb.0:
|
||||
; X64-SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; X64-SSE2-NEXT: subps %xmm1, %xmm0
|
||||
; X64-SSE2-NEXT: retq
|
||||
; X64-SSE-LABEL: fsub0_undef_elts_v4f32:
|
||||
; X64-SSE: # %bb.0:
|
||||
; X64-SSE-NEXT: retq
|
||||
%r = fsub <4 x float> %x, <float 0.0, float undef, float 0.0, float undef>
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user