From 7897525197fd9346b91a61d70745d82d716ad966 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Mon, 31 Aug 2020 13:26:36 +0200 Subject: [PATCH] [SelectionDAG] Always intersect SDNode flags during getNode() node memoization. Previously SDNodeFlags::instersectWith(Flags) would do nothing if Flags was in an undefined state, which is very bad given that this is the default when getNode() is called without passing an explicit SDNodeFlags argument. This meant that if an already existing and reused node had a flag which the second caller to getNode() did not set, that flag would remain uncleared. This was exposed by https://bugs.llvm.org/show_bug.cgi?id=47092, where an NSW flag was incorrectly set on an add instruction (which did in fact overflow in one of the two original contexts), so when SystemZElimCompare removed the compare with 0 trusting that flag, wrong-code resulted. There is more that needs to be done in this area as discussed here: Differential Revision: https://reviews.llvm.org/D86871 Review: Ulrich Weigand, Sanjay Patel --- include/llvm/CodeGen/SelectionDAGNodes.h | 11 +++---- .../SelectionDAG/SelectionDAGBuilder.cpp | 2 ++ lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 4 +-- lib/Target/AMDGPU/AMDGPUISelLowering.h | 4 +-- test/CodeGen/SystemZ/fp-mul-14.ll | 20 +++++++++++++ test/CodeGen/SystemZ/int-cmp-60.ll | 29 +++++++++++++++++++ 6 files changed, 59 insertions(+), 11 deletions(-) create mode 100644 test/CodeGen/SystemZ/fp-mul-14.ll create mode 100644 test/CodeGen/SystemZ/int-cmp-60.ll diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index cde075f41f7..6eef79162f8 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -357,9 +357,8 @@ template<> struct simplify_type { /// the backend. struct SDNodeFlags { private: - // This bit is used to determine if the flags are in a defined state. - // Flag bits can only be masked out during intersection if the masking flags - // are defined. + // This bit is used to determine if the flags are in a defined state. It is + // only used by SelectionDAGBuilder. bool AnyDefined : 1; bool NoUnsignedWrap : 1; @@ -464,11 +463,9 @@ public: bool hasAllowReassociation() const { return AllowReassociation; } bool hasNoFPExcept() const { return NoFPExcept; } - /// Clear any flags in this flag set that aren't also set in Flags. - /// If the given Flags are undefined then don't do anything. + /// Clear any flags in this flag set that aren't also set in Flags. All + /// flags will be cleared if Flags are undefined. void intersectWith(const SDNodeFlags Flags) { - if (!Flags.isDefined()) - return; NoUnsignedWrap &= Flags.NoUnsignedWrap; NoSignedWrap &= Flags.NoSignedWrap; Exact &= Flags.Exact; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 1a2c77974c2..5e6cb03f383 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1128,6 +1128,8 @@ void SelectionDAGBuilder::visit(const Instruction &I) { // TODO: We could handle all flags (nsw, etc) here. // TODO: If an IR instruction maps to >1 node, only the final node will have // flags set. + // TODO: The handling of flags should be improved, see + // https://reviews.llvm.org/D86871 if (SDNode *Node = getNodeForIRValue(&I)) { SDNodeFlags IncomingFlags; IncomingFlags.copyFMF(*FPMO); diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 151b1bdd553..5dd42d1f4a6 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -521,8 +521,8 @@ bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const { return true; // TODO: Move into isKnownNeverNaN - if (N->getFlags().isDefined()) - return N->getFlags().hasNoNaNs(); + if (N->getFlags().hasNoNaNs()) + return true; return CurDAG->isKnownNeverNaN(N); } diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h index c7fdc79c3b1..932a05a4ba8 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -150,8 +150,8 @@ public: return true; const auto Flags = Op.getNode()->getFlags(); - if (Flags.isDefined()) - return Flags.hasNoSignedZeros(); + if (Flags.hasNoSignedZeros()) + return true; return false; } diff --git a/test/CodeGen/SystemZ/fp-mul-14.ll b/test/CodeGen/SystemZ/fp-mul-14.ll new file mode 100644 index 00000000000..8bab2135739 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-mul-14.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; +; Check that a multiply-and-add results. + +; FIXME: This test is xfailed temporarily +; XFAIL: * + +define void @f1(float %arg, float* %Dst) { +; CHECK-LABEL: f1: +; CHECK: maeb +bb: + %i = fmul contract float %arg, 0xBE6777A5C0000000 + %i4 = fadd contract float %i, 1.000000e+00 + %i5 = fmul contract float %arg, 0xBE6777A5C0000000 + %i6 = fadd contract float %i5, 1.000000e+00 + %i7 = fmul contract float %i4, 2.000000e+00 + store float %i7, float* %Dst + ret void +} + diff --git a/test/CodeGen/SystemZ/int-cmp-60.ll b/test/CodeGen/SystemZ/int-cmp-60.ll new file mode 100644 index 00000000000..faae4f9bced --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-60.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s +; +; Test that DAGCombiner properly clears the NUW/NSW flags on the memoized add +; node. + +define void @fun(i64* %Src, i32* %Dst) { +; CHECK-LABEL: fun: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: iilf %r0, 1303940520 +; CHECK-NEXT: n %r0, 4(%r2) +; CHECK-NEXT: lr %r1, %r0 +; CHECK-NEXT: afi %r1, 1628135358 +; CHECK-NEXT: locrnhe %r1, %r0 +; CHECK-NEXT: st %r1, 0(%r3) +; CHECK-NEXT: br %r14 +entry: + %0 = load i64, i64* %Src, align 8 + %1 = trunc i64 %0 to i32 + %conv = and i32 %1, 1303940520 + %xor11.i = or i32 %conv, -2147483648 + %xor2.i = add i32 %xor11.i, -519348290 + %cmp.i = icmp slt i32 %xor2.i, 0 + %sub3.i = add nuw nsw i32 %conv, 1628135358 + %cond.i = select i1 %cmp.i, i32 %conv, i32 %sub3.i + store i32 %cond.i, i32* %Dst + ret void +} +