From bd07f66851b89d5d4ed13cdd683173b0299c3f5c Mon Sep 17 00:00:00 2001
From: Marcin Koscielnicki <koriakin@0x04.net>
Date: Sun, 10 Jul 2016 14:41:22 +0000
Subject: [PATCH] [SystemZ] Utilize Test Data Class instructions.

This adds a new SystemZ-specific intrinsic, llvm.s390.tdc.f(32|64|128),
which maps straight to the test data class instructions.  A new IR pass
is added to recognize instructions that can be converted to TDC and
perform the necessary replacements.

Differential Revision: http://reviews.llvm.org/D21949

llvm-svn: 275016
---
 include/llvm/IR/IntrinsicsSystemZ.td        |  11 +
 lib/Target/SystemZ/CMakeLists.txt           |   1 +
 lib/Target/SystemZ/README.txt               |   4 -
 lib/Target/SystemZ/SystemZ.h                |  41 +++
 lib/Target/SystemZ/SystemZISelLowering.cpp  |   5 +
 lib/Target/SystemZ/SystemZTDC.cpp           | 382 ++++++++++++++++++++
 lib/Target/SystemZ/SystemZTargetMachine.cpp |   3 +
 test/CodeGen/SystemZ/tdc-01.ll              |  95 +++++
 test/CodeGen/SystemZ/tdc-02.ll              |  96 +++++
 test/CodeGen/SystemZ/tdc-03.ll              | 139 +++++++
 test/CodeGen/SystemZ/tdc-04.ll              |  85 +++++
 test/CodeGen/SystemZ/tdc-05.ll              |  97 +++++
 test/CodeGen/SystemZ/tdc-06.ll              |  48 +++
 13 files changed, 1003 insertions(+), 4 deletions(-)
 create mode 100644 lib/Target/SystemZ/SystemZTDC.cpp
 create mode 100644 test/CodeGen/SystemZ/tdc-01.ll
 create mode 100644 test/CodeGen/SystemZ/tdc-02.ll
 create mode 100644 test/CodeGen/SystemZ/tdc-03.ll
 create mode 100644 test/CodeGen/SystemZ/tdc-04.ll
 create mode 100644 test/CodeGen/SystemZ/tdc-05.ll
 create mode 100644 test/CodeGen/SystemZ/tdc-06.ll

diff --git a/include/llvm/IR/IntrinsicsSystemZ.td b/include/llvm/IR/IntrinsicsSystemZ.td
index 49de4f9f906..bfc15b9bc09 100644
--- a/include/llvm/IR/IntrinsicsSystemZ.td
+++ b/include/llvm/IR/IntrinsicsSystemZ.td
@@ -374,3 +374,14 @@ let TargetPrefix = "s390" in {
                                  [llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty],
                                  [IntrNoMem]>;
 }
+
+//===----------------------------------------------------------------------===//
+//
+// Misc intrinsics
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "s390" in {
+  def int_s390_tdc : Intrinsic<[llvm_i32_ty], [llvm_anyfloat_ty, llvm_i64_ty],
+                               [IntrNoMem]>;
+}
diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt
index 336f037bb73..4b849ad6491 100644
--- a/lib/Target/SystemZ/CMakeLists.txt
+++ b/lib/Target/SystemZ/CMakeLists.txt
@@ -30,6 +30,7 @@ add_llvm_target(SystemZCodeGen
   SystemZSubtarget.cpp
   SystemZTargetMachine.cpp
   SystemZTargetTransformInfo.cpp
+  SystemZTDC.cpp
   )
 
 add_subdirectory(AsmParser)
diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt
index 69b72d26020..86a1322c9e2 100644
--- a/lib/Target/SystemZ/README.txt
+++ b/lib/Target/SystemZ/README.txt
@@ -36,10 +36,6 @@ We don't use the BRANCH ON INDEX instructions.
 
 --
 
-We don't use the TEST DATA CLASS instructions.
-
---
-
 We only use MVC, XC and CLC for constant-length block operations.
 We could extend them to variable-length operations too,
 using EXECUTE RELATIVE LONG.
diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h
index cafe2c5948c..c8ea9641fb6 100644
--- a/lib/Target/SystemZ/SystemZ.h
+++ b/lib/Target/SystemZ/SystemZ.h
@@ -87,6 +87,11 @@ const unsigned CCMASK_VCMP_MIXED     = CCMASK_1;
 const unsigned CCMASK_VCMP_NONE      = CCMASK_3;
 const unsigned CCMASK_VCMP           = CCMASK_0 | CCMASK_1 | CCMASK_3;
 
+// Condition-code mask assignments for Test Data Class.
+const unsigned CCMASK_TDC_NOMATCH   = CCMASK_0;
+const unsigned CCMASK_TDC_MATCH     = CCMASK_1;
+const unsigned CCMASK_TDC           = CCMASK_TDC_NOMATCH | CCMASK_TDC_MATCH;
+
 // The position of the low CC bit in an IPM result.
 const unsigned IPM_CC = 28;
 
@@ -94,6 +99,41 @@ const unsigned IPM_CC = 28;
 const unsigned PFD_READ  = 1;
 const unsigned PFD_WRITE = 2;
 
+// Mask assignments for TDC
+const unsigned TDCMASK_ZERO_PLUS       = 0x800;
+const unsigned TDCMASK_ZERO_MINUS      = 0x400;
+const unsigned TDCMASK_NORMAL_PLUS     = 0x200;
+const unsigned TDCMASK_NORMAL_MINUS    = 0x100;
+const unsigned TDCMASK_SUBNORMAL_PLUS  = 0x080;
+const unsigned TDCMASK_SUBNORMAL_MINUS = 0x040;
+const unsigned TDCMASK_INFINITY_PLUS   = 0x020;
+const unsigned TDCMASK_INFINITY_MINUS  = 0x010;
+const unsigned TDCMASK_QNAN_PLUS       = 0x008;
+const unsigned TDCMASK_QNAN_MINUS      = 0x004;
+const unsigned TDCMASK_SNAN_PLUS       = 0x002;
+const unsigned TDCMASK_SNAN_MINUS      = 0x001;
+
+const unsigned TDCMASK_ZERO            = TDCMASK_ZERO_PLUS | TDCMASK_ZERO_MINUS;
+const unsigned TDCMASK_POSITIVE        = TDCMASK_NORMAL_PLUS |
+                                         TDCMASK_SUBNORMAL_PLUS |
+                                         TDCMASK_INFINITY_PLUS;
+const unsigned TDCMASK_NEGATIVE        = TDCMASK_NORMAL_MINUS |
+                                         TDCMASK_SUBNORMAL_MINUS |
+                                         TDCMASK_INFINITY_MINUS;
+const unsigned TDCMASK_NAN             = TDCMASK_QNAN_PLUS |
+                                         TDCMASK_QNAN_MINUS |
+                                         TDCMASK_SNAN_PLUS |
+                                         TDCMASK_SNAN_MINUS;
+const unsigned TDCMASK_PLUS            = TDCMASK_POSITIVE |
+                                         TDCMASK_ZERO_PLUS |
+                                         TDCMASK_QNAN_PLUS |
+                                         TDCMASK_SNAN_PLUS;
+const unsigned TDCMASK_MINUS           = TDCMASK_NEGATIVE |
+                                         TDCMASK_ZERO_MINUS |
+                                         TDCMASK_QNAN_MINUS |
+                                         TDCMASK_SNAN_MINUS;
+const unsigned TDCMASK_ALL             = TDCMASK_PLUS | TDCMASK_MINUS;
+
 // Number of bits in a vector register.
 const unsigned VectorBits = 128;
 
@@ -138,6 +178,7 @@ FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM);
 FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM);
 FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM);
 FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM);
+FunctionPass *createSystemZTDCPass();
 } // end namespace llvm
 
 #endif
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 2259840d2d1..5e1552f586f 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1444,6 +1444,11 @@ static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
     CCValid = SystemZ::CCMASK_VCMP;
     return true;
 
+  case Intrinsic::s390_tdc:
+    Opcode = SystemZISD::TDC;
+    CCValid = SystemZ::CCMASK_TDC;
+    return true;
+
   default:
     return false;
   }
diff --git a/lib/Target/SystemZ/SystemZTDC.cpp b/lib/Target/SystemZ/SystemZTDC.cpp
new file mode 100644
index 00000000000..96a9ef82c12
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZTDC.cpp
@@ -0,0 +1,382 @@
+//===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass looks for instructions that can be replaced by a Test Data Class
+// instruction, and replaces them when profitable.
+//
+// Roughly, the following rules are recognized:
+//
+// 1: fcmp pred X, 0 -> tdc X, mask
+// 2: fcmp pred X, +-inf -> tdc X, mask
+// 3: fcmp pred X, +-minnorm -> tdc X, mask
+// 4: tdc (fabs X), mask -> tdc X, newmask
+// 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit]
+// 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask
+// 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask
+// 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2)
+// 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2)
+// 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2)
+//
+// The pass works in 4 steps:
+//
+// 1. All fcmp and icmp instructions in a function are checked for a match
+//    with rules 1-3 and 5-7.  Their TDC equivalents are stored in
+//    the ConvertedInsts mapping.  If the operand of a fcmp instruction is
+//    a fabs, it's also folded according to rule 4.
+// 2. All and/or/xor i1 instructions whose both operands have been already
+//    mapped are mapped according to rules 8-10.  LogicOpsWorklist is used
+//    as a queue of instructions to check.
+// 3. All mapped instructions that are considered worthy of conversion (ie.
+//    replacing them will actually simplify the final code) are replaced
+//    with a call to the s390.tdc intrinsic.
+// 4. All intermediate results of replaced instructions are removed if unused.
+//
+// Instructions that match rules 1-3 are considered unworthy of conversion
+// on their own (since a comparison instruction is superior), but are mapped
+// in the hopes of folding the result using rules 4 and 8-10 (likely removing
+// the original comparison in the process).
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include <deque>
+#include <set>
+
+using namespace llvm;
+
+namespace llvm {
+  void initializeSystemZTDCPassPass(PassRegistry&);
+}
+
+namespace {
+
+class SystemZTDCPass : public FunctionPass {
+public:
+  static char ID;
+  SystemZTDCPass() : FunctionPass(ID) {
+    initializeSystemZTDCPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override;
+private:
+  // Maps seen instructions that can be mapped to a TDC, values are
+  // (TDC operand, TDC mask, worthy flag) triples.
+  MapVector<Instruction *, std::tuple<Value *, int, bool>> ConvertedInsts;
+  // The queue of and/or/xor i1 instructions to be potentially folded.
+  std::vector<BinaryOperator *> LogicOpsWorklist;
+  // Instructions matched while folding, to be removed at the end if unused.
+  std::set<Instruction *> PossibleJunk;
+
+  // Tries to convert a fcmp instruction.
+  void convertFCmp(CmpInst &I);
+
+  // Tries to convert an icmp instruction.
+  void convertICmp(CmpInst &I);
+
+  // Tries to convert an i1 and/or/xor instruction, whose both operands
+  // have been already converted.
+  void convertLogicOp(BinaryOperator &I);
+
+  // Marks an instruction as converted - adds it to ConvertedInsts and adds
+  // any and/or/xor i1 users to the queue.
+  void converted(Instruction *I, Value *V, int Mask, bool Worthy) {
+    ConvertedInsts[I] = std::make_tuple(V, Mask, Worthy);
+    auto &M = *I->getFunction()->getParent();
+    auto &Ctx = M.getContext();
+    for (auto *U : I->users()) {
+      auto *LI = dyn_cast<BinaryOperator>(U);
+      if (LI && LI->getType() == Type::getInt1Ty(Ctx) &&
+          (LI->getOpcode() == Instruction::And ||
+           LI->getOpcode() == Instruction::Or ||
+           LI->getOpcode() == Instruction::Xor)) {
+        LogicOpsWorklist.push_back(LI);
+      }
+    }
+  }
+};
+
+} // end anonymous namespace
+
+char SystemZTDCPass::ID = 0;
+INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc",
+                "SystemZ Test Data Class optimization", false, false)
+
+FunctionPass *llvm::createSystemZTDCPass() {
+  return new SystemZTDCPass();
+}
+
+void SystemZTDCPass::convertFCmp(CmpInst &I) {
+  Value *Op0 = I.getOperand(0);
+  auto *Const = dyn_cast<ConstantFP>(I.getOperand(1));
+  auto Pred = I.getPredicate();
+  // Only comparisons with consts are interesting.
+  if (!Const)
+    return;
+  // Compute the smallest normal number (and its negation).
+  auto &Sem = Op0->getType()->getFltSemantics();
+  APFloat Smallest = APFloat::getSmallestNormalized(Sem);
+  APFloat NegSmallest = Smallest;
+  NegSmallest.changeSign();
+  // Check if Const is one of our recognized consts.
+  int WhichConst;
+  if (Const->isZero()) {
+    // All comparisons with 0 can be converted.
+    WhichConst = 0;
+  } else if (Const->isInfinity()) {
+    // Likewise for infinities.
+    WhichConst = Const->isNegative() ? 2 : 1;
+  } else if (Const->isExactlyValue(Smallest)) {
+    // For Smallest, we cannot do EQ separately from GT.
+    if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE &&
+        (Pred & CmpInst::FCMP_OGE) != 0)
+      return;
+    WhichConst = 3;
+  } else if (Const->isExactlyValue(NegSmallest)) {
+    // Likewise for NegSmallest, we cannot do EQ separately from LT.
+    if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE &&
+        (Pred & CmpInst::FCMP_OLE) != 0)
+      return;
+    WhichConst = 4;
+  } else {
+    // Not one of our special constants.
+    return;
+  }
+  // Partial masks to use for EQ, GT, LT, UN comparisons, respectively.
+  static const int Masks[][4] = {
+    { // 0
+      SystemZ::TDCMASK_ZERO,              // eq
+      SystemZ::TDCMASK_POSITIVE,          // gt
+      SystemZ::TDCMASK_NEGATIVE,          // lt
+      SystemZ::TDCMASK_NAN,               // un
+    },
+    { // inf
+      SystemZ::TDCMASK_INFINITY_PLUS,     // eq
+      0,                                  // gt
+      (SystemZ::TDCMASK_ZERO |
+       SystemZ::TDCMASK_NEGATIVE |
+       SystemZ::TDCMASK_NORMAL_PLUS |
+       SystemZ::TDCMASK_SUBNORMAL_PLUS),  // lt
+      SystemZ::TDCMASK_NAN,               // un
+    },
+    { // -inf
+      SystemZ::TDCMASK_INFINITY_MINUS,    // eq
+      (SystemZ::TDCMASK_ZERO |
+       SystemZ::TDCMASK_POSITIVE |
+       SystemZ::TDCMASK_NORMAL_MINUS |
+       SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
+      0,                                  // lt
+      SystemZ::TDCMASK_NAN,               // un
+    },
+    { // minnorm
+      0,                                  // eq (unsupported)
+      (SystemZ::TDCMASK_NORMAL_PLUS |
+       SystemZ::TDCMASK_INFINITY_PLUS),   // gt (actually ge)
+      (SystemZ::TDCMASK_ZERO |
+       SystemZ::TDCMASK_NEGATIVE |
+       SystemZ::TDCMASK_SUBNORMAL_PLUS),  // lt
+      SystemZ::TDCMASK_NAN,               // un
+    },
+    { // -minnorm
+      0,                                  // eq (unsupported)
+      (SystemZ::TDCMASK_ZERO |
+       SystemZ::TDCMASK_POSITIVE |
+       SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
+      (SystemZ::TDCMASK_NORMAL_MINUS |
+       SystemZ::TDCMASK_INFINITY_MINUS),  // lt (actually le)
+      SystemZ::TDCMASK_NAN,               // un
+    }
+  };
+  // Construct the mask as a combination of the partial masks.
+  int Mask = 0;
+  if (Pred & CmpInst::FCMP_OEQ)
+    Mask |= Masks[WhichConst][0];
+  if (Pred & CmpInst::FCMP_OGT)
+    Mask |= Masks[WhichConst][1];
+  if (Pred & CmpInst::FCMP_OLT)
+    Mask |= Masks[WhichConst][2];
+  if (Pred & CmpInst::FCMP_UNO)
+    Mask |= Masks[WhichConst][3];
+  // A lone fcmp is unworthy of tdc conversion on its own, but may become
+  // worthy if combined with fabs.
+  bool Worthy = false;
+  if (CallInst *CI = dyn_cast<CallInst>(Op0)) {
+    Function *F = CI->getCalledFunction();
+    if (F && F->getIntrinsicID() == Intrinsic::fabs) {
+      // Fold with fabs - adjust the mask appropriately.
+      Mask &= SystemZ::TDCMASK_PLUS;
+      Mask |= Mask >> 1;
+      Op0 = CI->getArgOperand(0);
+      // A combination of fcmp with fabs is a win, unless the constant
+      // involved is 0 (which is handled by later passes).
+      Worthy = WhichConst != 0;
+      PossibleJunk.insert(CI);
+    }
+  }
+  converted(&I, Op0, Mask, Worthy);
+}
+
+void SystemZTDCPass::convertICmp(CmpInst &I) {
+  Value *Op0 = I.getOperand(0);
+  auto *Const = dyn_cast<ConstantInt>(I.getOperand(1));
+  auto Pred = I.getPredicate();
+  // All our icmp rules involve comparisons with consts.
+  if (!Const)
+    return;
+  if (auto *Cast = dyn_cast<BitCastInst>(Op0)) {
+    // Check for icmp+bitcast used for signbit.
+    if (!Cast->getSrcTy()->isFloatTy() &&
+        !Cast->getSrcTy()->isDoubleTy() &&
+        !Cast->getSrcTy()->isFP128Ty())
+      return;
+    Value *V = Cast->getOperand(0);
+    int Mask;
+    if (Pred == CmpInst::ICMP_SLT && Const->isZero()) {
+      // icmp slt (bitcast X), 0 - set if sign bit true
+      Mask = SystemZ::TDCMASK_MINUS;
+    } else if (Pred == CmpInst::ICMP_SGT && Const->isMinusOne()) {
+      // icmp sgt (bitcast X), -1 - set if sign bit false
+      Mask = SystemZ::TDCMASK_PLUS;
+    } else {
+      // Not a sign bit check.
+      return;
+    }
+    PossibleJunk.insert(Cast);
+    converted(&I, V, Mask, true);
+  } else if (auto *CI = dyn_cast<CallInst>(Op0)) {
+    // Check if this is a pre-existing call of our tdc intrinsic.
+    Function *F = CI->getCalledFunction();
+    if (!F || F->getIntrinsicID() != Intrinsic::s390_tdc)
+      return;
+    if (!Const->isZero())
+      return;
+    Value *V = CI->getArgOperand(0);
+    auto *MaskC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+    // Bail if the mask is not a constant.
+    if (!MaskC)
+      return;
+    int Mask = MaskC->getZExtValue();
+    Mask &= SystemZ::TDCMASK_ALL;
+    if (Pred == CmpInst::ICMP_NE) {
+      // icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC
+    } else if (Pred == CmpInst::ICMP_EQ) {
+      // icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask
+      Mask ^= SystemZ::TDCMASK_ALL;
+    } else {
+      // An unknown comparison - ignore.
+      return;
+    }
+    PossibleJunk.insert(CI);
+    converted(&I, V, Mask, false);
+  }
+}
+
+void SystemZTDCPass::convertLogicOp(BinaryOperator &I) {
+  Value *Op0, *Op1;
+  int Mask0, Mask1;
+  bool Worthy0, Worthy1;
+  std::tie(Op0, Mask0, Worthy0) = ConvertedInsts[cast<Instruction>(I.getOperand(0))];
+  std::tie(Op1, Mask1, Worthy1) = ConvertedInsts[cast<Instruction>(I.getOperand(1))];
+  if (Op0 != Op1)
+    return;
+  int Mask;
+  switch (I.getOpcode()) {
+    case Instruction::And:
+      Mask = Mask0 & Mask1;
+      break;
+    case Instruction::Or:
+      Mask = Mask0 | Mask1;
+      break;
+    case Instruction::Xor:
+      Mask = Mask0 ^ Mask1;
+      break;
+    default:
+      llvm_unreachable("Unknown op in convertLogicOp");
+  }
+  converted(&I, Op0, Mask, true);
+}
+
+bool SystemZTDCPass::runOnFunction(Function &F) {
+  ConvertedInsts.clear();
+  LogicOpsWorklist.clear();
+  PossibleJunk.clear();
+
+  // Look for icmp+fcmp instructions.
+  for (auto &I : instructions(F)) {
+    if (I.getOpcode() == Instruction::FCmp)
+      convertFCmp(cast<CmpInst>(I));
+    else if (I.getOpcode() == Instruction::ICmp)
+      convertICmp(cast<CmpInst>(I));
+  }
+
+  // If none found, bail already.
+  if (ConvertedInsts.empty())
+    return false;
+
+  // Process the queue of logic instructions.
+  while (!LogicOpsWorklist.empty()) {
+    BinaryOperator *Op = LogicOpsWorklist.back();
+    LogicOpsWorklist.pop_back();
+    // If both operands mapped, and the instruction itself not yet mapped,
+    // convert it.
+    if (ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(0))) &&
+        ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(1))) &&
+        !ConvertedInsts.count(Op))
+      convertLogicOp(*Op);
+  }
+
+  // Time to actually replace the instructions.  Do it in the reverse order
+  // of finding them, since there's a good chance the earlier ones will be
+  // unused (due to being folded into later ones).
+  Module &M = *F.getParent();
+  auto &Ctx = M.getContext();
+  Value *Zero32 = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
+  bool MadeChange = false;
+  for (auto &It : reverse(ConvertedInsts)) {
+    Instruction *I = It.first;
+    Value *V;
+    int Mask;
+    bool Worthy;
+    std::tie(V, Mask, Worthy) = It.second;
+    if (!I->user_empty()) {
+      // If used and unworthy of conversion, skip it.
+      if (!Worthy)
+        continue;
+      // Call the intrinsic, compare result with 0.
+      Value *TDCFunc = Intrinsic::getDeclaration(&M, Intrinsic::s390_tdc,
+                                                 V->getType());
+      IRBuilder<> IRB(I);
+      Value *MaskVal = ConstantInt::get(Type::getInt64Ty(Ctx), Mask);
+      Instruction *TDC = IRB.CreateCall(TDCFunc, {V, MaskVal});
+      Value *ICmp = IRB.CreateICmp(CmpInst::ICMP_NE, TDC, Zero32);
+      I->replaceAllUsesWith(ICmp);
+    }
+    // If unused, or used and converted, remove it.
+    I->eraseFromParent();
+    MadeChange = true;
+  }
+
+  if (!MadeChange)
+    return false;
+
+  // We've actually done something - now clear misc accumulated junk (fabs,
+  // bitcast).
+  for (auto *I : PossibleJunk)
+    if (I->user_empty())
+      I->eraseFromParent();
+
+  return true;
+}
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 66a6e85df37..85a3f6f4a8b 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -122,6 +122,9 @@ public:
 } // end anonymous namespace
 
 void SystemZPassConfig::addIRPasses() {
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(createSystemZTDCPass());
+
   TargetPassConfig::addIRPasses();
 }
 
diff --git a/test/CodeGen/SystemZ/tdc-01.ll b/test/CodeGen/SystemZ/tdc-01.ll
new file mode 100644
index 00000000000..052d895b798
--- /dev/null
+++ b/test/CodeGen/SystemZ/tdc-01.ll
@@ -0,0 +1,95 @@
+; Test the Test Data Class instruction, selected manually via the intrinsic.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i32 @llvm.s390.tdc.f32(float, i64)
+declare i32 @llvm.s390.tdc.f64(double, i64)
+declare i32 @llvm.s390.tdc.f128(fp128, i64)
+
+; Check using as i32 - f32
+define i32 @f1(float %x) {
+; CHECK-LABEL: f1
+; CHECK: tceb %f0, 123
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+  %res = call i32 @llvm.s390.tdc.f32(float %x, i64 123)
+  ret i32 %res
+}
+
+; Check using as i32 - f64
+define i32 @f2(double %x) {
+; CHECK-LABEL: f2
+; CHECK: tcdb %f0, 123
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+  %res = call i32 @llvm.s390.tdc.f64(double %x, i64 123)
+  ret i32 %res
+}
+
+; Check using as i32 - f128
+define i32 @f3(fp128 %x) {
+; CHECK-LABEL: f3
+; CHECK: ld %f0, 0(%r2)
+; CHECK: ld %f2, 8(%r2)
+; CHECK: tcxb %f0, 123
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+  %res = call i32 @llvm.s390.tdc.f128(fp128 %x, i64 123)
+  ret i32 %res
+}
+
+declare void @g()
+
+; Check branch
+define void @f4(float %x) {
+; CHECK-LABEL: f4
+; CHECK: tceb %f0, 123
+; CHECK: jgl g
+; CHECK: br %r14
+  %res = call i32 @llvm.s390.tdc.f32(float %x, i64 123)
+  %cond = icmp ne i32 %res, 0
+  br i1 %cond, label %call, label %exit
+
+call:
+  tail call void @g()
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check branch negated
+define void @f5(float %x) {
+; CHECK-LABEL: f5
+; CHECK: tceb %f0, 123
+; CHECK: jge g
+; CHECK: br %r14
+  %res = call i32 @llvm.s390.tdc.f32(float %x, i64 123)
+  %cond = icmp eq i32 %res, 0
+  br i1 %cond, label %call, label %exit
+
+call:
+  tail call void @g()
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check non-const mask
+define void @f6(float %x, i64 %y) {
+; CHECK-LABEL: f6
+; CHECK: tceb %f0, 0(%r2)
+; CHECK: jge g
+; CHECK: br %r14
+  %res = call i32 @llvm.s390.tdc.f32(float %x, i64 %y)
+  %cond = icmp eq i32 %res, 0
+  br i1 %cond, label %call, label %exit
+
+call:
+  tail call void @g()
+  br label %exit
+
+exit:
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/tdc-02.ll b/test/CodeGen/SystemZ/tdc-02.ll
new file mode 100644
index 00000000000..c0c4ac84349
--- /dev/null
+++ b/test/CodeGen/SystemZ/tdc-02.ll
@@ -0,0 +1,96 @@
+; Test the Test Data Class instruction logic operation folding.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i32 @llvm.s390.tdc.f32(float, i64)
+declare i32 @llvm.s390.tdc.f64(double, i64)
+declare i32 @llvm.s390.tdc.f128(fp128, i64)
+
+; Check using or i1
+define i32 @f1(float %x) {
+; CHECK-LABEL: f1
+; CHECK: tceb %f0, 7
+; CHECK-NEXT: ipm [[REG1:%r[0-9]+]]
+; CHECK-NEXT: risbg %r2, [[REG1]], 63, 191, 36
+  %a = call i32 @llvm.s390.tdc.f32(float %x, i64 3)
+  %b = call i32 @llvm.s390.tdc.f32(float %x, i64 6)
+  %a1 = icmp ne i32 %a, 0
+  %b1 = icmp ne i32 %b, 0
+  %res = or i1 %a1, %b1
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
+
+; Check using and i1
+define i32 @f2(double %x) {
+; CHECK-LABEL: f2
+; CHECK: tcdb %f0, 2
+; CHECK-NEXT: ipm [[REG1:%r[0-9]+]]
+; CHECK-NEXT: risbg %r2, [[REG1]], 63, 191, 36
+  %a = call i32 @llvm.s390.tdc.f64(double %x, i64 3)
+  %b = call i32 @llvm.s390.tdc.f64(double %x, i64 6)
+  %a1 = icmp ne i32 %a, 0
+  %b1 = icmp ne i32 %b, 0
+  %res = and i1 %a1, %b1
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
+
+; Check using xor i1
+define i32 @f3(fp128 %x) {
+; CHECK-LABEL: f3
+; CHECK: tcxb %f0, 5
+; CHECK-NEXT: ipm [[REG1:%r[0-9]+]]
+; CHECK-NEXT: risbg %r2, [[REG1]], 63, 191, 36
+  %a = call i32 @llvm.s390.tdc.f128(fp128 %x, i64 3)
+  %b = call i32 @llvm.s390.tdc.f128(fp128 %x, i64 6)
+  %a1 = icmp ne i32 %a, 0
+  %b1 = icmp ne i32 %b, 0
+  %res = xor i1 %a1, %b1
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
+
+; Check using xor i1 - negated test
+define i32 @f4(fp128 %x) {
+; CHECK-LABEL: f4
+; CHECK: tcxb %f0, 4090
+; CHECK-NEXT: ipm [[REG1:%r[0-9]+]]
+; CHECK-NEXT: risbg %r2, [[REG1]], 63, 191, 36
+  %a = call i32 @llvm.s390.tdc.f128(fp128 %x, i64 3)
+  %b = call i32 @llvm.s390.tdc.f128(fp128 %x, i64 6)
+  %a1 = icmp ne i32 %a, 0
+  %b1 = icmp eq i32 %b, 0
+  %res = xor i1 %a1, %b1
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
+
+; Check different first args
+define i32 @f5(float %x, float %y) {
+; CHECK-LABEL: f5
+; CHECK-NOT: tceb {{%f[0-9]+}}, 5
+; CHECK-DAG: tceb %f0, 3
+; CHECK-DAG: tceb %f2, 6
+  %a = call i32 @llvm.s390.tdc.f32(float %x, i64 3)
+  %b = call i32 @llvm.s390.tdc.f32(float %y, i64 6)
+  %a1 = icmp ne i32 %a, 0
+  %b1 = icmp ne i32 %b, 0
+  %res = xor i1 %a1, %b1
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
+
+; Non-const mask (not supported)
+define i32 @f6(float %x, i64 %y) {
+; CHECK-LABEL: f6
+; CHECK-DAG: tceb %f0, 0(%r2)
+; CHECK-DAG: tceb %f0, 6
+  %a = call i32 @llvm.s390.tdc.f32(float %x, i64 %y)
+  %b = call i32 @llvm.s390.tdc.f32(float %x, i64 6)
+  %a1 = icmp ne i32 %a, 0
+  %b1 = icmp ne i32 %b, 0
+  %res = xor i1 %a1, %b1
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
diff --git a/test/CodeGen/SystemZ/tdc-03.ll b/test/CodeGen/SystemZ/tdc-03.ll
new file mode 100644
index 00000000000..95708f1effc
--- /dev/null
+++ b/test/CodeGen/SystemZ/tdc-03.ll
@@ -0,0 +1,139 @@
+; Test the Test Data Class instruction logic operation conversion from
+; compares.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare float @llvm.fabs.f32(float)
+declare double @llvm.fabs.f64(double)
+declare fp128 @llvm.fabs.f128(fp128)
+
+; Compare with 0 (unworthy)
+define i32 @f1(float %x) {
+; CHECK-LABEL: f1
+; CHECK-NOT: tceb
+; CHECK: ltebr {{%f[0-9]+}}, %f0
+; CHECK-NOT: tceb
+  %res = fcmp ugt float %x, 0.0
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
+
+; Compare fabs with 0 (unworthy)
+define i32 @f2(float %x) {
+; CHECK-LABEL: f2
+; CHECK-NOT: tceb
+; CHECK: lpebr {{%f[0-9]+}}, %f0
+; CHECK-NOT: tceb
+  %y = call float @llvm.fabs.f32(float %x)
+  %res = fcmp ugt float %y, 0.0
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
+
+; Compare with inf (unworthy)
+define i32 @f3(float %x) {
+; CHECK-LABEL: f3
+; CHECK-NOT: tceb
+; CHECK: ceb %f0, 0(%r{{[0-9]+}})
+; CHECK-NOT: tceb
+  %res = fcmp ult float %x, 0x7ff0000000000000
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
+
+; Compare fabs with inf
+define i32 @f4(float %x) {
+; CHECK-LABEL: f4
+; CHECK: tceb %f0, 4047
+  %y = call float @llvm.fabs.f32(float %x)
+  %res = fcmp ult float %y, 0x7ff0000000000000
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
+
+; Compare with minnorm (unworthy)
+define i32 @f5(float %x) {
+; CHECK-LABEL: f5
+; CHECK-NOT: tceb
+; CHECK: ceb %f0, 0(%r{{[0-9]+}})
+; CHECK-NOT: tceb
+  %res = fcmp ult float %x, 0x3810000000000000
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
+
+; Compare fabs with minnorm
+define i32 @f6(float %x) {
+; CHECK-LABEL: f6
+; CHECK: tceb %f0, 3279
+  %y = call float @llvm.fabs.f32(float %x)
+  %res = fcmp ult float %y, 0x3810000000000000
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
+
+; Compare fabs with minnorm, unsupported condition
+define i32 @f7(float %x) {
+; CHECK-LABEL: f7
+; CHECK-NOT: tceb
+; CHECK: lpdfr [[REG:%f[0-9]+]], %f0
+; CHECK: ceb [[REG]], 0(%r{{[0-9]+}})
+; CHECK-NOT: tceb
+  %y = call float @llvm.fabs.f32(float %x)
+  %res = fcmp ugt float %y, 0x3810000000000000
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
+
+; Compare fabs with unsupported constant
+define i32 @f8(float %x) {
+; CHECK-LABEL: f8
+; CHECK-NOT: tceb
+; CHECK: lpdfr [[REG:%f[0-9]+]], %f0
+; CHECK: ceb [[REG]], 0(%r{{[0-9]+}})
+; CHECK-NOT: tceb
+  %y = call float @llvm.fabs.f32(float %x)
+  %res = fcmp ult float %y, 0x3ff0000000000000
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
+
+; Compare fabs with minnorm - double
+define i32 @f9(double %x) {
+; CHECK-LABEL: f9
+; CHECK: tcdb %f0, 3279
+  %y = call double @llvm.fabs.f64(double %x)
+  %res = fcmp ult double %y, 0x0010000000000000
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
+
+; Compare fabs with minnorm - long double
+define i32 @f10(fp128 %x) {
+; CHECK-LABEL: f10
+; CHECK: tcxb %f0, 3279
+  %y = call fp128 @llvm.fabs.f128(fp128 %x)
+  %res = fcmp ult fp128 %y, 0xL00000000000000000001000000000000
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
+
+; Compare fabs for one with inf - clang's isfinite
+define i32 @f11(double %x) {
+; CHECK-LABEL: f11
+; CHECK: tcdb %f0, 4032
+  %y = call double @llvm.fabs.f64(double %x)
+  %res = fcmp one double %y, 0x7ff0000000000000
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
+
+; Compare fabs for oeq with inf - clang's isinf
+define i32 @f12(double %x) {
+; CHECK-LABEL: f12
+; CHECK: tcdb %f0, 48
+  %y = call double @llvm.fabs.f64(double %x)
+  %res = fcmp oeq double %y, 0x7ff0000000000000
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
diff --git a/test/CodeGen/SystemZ/tdc-04.ll b/test/CodeGen/SystemZ/tdc-04.ll
new file mode 100644
index 00000000000..929285b0ba8
--- /dev/null
+++ b/test/CodeGen/SystemZ/tdc-04.ll
@@ -0,0 +1,85 @@
+; Test the Test Data Class instruction logic operation conversion from
+; signbit extraction.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+;
+
+; Extract sign bit.
+define i32 @f1(float %x) {
+; CHECK-LABEL: f1
+; CHECK: tceb %f0, 1365
+  %cast = bitcast float %x to i32
+  %res = icmp slt i32 %cast, 0
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
+
+; Extract negated sign bit.
+define i32 @f2(float %x) {
+; CHECK-LABEL: f2
+; CHECK: tceb %f0, 2730
+  %cast = bitcast float %x to i32
+  %res = icmp sgt i32 %cast, -1
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
+
+; Extract sign bit.
+define i32 @f3(double %x) {
+; CHECK-LABEL: f3
+; CHECK: tcdb %f0, 1365
+  %cast = bitcast double %x to i64
+  %res = icmp slt i64 %cast, 0
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
+
+; Extract negated sign bit.
+define i32 @f4(double %x) {
+; CHECK-LABEL: f4
+; CHECK: tcdb %f0, 2730
+  %cast = bitcast double %x to i64
+  %res = icmp sgt i64 %cast, -1
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
+
+; Extract sign bit.
+define i32 @f5(fp128 %x) {
+; CHECK-LABEL: f5
+; CHECK: tcxb %f0, 1365
+  %cast = bitcast fp128 %x to i128
+  %res = icmp slt i128 %cast, 0
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
+
+; Extract negated sign bit.
+define i32 @f6(fp128 %x) {
+; CHECK-LABEL: f6
+; CHECK: tcxb %f0, 2730
+  %cast = bitcast fp128 %x to i128
+  %res = icmp sgt i128 %cast, -1
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
+
+; Wrong const.
+define i32 @f7(float %x) {
+; CHECK-LABEL: f7
+; CHECK-NOT: tceb
+  %cast = bitcast float %x to i32
+  %res = icmp slt i32 %cast, -1
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
+
+; Wrong pred.
+define i32 @f8(float %x) {
+; CHECK-LABEL: f8
+; CHECK-NOT: tceb
+  %cast = bitcast float %x to i32
+  %res = icmp eq i32 %cast, 0
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
diff --git a/test/CodeGen/SystemZ/tdc-05.ll b/test/CodeGen/SystemZ/tdc-05.ll
new file mode 100644
index 00000000000..c639a9b7b47
--- /dev/null
+++ b/test/CodeGen/SystemZ/tdc-05.ll
@@ -0,0 +1,97 @@
+; Test the Test Data Class instruction logic operation conversion from
+; compares, combined with signbit or other compares to ensure worthiness.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+;
+
+declare float @llvm.fabs.f32(float)
+declare double @llvm.fabs.f64(double)
+declare fp128 @llvm.fabs.f128(fp128)
+
+; Compare with 0, extract sign bit
+define i32 @f1(float %x) {
+; CHECK-LABEL: f1
+; CHECK: tceb %f0, 2047
+  %cast = bitcast float %x to i32
+  %sign = icmp slt i32 %cast, 0
+  %fcmp = fcmp ugt float %x, 0.0
+  %res = or i1 %sign, %fcmp
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
+
+; Compare with inf, extract negated sign bit
+define i32 @f2(float %x) {
+; CHECK-LABEL: f2
+; CHECK: tceb %f0, 2698
+  %cast = bitcast float %x to i32
+  %sign = icmp sgt i32 %cast, -1
+  %fcmp = fcmp ult float %x, 0x7ff0000000000000
+  %res = and i1 %sign, %fcmp
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
+
+; Compare with minnorm, extract negated sign bit
+define i32 @f3(float %x) {
+; CHECK-LABEL: f3
+; CHECK: tceb %f0, 2176
+  %cast = bitcast float %x to i32
+  %sign = icmp sgt i32 %cast, -1
+  %fcmp = fcmp olt float %x, 0x3810000000000000
+  %res = and i1 %sign, %fcmp
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
+
+; Test float isnormal, from clang.
+define i32 @f4(float %x) {
+; CHECK-LABEL: f4
+; CHECK: tceb %f0, 768
+  %y = call float @llvm.fabs.f32(float %x)
+  %ord = fcmp ord float %x, 0.0
+  %a = fcmp ult float %y, 0x7ff0000000000000
+  %b = fcmp uge float %y, 0x3810000000000000
+  %c = and i1 %a, %b
+  %res = and i1 %ord, %c
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
+
+; Check for negative 0.
+define i32 @f5(float %x) {
+; CHECK-LABEL: f5
+; CHECK: tceb %f0, 1024
+  %cast = bitcast float %x to i32
+  %sign = icmp slt i32 %cast, 0
+  %fcmp = fcmp oeq float %x, 0.0
+  %res = and i1 %sign, %fcmp
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
+
+; Test isnormal, from clang.
+define i32 @f6(double %x) {
+; CHECK-LABEL: f6
+; CHECK: tcdb %f0, 768
+  %y = call double @llvm.fabs.f64(double %x)
+  %ord = fcmp ord double %x, 0.0
+  %a = fcmp ult double %y, 0x7ff0000000000000
+  %b = fcmp uge double %y, 0x0010000000000000
+  %c = and i1 %ord, %a
+  %res = and i1 %b, %c
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
+
+; Test isinf || isnan, from clang.
+define i32 @f7(double %x) {
+; CHECK-LABEL: f7
+; CHECK: tcdb %f0, 63
+  %y = call double @llvm.fabs.f64(double %x)
+  %a = fcmp oeq double %y, 0x7ff0000000000000
+  %b = fcmp uno double %x, 0.0
+  %res = or i1 %a, %b
+  %xres = zext i1 %res to i32
+  ret i32 %xres
+}
diff --git a/test/CodeGen/SystemZ/tdc-06.ll b/test/CodeGen/SystemZ/tdc-06.ll
new file mode 100644
index 00000000000..11fb1e2916e
--- /dev/null
+++ b/test/CodeGen/SystemZ/tdc-06.ll
@@ -0,0 +1,48 @@
+; Test the Test Data Class instruction, as used by fpclassify.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+;
+
+declare float @llvm.fabs.f32(float)
+declare double @llvm.fabs.f64(double)
+declare fp128 @llvm.fabs.f128(fp128)
+
+define i32 @fpc(double %x) {
+entry:
+; CHECK-LABEL: fpc
+; CHECK: lhi %r2, 5
+; CHECK: ltdbr %f0, %f0
+; CHECK: je [[RET:.L.*]]
+  %testeq = fcmp oeq double %x, 0.000000e+00
+  br i1 %testeq, label %ret, label %nonzero
+
+nonzero:
+; CHECK: lhi %r2, 1
+; CHECK: cdbr %f0, %f0
+; CHECK: jo [[RET]]
+  %testnan = fcmp uno double %x, 0.000000e+00
+  br i1 %testnan, label %ret, label %nonzeroord
+
+nonzeroord:
+; CHECK: lhi %r2, 2
+; CHECK: tcdb %f0, 48
+; CHECK: jl [[RET]]
+  %abs = tail call double @llvm.fabs.f64(double %x)
+  %testinf = fcmp oeq double %abs, 0x7FF0000000000000
+  br i1 %testinf, label %ret, label %finite
+
+finite:
+; CHECK: lhi %r2, 3
+; CHECK: tcdb %f0, 831
+; CHECK: blr %r14
+; CHECK: lhi %r2, 4
+  %testnormal = fcmp uge double %abs, 0x10000000000000
+  %finres = select i1 %testnormal, i32 3, i32 4
+  br label %ret
+
+ret:
+; CHECK: [[RET]]:
+; CHECK: br %r14
+  %res = phi i32 [ 5, %entry ], [ 1, %nonzero ], [ 2, %nonzeroord ], [ %finres, %finite ]
+  ret i32 %res
+}