From bd07f66851b89d5d4ed13cdd683173b0299c3f5c Mon Sep 17 00:00:00 2001 From: Marcin Koscielnicki Date: Sun, 10 Jul 2016 14:41:22 +0000 Subject: [PATCH] [SystemZ] Utilize Test Data Class instructions. This adds a new SystemZ-specific intrinsic, llvm.s390.tdc.f(32|64|128), which maps straight to the test data class instructions. A new IR pass is added to recognize instructions that can be converted to TDC and perform the necessary replacements. Differential Revision: http://reviews.llvm.org/D21949 llvm-svn: 275016 --- include/llvm/IR/IntrinsicsSystemZ.td | 11 + lib/Target/SystemZ/CMakeLists.txt | 1 + lib/Target/SystemZ/README.txt | 4 - lib/Target/SystemZ/SystemZ.h | 41 +++ lib/Target/SystemZ/SystemZISelLowering.cpp | 5 + lib/Target/SystemZ/SystemZTDC.cpp | 382 ++++++++++++++++++++ lib/Target/SystemZ/SystemZTargetMachine.cpp | 3 + test/CodeGen/SystemZ/tdc-01.ll | 95 +++++ test/CodeGen/SystemZ/tdc-02.ll | 96 +++++ test/CodeGen/SystemZ/tdc-03.ll | 139 +++++++ test/CodeGen/SystemZ/tdc-04.ll | 85 +++++ test/CodeGen/SystemZ/tdc-05.ll | 97 +++++ test/CodeGen/SystemZ/tdc-06.ll | 48 +++ 13 files changed, 1003 insertions(+), 4 deletions(-) create mode 100644 lib/Target/SystemZ/SystemZTDC.cpp create mode 100644 test/CodeGen/SystemZ/tdc-01.ll create mode 100644 test/CodeGen/SystemZ/tdc-02.ll create mode 100644 test/CodeGen/SystemZ/tdc-03.ll create mode 100644 test/CodeGen/SystemZ/tdc-04.ll create mode 100644 test/CodeGen/SystemZ/tdc-05.ll create mode 100644 test/CodeGen/SystemZ/tdc-06.ll diff --git a/include/llvm/IR/IntrinsicsSystemZ.td b/include/llvm/IR/IntrinsicsSystemZ.td index 49de4f9f906..bfc15b9bc09 100644 --- a/include/llvm/IR/IntrinsicsSystemZ.td +++ b/include/llvm/IR/IntrinsicsSystemZ.td @@ -374,3 +374,14 @@ let TargetPrefix = "s390" in { [llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; } + +//===----------------------------------------------------------------------===// +// +// Misc intrinsics +// +//===----------------------------------------------------------------------===// + +let TargetPrefix = "s390" in { + def int_s390_tdc : Intrinsic<[llvm_i32_ty], [llvm_anyfloat_ty, llvm_i64_ty], + [IntrNoMem]>; +} diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt index 336f037bb73..4b849ad6491 100644 --- a/lib/Target/SystemZ/CMakeLists.txt +++ b/lib/Target/SystemZ/CMakeLists.txt @@ -30,6 +30,7 @@ add_llvm_target(SystemZCodeGen SystemZSubtarget.cpp SystemZTargetMachine.cpp SystemZTargetTransformInfo.cpp + SystemZTDC.cpp ) add_subdirectory(AsmParser) diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt index 69b72d26020..86a1322c9e2 100644 --- a/lib/Target/SystemZ/README.txt +++ b/lib/Target/SystemZ/README.txt @@ -36,10 +36,6 @@ We don't use the BRANCH ON INDEX instructions. -- -We don't use the TEST DATA CLASS instructions. - --- - We only use MVC, XC and CLC for constant-length block operations. We could extend them to variable-length operations too, using EXECUTE RELATIVE LONG. diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h index cafe2c5948c..c8ea9641fb6 100644 --- a/lib/Target/SystemZ/SystemZ.h +++ b/lib/Target/SystemZ/SystemZ.h @@ -87,6 +87,11 @@ const unsigned CCMASK_VCMP_MIXED = CCMASK_1; const unsigned CCMASK_VCMP_NONE = CCMASK_3; const unsigned CCMASK_VCMP = CCMASK_0 | CCMASK_1 | CCMASK_3; +// Condition-code mask assignments for Test Data Class. +const unsigned CCMASK_TDC_NOMATCH = CCMASK_0; +const unsigned CCMASK_TDC_MATCH = CCMASK_1; +const unsigned CCMASK_TDC = CCMASK_TDC_NOMATCH | CCMASK_TDC_MATCH; + // The position of the low CC bit in an IPM result. const unsigned IPM_CC = 28; @@ -94,6 +99,41 @@ const unsigned IPM_CC = 28; const unsigned PFD_READ = 1; const unsigned PFD_WRITE = 2; +// Mask assignments for TDC +const unsigned TDCMASK_ZERO_PLUS = 0x800; +const unsigned TDCMASK_ZERO_MINUS = 0x400; +const unsigned TDCMASK_NORMAL_PLUS = 0x200; +const unsigned TDCMASK_NORMAL_MINUS = 0x100; +const unsigned TDCMASK_SUBNORMAL_PLUS = 0x080; +const unsigned TDCMASK_SUBNORMAL_MINUS = 0x040; +const unsigned TDCMASK_INFINITY_PLUS = 0x020; +const unsigned TDCMASK_INFINITY_MINUS = 0x010; +const unsigned TDCMASK_QNAN_PLUS = 0x008; +const unsigned TDCMASK_QNAN_MINUS = 0x004; +const unsigned TDCMASK_SNAN_PLUS = 0x002; +const unsigned TDCMASK_SNAN_MINUS = 0x001; + +const unsigned TDCMASK_ZERO = TDCMASK_ZERO_PLUS | TDCMASK_ZERO_MINUS; +const unsigned TDCMASK_POSITIVE = TDCMASK_NORMAL_PLUS | + TDCMASK_SUBNORMAL_PLUS | + TDCMASK_INFINITY_PLUS; +const unsigned TDCMASK_NEGATIVE = TDCMASK_NORMAL_MINUS | + TDCMASK_SUBNORMAL_MINUS | + TDCMASK_INFINITY_MINUS; +const unsigned TDCMASK_NAN = TDCMASK_QNAN_PLUS | + TDCMASK_QNAN_MINUS | + TDCMASK_SNAN_PLUS | + TDCMASK_SNAN_MINUS; +const unsigned TDCMASK_PLUS = TDCMASK_POSITIVE | + TDCMASK_ZERO_PLUS | + TDCMASK_QNAN_PLUS | + TDCMASK_SNAN_PLUS; +const unsigned TDCMASK_MINUS = TDCMASK_NEGATIVE | + TDCMASK_ZERO_MINUS | + TDCMASK_QNAN_MINUS | + TDCMASK_SNAN_MINUS; +const unsigned TDCMASK_ALL = TDCMASK_PLUS | TDCMASK_MINUS; + // Number of bits in a vector register. const unsigned VectorBits = 128; @@ -138,6 +178,7 @@ FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM); FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM); +FunctionPass *createSystemZTDCPass(); } // end namespace llvm #endif diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 2259840d2d1..5e1552f586f 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1444,6 +1444,11 @@ static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) { CCValid = SystemZ::CCMASK_VCMP; return true; + case Intrinsic::s390_tdc: + Opcode = SystemZISD::TDC; + CCValid = SystemZ::CCMASK_TDC; + return true; + default: return false; } diff --git a/lib/Target/SystemZ/SystemZTDC.cpp b/lib/Target/SystemZ/SystemZTDC.cpp new file mode 100644 index 00000000000..96a9ef82c12 --- /dev/null +++ b/lib/Target/SystemZ/SystemZTDC.cpp @@ -0,0 +1,382 @@ +//===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass looks for instructions that can be replaced by a Test Data Class +// instruction, and replaces them when profitable. +// +// Roughly, the following rules are recognized: +// +// 1: fcmp pred X, 0 -> tdc X, mask +// 2: fcmp pred X, +-inf -> tdc X, mask +// 3: fcmp pred X, +-minnorm -> tdc X, mask +// 4: tdc (fabs X), mask -> tdc X, newmask +// 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit] +// 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask +// 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask +// 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2) +// 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2) +// 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2) +// +// The pass works in 4 steps: +// +// 1. All fcmp and icmp instructions in a function are checked for a match +// with rules 1-3 and 5-7. Their TDC equivalents are stored in +// the ConvertedInsts mapping. If the operand of a fcmp instruction is +// a fabs, it's also folded according to rule 4. +// 2. All and/or/xor i1 instructions whose both operands have been already +// mapped are mapped according to rules 8-10. LogicOpsWorklist is used +// as a queue of instructions to check. +// 3. All mapped instructions that are considered worthy of conversion (ie. +// replacing them will actually simplify the final code) are replaced +// with a call to the s390.tdc intrinsic. +// 4. All intermediate results of replaced instructions are removed if unused. +// +// Instructions that match rules 1-3 are considered unworthy of conversion +// on their own (since a comparison instruction is superior), but are mapped +// in the hopes of folding the result using rules 4 and 8-10 (likely removing +// the original comparison in the process). +// +//===----------------------------------------------------------------------===// + +#include "SystemZ.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include +#include + +using namespace llvm; + +namespace llvm { + void initializeSystemZTDCPassPass(PassRegistry&); +} + +namespace { + +class SystemZTDCPass : public FunctionPass { +public: + static char ID; + SystemZTDCPass() : FunctionPass(ID) { + initializeSystemZTDCPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; +private: + // Maps seen instructions that can be mapped to a TDC, values are + // (TDC operand, TDC mask, worthy flag) triples. + MapVector> ConvertedInsts; + // The queue of and/or/xor i1 instructions to be potentially folded. + std::vector LogicOpsWorklist; + // Instructions matched while folding, to be removed at the end if unused. + std::set PossibleJunk; + + // Tries to convert a fcmp instruction. + void convertFCmp(CmpInst &I); + + // Tries to convert an icmp instruction. + void convertICmp(CmpInst &I); + + // Tries to convert an i1 and/or/xor instruction, whose both operands + // have been already converted. + void convertLogicOp(BinaryOperator &I); + + // Marks an instruction as converted - adds it to ConvertedInsts and adds + // any and/or/xor i1 users to the queue. + void converted(Instruction *I, Value *V, int Mask, bool Worthy) { + ConvertedInsts[I] = std::make_tuple(V, Mask, Worthy); + auto &M = *I->getFunction()->getParent(); + auto &Ctx = M.getContext(); + for (auto *U : I->users()) { + auto *LI = dyn_cast(U); + if (LI && LI->getType() == Type::getInt1Ty(Ctx) && + (LI->getOpcode() == Instruction::And || + LI->getOpcode() == Instruction::Or || + LI->getOpcode() == Instruction::Xor)) { + LogicOpsWorklist.push_back(LI); + } + } + } +}; + +} // end anonymous namespace + +char SystemZTDCPass::ID = 0; +INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc", + "SystemZ Test Data Class optimization", false, false) + +FunctionPass *llvm::createSystemZTDCPass() { + return new SystemZTDCPass(); +} + +void SystemZTDCPass::convertFCmp(CmpInst &I) { + Value *Op0 = I.getOperand(0); + auto *Const = dyn_cast(I.getOperand(1)); + auto Pred = I.getPredicate(); + // Only comparisons with consts are interesting. + if (!Const) + return; + // Compute the smallest normal number (and its negation). + auto &Sem = Op0->getType()->getFltSemantics(); + APFloat Smallest = APFloat::getSmallestNormalized(Sem); + APFloat NegSmallest = Smallest; + NegSmallest.changeSign(); + // Check if Const is one of our recognized consts. + int WhichConst; + if (Const->isZero()) { + // All comparisons with 0 can be converted. + WhichConst = 0; + } else if (Const->isInfinity()) { + // Likewise for infinities. + WhichConst = Const->isNegative() ? 2 : 1; + } else if (Const->isExactlyValue(Smallest)) { + // For Smallest, we cannot do EQ separately from GT. + if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE && + (Pred & CmpInst::FCMP_OGE) != 0) + return; + WhichConst = 3; + } else if (Const->isExactlyValue(NegSmallest)) { + // Likewise for NegSmallest, we cannot do EQ separately from LT. + if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE && + (Pred & CmpInst::FCMP_OLE) != 0) + return; + WhichConst = 4; + } else { + // Not one of our special constants. + return; + } + // Partial masks to use for EQ, GT, LT, UN comparisons, respectively. + static const int Masks[][4] = { + { // 0 + SystemZ::TDCMASK_ZERO, // eq + SystemZ::TDCMASK_POSITIVE, // gt + SystemZ::TDCMASK_NEGATIVE, // lt + SystemZ::TDCMASK_NAN, // un + }, + { // inf + SystemZ::TDCMASK_INFINITY_PLUS, // eq + 0, // gt + (SystemZ::TDCMASK_ZERO | + SystemZ::TDCMASK_NEGATIVE | + SystemZ::TDCMASK_NORMAL_PLUS | + SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt + SystemZ::TDCMASK_NAN, // un + }, + { // -inf + SystemZ::TDCMASK_INFINITY_MINUS, // eq + (SystemZ::TDCMASK_ZERO | + SystemZ::TDCMASK_POSITIVE | + SystemZ::TDCMASK_NORMAL_MINUS | + SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt + 0, // lt + SystemZ::TDCMASK_NAN, // un + }, + { // minnorm + 0, // eq (unsupported) + (SystemZ::TDCMASK_NORMAL_PLUS | + SystemZ::TDCMASK_INFINITY_PLUS), // gt (actually ge) + (SystemZ::TDCMASK_ZERO | + SystemZ::TDCMASK_NEGATIVE | + SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt + SystemZ::TDCMASK_NAN, // un + }, + { // -minnorm + 0, // eq (unsupported) + (SystemZ::TDCMASK_ZERO | + SystemZ::TDCMASK_POSITIVE | + SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt + (SystemZ::TDCMASK_NORMAL_MINUS | + SystemZ::TDCMASK_INFINITY_MINUS), // lt (actually le) + SystemZ::TDCMASK_NAN, // un + } + }; + // Construct the mask as a combination of the partial masks. + int Mask = 0; + if (Pred & CmpInst::FCMP_OEQ) + Mask |= Masks[WhichConst][0]; + if (Pred & CmpInst::FCMP_OGT) + Mask |= Masks[WhichConst][1]; + if (Pred & CmpInst::FCMP_OLT) + Mask |= Masks[WhichConst][2]; + if (Pred & CmpInst::FCMP_UNO) + Mask |= Masks[WhichConst][3]; + // A lone fcmp is unworthy of tdc conversion on its own, but may become + // worthy if combined with fabs. + bool Worthy = false; + if (CallInst *CI = dyn_cast(Op0)) { + Function *F = CI->getCalledFunction(); + if (F && F->getIntrinsicID() == Intrinsic::fabs) { + // Fold with fabs - adjust the mask appropriately. + Mask &= SystemZ::TDCMASK_PLUS; + Mask |= Mask >> 1; + Op0 = CI->getArgOperand(0); + // A combination of fcmp with fabs is a win, unless the constant + // involved is 0 (which is handled by later passes). + Worthy = WhichConst != 0; + PossibleJunk.insert(CI); + } + } + converted(&I, Op0, Mask, Worthy); +} + +void SystemZTDCPass::convertICmp(CmpInst &I) { + Value *Op0 = I.getOperand(0); + auto *Const = dyn_cast(I.getOperand(1)); + auto Pred = I.getPredicate(); + // All our icmp rules involve comparisons with consts. + if (!Const) + return; + if (auto *Cast = dyn_cast(Op0)) { + // Check for icmp+bitcast used for signbit. + if (!Cast->getSrcTy()->isFloatTy() && + !Cast->getSrcTy()->isDoubleTy() && + !Cast->getSrcTy()->isFP128Ty()) + return; + Value *V = Cast->getOperand(0); + int Mask; + if (Pred == CmpInst::ICMP_SLT && Const->isZero()) { + // icmp slt (bitcast X), 0 - set if sign bit true + Mask = SystemZ::TDCMASK_MINUS; + } else if (Pred == CmpInst::ICMP_SGT && Const->isMinusOne()) { + // icmp sgt (bitcast X), -1 - set if sign bit false + Mask = SystemZ::TDCMASK_PLUS; + } else { + // Not a sign bit check. + return; + } + PossibleJunk.insert(Cast); + converted(&I, V, Mask, true); + } else if (auto *CI = dyn_cast(Op0)) { + // Check if this is a pre-existing call of our tdc intrinsic. + Function *F = CI->getCalledFunction(); + if (!F || F->getIntrinsicID() != Intrinsic::s390_tdc) + return; + if (!Const->isZero()) + return; + Value *V = CI->getArgOperand(0); + auto *MaskC = dyn_cast(CI->getArgOperand(1)); + // Bail if the mask is not a constant. + if (!MaskC) + return; + int Mask = MaskC->getZExtValue(); + Mask &= SystemZ::TDCMASK_ALL; + if (Pred == CmpInst::ICMP_NE) { + // icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC + } else if (Pred == CmpInst::ICMP_EQ) { + // icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask + Mask ^= SystemZ::TDCMASK_ALL; + } else { + // An unknown comparison - ignore. + return; + } + PossibleJunk.insert(CI); + converted(&I, V, Mask, false); + } +} + +void SystemZTDCPass::convertLogicOp(BinaryOperator &I) { + Value *Op0, *Op1; + int Mask0, Mask1; + bool Worthy0, Worthy1; + std::tie(Op0, Mask0, Worthy0) = ConvertedInsts[cast(I.getOperand(0))]; + std::tie(Op1, Mask1, Worthy1) = ConvertedInsts[cast(I.getOperand(1))]; + if (Op0 != Op1) + return; + int Mask; + switch (I.getOpcode()) { + case Instruction::And: + Mask = Mask0 & Mask1; + break; + case Instruction::Or: + Mask = Mask0 | Mask1; + break; + case Instruction::Xor: + Mask = Mask0 ^ Mask1; + break; + default: + llvm_unreachable("Unknown op in convertLogicOp"); + } + converted(&I, Op0, Mask, true); +} + +bool SystemZTDCPass::runOnFunction(Function &F) { + ConvertedInsts.clear(); + LogicOpsWorklist.clear(); + PossibleJunk.clear(); + + // Look for icmp+fcmp instructions. + for (auto &I : instructions(F)) { + if (I.getOpcode() == Instruction::FCmp) + convertFCmp(cast(I)); + else if (I.getOpcode() == Instruction::ICmp) + convertICmp(cast(I)); + } + + // If none found, bail already. + if (ConvertedInsts.empty()) + return false; + + // Process the queue of logic instructions. + while (!LogicOpsWorklist.empty()) { + BinaryOperator *Op = LogicOpsWorklist.back(); + LogicOpsWorklist.pop_back(); + // If both operands mapped, and the instruction itself not yet mapped, + // convert it. + if (ConvertedInsts.count(dyn_cast(Op->getOperand(0))) && + ConvertedInsts.count(dyn_cast(Op->getOperand(1))) && + !ConvertedInsts.count(Op)) + convertLogicOp(*Op); + } + + // Time to actually replace the instructions. Do it in the reverse order + // of finding them, since there's a good chance the earlier ones will be + // unused (due to being folded into later ones). + Module &M = *F.getParent(); + auto &Ctx = M.getContext(); + Value *Zero32 = ConstantInt::get(Type::getInt32Ty(Ctx), 0); + bool MadeChange = false; + for (auto &It : reverse(ConvertedInsts)) { + Instruction *I = It.first; + Value *V; + int Mask; + bool Worthy; + std::tie(V, Mask, Worthy) = It.second; + if (!I->user_empty()) { + // If used and unworthy of conversion, skip it. + if (!Worthy) + continue; + // Call the intrinsic, compare result with 0. + Value *TDCFunc = Intrinsic::getDeclaration(&M, Intrinsic::s390_tdc, + V->getType()); + IRBuilder<> IRB(I); + Value *MaskVal = ConstantInt::get(Type::getInt64Ty(Ctx), Mask); + Instruction *TDC = IRB.CreateCall(TDCFunc, {V, MaskVal}); + Value *ICmp = IRB.CreateICmp(CmpInst::ICMP_NE, TDC, Zero32); + I->replaceAllUsesWith(ICmp); + } + // If unused, or used and converted, remove it. + I->eraseFromParent(); + MadeChange = true; + } + + if (!MadeChange) + return false; + + // We've actually done something - now clear misc accumulated junk (fabs, + // bitcast). + for (auto *I : PossibleJunk) + if (I->user_empty()) + I->eraseFromParent(); + + return true; +} diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp index 66a6e85df37..85a3f6f4a8b 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -122,6 +122,9 @@ public: } // end anonymous namespace void SystemZPassConfig::addIRPasses() { + if (getOptLevel() != CodeGenOpt::None) + addPass(createSystemZTDCPass()); + TargetPassConfig::addIRPasses(); } diff --git a/test/CodeGen/SystemZ/tdc-01.ll b/test/CodeGen/SystemZ/tdc-01.ll new file mode 100644 index 00000000000..052d895b798 --- /dev/null +++ b/test/CodeGen/SystemZ/tdc-01.ll @@ -0,0 +1,95 @@ +; Test the Test Data Class instruction, selected manually via the intrinsic. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @llvm.s390.tdc.f32(float, i64) +declare i32 @llvm.s390.tdc.f64(double, i64) +declare i32 @llvm.s390.tdc.f128(fp128, i64) + +; Check using as i32 - f32 +define i32 @f1(float %x) { +; CHECK-LABEL: f1 +; CHECK: tceb %f0, 123 +; CHECK: ipm %r2 +; CHECK: srl %r2, 28 + %res = call i32 @llvm.s390.tdc.f32(float %x, i64 123) + ret i32 %res +} + +; Check using as i32 - f64 +define i32 @f2(double %x) { +; CHECK-LABEL: f2 +; CHECK: tcdb %f0, 123 +; CHECK: ipm %r2 +; CHECK: srl %r2, 28 + %res = call i32 @llvm.s390.tdc.f64(double %x, i64 123) + ret i32 %res +} + +; Check using as i32 - f128 +define i32 @f3(fp128 %x) { +; CHECK-LABEL: f3 +; CHECK: ld %f0, 0(%r2) +; CHECK: ld %f2, 8(%r2) +; CHECK: tcxb %f0, 123 +; CHECK: ipm %r2 +; CHECK: srl %r2, 28 + %res = call i32 @llvm.s390.tdc.f128(fp128 %x, i64 123) + ret i32 %res +} + +declare void @g() + +; Check branch +define void @f4(float %x) { +; CHECK-LABEL: f4 +; CHECK: tceb %f0, 123 +; CHECK: jgl g +; CHECK: br %r14 + %res = call i32 @llvm.s390.tdc.f32(float %x, i64 123) + %cond = icmp ne i32 %res, 0 + br i1 %cond, label %call, label %exit + +call: + tail call void @g() + br label %exit + +exit: + ret void +} + +; Check branch negated +define void @f5(float %x) { +; CHECK-LABEL: f5 +; CHECK: tceb %f0, 123 +; CHECK: jge g +; CHECK: br %r14 + %res = call i32 @llvm.s390.tdc.f32(float %x, i64 123) + %cond = icmp eq i32 %res, 0 + br i1 %cond, label %call, label %exit + +call: + tail call void @g() + br label %exit + +exit: + ret void +} + +; Check non-const mask +define void @f6(float %x, i64 %y) { +; CHECK-LABEL: f6 +; CHECK: tceb %f0, 0(%r2) +; CHECK: jge g +; CHECK: br %r14 + %res = call i32 @llvm.s390.tdc.f32(float %x, i64 %y) + %cond = icmp eq i32 %res, 0 + br i1 %cond, label %call, label %exit + +call: + tail call void @g() + br label %exit + +exit: + ret void +} diff --git a/test/CodeGen/SystemZ/tdc-02.ll b/test/CodeGen/SystemZ/tdc-02.ll new file mode 100644 index 00000000000..c0c4ac84349 --- /dev/null +++ b/test/CodeGen/SystemZ/tdc-02.ll @@ -0,0 +1,96 @@ +; Test the Test Data Class instruction logic operation folding. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @llvm.s390.tdc.f32(float, i64) +declare i32 @llvm.s390.tdc.f64(double, i64) +declare i32 @llvm.s390.tdc.f128(fp128, i64) + +; Check using or i1 +define i32 @f1(float %x) { +; CHECK-LABEL: f1 +; CHECK: tceb %f0, 7 +; CHECK-NEXT: ipm [[REG1:%r[0-9]+]] +; CHECK-NEXT: risbg %r2, [[REG1]], 63, 191, 36 + %a = call i32 @llvm.s390.tdc.f32(float %x, i64 3) + %b = call i32 @llvm.s390.tdc.f32(float %x, i64 6) + %a1 = icmp ne i32 %a, 0 + %b1 = icmp ne i32 %b, 0 + %res = or i1 %a1, %b1 + %xres = zext i1 %res to i32 + ret i32 %xres +} + +; Check using and i1 +define i32 @f2(double %x) { +; CHECK-LABEL: f2 +; CHECK: tcdb %f0, 2 +; CHECK-NEXT: ipm [[REG1:%r[0-9]+]] +; CHECK-NEXT: risbg %r2, [[REG1]], 63, 191, 36 + %a = call i32 @llvm.s390.tdc.f64(double %x, i64 3) + %b = call i32 @llvm.s390.tdc.f64(double %x, i64 6) + %a1 = icmp ne i32 %a, 0 + %b1 = icmp ne i32 %b, 0 + %res = and i1 %a1, %b1 + %xres = zext i1 %res to i32 + ret i32 %xres +} + +; Check using xor i1 +define i32 @f3(fp128 %x) { +; CHECK-LABEL: f3 +; CHECK: tcxb %f0, 5 +; CHECK-NEXT: ipm [[REG1:%r[0-9]+]] +; CHECK-NEXT: risbg %r2, [[REG1]], 63, 191, 36 + %a = call i32 @llvm.s390.tdc.f128(fp128 %x, i64 3) + %b = call i32 @llvm.s390.tdc.f128(fp128 %x, i64 6) + %a1 = icmp ne i32 %a, 0 + %b1 = icmp ne i32 %b, 0 + %res = xor i1 %a1, %b1 + %xres = zext i1 %res to i32 + ret i32 %xres +} + +; Check using xor i1 - negated test +define i32 @f4(fp128 %x) { +; CHECK-LABEL: f4 +; CHECK: tcxb %f0, 4090 +; CHECK-NEXT: ipm [[REG1:%r[0-9]+]] +; CHECK-NEXT: risbg %r2, [[REG1]], 63, 191, 36 + %a = call i32 @llvm.s390.tdc.f128(fp128 %x, i64 3) + %b = call i32 @llvm.s390.tdc.f128(fp128 %x, i64 6) + %a1 = icmp ne i32 %a, 0 + %b1 = icmp eq i32 %b, 0 + %res = xor i1 %a1, %b1 + %xres = zext i1 %res to i32 + ret i32 %xres +} + +; Check different first args +define i32 @f5(float %x, float %y) { +; CHECK-LABEL: f5 +; CHECK-NOT: tceb {{%f[0-9]+}}, 5 +; CHECK-DAG: tceb %f0, 3 +; CHECK-DAG: tceb %f2, 6 + %a = call i32 @llvm.s390.tdc.f32(float %x, i64 3) + %b = call i32 @llvm.s390.tdc.f32(float %y, i64 6) + %a1 = icmp ne i32 %a, 0 + %b1 = icmp ne i32 %b, 0 + %res = xor i1 %a1, %b1 + %xres = zext i1 %res to i32 + ret i32 %xres +} + +; Non-const mask (not supported) +define i32 @f6(float %x, i64 %y) { +; CHECK-LABEL: f6 +; CHECK-DAG: tceb %f0, 0(%r2) +; CHECK-DAG: tceb %f0, 6 + %a = call i32 @llvm.s390.tdc.f32(float %x, i64 %y) + %b = call i32 @llvm.s390.tdc.f32(float %x, i64 6) + %a1 = icmp ne i32 %a, 0 + %b1 = icmp ne i32 %b, 0 + %res = xor i1 %a1, %b1 + %xres = zext i1 %res to i32 + ret i32 %xres +} diff --git a/test/CodeGen/SystemZ/tdc-03.ll b/test/CodeGen/SystemZ/tdc-03.ll new file mode 100644 index 00000000000..95708f1effc --- /dev/null +++ b/test/CodeGen/SystemZ/tdc-03.ll @@ -0,0 +1,139 @@ +; Test the Test Data Class instruction logic operation conversion from +; compares. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare float @llvm.fabs.f32(float) +declare double @llvm.fabs.f64(double) +declare fp128 @llvm.fabs.f128(fp128) + +; Compare with 0 (unworthy) +define i32 @f1(float %x) { +; CHECK-LABEL: f1 +; CHECK-NOT: tceb +; CHECK: ltebr {{%f[0-9]+}}, %f0 +; CHECK-NOT: tceb + %res = fcmp ugt float %x, 0.0 + %xres = zext i1 %res to i32 + ret i32 %xres +} + +; Compare fabs with 0 (unworthy) +define i32 @f2(float %x) { +; CHECK-LABEL: f2 +; CHECK-NOT: tceb +; CHECK: lpebr {{%f[0-9]+}}, %f0 +; CHECK-NOT: tceb + %y = call float @llvm.fabs.f32(float %x) + %res = fcmp ugt float %y, 0.0 + %xres = zext i1 %res to i32 + ret i32 %xres +} + +; Compare with inf (unworthy) +define i32 @f3(float %x) { +; CHECK-LABEL: f3 +; CHECK-NOT: tceb +; CHECK: ceb %f0, 0(%r{{[0-9]+}}) +; CHECK-NOT: tceb + %res = fcmp ult float %x, 0x7ff0000000000000 + %xres = zext i1 %res to i32 + ret i32 %xres +} + +; Compare fabs with inf +define i32 @f4(float %x) { +; CHECK-LABEL: f4 +; CHECK: tceb %f0, 4047 + %y = call float @llvm.fabs.f32(float %x) + %res = fcmp ult float %y, 0x7ff0000000000000 + %xres = zext i1 %res to i32 + ret i32 %xres +} + +; Compare with minnorm (unworthy) +define i32 @f5(float %x) { +; CHECK-LABEL: f5 +; CHECK-NOT: tceb +; CHECK: ceb %f0, 0(%r{{[0-9]+}}) +; CHECK-NOT: tceb + %res = fcmp ult float %x, 0x3810000000000000 + %xres = zext i1 %res to i32 + ret i32 %xres +} + +; Compare fabs with minnorm +define i32 @f6(float %x) { +; CHECK-LABEL: f6 +; CHECK: tceb %f0, 3279 + %y = call float @llvm.fabs.f32(float %x) + %res = fcmp ult float %y, 0x3810000000000000 + %xres = zext i1 %res to i32 + ret i32 %xres +} + +; Compare fabs with minnorm, unsupported condition +define i32 @f7(float %x) { +; CHECK-LABEL: f7 +; CHECK-NOT: tceb +; CHECK: lpdfr [[REG:%f[0-9]+]], %f0 +; CHECK: ceb [[REG]], 0(%r{{[0-9]+}}) +; CHECK-NOT: tceb + %y = call float @llvm.fabs.f32(float %x) + %res = fcmp ugt float %y, 0x3810000000000000 + %xres = zext i1 %res to i32 + ret i32 %xres +} + +; Compare fabs with unsupported constant +define i32 @f8(float %x) { +; CHECK-LABEL: f8 +; CHECK-NOT: tceb +; CHECK: lpdfr [[REG:%f[0-9]+]], %f0 +; CHECK: ceb [[REG]], 0(%r{{[0-9]+}}) +; CHECK-NOT: tceb + %y = call float @llvm.fabs.f32(float %x) + %res = fcmp ult float %y, 0x3ff0000000000000 + %xres = zext i1 %res to i32 + ret i32 %xres +} + +; Compare fabs with minnorm - double +define i32 @f9(double %x) { +; CHECK-LABEL: f9 +; CHECK: tcdb %f0, 3279 + %y = call double @llvm.fabs.f64(double %x) + %res = fcmp ult double %y, 0x0010000000000000 + %xres = zext i1 %res to i32 + ret i32 %xres +} + +; Compare fabs with minnorm - long double +define i32 @f10(fp128 %x) { +; CHECK-LABEL: f10 +; CHECK: tcxb %f0, 3279 + %y = call fp128 @llvm.fabs.f128(fp128 %x) + %res = fcmp ult fp128 %y, 0xL00000000000000000001000000000000 + %xres = zext i1 %res to i32 + ret i32 %xres +} + +; Compare fabs for one with inf - clang's isfinite +define i32 @f11(double %x) { +; CHECK-LABEL: f11 +; CHECK: tcdb %f0, 4032 + %y = call double @llvm.fabs.f64(double %x) + %res = fcmp one double %y, 0x7ff0000000000000 + %xres = zext i1 %res to i32 + ret i32 %xres +} + +; Compare fabs for oeq with inf - clang's isinf +define i32 @f12(double %x) { +; CHECK-LABEL: f12 +; CHECK: tcdb %f0, 48 + %y = call double @llvm.fabs.f64(double %x) + %res = fcmp oeq double %y, 0x7ff0000000000000 + %xres = zext i1 %res to i32 + ret i32 %xres +} diff --git a/test/CodeGen/SystemZ/tdc-04.ll b/test/CodeGen/SystemZ/tdc-04.ll new file mode 100644 index 00000000000..929285b0ba8 --- /dev/null +++ b/test/CodeGen/SystemZ/tdc-04.ll @@ -0,0 +1,85 @@ +; Test the Test Data Class instruction logic operation conversion from +; signbit extraction. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; + +; Extract sign bit. +define i32 @f1(float %x) { +; CHECK-LABEL: f1 +; CHECK: tceb %f0, 1365 + %cast = bitcast float %x to i32 + %res = icmp slt i32 %cast, 0 + %xres = zext i1 %res to i32 + ret i32 %xres +} + +; Extract negated sign bit. +define i32 @f2(float %x) { +; CHECK-LABEL: f2 +; CHECK: tceb %f0, 2730 + %cast = bitcast float %x to i32 + %res = icmp sgt i32 %cast, -1 + %xres = zext i1 %res to i32 + ret i32 %xres +} + +; Extract sign bit. +define i32 @f3(double %x) { +; CHECK-LABEL: f3 +; CHECK: tcdb %f0, 1365 + %cast = bitcast double %x to i64 + %res = icmp slt i64 %cast, 0 + %xres = zext i1 %res to i32 + ret i32 %xres +} + +; Extract negated sign bit. +define i32 @f4(double %x) { +; CHECK-LABEL: f4 +; CHECK: tcdb %f0, 2730 + %cast = bitcast double %x to i64 + %res = icmp sgt i64 %cast, -1 + %xres = zext i1 %res to i32 + ret i32 %xres +} + +; Extract sign bit. +define i32 @f5(fp128 %x) { +; CHECK-LABEL: f5 +; CHECK: tcxb %f0, 1365 + %cast = bitcast fp128 %x to i128 + %res = icmp slt i128 %cast, 0 + %xres = zext i1 %res to i32 + ret i32 %xres +} + +; Extract negated sign bit. +define i32 @f6(fp128 %x) { +; CHECK-LABEL: f6 +; CHECK: tcxb %f0, 2730 + %cast = bitcast fp128 %x to i128 + %res = icmp sgt i128 %cast, -1 + %xres = zext i1 %res to i32 + ret i32 %xres +} + +; Wrong const. +define i32 @f7(float %x) { +; CHECK-LABEL: f7 +; CHECK-NOT: tceb + %cast = bitcast float %x to i32 + %res = icmp slt i32 %cast, -1 + %xres = zext i1 %res to i32 + ret i32 %xres +} + +; Wrong pred. +define i32 @f8(float %x) { +; CHECK-LABEL: f8 +; CHECK-NOT: tceb + %cast = bitcast float %x to i32 + %res = icmp eq i32 %cast, 0 + %xres = zext i1 %res to i32 + ret i32 %xres +} diff --git a/test/CodeGen/SystemZ/tdc-05.ll b/test/CodeGen/SystemZ/tdc-05.ll new file mode 100644 index 00000000000..c639a9b7b47 --- /dev/null +++ b/test/CodeGen/SystemZ/tdc-05.ll @@ -0,0 +1,97 @@ +; Test the Test Data Class instruction logic operation conversion from +; compares, combined with signbit or other compares to ensure worthiness. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; + +declare float @llvm.fabs.f32(float) +declare double @llvm.fabs.f64(double) +declare fp128 @llvm.fabs.f128(fp128) + +; Compare with 0, extract sign bit +define i32 @f1(float %x) { +; CHECK-LABEL: f1 +; CHECK: tceb %f0, 2047 + %cast = bitcast float %x to i32 + %sign = icmp slt i32 %cast, 0 + %fcmp = fcmp ugt float %x, 0.0 + %res = or i1 %sign, %fcmp + %xres = zext i1 %res to i32 + ret i32 %xres +} + +; Compare with inf, extract negated sign bit +define i32 @f2(float %x) { +; CHECK-LABEL: f2 +; CHECK: tceb %f0, 2698 + %cast = bitcast float %x to i32 + %sign = icmp sgt i32 %cast, -1 + %fcmp = fcmp ult float %x, 0x7ff0000000000000 + %res = and i1 %sign, %fcmp + %xres = zext i1 %res to i32 + ret i32 %xres +} + +; Compare with minnorm, extract negated sign bit +define i32 @f3(float %x) { +; CHECK-LABEL: f3 +; CHECK: tceb %f0, 2176 + %cast = bitcast float %x to i32 + %sign = icmp sgt i32 %cast, -1 + %fcmp = fcmp olt float %x, 0x3810000000000000 + %res = and i1 %sign, %fcmp + %xres = zext i1 %res to i32 + ret i32 %xres +} + +; Test float isnormal, from clang. +define i32 @f4(float %x) { +; CHECK-LABEL: f4 +; CHECK: tceb %f0, 768 + %y = call float @llvm.fabs.f32(float %x) + %ord = fcmp ord float %x, 0.0 + %a = fcmp ult float %y, 0x7ff0000000000000 + %b = fcmp uge float %y, 0x3810000000000000 + %c = and i1 %a, %b + %res = and i1 %ord, %c + %xres = zext i1 %res to i32 + ret i32 %xres +} + +; Check for negative 0. +define i32 @f5(float %x) { +; CHECK-LABEL: f5 +; CHECK: tceb %f0, 1024 + %cast = bitcast float %x to i32 + %sign = icmp slt i32 %cast, 0 + %fcmp = fcmp oeq float %x, 0.0 + %res = and i1 %sign, %fcmp + %xres = zext i1 %res to i32 + ret i32 %xres +} + +; Test isnormal, from clang. +define i32 @f6(double %x) { +; CHECK-LABEL: f6 +; CHECK: tcdb %f0, 768 + %y = call double @llvm.fabs.f64(double %x) + %ord = fcmp ord double %x, 0.0 + %a = fcmp ult double %y, 0x7ff0000000000000 + %b = fcmp uge double %y, 0x0010000000000000 + %c = and i1 %ord, %a + %res = and i1 %b, %c + %xres = zext i1 %res to i32 + ret i32 %xres +} + +; Test isinf || isnan, from clang. +define i32 @f7(double %x) { +; CHECK-LABEL: f7 +; CHECK: tcdb %f0, 63 + %y = call double @llvm.fabs.f64(double %x) + %a = fcmp oeq double %y, 0x7ff0000000000000 + %b = fcmp uno double %x, 0.0 + %res = or i1 %a, %b + %xres = zext i1 %res to i32 + ret i32 %xres +} diff --git a/test/CodeGen/SystemZ/tdc-06.ll b/test/CodeGen/SystemZ/tdc-06.ll new file mode 100644 index 00000000000..11fb1e2916e --- /dev/null +++ b/test/CodeGen/SystemZ/tdc-06.ll @@ -0,0 +1,48 @@ +; Test the Test Data Class instruction, as used by fpclassify. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; + +declare float @llvm.fabs.f32(float) +declare double @llvm.fabs.f64(double) +declare fp128 @llvm.fabs.f128(fp128) + +define i32 @fpc(double %x) { +entry: +; CHECK-LABEL: fpc +; CHECK: lhi %r2, 5 +; CHECK: ltdbr %f0, %f0 +; CHECK: je [[RET:.L.*]] + %testeq = fcmp oeq double %x, 0.000000e+00 + br i1 %testeq, label %ret, label %nonzero + +nonzero: +; CHECK: lhi %r2, 1 +; CHECK: cdbr %f0, %f0 +; CHECK: jo [[RET]] + %testnan = fcmp uno double %x, 0.000000e+00 + br i1 %testnan, label %ret, label %nonzeroord + +nonzeroord: +; CHECK: lhi %r2, 2 +; CHECK: tcdb %f0, 48 +; CHECK: jl [[RET]] + %abs = tail call double @llvm.fabs.f64(double %x) + %testinf = fcmp oeq double %abs, 0x7FF0000000000000 + br i1 %testinf, label %ret, label %finite + +finite: +; CHECK: lhi %r2, 3 +; CHECK: tcdb %f0, 831 +; CHECK: blr %r14 +; CHECK: lhi %r2, 4 + %testnormal = fcmp uge double %abs, 0x10000000000000 + %finres = select i1 %testnormal, i32 3, i32 4 + br label %ret + +ret: +; CHECK: [[RET]]: +; CHECK: br %r14 + %res = phi i32 [ 5, %entry ], [ 1, %nonzero ], [ 2, %nonzeroord ], [ %finres, %finite ] + ret i32 %res +}