From fe0580f6c09b3f82ce58907b95c477eb7ff44a94 Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Sun, 7 Mar 2021 16:30:54 -0800 Subject: [PATCH] [M68k][TableGen](1/8) TableGen related changes - Add a new TableGen backend: CodeBeads - Add support to generate logical operand information For the first item, it is currently a workaround of M68k's (complex) instruction encoding. A typical architecture, especially CISC one like X86, normally uses `MCInstrDesc::TSFlags` to carry instruction encoding info. However, at the early days of M68k backend development, we found it difficult to fit every possible encoding into the 64-bit `MCInstrDesc::TSFlags`. Therefore CodeBeads was invented to provide an alternative, arbitrary length container for instruciton encoding info. However, in the long term we incline not to use a new TG backend for less common pattern like what we encountered in M68k. A bug has been created to host to discussion on migrating from CodeBeads to more concise solution: https://bugs.llvm.org/show_bug.cgi?id=48792 The second item was also served for similar purpose. It created utility functions that tell you the index of a `MachineOperand` in a `MachineInst` given a logical operand index. In normal cases a logical operand is the same as `MachineOperand`, but for operands using complex addressing mode a logical operand might be consisting of multiple `MachineOperand`. The TableGen-ed `getLogicalOperandIdx`, for instance, can give you the mapping between these two concepts. Nevertheless, we hope to remove this feature in the future if possible. Since it's not really useful for the targets supported by LLVM now either. Authors: myhsu, m4yers, glaubitz Differential Revision: https://reviews.llvm.org/D88385 --- include/llvm/Target/Target.td | 8 ++ utils/TableGen/CMakeLists.txt | 1 + utils/TableGen/CodeBeadsGen.cpp | 137 ++++++++++++++++++++ utils/TableGen/InstrInfoEmitter.cpp | 191 ++++++++++++++++++++++++++++ utils/TableGen/TableGen.cpp | 6 + utils/TableGen/TableGenBackends.h | 1 + 6 files changed, 344 insertions(+) create mode 100644 utils/TableGen/CodeBeadsGen.cpp diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td index 1f0f8975231..c16035f0618 100644 --- a/include/llvm/Target/Target.td +++ b/include/llvm/Target/Target.td @@ -639,6 +639,14 @@ class Instruction : InstructionEncoding { /// by TableGen. bit UseNamedOperandTable = false; + /// Should generate helper functions that help you to map a logical operand's + /// index to the underlying MIOperand's index. + /// In most architectures logical operand indicies are equal to + /// MIOperand indicies, but for some CISC architectures, a logical operand + /// might be consist of multiple MIOperand (e.g. a logical operand that + /// uses complex address mode). + bit UseLogicalOperandMappings = false; + /// Should FastISel ignore this instruction. For certain ISAs, they have /// instructions which map to the same ISD Opcode, value type operands and /// instruction selection predicates. FastISel cannot handle such cases, but diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt index 8673a257246..9e918852b1c 100644 --- a/utils/TableGen/CMakeLists.txt +++ b/utils/TableGen/CMakeLists.txt @@ -8,6 +8,7 @@ add_tablegen(llvm-tblgen LLVM AsmWriterInst.cpp Attributes.cpp CallingConvEmitter.cpp + CodeBeadsGen.cpp CodeEmitterGen.cpp CodeGenDAGPatterns.cpp CodeGenHwModes.cpp diff --git a/utils/TableGen/CodeBeadsGen.cpp b/utils/TableGen/CodeBeadsGen.cpp new file mode 100644 index 00000000000..18a6d6d19eb --- /dev/null +++ b/utils/TableGen/CodeBeadsGen.cpp @@ -0,0 +1,137 @@ +//===---------- CodeBeadsGen.cpp - Code Beads Generator -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// CodeBeads are data fields carrying auxiliary information for instructions. +// +// Under the hood it's simply implemented by a `bits` field (with arbitrary +// length) in each TG instruction description, where this TG backend will +// generate a helper function to access it. +// +// This is especially useful for expressing variable length encoding +// instructions and complex addressing modes. Since in those cases each +// instruction is usually associated with large amount of information like +// addressing mode details used on a specific operand. Instead of retreating to +// ad-hoc methods to figure out these information when encoding an instruction, +// CodeBeads provide a clean table for the instruction encoder to lookup. +//===----------------------------------------------------------------------===// + +#include "CodeGenTarget.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/TableGen/Error.h" +#include "llvm/TableGen/Record.h" +#include "llvm/TableGen/TableGenBackend.h" +#include +#include +#include +using namespace llvm; + +namespace { + +class CodeBeadsGen { + RecordKeeper &Records; + +public: + CodeBeadsGen(RecordKeeper &R) : Records(R) {} + void run(raw_ostream &OS); +}; + +void CodeBeadsGen::run(raw_ostream &OS) { + CodeGenTarget Target(Records); + std::vector Insts = Records.getAllDerivedDefinitions("Instruction"); + + // For little-endian instruction bit encodings, reverse the bit order + Target.reverseBitsForLittleEndianEncoding(); + + ArrayRef NumberedInstructions = + Target.getInstructionsByEnumValue(); + + // Emit function declaration + OS << "const uint8_t *llvm::" << Target.getInstNamespace(); + OS << "::getMCInstrBeads(unsigned Opcode) {\n"; + + // First, get the maximum bit length among all beads. And do some + // simple validation + unsigned MaxBitLength = 0; + + for (const CodeGenInstruction *CGI : NumberedInstructions) { + Record *R = CGI->TheDef; + if (!R->getValue("Beads")) + continue; + + BitsInit *BI = R->getValueAsBitsInit("Beads"); + if (!BI->isComplete()) { + PrintFatalError(R->getLoc(), "Record `" + R->getName() + + "', bit field 'Beads' is not complete"); + } + + MaxBitLength = std::max(MaxBitLength, BI->getNumBits()); + } + + // Number of bytes + unsigned Parts = MaxBitLength / 8; + + // Emit instruction base values + OS << " static const uint8_t InstBits[][" << Parts << "] = {\n"; + for (const CodeGenInstruction *CGI : NumberedInstructions) { + Record *R = CGI->TheDef; + + if (R->getValueAsString("Namespace") == "TargetOpcode" || + !R->getValue("Beads")) { + OS << "\t{ 0x0 },\t// "; + if (R->getValueAsBit("isPseudo")) + OS << "(Pseudo) "; + OS << R->getName() << "\n"; + continue; + } + + BitsInit *BI = R->getValueAsBitsInit("Beads"); + + // Convert to byte array: + // [dcba] -> [a][b][c][d] + OS << "\t{"; + for (unsigned p = 0; p < Parts; ++p) { + unsigned Right = 8 * p; + unsigned Left = Right + 8; + + uint8_t Value = 0; + for (unsigned i = Right; i != Left; ++i) { + unsigned Shift = i % 8; + if (auto *B = dyn_cast(BI->getBit(i))) { + Value |= (static_cast(B->getValue()) << Shift); + } else { + PrintFatalError(R->getLoc(), "Record `" + R->getName() + + "', bit 'Beads[" + Twine(i) + + "]' is not defined"); + } + } + + if (p) + OS << ','; + OS << " 0x"; + OS.write_hex(Value); + OS << ""; + } + OS << " }," << '\t' << "// " << R->getName() << "\n"; + } + OS << "\t{ 0x0 }\n };\n"; + + // Emit initial function code + OS << " return InstBits[Opcode];\n" + << "}\n\n"; +} + +} // End anonymous namespace + +namespace llvm { + +void EmitCodeBeads(RecordKeeper &RK, raw_ostream &OS) { + emitSourceFileHeader("Machine Code Beads", OS); + CodeBeadsGen(RK).run(OS); +} + +} // namespace llvm diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp index 9ff385faec5..13914c0f879 100644 --- a/utils/TableGen/InstrInfoEmitter.cpp +++ b/utils/TableGen/InstrInfoEmitter.cpp @@ -19,6 +19,7 @@ #include "SequenceToOffsetTable.h" #include "TableGenBackends.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Casting.h" #include "llvm/Support/raw_ostream.h" @@ -27,6 +28,7 @@ #include "llvm/TableGen/TableGenBackend.h" #include #include +#include #include #include #include @@ -87,6 +89,13 @@ private: void emitOperandNameMappings(raw_ostream &OS, const CodeGenTarget &Target, ArrayRef NumberedInstructions); + void emitLogicalOperandSizeMappings( + raw_ostream &OS, StringRef Namespace, + ArrayRef NumberedInstructions); + void emitLogicalOperandTypeMappings( + raw_ostream &OS, StringRef Namespace, + ArrayRef NumberedInstructions); + // Operand information. void EmitOperandInfo(raw_ostream &OS, OperandInfoMapTy &OperandInfoIDs); std::vector GetOperandInfo(const CodeGenInstruction &Inst); @@ -442,6 +451,182 @@ void InstrInfoEmitter::emitOperandTypeMappings( OS << "#endif // GET_INSTRINFO_OPERAND_TYPE\n\n"; } +void InstrInfoEmitter::emitLogicalOperandSizeMappings( + raw_ostream &OS, StringRef Namespace, + ArrayRef NumberedInstructions) { + std::map, unsigned> LogicalOpSizeMap; + + std::map> InstMap; + + size_t LogicalOpListSize = 0U; + std::vector LogicalOpList; + for (const auto *Inst : NumberedInstructions) { + if (!Inst->TheDef->getValueAsBit("UseLogicalOperandMappings")) + continue; + + LogicalOpList.clear(); + llvm::transform(Inst->Operands, std::back_inserter(LogicalOpList), + [](const CGIOperandList::OperandInfo &Op) -> unsigned { + auto *MIOI = Op.MIOperandInfo; + if (!MIOI || MIOI->getNumArgs() == 0) + return 1; + return MIOI->getNumArgs(); + }); + LogicalOpListSize = std::max(LogicalOpList.size(), LogicalOpListSize); + + auto I = + LogicalOpSizeMap.insert({LogicalOpList, LogicalOpSizeMap.size()}).first; + InstMap[I->second].push_back( + (Namespace + "::" + Inst->TheDef->getName()).str()); + } + + OS << "#ifdef GET_INSTRINFO_LOGICAL_OPERAND_SIZE_MAP\n"; + OS << "#undef GET_INSTRINFO_LOGICAL_OPERAND_SIZE_MAP\n"; + OS << "namespace llvm {\n"; + OS << "namespace " << Namespace << " {\n"; + OS << "LLVM_READONLY static unsigned\n"; + OS << "getLogicalOperandSize(uint16_t Opcode, uint16_t LogicalOpIdx) {\n"; + if (!InstMap.empty()) { + std::vector *> LogicalOpSizeList( + LogicalOpSizeMap.size()); + for (auto &P : LogicalOpSizeMap) { + LogicalOpSizeList[P.second] = &P.first; + } + OS << " static const unsigned SizeMap[][" << LogicalOpListSize + << "] = {\n"; + for (int r = 0, rs = LogicalOpSizeList.size(); r < rs; ++r) { + const auto &Row = *LogicalOpSizeList[r]; + OS << " {"; + int i; + for (i = 0; i < static_cast(Row.size()); ++i) { + OS << Row[i] << ", "; + } + for (; i < static_cast(LogicalOpListSize); ++i) { + OS << "0, "; + } + OS << "}, "; + OS << "\n"; + } + OS << " };\n"; + + OS << " switch (Opcode) {\n"; + OS << " default: return LogicalOpIdx;\n"; + for (auto &P : InstMap) { + auto OpMapIdx = P.first; + const auto &Insts = P.second; + for (const auto &Inst : Insts) { + OS << " case " << Inst << ":\n"; + } + OS << " return SizeMap[" << OpMapIdx << "][LogicalOpIdx];\n"; + } + OS << " }\n"; + } else { + OS << " return LogicalOpIdx;\n"; + } + OS << "}\n"; + + OS << "LLVM_READONLY static inline unsigned\n"; + OS << "getLogicalOperandIdx(uint16_t Opcode, uint16_t LogicalOpIdx) {\n"; + OS << " auto S = 0U;\n"; + OS << " for (auto i = 0U; i < LogicalOpIdx; ++i)\n"; + OS << " S += getLogicalOperandSize(Opcode, i);\n"; + OS << " return S;\n"; + OS << "}\n"; + + OS << "} // end namespace " << Namespace << "\n"; + OS << "} // end namespace llvm\n"; + OS << "#endif // GET_INSTRINFO_LOGICAL_OPERAND_SIZE_MAP\n\n"; +} + +void InstrInfoEmitter::emitLogicalOperandTypeMappings( + raw_ostream &OS, StringRef Namespace, + ArrayRef NumberedInstructions) { + std::map, unsigned> LogicalOpTypeMap; + + std::map> InstMap; + + size_t OpTypeListSize = 0U; + std::vector LogicalOpTypeList; + for (const auto *Inst : NumberedInstructions) { + if (!Inst->TheDef->getValueAsBit("UseLogicalOperandMappings")) + continue; + + LogicalOpTypeList.clear(); + for (const auto &Op : Inst->Operands) { + auto *OpR = Op.Rec; + if ((OpR->isSubClassOf("Operand") || + OpR->isSubClassOf("RegisterOperand") || + OpR->isSubClassOf("RegisterClass")) && + !OpR->isAnonymous()) { + LogicalOpTypeList.push_back( + (Namespace + "::OpTypes::" + Op.Rec->getName()).str()); + } else { + LogicalOpTypeList.push_back("-1"); + } + } + OpTypeListSize = std::max(LogicalOpTypeList.size(), OpTypeListSize); + + auto I = + LogicalOpTypeMap.insert({LogicalOpTypeList, LogicalOpTypeMap.size()}) + .first; + InstMap[I->second].push_back( + (Namespace + "::" + Inst->TheDef->getName()).str()); + } + + OS << "#ifdef GET_INSTRINFO_LOGICAL_OPERAND_TYPE_MAP\n"; + OS << "#undef GET_INSTRINFO_LOGICAL_OPERAND_TYPE_MAP\n"; + OS << "namespace llvm {\n"; + OS << "namespace " << Namespace << " {\n"; + OS << "LLVM_READONLY static int\n"; + OS << "getLogicalOperandType(uint16_t Opcode, uint16_t LogicalOpIdx) {\n"; + if (!InstMap.empty()) { + std::vector *> LogicalOpTypeList( + LogicalOpTypeMap.size()); + for (auto &P : LogicalOpTypeMap) { + LogicalOpTypeList[P.second] = &P.first; + } + OS << " static const int TypeMap[][" << OpTypeListSize << "] = {\n"; + for (int r = 0, rs = LogicalOpTypeList.size(); r < rs; ++r) { + const auto &Row = *LogicalOpTypeList[r]; + OS << " {"; + int i, s = Row.size(); + for (i = 0; i < s; ++i) { + if (i > 0) + OS << ", "; + OS << Row[i]; + } + for (; i < static_cast(OpTypeListSize); ++i) { + if (i > 0) + OS << ", "; + OS << "-1"; + } + OS << "}"; + if (r != rs - 1) + OS << ","; + OS << "\n"; + } + OS << " };\n"; + + OS << " switch (Opcode) {\n"; + OS << " default: return -1;\n"; + for (auto &P : InstMap) { + auto OpMapIdx = P.first; + const auto &Insts = P.second; + for (const auto &Inst : Insts) { + OS << " case " << Inst << ":\n"; + } + OS << " return TypeMap[" << OpMapIdx << "][LogicalOpIdx];\n"; + } + OS << " }\n"; + } else { + OS << " return -1;\n"; + } + OS << "}\n"; + OS << "} // end namespace " << Namespace << "\n"; + OS << "} // end namespace llvm\n"; + OS << "#endif // GET_INSTRINFO_LOGICAL_OPERAND_TYPE_MAP\n\n"; +} + void InstrInfoEmitter::emitMCIIHelperMethods(raw_ostream &OS, StringRef TargetName) { RecVec TIIPredicates = Records.getAllDerivedDefinitions("TIIPredicate"); @@ -726,6 +911,12 @@ void InstrInfoEmitter::run(raw_ostream &OS) { Records.startTimer("Emit operand type mappings"); emitOperandTypeMappings(OS, Target, NumberedInstructions); + Records.startTimer("Emit logical operand size mappings"); + emitLogicalOperandSizeMappings(OS, TargetName, NumberedInstructions); + + Records.startTimer("Emit logical operand type mappings"); + emitLogicalOperandTypeMappings(OS, TargetName, NumberedInstructions); + Records.startTimer("Emit helper methods"); emitMCIIHelperMethods(OS, TargetName); } diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp index 6d851da3473..a4b16179cdf 100644 --- a/utils/TableGen/TableGen.cpp +++ b/utils/TableGen/TableGen.cpp @@ -25,6 +25,7 @@ enum ActionType { NullBackend, DumpJSON, GenEmitter, + GenCodeBeads, GenRegisterInfo, GenInstrInfo, GenInstrDocs, @@ -81,6 +82,8 @@ cl::opt Action( clEnumValN(DumpJSON, "dump-json", "Dump all records as machine-readable JSON"), clEnumValN(GenEmitter, "gen-emitter", "Generate machine code emitter"), + clEnumValN(GenCodeBeads, "gen-code-beads", + "Generate machine code beads"), clEnumValN(GenRegisterInfo, "gen-register-info", "Generate registers and register classes info"), clEnumValN(GenInstrInfo, "gen-instr-info", @@ -161,6 +164,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) { case GenEmitter: EmitCodeEmitter(Records, OS); break; + case GenCodeBeads: + EmitCodeBeads(Records, OS); + break; case GenRegisterInfo: EmitRegisterInfo(Records, OS); break; diff --git a/utils/TableGen/TableGenBackends.h b/utils/TableGen/TableGenBackends.h index 92204f39f8f..ed0057c697c 100644 --- a/utils/TableGen/TableGenBackends.h +++ b/utils/TableGen/TableGenBackends.h @@ -67,6 +67,7 @@ void EmitAsmMatcher(RecordKeeper &RK, raw_ostream &OS); void EmitAsmWriter(RecordKeeper &RK, raw_ostream &OS); void EmitCallingConv(RecordKeeper &RK, raw_ostream &OS); void EmitCodeEmitter(RecordKeeper &RK, raw_ostream &OS); +void EmitCodeBeads(RecordKeeper &RK, raw_ostream &OS); void EmitDAGISel(RecordKeeper &RK, raw_ostream &OS); void EmitDFAPacketizer(RecordKeeper &RK, raw_ostream &OS); void EmitDisassembler(RecordKeeper &RK, raw_ostream &OS);