From efcb84688f6747417810f53cbc33b580a0c4108d Mon Sep 17 00:00:00 2001 From: Che-Liang Chiou Date: Tue, 4 Oct 2011 15:14:51 +0000 Subject: [PATCH] tblgen: add preprocessor as a separate mode This patch adds a preprocessor that can expand nested for-loops for saving some copy-n-paste in *.td files. The preprocessor is not yet integrated with TGParser, and so it has no direct effect on *.td inputs. However, you may preprocess an td input (and only preprocess it). To test the proprecessor, type: tblgen -E -o $@ $< llvm-svn: 141079 --- lib/TableGen/CMakeLists.txt | 1 + lib/TableGen/Main.cpp | 23 ++ lib/TableGen/TGPreprocessor.cpp | 603 ++++++++++++++++++++++++++++++++ lib/TableGen/TGPreprocessor.h | 52 +++ 4 files changed, 679 insertions(+) create mode 100644 lib/TableGen/TGPreprocessor.cpp create mode 100644 lib/TableGen/TGPreprocessor.h diff --git a/lib/TableGen/CMakeLists.txt b/lib/TableGen/CMakeLists.txt index 0db41346911..507c915ae61 100644 --- a/lib/TableGen/CMakeLists.txt +++ b/lib/TableGen/CMakeLists.txt @@ -9,6 +9,7 @@ add_llvm_library(LLVMTableGen TableGenBackend.cpp TGLexer.cpp TGParser.cpp + TGPreprocessor.cpp ) add_llvm_library_dependencies(LLVMTableGen diff --git a/lib/TableGen/Main.cpp b/lib/TableGen/Main.cpp index 01bc55e9898..e15c09fca06 100644 --- a/lib/TableGen/Main.cpp +++ b/lib/TableGen/Main.cpp @@ -16,6 +16,7 @@ //===----------------------------------------------------------------------===// #include "TGParser.h" +#include "TGPreprocessor.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/MemoryBuffer.h" @@ -43,6 +44,12 @@ namespace { cl::list IncludeDirs("I", cl::desc("Directory of include files"), cl::value_desc("directory"), cl::Prefix); + + cl::opt + PreprocessOnly("E", + cl::desc("Stop after the preprocessing stage; " + "This is work in progress and has no effect yet"), + cl::init(false)); } namespace llvm { @@ -67,6 +74,22 @@ int TableGenMain(char *argv0, TableGenAction &Action) { // it later. SrcMgr.setIncludeDirs(IncludeDirs); + // TODO(clchiou): Integrate preprocessor into TGParser + if (PreprocessOnly) { + std::string Error; + tool_output_file Out(OutputFilename.c_str(), Error); + if (!Error.empty()) { + errs() << argv0 << ": error opening " << OutputFilename + << ":" << Error << "\n"; + return 1; + } + TGPreprocessor Preprocessor(SrcMgr, Out); + if (Preprocessor.PreprocessFile()) + return 1; + Out.keep(); + return 0; + } + TGParser Parser(SrcMgr, Records); if (Parser.ParseFile()) diff --git a/lib/TableGen/TGPreprocessor.cpp b/lib/TableGen/TGPreprocessor.cpp new file mode 100644 index 00000000000..ac89e880086 --- /dev/null +++ b/lib/TableGen/TGPreprocessor.cpp @@ -0,0 +1,603 @@ +//===- TGPreprocessor.cpp - Preprocessor for TableGen ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implement the Preprocessor for TableGen. +// +//===----------------------------------------------------------------------===// + +#include "TGPreprocessor.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/ToolOutputFile.h" +#include "llvm/TableGen/Error.h" +#include +#include +#include +#include +#include +#include + +namespace llvm { +typedef std::map TGPPEnvironment; + +enum TGPPTokenKind { + tgpptoken_symbol, + tgpptoken_literal, + tgpptoken_newline, + tgpptoken_error, + tgpptoken_end +}; + +enum TGPPRecordKind { + tgpprecord_for, + tgpprecord_variable, + tgpprecord_literal +}; + +enum TGPPRangeKind { + tgpprange_list, + tgpprange_sequence +}; + +bool MatchSymbol(TGPPTokenKind Kind, + const char *BeginOfToken, const char *EndOfToken, + char Symbol); + +bool MatchSymbol(TGPPTokenKind Kind, + const char *BeginOfToken, const char *EndOfToken, + const char *Symbol); + +bool MatchIdNum(TGPPTokenKind Kind, + const char *BeginOfToken, const char *EndOfToken); + +bool MatchIdentifier(TGPPTokenKind Kind, + const char *BeginOfToken, const char *EndOfToken); + +bool MatchNumber(TGPPTokenKind Kind, + const char *BeginOfToken, const char *EndOfToken, + long int *Val); + +class TGPPLexer { + const MemoryBuffer *CurBuf; + const char *CurPtr; + bool IsInsideMacroStatement, WasEndOfLine; + + bool IsEndOfBuffer(const char *Ptr) const { + return (!*Ptr && Ptr == CurBuf->getBufferEnd()); + } + + bool IsNewLine() { + if (*CurPtr == '\r' || *CurPtr == '\n') { + if ((CurPtr[1] == '\r' || CurPtr[1] == '\n') && CurPtr[0] != CurPtr[1]) + ++CurPtr; + return true; + } + return false; + } + + bool MatchPrefix(const char *Prefix, const char *Ptr) const { + while (*Ptr == ' ' || *Ptr == '\t') + ++Ptr; + return !strncmp(Prefix, Ptr, strlen(Prefix)); + } +public: + TGPPLexer(const SourceMgr &SM) + : CurBuf(SM.getMemoryBuffer(0)), + CurPtr(CurBuf->getBufferStart()), + IsInsideMacroStatement(false), + WasEndOfLine(true) { + } + + TGPPTokenKind NextToken(const char **BeginOfToken, const char **EndOfToken); +}; + +// preprocessor records +class TGPPRecord { + TGPPRecordKind Kind; + + // tgpprecord_for + std::vector IndexVars; + std::vector IndexRanges; + TGPPRecords LoopBody; + + // tgpprecord_variable, tgpprecord_literal + std::string Str; + + bool EvaluateFor(const TGPPEnvironment &Env, raw_fd_ostream &OS) const; + + bool EvaluateVariable(const TGPPEnvironment &Env, raw_fd_ostream &OS) const { + TGPPEnvironment::const_iterator it_val = Env.find(Str); + if (it_val == Env.end()) { + PrintError("Var is not bound to any value: " + Str); + return true; + } + OS << it_val->second; + return false; + } + + bool EvaluateLiteral(const TGPPEnvironment &Env, raw_fd_ostream &OS) const { + OS << Str; + return false; + } + +public: + TGPPRecord(TGPPRecordKind K) : Kind(K) {} + TGPPRecord(TGPPRecordKind K, const std::string &S) : Kind(K), Str(S) {} + + TGPPRecords *GetLoopBody() { return &LoopBody; } + + void AppendIndex(const std::string &V, const TGPPRange &R) { + IndexVars.push_back(V); + IndexRanges.push_back(R); + } + + bool Evaluate(const TGPPEnvironment &Env, raw_fd_ostream &OS) const; +}; + +class TGPPRange { + TGPPRangeKind Kind; + + // tgpprange_list + std::vector Vals; + + // tgpprange_sequence + long int From, To; + +public: + TGPPRange() : Kind(tgpprange_list) {} + TGPPRange(long int F, long int T) + : Kind(tgpprange_sequence), From(F), To(T) {} + + size_t size() const { + if (Kind == tgpprange_list) + return Vals.size(); + else + return To - From + 1; + } + + std::string at(size_t i) const { + if (Kind == tgpprange_list) + return Vals.at(i); + else { + char buf[32]; + snprintf(buf, sizeof(buf), "%ld", From + (long int)i); + return std::string(buf); + } + } + + void push_back(const std::string &S) { + if (Kind == tgpprange_list) + Vals.push_back(S); + } +}; +} // namespace llvm + +using namespace llvm; + +bool llvm::MatchSymbol(TGPPTokenKind Kind, + const char *BeginOfToken, const char *EndOfToken, + char Symbol) { + return Kind == tgpptoken_symbol && + BeginOfToken + 1 == EndOfToken && + Symbol == *BeginOfToken; +} + +bool llvm::MatchSymbol(TGPPTokenKind Kind, + const char *BeginOfToken, const char *EndOfToken, + const char *Symbol) { + return Kind == tgpptoken_symbol && + BeginOfToken + strlen(Symbol) == EndOfToken && + !strncmp(Symbol, BeginOfToken, EndOfToken - BeginOfToken); +} + +bool llvm::MatchIdNum(TGPPTokenKind Kind, + const char *BeginOfToken, const char *EndOfToken) { + if (Kind != tgpptoken_symbol) + return false; + for (const char *i = BeginOfToken; i != EndOfToken; ++i) + if (*i != '_' && !isalnum(*i)) + return false; + return true; +} + +bool llvm::MatchIdentifier(TGPPTokenKind Kind, + const char *BeginOfToken, const char *EndOfToken) { + if (Kind != tgpptoken_symbol) + return false; + + const char *i = BeginOfToken; + if (*i != '_' && !isalpha(*i)) + return false; + for (++i; i != EndOfToken; ++i) + if (*i != '_' && !isalnum(*i)) + return false; + + return true; +} + +bool llvm::MatchNumber(TGPPTokenKind Kind, + const char *BeginOfToken, const char *EndOfToken, + long int *Val) { + if (Kind != tgpptoken_symbol) + return false; + char *e; + *Val = strtol(BeginOfToken, &e, 10); + return e == EndOfToken; +} + +TGPPTokenKind TGPPLexer:: +NextToken(const char **BeginOfToken, const char **EndOfToken) { + bool IsBeginOfLine = WasEndOfLine; + WasEndOfLine = false; + + if (IsEndOfBuffer(CurPtr)) + return tgpptoken_end; + + else if (IsInsideMacroStatement) { + while (*CurPtr == ' ' || *CurPtr == '\t') // trim space, if any + ++CurPtr; + + const char *BeginOfSymbol = CurPtr; + + if (IsNewLine()) { + ++CurPtr; + IsInsideMacroStatement = false; + WasEndOfLine = true; + return tgpptoken_newline; + } + + else if (*CurPtr == '[' || *CurPtr == ']' || + *CurPtr == '(' || *CurPtr == ')' || + *CurPtr == ',' || *CurPtr == '=') { + *BeginOfToken = BeginOfSymbol; + *EndOfToken = ++CurPtr; + return tgpptoken_symbol; + } + + else if (*CurPtr == '_' || isalpha(*CurPtr)) { + ++CurPtr; + while (*CurPtr == '_' || isalnum(*CurPtr)) + ++CurPtr; + *BeginOfToken = BeginOfSymbol; + *EndOfToken = CurPtr; + return tgpptoken_symbol; + } + + else if (*CurPtr == '+' || *CurPtr == '-' || isdigit(*CurPtr)) { + ++CurPtr; + while (isdigit(*CurPtr)) + ++CurPtr; + *BeginOfToken = BeginOfSymbol; + *EndOfToken = CurPtr; + return tgpptoken_symbol; + } + + else { + PrintError(BeginOfSymbol, "Unrecognizable token"); + return tgpptoken_error; + } + } + + else if (*CurPtr == '#') { + if (IsBeginOfLine && + (MatchPrefix("for", CurPtr + 1) || + MatchPrefix("end", CurPtr + 1))) { + ++CurPtr; + IsInsideMacroStatement = true; + return NextToken(BeginOfToken, EndOfToken); + } + + // special token #"# is translate to literal " + else if (CurPtr[1] == '"' && CurPtr[2] == '#') { + *BeginOfToken = ++CurPtr; + *EndOfToken = ++CurPtr; + ++CurPtr; + return tgpptoken_literal; + } + + else { + const char *BeginOfVar = ++CurPtr; // trim '#' + if (*CurPtr != '_' && !isalpha(*CurPtr)) { + PrintError(BeginOfVar, "Variable must start with [_A-Za-z]: "); + return tgpptoken_error; + } + while (*CurPtr == '_' || isalnum(*CurPtr)) + ++CurPtr; + if (*CurPtr != '#') { + PrintError(BeginOfVar, "Variable must end with #"); + return tgpptoken_error; + } + *BeginOfToken = BeginOfVar; + *EndOfToken = CurPtr++; // trim '#' + return tgpptoken_symbol; + } + } + + const char *BeginOfLiteral = CurPtr; + int CCommentLevel = 0; + bool BCPLComment = false; + bool StringLiteral = false; + for (; !IsEndOfBuffer(CurPtr); ++CurPtr) { + if (CCommentLevel > 0) { + if (CurPtr[0] == '/' && CurPtr[1] == '*') { + ++CurPtr; + ++CCommentLevel; + } else if (CurPtr[0] == '*' && CurPtr[1] == '/') { + ++CurPtr; + --CCommentLevel; + } else if (IsNewLine()) + WasEndOfLine = true; + } + + else if (BCPLComment) { + if (IsNewLine()) { + WasEndOfLine = true; + BCPLComment = false; + } + } + + else if (StringLiteral) { + // no string escape sequence in TableGen? + if (*CurPtr == '"') + StringLiteral = false; + } + + else if (CurPtr[0] == '/' && CurPtr[1] == '*') { + ++CurPtr; + ++CCommentLevel; + } + + else if (CurPtr[0] == '/' && CurPtr[1] == '/') { + ++CurPtr; + BCPLComment = true; + } + + else if (*CurPtr == '"') + StringLiteral = true; + + else if (IsNewLine()) { + ++CurPtr; + WasEndOfLine = true; + break; + } + + else if (*CurPtr == '#') + break; + } + + *BeginOfToken = BeginOfLiteral; + *EndOfToken = CurPtr; + return tgpptoken_literal; +} + +bool TGPPRecord:: +EvaluateFor(const TGPPEnvironment &Env, raw_fd_ostream &OS) const { + std::vector::const_iterator ri, re; + + // calculate the min size + ri = IndexRanges.begin(); + re = IndexRanges.begin(); + size_t n = ri->size(); + for (; ri != re; ++ri) { + size_t m = ri->size(); + if (m < n) + n = m; + } + + for (size_t which_val = 0; which_val < n; ++which_val) { + // construct nested environment + TGPPEnvironment NestedEnv(Env); + std::vector::const_iterator vi = IndexVars.begin(); + for (ri = IndexRanges.begin(), re = IndexRanges.end(); + ri != re; ++vi, ++ri) { + NestedEnv.insert(std::make_pair(*vi, ri->at(which_val))); + } + // evalute loop body + for (TGPPRecords::const_iterator i = LoopBody.begin(), e = LoopBody.end(); + i != e; ++i) + if (i->Evaluate(NestedEnv, OS)) + return true; + } + + return false; +} + +bool TGPPRecord:: +Evaluate(const TGPPEnvironment &Env, raw_fd_ostream &OS) const { + switch (Kind) { + case tgpprecord_for: + return EvaluateFor(Env, OS); + case tgpprecord_variable: + return EvaluateVariable(Env, OS); + case tgpprecord_literal: + return EvaluateLiteral(Env, OS); + default: + PrintError("Unknown kind of record: " + Kind); + return true; + } + return false; +} + +bool TGPreprocessor::ParseBlock(bool TopLevel) { + TGPPTokenKind Kind; + const char *BeginOfToken, *EndOfToken; + while ((Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken)) != + tgpptoken_end) { + std::string Symbol(BeginOfToken, EndOfToken); + switch (Kind) { + case tgpptoken_symbol: + if (Symbol == "for") { + if (ParseForLoop()) + return true; + } else if (Symbol == "end") { + if (TopLevel) { + PrintError(BeginOfToken, "No block to end here"); + return true; + } + if ((Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken)) != + tgpptoken_newline) { + PrintError(BeginOfToken, "Tokens after #end"); + return true; + } + return false; + } else if (Symbol == "NAME") { + // treat '#NAME#' as a literal + CurRecords->push_back( + TGPPRecord(tgpprecord_literal, + std::string("#NAME#"))); + } else { + CurRecords->push_back( + TGPPRecord(tgpprecord_variable, + std::string(BeginOfToken, EndOfToken))); + } + break; + case tgpptoken_literal: + CurRecords->push_back( + TGPPRecord(tgpprecord_literal, + std::string(BeginOfToken, EndOfToken))); + break; + default: + return true; + } + } + return false; +} + +bool TGPreprocessor::ParseForLoop() { + TGPPRecord ForLoopRecord(tgpprecord_for); + + for (;;) { + TGPPTokenKind Kind; + const char *BeginOfToken, *EndOfToken; + + Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken); + if (!MatchIdentifier(Kind, BeginOfToken, EndOfToken)) { + PrintError(BeginOfToken, "Not an identifier"); + return true; + } + std::string IndexVar(BeginOfToken, EndOfToken); + + Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken); + if (!MatchSymbol(Kind, BeginOfToken, EndOfToken, '=')) { + PrintError(BeginOfToken, "Need a '=' here"); + return true; + } + + TGPPRange Range; + if (ParseRange(&Range)) + return true; + ForLoopRecord.AppendIndex(IndexVar, Range); + + Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken); + if (Kind == tgpptoken_newline) + break; + if (!MatchSymbol(Kind, BeginOfToken, EndOfToken, ',')) { + PrintError(BeginOfToken, "Need a ',' here"); + return true; + } + } + + // open a new level + TGPPRecords *LastCurRecords = CurRecords; + CurRecords = ForLoopRecord.GetLoopBody(); + + if (ParseBlock(false)) + return true; + + CurRecords = LastCurRecords; + CurRecords->push_back(ForLoopRecord); + return false; +} + +bool TGPreprocessor::ParseRange(TGPPRange *Range) { + TGPPTokenKind Kind; + const char *BeginOfToken, *EndOfToken; + + Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken); + + if (MatchSymbol(Kind, BeginOfToken, EndOfToken, '[')) { + for (;;) { + Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken); + if (!MatchIdNum(Kind, BeginOfToken, EndOfToken)) { + PrintError(BeginOfToken, "Need a identifier or a number here"); + return true; + } + Range->push_back(std::string(BeginOfToken, EndOfToken)); + + Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken); + if (MatchSymbol(Kind, BeginOfToken, EndOfToken, ']')) + break; + if (!MatchSymbol(Kind, BeginOfToken, EndOfToken, ',')) { + PrintError(BeginOfToken, "Need a comma here"); + return true; + } + } + return false; + } + + else if (MatchSymbol(Kind, BeginOfToken, EndOfToken, "sequence")) { + long int from, to; + + Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken); + if (!MatchSymbol(Kind, BeginOfToken, EndOfToken, '(')) { + PrintError(BeginOfToken, "Need a left parentheses here"); + return true; + } + + Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken); + if (!MatchNumber(Kind, BeginOfToken, EndOfToken, &from)) { + PrintError(BeginOfToken, "Not a number"); + return true; + } + + Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken); + if (!MatchSymbol(Kind, BeginOfToken, EndOfToken, ',')) { + PrintError(BeginOfToken, "Need a comma here"); + return true; + } + + Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken); + if (!MatchNumber(Kind, BeginOfToken, EndOfToken, &to)) { + PrintError(BeginOfToken, "Not a number"); + return true; + } + + Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken); + if (!MatchSymbol(Kind, BeginOfToken, EndOfToken, ')')) { + PrintError(BeginOfToken, "Need a right parentheses here"); + return true; + } + + *Range = TGPPRange(from, to); + return false; + } + + PrintError(BeginOfToken, "illegal range of loop index"); + return true; +} + +bool TGPreprocessor::PreprocessFile() { + TGPPLexer TheLexer(SrcMgr); + TGPPRecords TopLevelRecords; + + Lexer = &TheLexer; + CurRecords = &TopLevelRecords; + if (ParseBlock(true)) + return true; + + TGPPEnvironment Env; + for (TGPPRecords::const_iterator i = TopLevelRecords.begin(), + e = TopLevelRecords.end(); + i != e; ++i) + if (i->Evaluate(Env, Out.os())) + return true; + + return false; +} diff --git a/lib/TableGen/TGPreprocessor.h b/lib/TableGen/TGPreprocessor.h new file mode 100644 index 00000000000..2b9ed9c1566 --- /dev/null +++ b/lib/TableGen/TGPreprocessor.h @@ -0,0 +1,52 @@ +//===- TGPreprocessor.h - Preprocessor for TableGen Files -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class represents the Preprocessor for tablegen files. +// +//===----------------------------------------------------------------------===// + +#ifndef TGPREPROCESSOR_H +#define TGPREPROCESSOR_H + +#include + +namespace llvm { +class MemoryBuffer; +class SourceMgr; +class tool_output_file; + +class TGPPLexer; +class TGPPRange; +class TGPPRecord; + +typedef std::vector TGPPRecords; + +class TGPreprocessor { + SourceMgr &SrcMgr; + tool_output_file &Out; + + TGPPLexer *Lexer; + TGPPRecords *CurRecords; + + bool ParseBlock(bool TopLevel); + bool ParseForLoop(); + bool ParseRange(TGPPRange *Range); + +public: + TGPreprocessor(SourceMgr &SM, tool_output_file &O) + : SrcMgr(SM), Out(O), Lexer(NULL), CurRecords(NULL) { + } + + /// PreprocessFile - Main entrypoint for preprocess a tblgen file. These + /// preprocess routines return true on error, or false on success. + bool PreprocessFile(); +}; +} // namespace llvm + +#endif /* TGPREPROCESSOR_H */