reimplement the tblgen lexer with a simple hand-written lexer. This eliminates

one dependency on flex and gets rid of two ".cvs" files. llvm-svn: 44210
2024-11-23 03:02:36 +01:00 · 2007-11-18 02:57:27 +00:00 · 2007-11-18 02:57:27 +00:00 · 07a4b4d5fe
commit 07a4b4d5fe
parent 2a8ef3f29a
6 changed files with 516 additions and 2494 deletions
--- a/utils/TableGen/FileLexer.cpp.cvs
+++ b/utils/TableGen/FileLexer.cpp.cvs
--- a/utils/TableGen/FileLexer.l
+++ b/utils/TableGen/FileLexer.l
@ -1,240 +0,0 @@
-/*===-- FileLexer.l - Scanner for TableGen Files ----------------*- C++ -*-===//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file was developed by the LLVM research group and is distributed under
-// the University of Illinois Open Source License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file defines a simple flex scanner for TableGen files.  This is pretty
-// straight-forward, except for the magic to handle file inclusion.
-//
-//===----------------------------------------------------------------------===*/
-
-%option prefix="File"
-%option yylineno
-%option nostdinit
-%option never-interactive
-%option batch
-%option nodefault
-%option 8bit
-%option outfile="Lexer.cpp"
-%option ecs
-%option noreject
-%option noyymore
-
-%x in_comment
-
-%{
-#include "llvm/Config/config.h"
-#include "llvm/Support/Streams.h"
-#include "Record.h"
-typedef std::pair<llvm::Record*, std::vector<llvm::Init*>*> SubClassRefTy;
-#include "FileParser.h"
-
-int Fileparse();
-
-namespace llvm {
-
-// Global variable recording the location of the include directory
-std::vector<std::string> IncludeDirectories;
-
-/// ParseInt - This has to handle the special case of binary numbers 0b0101
-///
-static int ParseInt(const char *Str) {
-  if (Str[0] == '0' && Str[1] == 'b')
-    return strtoll(Str+2, 0, 2);
-  return strtoll(Str, 0, 0); 
-}
-
-static int CommentDepth = 0;
-
-struct IncludeRec {
-  std::string Filename;
-  FILE *File;
-  unsigned LineNo;
-  YY_BUFFER_STATE Buffer;
-
-  IncludeRec(const std::string &FN, FILE *F)
-    : Filename(FN), File(F), LineNo(0){
-  }
-};
-
-static std::vector<IncludeRec> IncludeStack;
-
-std::ostream &err() {
-  if (IncludeStack.empty()) {
-    cerr << "At end of input: ";
-    return *cerr.stream();
-  }
-
-  for (unsigned i = 0, e = IncludeStack.size()-1; i != e; ++i)
-    cerr << "Included from " << IncludeStack[i].Filename << ":"
-         << IncludeStack[i].LineNo << ":\n";
-  cerr << "Parsing " << IncludeStack.back().Filename << ":"
-       << Filelineno << ": ";
-  return *cerr.stream();
-}
-
-/// ParseFile - this function begins the parsing of the specified tablegen file.
-///
-void ParseFile(const std::string &Filename, 
-               const std::vector<std::string> &IncludeDirs) {
-  FILE *F = stdin;
-  if (Filename != "-") {
-    F = fopen(Filename.c_str(), "r");
-
-    if (F == 0) {
-      cerr << "Could not open input file '" + Filename + "'!\n";
-      exit (1);
-    }
-    IncludeStack.push_back(IncludeRec(Filename, F));
-  } else {
-    IncludeStack.push_back(IncludeRec("<stdin>", stdin));
-  }
-
-  // Record the location of the include directory so that the lexer can find
-  // it later.
-  IncludeDirectories = IncludeDirs;
- 
-  Filein = F;
-  Filelineno = 1;
-  Fileparse();
-  Filein = stdin;
-}
-
-/// HandleInclude - This function is called when an include directive is
-/// encountered in the input stream...
-///
-static void HandleInclude(const char *Buffer) {
-  unsigned Length = yyleng;
-  assert(Buffer[Length-1] == '"');
-  Buffer += strlen("include ");
-  Length -= strlen("include ");
-  while (*Buffer != '"') {
-    ++Buffer;
-    --Length;
-  }
-  assert(Length >= 2 && "Double quotes not found?");
-  std::string Filename(Buffer+1, Buffer+Length-1);
-  //cerr << "Filename = '" << Filename << "'\n";
-
-  // Save the line number and lex buffer of the includer...
-  IncludeStack.back().LineNo = Filelineno;
-  IncludeStack.back().Buffer = YY_CURRENT_BUFFER;
-
-  // Open the new input file...
-  yyin = fopen(Filename.c_str(), "r");
-  if (yyin == 0) {
-    // If we couldn't find the file in the current directory, look for it in
-    // the include directories.
-    //
-    std::string NextFilename;
-    for (unsigned i = 0, e = IncludeDirectories.size(); i != e; ++i) {
-      NextFilename = IncludeDirectories[i] + "/" + Filename;
-      if ((yyin = fopen(NextFilename.c_str(), "r")))
-        break;
-    }
-    
-    if (yyin == 0) {
-      err() << "Could not find include file '" << Filename << "'!\n";
-      exit(1);
-    }
-    Filename = NextFilename;
-  }
-
-  // Add the file to our include stack...
-  IncludeStack.push_back(IncludeRec(Filename, yyin));
-  Filelineno = 1;  // Reset line numbering...
-  //yyrestart(yyin);    // Start lexing the new file...
-
-  yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
-}
-
-/// yywrap - This is called when the lexer runs out of input in one of the
-/// files. Switch back to an includer if an includee has run out of input.
-///
-extern "C"
-int yywrap(void) {
-  if (IncludeStack.back().File != stdin)
-    fclose(IncludeStack.back().File);
-  IncludeStack.pop_back();
-  if (IncludeStack.empty()) return 1;  // Top-level file is done.
-
-  // Otherwise, we need to switch back to a file which included the current one.
-  Filelineno = IncludeStack.back().LineNo;  // Restore current line number
-  yy_switch_to_buffer(IncludeStack.back().Buffer);
-  return 0;
-}
-
-} // End llvm namespace
-
-using namespace llvm;
-
-%}
-
-Comment      \/\/.*
-
-Identifier   [a-zA-Z_][0-9a-zA-Z_]*
-Integer      [-+]?[0-9]+|0x[0-9a-fA-F]+|0b[01]+
-CodeFragment \[\{([^}]+|\}[^\]])*\}\]
-StringVal    \"[^"]*\"
-IncludeStr   include[ \t\n]+\"[^"]*\"
-
-%%
-
-{Comment}      { /* Ignore comments */ }
-
-{IncludeStr}   { HandleInclude(yytext); }
-{CodeFragment} { Filelval.StrVal = new std::string(yytext+2, yytext+yyleng-2);
-                 return CODEFRAGMENT; }
-
-int            { return INT; }
-bit            { return BIT; }
-bits           { return BITS; }
-string         { return STRING; }
-list           { return LIST; }
-code           { return CODE; }
-dag            { return DAG; }
-
-class          { return CLASS; }
-def            { return DEF; }
-defm           { return DEFM; }
-multiclass     { return MULTICLASS; }
-field          { return FIELD; }
-let            { return LET; }
-in             { return IN; }
-
-!con           { return CONCATTOK; }
-!sra           { return SRATOK; }
-!srl           { return SRLTOK; }
-!shl           { return SHLTOK; }
-!strconcat     { return STRCONCATTOK; }
-
-
-{Identifier}   { Filelval.StrVal = new std::string(yytext, yytext+yyleng);
-                 return ID; }
-${Identifier}  { Filelval.StrVal = new std::string(yytext+1, yytext+yyleng);
-                 return VARNAME; } 
-
-{StringVal}    { Filelval.StrVal = new std::string(yytext+1, yytext+yyleng-1);
-                 return STRVAL; }
-
-{Integer}      { Filelval.IntVal = ParseInt(Filetext); return INTVAL; }
-
-[ \t\n\r]+     { /* Ignore whitespace */ }
-
-
-"/*"                       { BEGIN(in_comment); CommentDepth++; }
-<in_comment>[^*/]*         {} /* eat anything that's not a '*' or '/' */
-<in_comment>"*"+[^*/]*     {} /* eat up '*'s not followed by '/'s */
-<in_comment>"/*"           { ++CommentDepth; }
-<in_comment>"/"+[^*/]*     {} /* eat up /'s not followed by *'s */
-<in_comment>"*"+"/"        { if (!--CommentDepth) { BEGIN(INITIAL); } }
-<in_comment><<EOF>>        { err() << "Unterminated comment!\n"; exit(1); }
-
-.              { return Filetext[0]; }
-
-%%
-
--- a/utils/TableGen/FileLexer.l.cvs
+++ b/utils/TableGen/FileLexer.l.cvs
@ -1,240 +0,0 @@
-/*===-- FileLexer.l - Scanner for TableGen Files ----------------*- C++ -*-===//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file was developed by the LLVM research group and is distributed under
-// the University of Illinois Open Source License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file defines a simple flex scanner for TableGen files.  This is pretty
-// straight-forward, except for the magic to handle file inclusion.
-//
-//===----------------------------------------------------------------------===*/
-
-%option prefix="File"
-%option yylineno
-%option nostdinit
-%option never-interactive
-%option batch
-%option nodefault
-%option 8bit
-%option outfile="Lexer.cpp"
-%option ecs
-%option noreject
-%option noyymore
-
-%x in_comment
-
-%{
-#include "llvm/Config/config.h"
-#include "llvm/Support/Streams.h"
-#include "Record.h"
-typedef std::pair<llvm::Record*, std::vector<llvm::Init*>*> SubClassRefTy;
-#include "FileParser.h"
-
-int Fileparse();
-
-namespace llvm {
-
-// Global variable recording the location of the include directory
-std::vector<std::string> IncludeDirectories;
-
-/// ParseInt - This has to handle the special case of binary numbers 0b0101
-///
-static int ParseInt(const char *Str) {
-  if (Str[0] == '0' && Str[1] == 'b')
-    return strtoll(Str+2, 0, 2);
-  return strtoll(Str, 0, 0); 
-}
-
-static int CommentDepth = 0;
-
-struct IncludeRec {
-  std::string Filename;
-  FILE *File;
-  unsigned LineNo;
-  YY_BUFFER_STATE Buffer;
-
-  IncludeRec(const std::string &FN, FILE *F)
-    : Filename(FN), File(F), LineNo(0){
-  }
-};
-
-static std::vector<IncludeRec> IncludeStack;
-
-std::ostream &err() {
-  if (IncludeStack.empty()) {
-    cerr << "At end of input: ";
-    return *cerr.stream();
-  }
-
-  for (unsigned i = 0, e = IncludeStack.size()-1; i != e; ++i)
-    cerr << "Included from " << IncludeStack[i].Filename << ":"
-         << IncludeStack[i].LineNo << ":\n";
-  cerr << "Parsing " << IncludeStack.back().Filename << ":"
-       << Filelineno << ": ";
-  return *cerr.stream();
-}
-
-/// ParseFile - this function begins the parsing of the specified tablegen file.
-///
-void ParseFile(const std::string &Filename, 
-               const std::vector<std::string> &IncludeDirs) {
-  FILE *F = stdin;
-  if (Filename != "-") {
-    F = fopen(Filename.c_str(), "r");
-
-    if (F == 0) {
-      cerr << "Could not open input file '" + Filename + "'!\n";
-      exit (1);
-    }
-    IncludeStack.push_back(IncludeRec(Filename, F));
-  } else {
-    IncludeStack.push_back(IncludeRec("<stdin>", stdin));
-  }
-
-  // Record the location of the include directory so that the lexer can find
-  // it later.
-  IncludeDirectories = IncludeDirs;
- 
-  Filein = F;
-  Filelineno = 1;
-  Fileparse();
-  Filein = stdin;
-}
-
-/// HandleInclude - This function is called when an include directive is
-/// encountered in the input stream...
-///
-static void HandleInclude(const char *Buffer) {
-  unsigned Length = yyleng;
-  assert(Buffer[Length-1] == '"');
-  Buffer += strlen("include ");
-  Length -= strlen("include ");
-  while (*Buffer != '"') {
-    ++Buffer;
-    --Length;
-  }
-  assert(Length >= 2 && "Double quotes not found?");
-  std::string Filename(Buffer+1, Buffer+Length-1);
-  //cerr << "Filename = '" << Filename << "'\n";
-
-  // Save the line number and lex buffer of the includer...
-  IncludeStack.back().LineNo = Filelineno;
-  IncludeStack.back().Buffer = YY_CURRENT_BUFFER;
-
-  // Open the new input file...
-  yyin = fopen(Filename.c_str(), "r");
-  if (yyin == 0) {
-    // If we couldn't find the file in the current directory, look for it in
-    // the include directories.
-    //
-    std::string NextFilename;
-    for (unsigned i = 0, e = IncludeDirectories.size(); i != e; ++i) {
-      NextFilename = IncludeDirectories[i] + "/" + Filename;
-      if ((yyin = fopen(NextFilename.c_str(), "r")))
-        break;
-    }
-    
-    if (yyin == 0) {
-      err() << "Could not find include file '" << Filename << "'!\n";
-      exit(1);
-    }
-    Filename = NextFilename;
-  }
-
-  // Add the file to our include stack...
-  IncludeStack.push_back(IncludeRec(Filename, yyin));
-  Filelineno = 1;  // Reset line numbering...
-  //yyrestart(yyin);    // Start lexing the new file...
-
-  yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
-}
-
-/// yywrap - This is called when the lexer runs out of input in one of the
-/// files. Switch back to an includer if an includee has run out of input.
-///
-extern "C"
-int yywrap(void) {
-  if (IncludeStack.back().File != stdin)
-    fclose(IncludeStack.back().File);
-  IncludeStack.pop_back();
-  if (IncludeStack.empty()) return 1;  // Top-level file is done.
-
-  // Otherwise, we need to switch back to a file which included the current one.
-  Filelineno = IncludeStack.back().LineNo;  // Restore current line number
-  yy_switch_to_buffer(IncludeStack.back().Buffer);
-  return 0;
-}
-
-} // End llvm namespace
-
-using namespace llvm;
-
-%}
-
-Comment      \/\/.*
-
-Identifier   [a-zA-Z_][0-9a-zA-Z_]*
-Integer      [-+]?[0-9]+|0x[0-9a-fA-F]+|0b[01]+
-CodeFragment \[\{([^}]+|\}[^\]])*\}\]
-StringVal    \"[^"]*\"
-IncludeStr   include[ \t\n]+\"[^"]*\"
-
-%%
-
-{Comment}      { /* Ignore comments */ }
-
-{IncludeStr}   { HandleInclude(yytext); }
-{CodeFragment} { Filelval.StrVal = new std::string(yytext+2, yytext+yyleng-2);
-                 return CODEFRAGMENT; }
-
-int            { return INT; }
-bit            { return BIT; }
-bits           { return BITS; }
-string         { return STRING; }
-list           { return LIST; }
-code           { return CODE; }
-dag            { return DAG; }
-
-class          { return CLASS; }
-def            { return DEF; }
-defm           { return DEFM; }
-multiclass     { return MULTICLASS; }
-field          { return FIELD; }
-let            { return LET; }
-in             { return IN; }
-
-!con           { return CONCATTOK; }
-!sra           { return SRATOK; }
-!srl           { return SRLTOK; }
-!shl           { return SHLTOK; }
-!strconcat     { return STRCONCATTOK; }
-
-
-{Identifier}   { Filelval.StrVal = new std::string(yytext, yytext+yyleng);
-                 return ID; }
-${Identifier}  { Filelval.StrVal = new std::string(yytext+1, yytext+yyleng);
-                 return VARNAME; } 
-
-{StringVal}    { Filelval.StrVal = new std::string(yytext+1, yytext+yyleng-1);
-                 return STRVAL; }
-
-{Integer}      { Filelval.IntVal = ParseInt(Filetext); return INTVAL; }
-
-[ \t\n\r]+     { /* Ignore whitespace */ }
-
-
-"/*"                       { BEGIN(in_comment); CommentDepth++; }
-<in_comment>[^*/]*         {} /* eat anything that's not a '*' or '/' */
-<in_comment>"*"+[^*/]*     {} /* eat up '*'s not followed by '/'s */
-<in_comment>"/*"           { ++CommentDepth; }
-<in_comment>"/"+[^*/]*     {} /* eat up /'s not followed by *'s */
-<in_comment>"*"+"/"        { if (!--CommentDepth) { BEGIN(INITIAL); } }
-<in_comment><<EOF>>        { err() << "Unterminated comment!\n"; exit(1); }
-
-.              { return Filetext[0]; }
-
-%%
-
--- a/utils/TableGen/Makefile
+++ b/utils/TableGen/Makefile
@ -11,8 +11,7 @@ LEVEL = ../..
 TOOLNAME = tblgen
 NO_INSTALL = 1;
 USEDLIBS = LLVMSupport.a LLVMSystem.a
-EXTRA_DIST = FileLexer.cpp.cvs FileLexer.l.cvs \
-	     FileParser.cpp.cvs FileParser.h.cvs FileParser.y.cvs
+EXTRA_DIST = FileParser.cpp.cvs FileParser.h.cvs FileParser.y.cvs
 REQUIRES_EH := 1
 REQUIRES_RTTI := 1

@ -27,4 +26,5 @@ CompileCommonOpts := $(filter-out -Wno-long-long,$(CompileCommonOpts))
 # (which depend on the source file) won't get generated until bison is done
 # generating the C source and header files for the parser.
 #
-$(ObjDir)/FileLexer.o : $(PROJ_SRC_DIR)/FileParser.h 
+$(ObjDir)/TGLexer.o : $(PROJ_SRC_DIR)/FileParser.h 
+
--- a/utils/TableGen/TGLexer.cpp
+++ b/utils/TableGen/TGLexer.cpp
@ -0,0 +1,439 @@
+//===- TGLexer.cpp - Lexer for TableGen -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implement the Lexer for TableGen.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Record.h"
+#include "llvm/Support/Streams.h"
+#include "Record.h"
+#include "TGLexer.h"
+#include "llvm/Support/MemoryBuffer.h"
+typedef std::pair<llvm::Record*, std::vector<llvm::Init*>*> SubClassRefTy;
+#include "FileParser.h"
+#include <cctype>
+using namespace llvm;
+
+// FIXME: REMOVE THIS.
+#define YYEOF 0
+#define YYERROR -2
+
+TGLexer::TGLexer(MemoryBuffer *StartBuf) : CurLineNo(1), CurBuf(StartBuf) {
+  CurPtr = CurBuf->getBufferStart();
+}
+
+TGLexer::~TGLexer() {
+  while (!IncludeStack.empty()) {
+    delete IncludeStack.back().Buffer;
+    IncludeStack.pop_back();
+  }
+  delete CurBuf;
+}
+
+
+std::ostream &TGLexer::err() {
+  PrintIncludeStack(*cerr.stream());
+  return *cerr.stream();
+}
+
+
+void TGLexer::PrintIncludeStack(std::ostream &OS) {
+  for (unsigned i = 0, e = IncludeStack.size(); i != e; ++i)
+    OS << "Included from " << IncludeStack[i].Buffer->getBufferIdentifier()
+       << ":" << IncludeStack[i].LineNo << ":\n";
+  OS << "Parsing " << CurBuf->getBufferIdentifier() << ":"
+     << CurLineNo << ": ";
+}
+
+int TGLexer::getNextChar() {
+  char CurChar = *CurPtr++;
+  switch (CurChar) {
+  default:
+    return CurChar;
+  case 0:
+    // A nul character in the stream is either the end of the current buffer or
+    // a random nul in the file.  Disambiguate that here.
+    if (CurPtr-1 != CurBuf->getBufferEnd())
+      return 0;  // Just whitespace.
+    
+    // If this is the end of an included file, pop the parent file off the
+    // include stack.
+    if (!IncludeStack.empty()) {
+      delete CurBuf;
+      CurBuf = IncludeStack.back().Buffer;
+      CurLineNo = IncludeStack.back().LineNo;
+      CurPtr = IncludeStack.back().CurPtr;
+      IncludeStack.pop_back();
+      return getNextChar();
+    }
+    
+    // Otherwise, return end of file.
+    --CurPtr;  // Another call to lex will return EOF again.  
+    return EOF;
+  case '\n':
+  case '\r':
+    // Handle the newline character by ignoring it and incrementing the line
+    // count.  However, be careful about 'dos style' files with \n\r in them.
+    // Only treat a \n\r or \r\n as a single line.
+    if ((*CurPtr == '\n' || (*CurPtr == '\r')) &&
+        *CurPtr != CurChar)
+      ++CurPtr;  // Each the two char newline sequence.
+      
+    ++CurLineNo;
+    return '\n';
+  }  
+}
+
+int TGLexer::LexToken() {
+  // This always consumes at least one character.
+  int CurChar = getNextChar();
+
+  switch (CurChar) {
+  default:
+    // Handle letters: [a-zA-Z_]
+    if (isalpha(CurChar) || CurChar == '_')
+      return LexIdentifier();
+      
+    // Unknown character, return the char itself.
+    return (unsigned char)CurChar;
+  case EOF: return YYEOF;
+  case 0:
+  case ' ':
+  case '\t':
+  case '\n':
+  case '\r':
+    // Ignore whitespace.
+    return LexToken();
+  case '/':
+    // If this is the start of a // comment, skip until the end of the line or
+    // the end of the buffer.
+    if (*CurPtr == '/')
+      SkipBCPLComment();
+    else if (*CurPtr == '*') {
+      if (SkipCComment())
+        return YYERROR;
+    } else // Otherwise, return this / as a token.
+      return CurChar;
+    return LexToken();
+  case '-': case '+':
+  case '0': case '1': case '2': case '3': case '4': case '5': case '6':
+  case '7': case '8': case '9':  
+    return LexNumber();
+  case '"': return LexString();
+  case '$': return LexVarName();
+  case '[': return LexBracket();
+  case '!': return LexExclaim();
+  }
+}
+
+/// LexString - Lex "[^"]*"
+int TGLexer::LexString() {
+  const char *StrStart = CurPtr;
+  
+  while (*CurPtr != '"') {
+    // If we hit the end of the buffer, report an error.
+    if (*CurPtr == 0 && CurPtr == CurBuf->getBufferEnd()) {
+      TheError = "End of file in string literal";
+      return YYERROR;
+    } else if (*CurPtr == '\n' || *CurPtr == '\r') {
+      TheError = "End of line in string literal";
+      return YYERROR;
+    }
+    
+    ++CurPtr;
+  }
+  
+  Filelval.StrVal = new std::string(StrStart, CurPtr);
+  ++CurPtr;
+  return STRVAL;
+}
+
+int TGLexer::LexVarName() {
+  if (!isalpha(CurPtr[0]) && CurPtr[0] != '_')
+    return '$'; // Invalid varname.
+  
+  // Otherwise, we're ok, consume the rest of the characters.
+  const char *VarNameStart = CurPtr++;
+  
+  while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
+    ++CurPtr;
+
+  Filelval.StrVal = new std::string(VarNameStart, CurPtr);
+  return VARNAME;
+}
+
+
+int TGLexer::LexIdentifier() {
+  // The first letter is [a-zA-Z_].
+  const char *IdentStart = CurPtr-1;
+  
+  // Match the rest of the identifier regex: [0-9a-zA-Z_]*
+  while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
+    ++CurPtr;
+  
+  // Check to see if this identifier is a keyword.
+  unsigned Len = CurPtr-IdentStart;
+  
+  if (Len == 3 && !memcmp(IdentStart, "int", 3)) return INT;
+  if (Len == 3 && !memcmp(IdentStart, "bit", 3)) return BIT;
+  if (Len == 4 && !memcmp(IdentStart, "bits", 4)) return BITS;
+  if (Len == 6 && !memcmp(IdentStart, "string", 6)) return STRING;
+  if (Len == 4 && !memcmp(IdentStart, "list", 4)) return LIST;
+  if (Len == 4 && !memcmp(IdentStart, "code", 4)) return CODE;
+  if (Len == 3 && !memcmp(IdentStart, "dag", 3)) return DAG;
+  
+  if (Len == 5 && !memcmp(IdentStart, "class", 5)) return CLASS;
+  if (Len == 3 && !memcmp(IdentStart, "def", 3)) return DEF;
+  if (Len == 4 && !memcmp(IdentStart, "defm", 4)) return DEFM;
+  if (Len == 10 && !memcmp(IdentStart, "multiclass", 10)) return MULTICLASS;
+  if (Len == 5 && !memcmp(IdentStart, "field", 5)) return FIELD;
+  if (Len == 3 && !memcmp(IdentStart, "let", 3)) return LET;
+  if (Len == 2 && !memcmp(IdentStart, "in", 2)) return IN;
+  
+  if (Len == 7 && !memcmp(IdentStart, "include", 7)) {
+    if (LexInclude()) return YYERROR;
+    return LexToken();
+  }
+    
+  Filelval.StrVal = new std::string(IdentStart, CurPtr);
+  return ID;
+}
+
+/// LexInclude - We just read the "include" token.  Get the string token that
+/// comes next and enter the include.
+bool TGLexer::LexInclude() {
+  // The token after the include must be a string.
+  int Tok = LexToken();
+  if (Tok == YYERROR) return true;
+  if (Tok != STRVAL) {
+    TheError = "Expected filename after include";
+    return true;
+  }
+
+  // Get the string.
+  std::string Filename = *Filelval.StrVal;
+  delete Filelval.StrVal;
+
+  // Try to find the file.
+  MemoryBuffer *NewBuf = MemoryBuffer::getFile(&Filename[0], Filename.size());
+
+  // If the file didn't exist directly, see if it's in an include path.
+  for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBuf; ++i) {
+    std::string IncFile = IncludeDirectories[i] + "/" + Filename;
+    NewBuf = MemoryBuffer::getFile(&IncFile[0], IncFile.size());
+  }
+    
+  if (NewBuf == 0) {
+    TheError = "Could not find include file '" + Filename + "'";
+    return true;
+  }
+  
+  // Save the line number and lex buffer of the includer.
+  IncludeStack.push_back(IncludeRec(CurBuf, CurPtr, CurLineNo));
+  
+  CurLineNo = 1;  // Reset line numbering.
+  CurBuf = NewBuf;
+  CurPtr = CurBuf->getBufferStart();
+  return false;
+}
+
+void TGLexer::SkipBCPLComment() {
+  ++CurPtr;  // skip the second slash.
+  while (1) {
+    switch (*CurPtr) {
+    case '\n':
+    case '\r':
+      return;  // Newline is end of comment.
+    case 0:
+      // If this is the end of the buffer, end the comment.
+      if (CurPtr == CurBuf->getBufferEnd())
+        return;
+      break;
+    }
+    // Otherwise, skip the character.
+    ++CurPtr;
+  }
+}
+
+/// SkipCComment - This skips C-style /**/ comments.  The only difference from C
+/// is that we allow nesting.
+bool TGLexer::SkipCComment() {
+  ++CurPtr;  // skip the star.
+  unsigned CommentDepth = 1;
+  
+  while (1) {
+    int CurChar = getNextChar();
+    switch (CurChar) {
+    case EOF:
+      TheError = "Unterminated comment!";
+      return true;
+    case '*':
+      // End of the comment?
+      if (CurPtr[0] != '/') break;
+      
+      ++CurPtr;   // End the */.
+      if (--CommentDepth == 0)
+        return false;
+      break;
+    case '/':
+      // Start of a nested comment?
+      if (CurPtr[0] != '*') break;
+      ++CurPtr;
+      ++CommentDepth;
+      break;
+    }
+  }
+}
+
+/// LexNumber - Lex:
+///    [-+]?[0-9]+
+///    0x[0-9a-fA-F]+
+///    0b[01]+
+int TGLexer::LexNumber() {
+  const char *NumStart = CurPtr-1;
+  
+  if (CurPtr[-1] == '0') {
+    if (CurPtr[0] == 'x') {
+      ++CurPtr;
+      NumStart = CurPtr;
+      while (isxdigit(CurPtr[0]))
+        ++CurPtr;
+      
+      if (CurPtr == NumStart) {
+        TheError = "Invalid hexadecimal number";
+        return YYERROR;
+      }
+      Filelval.IntVal = strtoll(NumStart, 0, 16);
+      return INTVAL;
+    } else if (CurPtr[0] == 'b') {
+      ++CurPtr;
+      NumStart = CurPtr;
+      while (CurPtr[0] == '0' || CurPtr[0] == '1')
+        ++CurPtr;
+      
+      if (CurPtr == NumStart) {
+        TheError = "Invalid binary number";
+        return YYERROR;
+      }
+      Filelval.IntVal = strtoll(NumStart, 0, 2);
+      return INTVAL;
+    }
+  }
+
+  // Check for a sign without a digit.
+  if (CurPtr[-1] == '-' || CurPtr[-1] == '+') {
+    if (!isdigit(CurPtr[0]))
+      return CurPtr[-1];
+  }
+  
+  while (isdigit(CurPtr[0]))
+    ++CurPtr;
+  Filelval.IntVal = strtoll(NumStart, 0, 10);
+  return INTVAL;
+}
+
+/// LexBracket - We just read '['.  If this is a code block, return it,
+/// otherwise return the bracket.  Match: '[' and '[{ ( [^}]+ | }[^]] )* }]'
+int TGLexer::LexBracket() {
+  if (CurPtr[0] != '{')
+    return '[';
+  ++CurPtr;
+  const char *CodeStart = CurPtr;
+  while (1) {
+    int Char = getNextChar();
+    if (Char == EOF) break;
+    
+    if (Char != '}') continue;
+    
+    Char = getNextChar();
+    if (Char == EOF) break;
+    if (Char == ']') {
+      Filelval.StrVal = new std::string(CodeStart, CurPtr-2);
+      return CODEFRAGMENT;
+    }
+  }
+  
+  TheError = "Invalid Code Block";
+  return YYERROR;
+}
+
+/// LexExclaim - Lex '!' and '![a-zA-Z]+'.
+int TGLexer::LexExclaim() {
+  if (!isalpha(*CurPtr))
+    return '!';
+  
+  const char *Start = CurPtr++;
+  while (isalpha(*CurPtr))
+    ++CurPtr;
+  
+  // Check to see which operator this is.
+  unsigned Len = CurPtr-Start;
+  
+  if (Len == 3 && !memcmp(Start, "con", 3)) return CONCATTOK;
+  if (Len == 3 && !memcmp(Start, "sra", 3)) return SRATOK;
+  if (Len == 3 && !memcmp(Start, "srl", 3)) return SRLTOK;
+  if (Len == 3 && !memcmp(Start, "shl", 3)) return SHLTOK;
+  if (Len == 9 && !memcmp(Start, "strconcat", 9)) return STRCONCATTOK;
+  
+  TheError = "Unknown operator";
+  return YYERROR;
+}
+
+//===----------------------------------------------------------------------===//
+//  Interfaces used by the Bison parser.
+//===----------------------------------------------------------------------===//
+
+int Fileparse();
+static TGLexer *TheLexer;
+
+namespace llvm {
+  
+std::ostream &err() {
+  return TheLexer->err();
+}
+
+/// ParseFile - this function begins the parsing of the specified tablegen
+/// file.
+///
+void ParseFile(const std::string &Filename, 
+               const std::vector<std::string> &IncludeDirs) {
+  std::string ErrorStr;
+  MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(&Filename[0], Filename.size(),
+                                                 &ErrorStr);
+  if (F == 0) {
+    cerr << "Could not open input file '" + Filename + "': " << ErrorStr <<"\n";
+    exit(1);
+  }
+  
+  assert(!TheLexer && "Lexer isn't reentrant yet!");
+  TheLexer = new TGLexer(F);
+  
+  // Record the location of the include directory so that the lexer can find
+  // it later.
+  TheLexer->setIncludeDirs(IncludeDirs);
+  
+  Fileparse();
+  
+  // Cleanup
+  delete TheLexer;
+  TheLexer = 0;
+}
+} // End llvm namespace
+
+
+int Filelex() {
+  assert(TheLexer && "No lexer setup yet!");
+  int Tok = TheLexer->LexToken();
+  if (Tok == YYERROR) {
+    err() << TheLexer->getError() << "\n";
+    exit(1);
+  }
+  return Tok;
+}
--- a/utils/TableGen/TGLexer.h
+++ b/utils/TableGen/TGLexer.h
@ -0,0 +1,74 @@
+//===- TGLexer.h - Lexer for TableGen Files ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class represents the Lexer for tablegen files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TGLEXER_H
+#define TGLEXER_H
+
+#include <vector>
+#include <string>
+#include <iosfwd>
+
+namespace llvm {
+class MemoryBuffer;
+
+class TGLexer {
+  const char *CurPtr;
+  unsigned CurLineNo;
+  MemoryBuffer *CurBuf;
+
+  /// IncludeRec / IncludeStack - This captures the current set of include
+  /// directives we are nested within.
+  struct IncludeRec {
+    MemoryBuffer *Buffer;
+    const char *CurPtr;
+    unsigned LineNo;
+    IncludeRec(MemoryBuffer *buffer, const char *curPtr, unsigned lineNo)
+      : Buffer(buffer), CurPtr(curPtr), LineNo(lineNo) {}
+  };
+  std::vector<IncludeRec> IncludeStack;
+  
+  // IncludeDirectories - This is the list of directories we should search for
+  // include files in.
+  std::vector<std::string> IncludeDirectories;
+  
+  std::string TheError;
+public:
+  TGLexer(MemoryBuffer *StartBuf);
+  ~TGLexer();
+  
+  void setIncludeDirs(const std::vector<std::string> &Dirs) {
+    IncludeDirectories = Dirs;
+  }
+  
+  int LexToken();
+
+  const std::string getError() const { return TheError; }
+  
+  std::ostream &err();
+  void PrintIncludeStack(std::ostream &OS);
+private:
+  int getNextChar();
+  void SkipBCPLComment();
+  bool SkipCComment();
+  int LexIdentifier();
+  bool LexInclude();
+  int LexString();
+  int LexVarName();
+  int LexNumber();
+  int LexBracket();
+  int LexExclaim();
+};
+  
+} // end namespace llvm
+
+#endif