1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00

[llvm-rc] Add basic RC scripts parsing ability.

As for now, the parser supports a limited set of statements and
resources. This will be extended in the following patches.

Thanks to Nico Weber (thakis) for his original work in this area.

Differential Revision: https://reviews.llvm.org/D36340

llvm-svn: 311175
This commit is contained in:
Marek Sokolowski 2017-08-18 17:05:47 +00:00
parent 6940aac2e2
commit 0823f275d4
18 changed files with 693 additions and 1 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,67 @@
; RUN: llvm-rc /V %p/Inputs/parser-correct-everything.rc | FileCheck %s --check-prefix PGOOD
; PGOOD: Icon (meh): "hello.bmp"
; PGOOD-NEXT: Icon (Icon): "Icon"
; PGOOD-NEXT: Language: 5, Sublanguage: 12
; PGOOD-NEXT: StringTable:
; PGOOD-NEXT: Option: Language: 1, Sublanguage: 1
; PGOOD-NEXT: Option: Characteristics: 500
; PGOOD-NEXT: Option: Language: 3, Sublanguage: 4
; PGOOD-NEXT: Option: Version: 14
; PGOOD-NEXT: 1 => "hello"
; PGOOD-NEXT: 2 => "world"
; PGOOD-NEXT: StringTable:
; RUN: not llvm-rc /V %p/Inputs/parser-stringtable-no-string.rc 2> %t2
; RUN: FileCheck %s --check-prefix PSTRINGTABLE1 --input-file %t2
; PSTRINGTABLE1: llvm-rc: Error parsing file: expected string, got }
; RUN: not llvm-rc /V %p/Inputs/parser-stringtable-weird-option.rc 2> %t3
; RUN: FileCheck %s --check-prefix PSTRINGTABLE2 --input-file %t3
; PSTRINGTABLE2: llvm-rc: Error parsing file: expected optional statement type, BEGIN or '{', got NONSENSETYPE
; RUN: not llvm-rc /V %p/Inputs/parser-eof.rc 2> %t4
; RUN: FileCheck %s --check-prefix PEOF --input-file %t4
; PEOF: llvm-rc: Error parsing file: expected integer, got <EOF>
; RUN: not llvm-rc /V %p/Inputs/parser-no-characteristics-arg.rc 2> %t5
; RUN: FileCheck %s --check-prefix PCHARACTERISTICS1 --input-file %t5
; PCHARACTERISTICS1: llvm-rc: Error parsing file: expected integer, got BEGIN
; RUN: not llvm-rc /V %p/Inputs/parser-nonsense-token.rc 2> %t6
; RUN: FileCheck %s --check-prefix PNONSENSE1 --input-file %t6
; PNONSENSE1: llvm-rc: Error parsing file: expected int or identifier, got &
; RUN: not llvm-rc /V %p/Inputs/parser-nonsense-type.rc 2> %t7
; RUN: FileCheck %s --check-prefix PNONSENSE2 --input-file %t7
; PNONSENSE2: llvm-rc: Error parsing file: expected resource type, got WORLD
; RUN: not llvm-rc /V %p/Inputs/parser-nonsense-type-eof.rc 2> %t8
; RUN: FileCheck %s --check-prefix PNONSENSE3 --input-file %t8
; PNONSENSE3: llvm-rc: Error parsing file: expected int or identifier, got <EOF>
; RUN: not llvm-rc /V %p/Inputs/parser-language-no-comma.rc 2> %t9
; RUN: FileCheck %s --check-prefix PLANGUAGE1 --input-file %t9
; PLANGUAGE1: llvm-rc: Error parsing file: expected ',', got 7
; RUN: not llvm-rc /V %p/Inputs/parser-language-too-many-commas.rc 2> %t10
; RUN: FileCheck %s --check-prefix PLANGUAGE2 --input-file %t10
; PLANGUAGE2: llvm-rc: Error parsing file: expected integer, got ,

View File

@ -1,4 +1,6 @@
; RUN: llvm-rc /V %p/Inputs/tokens.rc | FileCheck %s
; RUN: not llvm-rc /V %p/Inputs/tokens.rc | FileCheck %s
; llvm-rc fails now on this sample because it is an invalid resource file
; script. We silence the error message and just analyze the output.
; CHECK: Int: 1; int value = 1
; CHECK-NEXT: Plus: +

View File

@ -10,5 +10,7 @@ add_public_tablegen_target(RcTableGen)
add_llvm_tool(llvm-rc
llvm-rc.cpp
ResourceScriptParser.cpp
ResourceScriptStmt.cpp
ResourceScriptToken.cpp
)

View File

@ -0,0 +1,265 @@
//===-- ResourceScriptParser.cpp --------------------------------*- C++-*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===---------------------------------------------------------------------===//
//
// This implements the parser defined in ResourceScriptParser.h.
//
//===---------------------------------------------------------------------===//
#include "ResourceScriptParser.h"
// Take an expression returning llvm::Error and forward the error if it exists.
#define RETURN_IF_ERROR(Expr) \
if (auto Err = (Expr)) \
return std::move(Err);
// Take an expression returning llvm::Expected<T> and assign it to Var or
// forward the error out of the function.
#define ASSIGN_OR_RETURN(Var, Expr) \
auto Var = (Expr); \
if (!Var) \
return Var.takeError();
namespace llvm {
namespace rc {
RCParser::ParserError::ParserError(const Twine Expected, const LocIter CurLoc,
const LocIter End)
: ErrorLoc(CurLoc), FileEnd(End) {
CurMessage = "Error parsing file: expected " + Expected.str() + ", got " +
(CurLoc == End ? "<EOF>" : CurLoc->value()).str();
}
char RCParser::ParserError::ID = 0;
RCParser::RCParser(const std::vector<RCToken> &TokenList)
: Tokens(TokenList), CurLoc(Tokens.begin()), End(Tokens.end()) {}
RCParser::RCParser(std::vector<RCToken> &&TokenList)
: Tokens(std::move(TokenList)), CurLoc(Tokens.begin()), End(Tokens.end()) {}
bool RCParser::isEof() const { return CurLoc == End; }
RCParser::ParseType RCParser::parseSingleResource() {
// The first thing we read is usually a resource's name. However, in some
// cases (LANGUAGE and STRINGTABLE) the resources don't have their names
// and the first token to be read is the type.
ASSIGN_OR_RETURN(NameToken, readTypeOrName());
if (NameToken->equalsLower("LANGUAGE"))
return parseLanguageResource();
else if (NameToken->equalsLower("STRINGTABLE"))
return parseStringTableResource();
// If it's not an unnamed resource, what we've just read is a name. Now,
// read resource type;
ASSIGN_OR_RETURN(TypeToken, readTypeOrName());
ParseType Result = std::unique_ptr<RCResource>();
(void)!Result;
if (TypeToken->equalsLower("ICON"))
Result = parseIconResource();
else
return getExpectedError("resource type", /* IsAlreadyRead = */ true);
if (Result)
(*Result)->setName(*NameToken);
return Result;
}
bool RCParser::isNextTokenKind(Kind TokenKind) const {
return !isEof() && look().kind() == TokenKind;
}
const RCToken &RCParser::look() const {
assert(!isEof());
return *CurLoc;
}
const RCToken &RCParser::read() {
assert(!isEof());
return *CurLoc++;
}
void RCParser::consume() {
assert(!isEof());
CurLoc++;
}
Expected<uint32_t> RCParser::readInt() {
if (!isNextTokenKind(Kind::Int))
return getExpectedError("integer");
return read().intValue();
}
Expected<StringRef> RCParser::readString() {
if (!isNextTokenKind(Kind::String))
return getExpectedError("string");
return read().value();
}
Expected<StringRef> RCParser::readIdentifier() {
if (!isNextTokenKind(Kind::Identifier))
return getExpectedError("identifier");
return read().value();
}
Expected<IntOrString> RCParser::readTypeOrName() {
// We suggest that the correct resource name or type should be either an
// identifier or an integer. The original RC tool is much more liberal.
if (!isNextTokenKind(Kind::Identifier) && !isNextTokenKind(Kind::Int))
return getExpectedError("int or identifier");
const RCToken &Tok = read();
if (Tok.kind() == Kind::Int)
return IntOrString(Tok.intValue());
else
return IntOrString(Tok.value());
}
Error RCParser::consumeType(Kind TokenKind) {
if (isNextTokenKind(TokenKind)) {
consume();
return Error::success();
}
switch (TokenKind) {
#define TOKEN(TokenName) \
case Kind::TokenName: \
return getExpectedError(#TokenName);
#define SHORT_TOKEN(TokenName, TokenCh) \
case Kind::TokenName: \
return getExpectedError(#TokenCh);
#include "ResourceScriptTokenList.h"
#undef SHORT_TOKEN
#undef TOKEN
}
}
bool RCParser::consumeOptionalType(Kind TokenKind) {
if (isNextTokenKind(TokenKind)) {
consume();
return true;
}
return false;
}
Expected<SmallVector<uint32_t, 8>>
RCParser::readIntsWithCommas(size_t MinCount, size_t MaxCount) {
assert(MinCount <= MaxCount);
SmallVector<uint32_t, 8> Result;
auto FailureHandler =
[&](llvm::Error Err) -> Expected<SmallVector<uint32_t, 8>> {
if (Result.size() < MinCount)
return std::move(Err);
consumeError(std::move(Err));
return Result;
};
for (size_t i = 0; i < MaxCount; ++i) {
// Try to read a comma unless we read the first token.
// Sometimes RC tool requires them and sometimes not. We decide to
// always require them.
if (i >= 1) {
if (auto CommaError = consumeType(Kind::Comma))
return FailureHandler(std::move(CommaError));
}
if (auto IntResult = readInt())
Result.push_back(*IntResult);
else
return FailureHandler(IntResult.takeError());
}
return Result;
}
// As for now, we ignore the extended set of statements.
Expected<OptionalStmtList> RCParser::parseOptionalStatements(bool IsExtended) {
OptionalStmtList Result;
// The last statement is always followed by the start of the block.
while (!isNextTokenKind(Kind::BlockBegin)) {
ASSIGN_OR_RETURN(SingleParse, parseSingleOptionalStatement(IsExtended));
Result.addStmt(std::move(*SingleParse));
}
return Result;
}
Expected<std::unique_ptr<OptionalStmt>>
RCParser::parseSingleOptionalStatement(bool) {
ASSIGN_OR_RETURN(TypeToken, readIdentifier());
if (TypeToken->equals_lower("CHARACTERISTICS"))
return parseCharacteristicsStmt();
else if (TypeToken->equals_lower("LANGUAGE"))
return parseLanguageStmt();
else if (TypeToken->equals_lower("VERSION"))
return parseVersionStmt();
else
return getExpectedError("optional statement type, BEGIN or '{'",
/* IsAlreadyRead = */ true);
}
RCParser::ParseType RCParser::parseLanguageResource() {
// Read LANGUAGE as an optional statement. If it's read correctly, we can
// upcast it to RCResource.
return parseLanguageStmt();
}
RCParser::ParseType RCParser::parseIconResource() {
ASSIGN_OR_RETURN(Arg, readString());
return make_unique<IconResource>(*Arg);
}
RCParser::ParseType RCParser::parseStringTableResource() {
ASSIGN_OR_RETURN(OptStatements, parseOptionalStatements());
RETURN_IF_ERROR(consumeType(Kind::BlockBegin));
auto Table = make_unique<StringTableResource>(std::move(*OptStatements));
// Read strings until we reach the end of the block.
while (!consumeOptionalType(Kind::BlockEnd)) {
// Each definition consists of string's ID (an integer) and a string.
// Some examples in documentation suggest that there might be a comma in
// between, however we strictly adhere to the single statement definition.
ASSIGN_OR_RETURN(IDResult, readInt());
ASSIGN_OR_RETURN(StrResult, readString());
Table->addString(*IDResult, *StrResult);
}
return Table;
}
RCParser::ParseOptionType RCParser::parseLanguageStmt() {
ASSIGN_OR_RETURN(Args, readIntsWithCommas(/* min = */ 2, /* max = */ 2));
return make_unique<LanguageResource>((*Args)[0], (*Args)[1]);
}
RCParser::ParseOptionType RCParser::parseCharacteristicsStmt() {
ASSIGN_OR_RETURN(Arg, readInt());
return make_unique<CharacteristicsStmt>(*Arg);
}
RCParser::ParseOptionType RCParser::parseVersionStmt() {
ASSIGN_OR_RETURN(Arg, readInt());
return make_unique<VersionStmt>(*Arg);
}
Error RCParser::getExpectedError(const Twine Message, bool IsAlreadyRead) {
return make_error<ParserError>(
Message, IsAlreadyRead ? std::prev(CurLoc) : CurLoc, End);
}
} // namespace rc
} // namespace llvm

View File

@ -0,0 +1,143 @@
//===-- ResourceScriptParser.h ----------------------------------*- C++-*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===---------------------------------------------------------------------===//
//
// This defines the RC scripts parser. It takes a sequence of RC tokens
// and then provides the method to parse the resources one by one.
//
//===---------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVMRC_RESOURCESCRIPTPARSER_H
#define LLVM_TOOLS_LLVMRC_RESOURCESCRIPTPARSER_H
#include "ResourceScriptStmt.h"
#include "ResourceScriptToken.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/raw_ostream.h"
#include <system_error>
#include <vector>
namespace llvm {
namespace rc {
class RCParser {
public:
using LocIter = std::vector<RCToken>::iterator;
using ParseType = Expected<std::unique_ptr<RCResource>>;
using ParseOptionType = Expected<std::unique_ptr<OptionalStmt>>;
// Class describing a single failure of parser.
class ParserError : public ErrorInfo<ParserError> {
public:
ParserError(Twine Expected, const LocIter CurLoc, const LocIter End);
void log(raw_ostream &OS) const override { OS << CurMessage; }
std::error_code convertToErrorCode() const override {
return std::make_error_code(std::errc::invalid_argument);
}
const std::string &getMessage() const { return CurMessage; }
static char ID; // Keep llvm::Error happy.
private:
std::string CurMessage;
LocIter ErrorLoc, FileEnd;
};
RCParser(const std::vector<RCToken> &TokenList);
RCParser(std::vector<RCToken> &&TokenList);
// Reads and returns a single resource definition, or error message if any
// occurred.
ParseType parseSingleResource();
bool isEof() const;
private:
using Kind = RCToken::Kind;
// Checks if the current parser state points to the token of type TokenKind.
bool isNextTokenKind(Kind TokenKind) const;
// These methods assume that the parser is not in EOF state.
// Take a look at the current token. Do not fetch it.
const RCToken &look() const;
// Read the current token and advance the state by one token.
const RCToken &read();
// Advance the state by one token, discarding the current token.
void consume();
// The following methods try to read a single token, check if it has the
// correct type and then parse it.
Expected<uint32_t> readInt(); // Parse an integer.
Expected<StringRef> readString(); // Parse a string.
Expected<StringRef> readIdentifier(); // Parse an identifier.
Expected<IntOrString> readTypeOrName(); // Parse an integer or an identifier.
// Advance the state by one, discarding the current token.
// If the discarded token had an incorrect type, fail.
Error consumeType(Kind TokenKind);
// Check the current token type. If it's TokenKind, discard it.
// Return true if the parser consumed this token successfully.
bool consumeOptionalType(Kind TokenKind);
// Read at least MinCount, and at most MaxCount integers separated by
// commas. The parser stops reading after fetching MaxCount integers
// or after an error occurs. Whenever the parser reads a comma, it
// expects an integer to follow.
Expected<SmallVector<uint32_t, 8>> readIntsWithCommas(size_t MinCount,
size_t MaxCount);
// Reads a set of optional statements. These can change the behavior of
// a number of resource types (e.g. STRINGTABLE, MENU or DIALOG) if provided
// before the main block with the contents of the resource.
// Usually, resources use a basic set of optional statements:
// CHARACTERISTICS, LANGUAGE, VERSION
// However, DIALOG and DIALOGEX extend this list by the following items:
// CAPTION, CLASS, EXSTYLE, FONT, MENU, STYLE
// UseExtendedStatements flag (off by default) allows the parser to read
// the additional types of statements.
//
// Ref (to the list of all optional statements):
// msdn.microsoft.com/en-us/library/windows/desktop/aa381002(v=vs.85).aspx
Expected<OptionalStmtList>
parseOptionalStatements(bool UseExtendedStatements = false);
// Read a single optional statement.
Expected<std::unique_ptr<OptionalStmt>>
parseSingleOptionalStatement(bool UseExtendedStatements = false);
// Top-level resource parsers.
ParseType parseLanguageResource();
ParseType parseIconResource();
ParseType parseStringTableResource();
// Optional statement parsers.
ParseOptionType parseLanguageStmt();
ParseOptionType parseCharacteristicsStmt();
ParseOptionType parseVersionStmt();
// Raises an error. If IsAlreadyRead = false (default), this complains about
// the token that couldn't be parsed. If the flag is on, this complains about
// the correctly read token that makes no sense (that is, the current parser
// state is beyond the erroneous token.)
Error getExpectedError(const Twine Message, bool IsAlreadyRead = false);
std::vector<RCToken> Tokens;
LocIter CurLoc;
const LocIter End;
};
} // namespace rc
} // namespace llvm
#endif

View File

@ -0,0 +1,60 @@
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===---------------------------------------------------------------------===//
//
// This implements methods defined in ResourceScriptStmt.h.
//
// Ref: msdn.microsoft.com/en-us/library/windows/desktop/aa380599(v=vs.85).aspx
//
//===---------------------------------------------------------------------===//
#include "ResourceScriptStmt.h"
namespace llvm {
namespace rc {
raw_ostream &operator<<(raw_ostream &OS, const IntOrString &Item) {
if (Item.IsInt)
return OS << Item.Data.Int;
else
return OS << Item.Data.String;
}
raw_ostream &OptionalStmtList::log(raw_ostream &OS) const {
for (const auto &Stmt : Statements) {
OS << " Option: ";
Stmt->log(OS);
}
return OS;
}
raw_ostream &LanguageResource::log(raw_ostream &OS) const {
return OS << "Language: " << Lang << ", Sublanguage: " << SubLang << "\n";
}
raw_ostream &IconResource::log(raw_ostream &OS) const {
return OS << "Icon (" << ResName << "): " << IconLoc << "\n";
}
raw_ostream &StringTableResource::log(raw_ostream &OS) const {
OS << "StringTable:\n";
OptStatements.log(OS);
for (const auto &String : Table)
OS << " " << String.first << " => " << String.second << "\n";
return OS;
}
raw_ostream &CharacteristicsStmt::log(raw_ostream &OS) const {
return OS << "Characteristics: " << Value << "\n";
}
raw_ostream &VersionStmt::log(raw_ostream &OS) const {
return OS << "Version: " << Value << "\n";
}
} // namespace rc
} // namespace llvm

View File

@ -0,0 +1,145 @@
//===-- ResourceScriptStmt.h ------------------------------------*- C++-*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===---------------------------------------------------------------------===//
//
// This lists all the resource and statement types occurring in RC scripts.
//
//===---------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVMRC_RESOURCESCRIPTSTMT_H
#define LLVM_TOOLS_LLVMRC_RESOURCESCRIPTSTMT_H
#include "ResourceScriptToken.h"
namespace llvm {
namespace rc {
// A class holding a name - either an integer or a reference to the string.
class IntOrString {
private:
union Data {
uint32_t Int;
StringRef String;
Data(uint32_t Value) : Int(Value) {}
Data(const StringRef Value) : String(Value) {}
Data(const RCToken &Token);
} Data;
bool IsInt;
public:
IntOrString() : IntOrString(0) {}
IntOrString(uint32_t Value) : Data(Value), IsInt(1) {}
IntOrString(StringRef Value) : Data(Value), IsInt(0) {}
IntOrString(const RCToken &Token)
: Data(Token), IsInt(Token.kind() == RCToken::Kind::Int) {}
bool equalsLower(const char *Str) {
return !IsInt && Data.String.equals_lower(Str);
}
friend raw_ostream &operator<<(raw_ostream &, const IntOrString &);
};
// Base resource. All the resources should derive from this base.
class RCResource {
protected:
IntOrString ResName;
public:
RCResource() = default;
RCResource(RCResource &&) = default;
void setName(const IntOrString &Name) { ResName = Name; }
virtual raw_ostream &log(raw_ostream &OS) const {
return OS << "Base statement\n";
};
virtual ~RCResource() {}
};
// Optional statement base. All such statements should derive from this base.
class OptionalStmt : public RCResource {};
class OptionalStmtList : public OptionalStmt {
std::vector<std::unique_ptr<OptionalStmt>> Statements;
public:
OptionalStmtList() {}
virtual raw_ostream &log(raw_ostream &OS) const;
void addStmt(std::unique_ptr<OptionalStmt> Stmt) {
Statements.push_back(std::move(Stmt));
}
};
// LANGUAGE statement. It can occur both as a top-level statement (in such
// a situation, it changes the default language until the end of the file)
// and as an optional resource statement (then it changes the language
// of a single resource).
//
// Ref: msdn.microsoft.com/en-us/library/windows/desktop/aa381019(v=vs.85).aspx
class LanguageResource : public OptionalStmt {
uint32_t Lang, SubLang;
public:
LanguageResource(uint32_t LangId, uint32_t SubLangId)
: Lang(LangId), SubLang(SubLangId) {}
raw_ostream &log(raw_ostream &) const override;
};
// ICON resource. Represents a single ".ico" file containing a group of icons.
//
// Ref: msdn.microsoft.com/en-us/library/windows/desktop/aa381018(v=vs.85).aspx
class IconResource : public RCResource {
StringRef IconLoc;
public:
IconResource(StringRef Location) : IconLoc(Location) {}
raw_ostream &log(raw_ostream &) const override;
};
// STRINGTABLE resource. Contains a list of strings, each having its unique ID.
//
// Ref: msdn.microsoft.com/en-us/library/windows/desktop/aa381050(v=vs.85).aspx
class StringTableResource : public RCResource {
OptionalStmtList OptStatements;
std::vector<std::pair<uint32_t, StringRef>> Table;
public:
StringTableResource(OptionalStmtList &&OptStmts)
: OptStatements(std::move(OptStmts)) {}
void addString(uint32_t ID, StringRef String) {
Table.emplace_back(ID, String);
}
raw_ostream &log(raw_ostream &) const override;
};
// CHARACTERISTICS optional statement.
//
// Ref: msdn.microsoft.com/en-us/library/windows/desktop/aa380872(v=vs.85).aspx
class CharacteristicsStmt : public OptionalStmt {
uint32_t Value;
public:
CharacteristicsStmt(uint32_t Characteristic) : Value(Characteristic) {}
raw_ostream &log(raw_ostream &) const override;
};
// VERSION optional statement.
//
// Ref: msdn.microsoft.com/en-us/library/windows/desktop/aa381059(v=vs.85).aspx
class VersionStmt : public OptionalStmt {
uint32_t Value;
public:
VersionStmt(uint32_t Version) : Value(Version) {}
raw_ostream &log(raw_ostream &) const override;
};
} // namespace rc
} // namespace llvm
#endif

View File

@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "ResourceScriptToken.h"
#include "ResourceScriptParser.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
@ -133,5 +134,12 @@ int main(int argc_, const char *argv_[]) {
}
}
rc::RCParser Parser{std::move(Tokens)};
while (!Parser.isEof()) {
auto Resource = ExitOnErr(Parser.parseSingleResource());
if (BeVerbose)
Resource->log(outs());
}
return 0;
}