Microsoft-3D-Movie-Maker/kauai/SRC/LEX.CPP
2022-05-03 16:31:19 -07:00

838 lines
19 KiB
C++

/* Copyright (c) Microsoft Corporation.
Licensed under the MIT License. */
/***************************************************************************
Author: ShonK
Project: Kauai
Reviewed:
Copyright (c) Microsoft Corporation
Basic lexer class
***************************************************************************/
#include <stdio.h>
#include "util.h"
ASSERTNAME
RTCLASS(LEXB)
// #line handling
achar _szPoundLine[] = PszLit("#line");
#define kcchPoundLine (CvFromRgv(_szPoundLine) - 1)
ushort LEXB::_mpchgrfct[128] =
{
// 0x00 - 0x07
fctNil, fctNil, fctNil, fctNil, fctNil, fctNil, fctNil, fctNil,
// 0x08; 0x09=tab; 0x0A=line-feed; 0x0B; 0x0C; 0x0D=return; 0x0E; 0x0F
fctNil, fctSpc, fctSpc, fctNil, fctNil, fctSpc, fctNil, fctNil,
// 0x10 - 0x17
fctNil, fctNil, fctNil, fctNil, fctNil, fctNil, fctNil, fctNil,
// 0x18; 0x19; 0x1A=Ctrl-Z; 0x1B - 0x1F
fctNil, fctNil, fctSpc, fctNil, fctNil, fctNil, fctNil, fctNil,
// space ! " #
fctSpc, fctOpr|fctOp1, fctQuo, fctOpr,
// $ % & '
fctOpr, fctOpr|fctOp1, fctOpr|fctOp1|fctOp2, fctQuo,
// ( ) * +
fctOpr, fctOpr, fctOpr|fctOp1, fctOpr|fctOp1|fctOp2,
// , - . /
fctOpr, fctOpr|fctOp1|fctOp2, fctOpr, fctOpr|fctOp1,
// 0 1 2 3
kgrfctDigit, kgrfctDigit, kgrfctDigit, kgrfctDigit,
// 4 5 6 7
kgrfctDigit, kgrfctDigit, kgrfctDigit, kgrfctDigit,
// 8 9 : ;
fctDec|fctHex, fctDec|fctHex, fctOpr|fctOp1|fctOp2, fctOpr,
// < = > ?
fctOpr|fctOp1|fctOp2, fctOpr|fctOp1|fctOp2, fctOpr|fctOp1|fctOp2, fctOpr,
// @ A B C
fctOpr, fctUpp|fctHex, fctUpp|fctHex, fctUpp|fctHex,
// D E F G
fctUpp|fctHex, fctUpp|fctHex, fctUpp|fctHex, fctUpp,
// H I J K L M N O
fctUpp, fctUpp, fctUpp, fctUpp, fctUpp, fctUpp, fctUpp, fctUpp,
// P Q R S T U V W
fctUpp, fctUpp, fctUpp, fctUpp, fctUpp, fctUpp, fctUpp, fctUpp,
// X Y Z [ \ ] ^ _
fctUpp, fctUpp, fctUpp, fctOpr, fctOpr, fctOpr, fctOpr|fctOp1|fctOp2, fctUpp|fctLow,
// ` a b c
fctOpr, fctLow|fctHex, fctLow|fctHex, fctLow|fctHex,
// d e f g
fctLow|fctHex, fctLow|fctHex, fctLow|fctHex, fctLow,
// h i j k l m n o
fctLow, fctLow, fctLow, fctLow, fctLow, fctLow, fctLow, fctLow,
// p q r s t u v w
fctLow, fctLow, fctLow, fctLow, fctLow, fctLow, fctLow, fctLow,
// x y z {
fctLow, fctLow, fctLow, fctOpr,
// | } ~ 0x7F=del
fctOpr|fctOp1|fctOp2, fctOpr, fctOpr|fctOp1, fctNil,
};
// token values for single characters
#define kchMinTok ChLit('!')
short _rgtt[] =
{
// ! " # $ % & '
ttLNot, ttNil, ttPound, ttDollar, ttMod, ttBAnd, ttNil,
// ( ) * + , - . /
ttOpenParen, ttCloseParen, ttMul, ttAdd, ttComma, ttSub, ttDot, ttDiv,
// 0-7
ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil,
// 8 9 : ; < = > ?
ttNil, ttNil, ttColon, ttSemi, ttLt, ttAssign, ttGt, ttQuery,
// @ A-G
ttAt, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil,
// H-O
ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil,
// P-W
ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil,
// X Y Z [ \ ] ^ _
ttNil, ttNil, ttNil, ttOpenRef, ttBackSlash, ttCloseRef, ttBXor, ttNil,
// ` a-g
ttAccent, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil,
// h-o
ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil,
// p-w
ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil,
// x y a { | } ~
ttNil, ttNil, ttNil, ttOpenBrace, ttBOr, ttCloseBrace, ttBNot
};
long _TtFromCh(achar ch);
/***************************************************************************
Return the token type of a single character operator.
***************************************************************************/
long _TtFromCh(achar ch)
{
AssertIn(ch, kchMinTok, kchMinTok + size(_rgtt) / size(_rgtt[0]));
return _rgtt[(byte)ch - kchMinTok];
}
#define kchMinDouble ChLit('&')
#define kchLastDouble ChLit('|')
short _rgttDouble[] =
{
// & '
ttLAnd, ttNil,
// ( ) * + , - . /
ttNil, ttNil, ttNil, ttInc, ttNil, ttDec, ttNil, ttNil,
// 0-7
ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil,
// 8 9 : ; < = > ?
ttNil, ttNil, ttScope, ttNil, ttShl, ttEq, ttShr, ttNil,
// @ A-G
ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil,
// H-O
ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil,
// P-W
ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil,
// X Y Z [ \ ] ^ _
ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttLXor, ttNil,
// ` a-g
ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil,
// h-o
ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil,
// p-w
ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil,
// x y a { |
ttNil, ttNil, ttNil, ttNil, ttLOr
};
#define kchMinEqual ChLit('!')
#define kchLastEqual ChLit('|')
short _rgttEqual[] =
{
// ! " # $ % & '
ttNe, ttNil, ttNil, ttNil, ttAMod, ttABAnd, ttNil,
// ( ) * + , - . /
ttNil, ttNil, ttAMul, ttAAdd, ttNil, ttASub, ttNil, ttADiv,
// 0-7
ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil,
// 8 9 : ; < = > ?
ttNil, ttNil, ttNil, ttNil, ttLe, ttEq, ttGe, ttNil,
// @ A-G
ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil,
// H-O
ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil,
// P-W
ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil,
// X Y Z [ \ ] ^ _
ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttABXor, ttNil,
// ` a-g
ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil,
// h-o
ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil,
// p-w
ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil, ttNil,
// x y a { |
ttNil, ttNil, ttNil, ttNil, ttABOr
};
long _TtFromChCh(achar ch1, achar ch2);
/***************************************************************************
Return the token type of a double character token.
***************************************************************************/
long _TtFromChCh(achar ch1, achar ch2)
{
if (ch1 == ch2)
{
return FIn(ch1, kchMinDouble, kchLastDouble + 1) ?
_rgttDouble[(byte)ch1 - kchMinDouble] : ttNil;
}
if (ch2 == ChLit('='))
{
return FIn(ch1, kchMinEqual, kchLastEqual + 1) ?
_rgttEqual[ch1 - kchMinEqual] : ttNil;
}
if (ch1 == ChLit('-') && ch2 == ChLit('>'))
return ttArrow;
return ttNil;
}
/***************************************************************************
Constructor for the lexer.
***************************************************************************/
LEXB::LEXB(PFIL pfil, bool fUnionStrings)
{
AssertPo(pfil, 0);
_pfil = pfil;
_pbsf = pvNil;
_pfil->AddRef();
_pfil->GetStnPath(&_stnFile);
_lwLine = 1;
_ichLine = 0;
_fpCur = 0;
_fpMac = pfil->FpMac();
_ichLim = _ichCur = 0;
_fLineStart = fTrue;
_fSkipToNextLine = fFalse;
_fUnionStrings = fUnionStrings;
AssertThis(0);
}
/***************************************************************************
Constructor for the lexer.
***************************************************************************/
LEXB::LEXB(PBSF pbsf, PSTN pstnFile, bool fUnionStrings)
{
AssertPo(pbsf, 0);
AssertPo(pstnFile, 0);
_pfil = pvNil;
_pbsf = pbsf;
_pbsf->AddRef();
_stnFile = *pstnFile;
_lwLine = 1;
_ichLine = 0;
_fpCur = 0;
_fpMac = pbsf->IbMac();
_ichLim = _ichCur = 0;
_fLineStart = fTrue;
_fSkipToNextLine = fFalse;
_fUnionStrings = fUnionStrings;
AssertThis(0);
}
/***************************************************************************
Destructor for the lexer.
***************************************************************************/
LEXB::~LEXB(void)
{
ReleasePpo(&_pfil);
ReleasePpo(&_pbsf);
}
#ifdef DEBUG
/***************************************************************************
Assert the validity of a LEXB.
***************************************************************************/
void LEXB::AssertValid(ulong grf)
{
LEXB_PAR::AssertValid(0);
AssertNilOrPo(_pfil, 0);
AssertNilOrPo(_pbsf, 0);
Assert((_pfil == pvNil) != (_pbsf == pvNil),
"exactly one of _pfil, _pbsf should be non-nil");
AssertPo(&_stnFile, 0);
AssertIn(_lwLine, 0, kcbMax);
AssertIn(_ichLine, 0, kcbMax);
AssertIn(_fpCur, 0, _fpMac + 1);
AssertIn(_fpMac, 0, kcbMax);
AssertIn(_ichCur, 0, _ichLim + 1);
AssertIn(_ichLim, 0, size(_rgch) + 1);
}
/***************************************************************************
Mark memory for the LEXB.
***************************************************************************/
void LEXB::MarkMem(void)
{
AssertValid(0);
LEXB_PAR::MarkMem();
MarkMemObj(_pfil);
MarkMemObj(_pbsf);
}
#endif //DEBUG
/***************************************************************************
Get the current file that we're reading tokens from.
***************************************************************************/
void LEXB::GetStnFile(PSTN pstn)
{
AssertThis(0);
AssertPo(pstn, 0);
*pstn = _stnFile;
}
/***************************************************************************
Fetch some characters. Don't advance the pointer into the file. Can
fetch at most kcchLexbBuf characters at a time.
***************************************************************************/
bool LEXB::_FFetchRgch(achar *prgch, long cch)
{
AssertThis(0);
AssertIn(cch, 1, kcchLexbBuf);
AssertPvCb(prgch, cch * size(achar));
if (_ichLim < _ichCur + cch)
{
// need to read some more data
long cchT;
if (_fpCur + (_ichCur + cch - _ichLim) * size(achar) > _fpMac)
{
// hit the eof
return fFalse;
}
// keep any valid characters
if (_ichCur < _ichLim)
{
BltPb(_rgch + _ichCur, _rgch, (_ichLim - _ichCur) * size(achar));
_ichLim -= _ichCur;
}
else
_ichLim = 0;
_ichCur = 0;
// read new stuff
cchT = LwMin((_fpMac - _fpCur) / size(achar), kcchLexbBuf - _ichLim);
AssertIn(cchT, cch - _ichLim, kcchLexbBuf + 1);
if (pvNil != _pfil)
{
AssertPo(_pfil, 0);
if (!_pfil->FReadRgb(_rgch + _ichLim, cchT * size(achar), _fpCur))
{
Warn("Error reading file, truncating logical file");
_fpMac = _fpCur;
return fFalse;
}
}
else
{
AssertPo(_pbsf, 0);
_pbsf->FetchRgb(_fpCur, cchT * size(achar), _rgch + _ichLim);
}
_ichLim += cchT;
_fpCur += cchT * size(achar);
AssertIn(_ichLim, _ichCur + cch, kcchLexbBuf + 1);
}
// get the text
CopyPb(_rgch + _ichCur, prgch, cch * size(achar));
AssertThis(0);
return fTrue;
}
/***************************************************************************
Skip any white space at the current location in the buffer. This
handles #line directives and comments. Comments are not allowed on
the same line as a #line directive.
***************************************************************************/
bool LEXB::_FSkipWhiteSpace(void)
{
AssertThis(0);
achar ch;
bool fStar, fSkipComment, fSlash;
long lwLineSav;
achar rgch[kcchPoundLine + 1];
STN stn;
fSkipComment = fFalse;
while (_FFetchRgch(&ch))
{
if ((_GrfctCh(ch) & fctSpc) || _fSkipToNextLine || fSkipComment)
{
_Advance();
if (kchReturn == ch)
{
_lwLine++;
_ichLine = 0;
_fLineStart = fTrue;
_fSkipToNextLine = fFalse;
}
else if (ChLit('\xA') == ch && 1 == _ichLine)
_ichLine = 0;
if (fSkipComment)
{
if (fStar && ch == ChLit('/'))
fSkipComment = fFalse;
fStar = (ch == ChLit('*'));
}
continue;
}
//not a white space character
//check for a comment
if (ChLit('/') == ch && _FFetchRgch(rgch, 2))
{
switch (rgch[1])
{
case ChLit('/'):
//line comment - skip characters until we hit a return
_Advance(2);
_fSkipToNextLine = fTrue;
continue;
case ChLit('*'):
//normal comment
_Advance(2);
fSkipComment = fTrue;
fStar = fFalse;
continue;
}
}
//if this is at the beginning of a line, check for a #line directive
if (!_fLineStart || (ch != _szPoundLine[0]) ||
!_FFetchRgch(rgch, kcchPoundLine + 1) ||
!FEqualRgb(rgch, _szPoundLine, kcchPoundLine) ||
!(_GrfctCh(rgch[kcchPoundLine]) & fctSpc))
{
_fLineStart = fFalse;
break;
}
//a #line directive - skip it and white space
_Advance(kcchPoundLine);
while (_FFetchRgch(&ch) && (_GrfctCh(ch) & fctSpc) && ch != kchReturn)
_Advance();
//read the line number
lwLineSav = _lwLine;
if (!_FFetchRgch(&ch) || !(_GrfctCh(ch) & fctDec))
goto LBadDirective;
_Advance();
_ReadNumber(&_lwLine, ch, 10, klwMax);
_lwLine--;
//skip white space (and make sure there is some)
if (!_FFetchRgch(&ch))
break; //eof
if (!(_GrfctCh(ch) & fctSpc))
goto LBadDirective;
while (_FFetchRgch(&ch) && (_GrfctCh(ch) & fctSpc) && ch != kchReturn)
_Advance();
if (!_FFetchRgch(&ch))
break; //eof
if (ch == kchReturn)
continue; //end of #line
// read file name
if (ch != ChLit('"'))
goto LBadDirective;
_Advance();
stn.SetNil();
for (fSlash = fFalse; ; )
{
if (!_FFetchRgch(&ch) || ch == kchReturn)
goto LBadDirective;
_Advance();
if (ch == ChLit('"'))
break;
if (ch == ChLit('\\'))
{
// if this is the second of a pair of slashes, skip it
fSlash = !fSlash;
if (!fSlash)
continue;
}
else
fSlash = fFalse;
stn.FAppendCh(ch);
}
//skip white space to end of line
if (!_FFetchRgch(&ch))
goto LSetFileName; //eof
if (!(_GrfctCh(ch) & fctSpc))
goto LBadDirective;
while (_FFetchRgch(&ch) && (_GrfctCh(ch) & fctSpc) && ch != kchReturn)
_Advance();
if (!_FFetchRgch(&ch))
goto LSetFileName; //eof
if (ch != kchReturn)
{
LBadDirective:
//Bad #line directive - restore the line number
_lwLine = lwLineSav;
return fFalse;
}
else
{
LSetFileName:
_stnFile = stn;
}
}
//if fSkipComment is true, we hit the eof in a comment
return !fSkipComment;
}
/***************************************************************************
Get the next token from the file.
***************************************************************************/
bool LEXB::FGetTok(PTOK ptok)
{
AssertThis(0);
AssertVarMem(ptok);
achar ch, ch2;
ulong grfct;
long cch;
ptok->stn.SetNil();
if (!_FSkipWhiteSpace())
{
_fSkipToNextLine = fTrue;
goto LError;
}
if (!_FFetchRgch(&ch))
{
ptok->tt = ttNil;
return fFalse;
}
_Advance();
grfct = _GrfctCh(ch);
if (grfct & fctDec)
{
//numeric value
ptok->tt = ttLong;
if (ch == ChLit('0'))
{
//hex or octal
if (!_FFetchRgch(&ch))
{
ptok->lw = 0;
return fTrue;
}
if (ch == ChLit('x') || ch == ChLit('X'))
{
//hex
_Advance();
if (!_FReadHex(&ptok->lw))
goto LError;
}
else
{
//octal
_ReadNumTok(ptok, ChLit('0'), 8, klwMax);
}
}
else
{
//decimal
_ReadNumTok(ptok, ch, 10, klwMax);
}
//check for bad termination
if (_FFetchRgch(&ch) &&
(_GrfctCh(ch) & (fctDec | fctUpp | fctLow | fctQuo)))
{
goto LError;
}
return fTrue;
}
if (grfct & fctQuo)
{
//single or double quote
if (ch == ChLit('"'))
{
//string
ptok->tt = ttString;
for (;;)
{
if (!_FFetchRgch(&ch))
goto LError;
_Advance();
switch (ch)
{
case kchReturn:
goto LError;
case ChLit('"'):
//check for another string immediately following this one
if (!_fUnionStrings)
return fTrue;
if (!_FSkipWhiteSpace())
{
_fSkipToNextLine = fTrue;
goto LError;
}
if (!_FFetchRgch(&ch) || ch != ChLit('"'))
return fTrue;
_Advance();
break;
case ChLit('\\'):
//control sequence
if (!_FReadControlCh(&ch))
{
_fSkipToNextLine = fTrue;
goto LError;
}
if (chNil != ch)
ptok->stn.FAppendCh(ch);
break;
default:
ptok->stn.FAppendCh(ch);
break;
}
}
Assert(fFalse, "how'd we get here?");
}
Assert(ch == ChLit('\''), "bad grfct");
ptok->tt = ttLong;
ptok->lw = 0;
//ctg type long
for (cch = 0; cch < 5; )
{
if (!_FFetchRgch(&ch))
goto LError;
_Advance();
switch (ch)
{
case kchReturn:
goto LError;
case ChLit('\''):
return fTrue;
case ChLit('\\'):
if (!_FReadControlCh(&ch))
{
_fSkipToNextLine = fTrue;
goto LError;
}
break;
}
ptok->lw = (ptok->lw << 8) + (byte)ch;
cch++;
}
//constant too long
goto LError;
}
if (grfct & fctOp1)
{
//check for multi character token
if (_FFetchRgch(&ch2) && (_GrfctCh(ch2) & fctOp2) &&
ttNil != (ptok->tt = _TtFromChCh(ch, ch2)))
{
_Advance();
//special case <<= and >>=
if ((ptok->tt == ttShr || ptok->tt == ttShl) &&
_FFetchRgch(&ch2) && ch2 == ChLit('='))
{
ptok->tt = (ptok->tt == ttShr) ? ttAShr : ttAShl;
_Advance();
}
return fTrue;
}
}
if (grfct & fctOpr)
{
/* single character token */
ptok->tt = _TtFromCh(ch);
Assert(ttNil != ptok->tt, "bad table entry");
return fTrue;
}
if (grfct & (fctLow | fctUpp))
{
//identifier
ptok->tt = ttName;
ptok->stn.FAppendCh(ch);
while (_FFetchRgch(&ch) &&
(_GrfctCh(ch) & (fctUpp | fctLow | fctDec)))
{
ptok->stn.FAppendCh(ch);
_Advance();
}
return fTrue;
}
LError:
ptok->tt = ttError;
ptok->stn.SetNil();
return fTrue;
}
/***************************************************************************
Return the size of extra data associated with the last token returned.
***************************************************************************/
long LEXB::CbExtra(void)
{
AssertThis(0);
return 0;
}
/***************************************************************************
Get the extra data for the last token returned.
***************************************************************************/
void LEXB::GetExtra(void *pv)
{
AssertThis(0);
Bug("no extra data");
}
/***************************************************************************
Read a number. The first character is passed in ch. lwBase is the base
of the number (must be <= 10).
***************************************************************************/
void LEXB::_ReadNumber(long *plw, achar ch, long lwBase, long cchMax)
{
AssertThis(0);
AssertVarMem(plw);
AssertIn(ch - ChLit('0'), 0, lwBase);
AssertIn(lwBase, 2, 11);
*plw = ch - ChLit('0');
while (--cchMax > 0 && _FFetchRgch(&ch) &&
(_GrfctCh(ch) & fctDec) && (ch - ChLit('0') < lwBase))
{
*plw = *plw * lwBase + (ch - ChLit('0'));
_Advance();
}
}
/***************************************************************************
Read in a hexadecimal value (without the 0x).
***************************************************************************/
bool LEXB::_FReadHex(long *plw)
{
AssertThis(0);
AssertVarMem(plw);
achar ch;
ulong grfct;
*plw = 0;
if (!_FFetchRgch(&ch) || !((grfct = _GrfctCh(ch)) & fctHex))
return fFalse;
do
{
if (grfct & fctDec)
*plw = *plw * 16 + (ch - ChLit('0'));
else if (grfct & fctLow)
*plw = *plw * 16 + (10 + ch - ChLit('a'));
else
{
Assert(grfct & fctUpp, "bad grfct");
*plw = *plw * 16 + (10 + ch - ChLit('A'));
}
_Advance();
}
while (_FFetchRgch(&ch) && ((grfct = _GrfctCh(ch)) & fctHex));
return fTrue;
}
/***************************************************************************
Read a control character (eg, \x3F). This code assumes the \ has
already been read.
***************************************************************************/
bool LEXB::_FReadControlCh(achar *pch)
{
AssertThis(0);
AssertVarMem(pch);
//control sequence
achar ch;
long lw;
if (!_FFetchRgch(&ch))
return fFalse;
_Advance();
switch (ch)
{
case kchReturn:
while (_FFetchRgch(&ch) && ch == ChLit('\xA'))
_Advance();
*pch = chNil;
break;
case ChLit('t'):
*pch = kchTab;
break;
case ChLit('n'):
*pch = kchReturn;
break;
case ChLit('x'):
case ChLit('X'):
if (!_FReadHex(&lw))
return fFalse;
*pch = (achar)lw;
break;
default:
if (_GrfctCh(ch) & fctOct)
{
_ReadNumber(&lw, ch, 8, 3);
*pch = (achar)lw;
}
else
*pch = ch;
break;
}
return fTrue;
}