| Index: tools/idl_parser/idl_lexer.py
|
| diff --git a/tools/idl_parser/idl_lexer.py b/tools/idl_parser/idl_lexer.py
|
| index 9a07ac6adc14f32760621c3e9c01669584cae80e..aa0da06f5789ac2b9f6d64d6594430384fd6c2c9 100755
|
| --- a/tools/idl_parser/idl_lexer.py
|
| +++ b/tools/idl_parser/idl_lexer.py
|
| @@ -100,31 +100,24 @@ class IDLLexer(object):
|
| 'void' : 'VOID'
|
| }
|
|
|
| - # Add keywords
|
| - for key in keywords:
|
| - tokens.append(keywords[key])
|
| -
|
| - # 'literals' is a value expected by lex which specifies a list of valid
|
| - # literal tokens, meaning the token type and token value are identical.
|
| - literals = '"*.(){}[],;:=+-/~|&^?<>'
|
| -
|
| # Token definitions
|
| #
|
| # Lex assumes any value or function in the form of 't_<TYPE>' represents a
|
| # regular expression where a match will emit a token of type <TYPE>. In the
|
| # case of a function, the function is called when a match is made. These
|
| # definitions come from WebIDL.
|
| + def t_ELLIPSIS(self, t):
|
| + r'\.\.\.'
|
| + return t
|
|
|
| - # 't_ignore' is a special match of items to ignore
|
| - t_ignore = ' \t'
|
| + def t_float(self, t):
|
| + r'-?(([0-9]+\.[0-9]*|[0-9]*\.[0-9]+)([Ee][+-]?[0-9]+)?|[0-9]+[Ee][+-]?[0-9]+)'
|
| + return t
|
|
|
| - # Ellipsis operator
|
| - t_ELLIPSIS = r'\.\.\.'
|
| + def t_integer(self, t):
|
| + r'-?(0([0-7]*|[Xx][0-9A-Fa-f]+)|[1-9][0-9]*)'
|
| + return t
|
|
|
| - # Constant values
|
| - t_integer = r'-?(0([0-7]*|[Xx][0-9A-Fa-f]+)|[1-9][0-9]*)'
|
| - t_float = r'-?(([0-9]+\.[0-9]*|[0-9]*\.[0-9]+)'
|
| - t_float += r'([Ee][+-]?[0-9]+)?|[0-9]+[Ee][+-]?[0-9]+)'
|
|
|
| # A line ending '\n', we use this to increment the line number
|
| def t_LINE_END(self, t):
|
| @@ -160,7 +153,7 @@ class IDLLexer(object):
|
|
|
| def t_ANY_error(self, t):
|
| msg = 'Unrecognized input'
|
| - line = self.lexobj.lineno
|
| + line = self.Lexer().lineno
|
|
|
| # If that line has not been accounted for, then we must have hit
|
| # EoF, so compute the beginning of the line that caused the problem.
|
| @@ -169,10 +162,10 @@ class IDLLexer(object):
|
| word = t.value.split()[0]
|
| offs = self.lines[line - 1].find(word)
|
| # Add the computed line's starting position
|
| - self.index.append(self.lexobj.lexpos - offs)
|
| + self.index.append(self.Lexer().lexpos - offs)
|
| msg = 'Unexpected EoF reached after'
|
|
|
| - pos = self.lexobj.lexpos - self.index[line]
|
| + pos = self.Lexer().lexpos - self.index[line]
|
| out = self.ErrorMessage(line, pos, msg)
|
| sys.stderr.write(out + '\n')
|
| self._lex_errors += 1
|
| @@ -183,13 +176,13 @@ class IDLLexer(object):
|
| # of multiple lines, tokens can not exist on any of the lines except the
|
| # last one, so the recorded value for previous lines are unused. We still
|
| # fill the array however, to make sure the line count is correct.
|
| - self.lexobj.lineno += count
|
| + self.Lexer().lineno += count
|
| for _ in range(count):
|
| - self.index.append(self.lexobj.lexpos)
|
| + self.index.append(self.Lexer().lexpos)
|
|
|
| def FileLineMsg(self, line, msg):
|
| # Generate a message containing the file and line number of a token.
|
| - filename = self.lexobj.filename
|
| + filename = self.Lexer().filename
|
| if filename:
|
| return "%s(%d) : %s" % (filename, line + 1, msg)
|
| return "<BuiltIn> : %s" % msg
|
| @@ -213,7 +206,7 @@ class IDLLexer(object):
|
| # against the leaf paterns.
|
| #
|
| def token(self):
|
| - tok = self.lexobj.token()
|
| + tok = self.Lexer().token()
|
| if tok:
|
| self.last = tok
|
| return tok
|
| @@ -222,21 +215,60 @@ class IDLLexer(object):
|
| def GetTokens(self):
|
| outlist = []
|
| while True:
|
| - t = self.lexobj.token()
|
| + t = self.Lexer().token()
|
| if not t:
|
| break
|
| outlist.append(t)
|
| return outlist
|
|
|
| def Tokenize(self, data, filename='__no_file__'):
|
| - self.lexobj.filename = filename
|
| - self.lexobj.input(data)
|
| + lexer = self.Lexer()
|
| + lexer.lineno = 1
|
| + lexer.filename = filename
|
| + lexer.input(data)
|
| self.lines = data.split('\n')
|
|
|
| + def KnownTokens(self):
|
| + return self.tokens
|
| +
|
| + def Lexer(self):
|
| + if not self._lexobj:
|
| + self._lexobj = lex.lex(object=self, lextab=None, optimize=0)
|
| + return self._lexobj
|
| +
|
| + def _AddConstDefs(self):
|
| + # 'literals' is a value expected by lex which specifies a list of valid
|
| + # literal tokens, meaning the token type and token value are identical.
|
| + self.literals = r'"*.(){}[],;:=+-/~|&^?<>'
|
| + self.t_ignore = ' \t'
|
| +
|
| + def _AddToken(self, token):
|
| + if token in self.tokens:
|
| + raise RuntimeError('Same token: ' + token)
|
| + self.tokens.append(token)
|
| +
|
| + def _AddTokens(self, tokens):
|
| + for token in tokens:
|
| + self._AddToken(token)
|
| +
|
| + def _AddKeywords(self, keywords):
|
| + for key in keywords:
|
| + value = key.upper()
|
| + self._AddToken(value)
|
| + self.keywords[key] = value
|
| +
|
| def __init__(self):
|
| self.index = [0]
|
| self._lex_errors = 0
|
| self.linex = []
|
| self.filename = None
|
| - self.lexobj = lex.lex(object=self, lextab=None, optimize=0)
|
| -
|
| + self.keywords = {}
|
| + self.tokens = []
|
| + self._AddConstDefs()
|
| + self._AddTokens(IDLLexer.tokens)
|
| + self._AddKeywords(IDLLexer.keywords)
|
| + self._lexobj = None
|
| +
|
| +# If run by itself, attempt to build the lexer
|
| +if __name__ == '__main__':
|
| + lexer = IDLLexer()
|
|
|