| Index: sdk/lib/html/scripts/pegparser.py
|
| diff --git a/sdk/lib/html/scripts/pegparser.py b/sdk/lib/html/scripts/pegparser.py
|
| deleted file mode 100755
|
| index ffc12cc7c99326c44cd27d3b0f186c163b08dfd6..0000000000000000000000000000000000000000
|
| --- a/sdk/lib/html/scripts/pegparser.py
|
| +++ /dev/null
|
| @@ -1,527 +0,0 @@
|
| -#!/usr/bin/python
|
| -# Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file
|
| -# for details. All rights reserved. Use of this source code is governed by a
|
| -# BSD-style license that can be found in the LICENSE file.
|
| -
|
| -import logging
|
| -import re
|
| -import weakref
|
| -
|
| -_logger = logging.getLogger('pegparser')
|
| -
|
| -# functions can refer to each other, hence creating infinite loops. The
|
| -# following hashmap is used to memoize functions that were already compiled.
|
| -_compiled_functions_memory = weakref.WeakKeyDictionary()
|
| -
|
| -_regex_type = type(re.compile(r''))
|
| -_list_type = type([])
|
| -_function_type = type(lambda func: 0)
|
| -
|
| -
|
| -class _PegParserState(object):
|
| - """Object for storing parsing state variables and options"""
|
| -
|
| - def __init__(self, text, whitespace_rule, strings_are_tokens):
|
| - # Parsing state:
|
| - self.text = text
|
| - self.is_whitespace_mode = False
|
| -
|
| - # Error message helpers:
|
| - self.max_pos = None
|
| - self.max_rule = None
|
| -
|
| - # Parsing options:
|
| - self.whitespace_rule = whitespace_rule
|
| - self.strings_are_tokens = strings_are_tokens
|
| -
|
| -
|
| -class _PegParserRule(object):
|
| - """Base class for all rules"""
|
| -
|
| - def __init__(self):
|
| - return
|
| -
|
| - def __str__(self):
|
| - return self.__class__.__name__
|
| -
|
| - def _match_impl(self, state, pos):
|
| - """Default implementation of the matching algorithm.
|
| - Should be overwritten by sub-classes.
|
| - """
|
| - raise RuntimeError('_match_impl not implemented')
|
| -
|
| - def match(self, state, pos):
|
| - """Matches the rule against the text in the given position.
|
| -
|
| - The actual rule evaluation is delegated to _match_impl,
|
| - while this function deals mostly with support tasks such as
|
| - skipping whitespace, debug information and data for exception.
|
| -
|
| - Args:
|
| - state -- the current parsing state and options.
|
| - pos -- the current offset in the text.
|
| -
|
| - Returns:
|
| - (next position, value) if the rule matches, or
|
| - (None, None) if it doesn't.
|
| - """
|
| - if not state.is_whitespace_mode:
|
| - # Skip whitespace
|
| - pos = _skip_whitespace(state, pos)
|
| -
|
| - # Track position for possible error messaging
|
| - if pos > state.max_pos:
|
| - # Store position and the rule.
|
| - state.max_pos = pos
|
| - if isinstance(self, _StringRule):
|
| - state.max_rule = [self]
|
| - else:
|
| - state.max_rule = []
|
| - elif pos == state.max_pos:
|
| - if isinstance(self, _StringRule):
|
| - state.max_rule.append(self)
|
| -
|
| - if _logger.isEnabledFor(logging.DEBUG):
|
| - # Used for debugging
|
| - _logger.debug('Try: pos=%s char=%s rule=%s' % \
|
| - (pos, state.text[pos:pos + 1], self))
|
| -
|
| - # Delegate the matching logic to the the specialized function.
|
| - res = self._match_impl(state, pos)
|
| -
|
| - if not state.is_whitespace_mode \
|
| - and _logger.isEnabledFor(logging.DEBUG):
|
| - # More debugging information
|
| - (nextPos, ast) = res
|
| - if nextPos is not None:
|
| - _logger.debug('Match! pos=%s char=%s rule=%s' % \
|
| - (pos, state.text[pos:pos + 1], self))
|
| - else:
|
| - _logger.debug('Fail. pos=%s char=%s rule=%s' % \
|
| - (pos, state.text[pos:pos + 1], self))
|
| -
|
| - return res
|
| -
|
| -
|
| -def _compile(rule):
|
| - """Recursively compiles user-defined rules into parser rules.
|
| - Compilation is performed by converting strings, regular expressions, lists
|
| - and functions into _StringRule, _RegExpRule, SEQUENCE and _FunctionRule
|
| - (respectively). Memoization is used to avoid infinite recursion as rules
|
| - may refer to each other."""
|
| - if rule is None:
|
| - raise RuntimeError('None is not a valid rule')
|
| - elif isinstance(rule, str):
|
| - return _StringRule(rule)
|
| - elif isinstance(rule, _regex_type):
|
| - return _RegExpRule(rule)
|
| - elif isinstance(rule, _list_type):
|
| - return SEQUENCE(*rule)
|
| - elif isinstance(rule, _function_type):
|
| - # Memoize compiled functions to avoid infinite compliation loops.
|
| - if rule in _compiled_functions_memory:
|
| - return _compiled_functions_memory[rule]
|
| - else:
|
| - compiled_function = _FunctionRule(rule)
|
| - _compiled_functions_memory[rule] = compiled_function
|
| - compiled_function._sub_rule = _compile(rule())
|
| - return compiled_function
|
| - elif isinstance(rule, _PegParserRule):
|
| - return rule
|
| - else:
|
| - raise RuntimeError('Invalid rule type %s: %s', (type(rule), rule))
|
| -
|
| -
|
| -def _skip_whitespace(state, pos):
|
| - """Returns the next non-whitespace position.
|
| - This is done by matching the optional whitespace_rule with the current
|
| - text."""
|
| - if not state.whitespace_rule:
|
| - return pos
|
| - state.is_whitespace_mode = True
|
| - nextPos = pos
|
| - while nextPos is not None:
|
| - pos = nextPos
|
| - (nextPos, ast) = state.whitespace_rule.match(state, pos)
|
| - state.is_whitespace_mode = False
|
| - return pos
|
| -
|
| -
|
| -class _StringRule(_PegParserRule):
|
| - """This rule tries to match a whole string."""
|
| -
|
| - def __init__(self, string):
|
| - """Constructor.
|
| - Args:
|
| - string -- string to match.
|
| - """
|
| - _PegParserRule.__init__(self)
|
| - self._string = string
|
| -
|
| - def __str__(self):
|
| - return '"%s"' % self._string
|
| -
|
| - def _match_impl(self, state, pos):
|
| - """Tries to match the string at the current position"""
|
| - if state.text.startswith(self._string, pos):
|
| - nextPos = pos + len(self._string)
|
| - if state.strings_are_tokens:
|
| - return (nextPos, None)
|
| - else:
|
| - return (nextPos, self._string)
|
| - return (None, None)
|
| -
|
| -
|
| -class _RegExpRule(_PegParserRule):
|
| - """This rule tries to matches a regular expression."""
|
| -
|
| - def __init__(self, reg_exp):
|
| - """Constructor.
|
| - Args:
|
| - reg_exp -- a regular expression used in matching.
|
| - """
|
| - _PegParserRule.__init__(self)
|
| - self.reg_exp = reg_exp
|
| -
|
| - def __str__(self):
|
| - return 'regexp'
|
| -
|
| - def _match_impl(self, state, pos):
|
| - """Tries to match the regular expression with current text"""
|
| - matchObj = self.reg_exp.match(state.text, pos)
|
| - if matchObj:
|
| - matchStr = matchObj.group()
|
| - return (pos + len(matchStr), matchStr)
|
| - return (None, None)
|
| -
|
| -
|
| -class _FunctionRule(_PegParserRule):
|
| - """Function rule wraps a rule defined via a Python function.
|
| -
|
| - Defining rules via functions helps break the grammar into parts, labeling
|
| - the ast, and supporting recursive definitions in the grammar
|
| -
|
| - Usage Example:
|
| - def Func(): return ['function', TOKEN('('), TOKEN(')')]
|
| - def Var(): return OR('x', 'y')
|
| - def Program(): return OR(Func, Var)
|
| -
|
| - When matched with 'function()', will return the tuple:
|
| - ('Program', ('Func', 'function'))
|
| - When matched with 'x', will return the tuple:
|
| - ('Program', ('Var', 'x'))
|
| -
|
| - Functions who's name begins with '_' will not be labelled. This is useful
|
| - for creating utility rules. Extending the example above:
|
| -
|
| - def _Program(): return OR(Func, Var)
|
| -
|
| - When matched with 'function()', will return the tuple:
|
| - ('Func', 'function')
|
| - """
|
| -
|
| - def __init__(self, func):
|
| - """Constructor.
|
| - Args:
|
| - func -- the original function will be used for labeling output.
|
| - """
|
| - _PegParserRule.__init__(self)
|
| - self._func = func
|
| - # Sub-rule is compiled by _compile to avoid infinite recursion.
|
| - self._sub_rule = None
|
| -
|
| - def __str__(self):
|
| - return self._func.__name__
|
| -
|
| - def _match_impl(self, state, pos):
|
| - """Simply invokes the sub rule"""
|
| - (nextPos, ast) = self._sub_rule.match(state, pos)
|
| - if nextPos is not None:
|
| - if not self._func.__name__.startswith('_'):
|
| - ast = (self._func.__name__, ast)
|
| - return (nextPos, ast)
|
| - return (None, None)
|
| -
|
| -
|
| -class SEQUENCE(_PegParserRule):
|
| - """This rule expects all given rules to match in sequence.
|
| - Note that SEQUENCE is equivalent to a rule composed of a Python list of
|
| - rules.
|
| - Usage example: SEQUENCE('A', 'B', 'C')
|
| - or: ['A', 'B', 'C']
|
| - Will match 'ABC' but not 'A', 'B' or ''.
|
| - """
|
| - def __init__(self, *rules):
|
| - """Constructor.
|
| - Args:
|
| - rules -- one or more rules to match.
|
| - """
|
| - _PegParserRule.__init__(self)
|
| - self._sub_rules = []
|
| - for rule in rules:
|
| - self._sub_rules.append(_compile(rule))
|
| -
|
| - def _match_impl(self, state, pos):
|
| - """Tries to match all the sub rules"""
|
| - sequence = []
|
| - for rule in self._sub_rules:
|
| - (nextPos, ast) = rule.match(state, pos)
|
| - if nextPos is not None:
|
| - if ast:
|
| - if isinstance(ast, _list_type):
|
| - sequence.extend(ast)
|
| - else:
|
| - sequence.append(ast)
|
| - pos = nextPos
|
| - else:
|
| - return (None, None)
|
| - return (pos, sequence)
|
| -
|
| -
|
| -class OR(_PegParserRule):
|
| - """This rule matches one and only one of multiple sub-rules.
|
| - Usage example: OR('A', 'B', 'C')
|
| - Will match 'A', 'B' or 'C'.
|
| - """
|
| - def __init__(self, *rules):
|
| - """Constructor.
|
| - Args:
|
| - rules -- rules to choose from.
|
| - """
|
| - _PegParserRule.__init__(self)
|
| - self._sub_rules = []
|
| - for rule in rules:
|
| - self._sub_rules.append(_compile(rule))
|
| -
|
| - def _match_impl(self, state, pos):
|
| - """Tries to match at leat one of the sub rules"""
|
| - for rule in self._sub_rules:
|
| - (nextPos, ast) = rule.match(state, pos)
|
| - if nextPos is not None:
|
| - return (nextPos, ast)
|
| - return (None, None)
|
| -
|
| -
|
| -class MAYBE(_PegParserRule):
|
| - """Will try to match the given rule, tolerating absence.
|
| - Usage example: MAYBE('A')
|
| - Will match 'A' but also ''.
|
| - """
|
| - def __init__(self, rule):
|
| - """Constructor.
|
| - Args:
|
| - rule -- the rule that may be absent.
|
| - """
|
| - _PegParserRule.__init__(self)
|
| - self._sub_rule = _compile(rule)
|
| -
|
| - def _match_impl(self, state, pos):
|
| - """Tries to match at leat one of the sub rules"""
|
| - (nextPos, ast) = self._sub_rule.match(state, pos)
|
| - if nextPos is not None:
|
| - return (nextPos, ast)
|
| - return (pos, None)
|
| -
|
| -
|
| -class MANY(_PegParserRule):
|
| - """Will try to match the given rule one or more times.
|
| - Usage example 1: MANY('A')
|
| - Will match 'A', 'AAAAA' but not ''.
|
| - Usage example 2: MANY('A', separator=',')
|
| - Will match 'A', 'A,A' but not 'AA'.
|
| - """
|
| -
|
| - def __init__(self, rule, separator=None):
|
| - """Constructor.
|
| - Args:
|
| - rule -- the rule to match multiple times.
|
| - separator -- this optional rule is used to match separators.
|
| - """
|
| - _PegParserRule.__init__(self)
|
| - self._sub_rule = _compile(rule)
|
| - self._separator = _compile(separator) if separator else None
|
| -
|
| - def _match_impl(self, state, pos):
|
| - res = []
|
| - count = 0
|
| - while True:
|
| - if count > 0 and self._separator:
|
| - (nextPos, ast) = self._separator.match(state, pos)
|
| - if nextPos is not None:
|
| - pos = nextPos
|
| - if ast:
|
| - res.append(ast)
|
| - else:
|
| - break
|
| - (nextPos, ast) = self._sub_rule.match(state, pos)
|
| - if nextPos is None:
|
| - break
|
| - count += 1
|
| - pos = nextPos
|
| - res.append(ast)
|
| - if count > 0:
|
| - return (pos, res)
|
| - return (None, None)
|
| -
|
| -
|
| -class TOKEN(_PegParserRule):
|
| - """The matched rule will not appear in the the output.
|
| - Usage example: ['A', TOKEN('.'), 'B']
|
| - When matching 'A.B', will return the sequence ['A', 'B'].
|
| - """
|
| -
|
| - def __init__(self, rule):
|
| - """Constructor.
|
| - Args:
|
| - rule -- the rule to match.
|
| - """
|
| - _PegParserRule.__init__(self)
|
| - self._sub_rule = _compile(rule)
|
| -
|
| - def _match_impl(self, state, pos):
|
| - (nextPos, ast) = self._sub_rule.match(state, pos)
|
| - if nextPos is not None:
|
| - return (nextPos, None)
|
| - return (None, None)
|
| -
|
| -
|
| -class LABEL(_PegParserRule):
|
| - """The matched rule will appear in the output with the given label.
|
| - Usage example: LABEL('number', re.compile(r'[0-9]+'))
|
| - When matched with '1234', will return ('number', '1234').
|
| -
|
| - Keyword arguments:
|
| - label -- a string.
|
| - rule -- the rule to match.
|
| - """
|
| -
|
| - def __init__(self, label, rule):
|
| - """Constructor.
|
| - Args:
|
| - rule -- the rule to match.
|
| - """
|
| - _PegParserRule.__init__(self)
|
| - self._label = label
|
| - self._sub_rule = _compile(rule)
|
| -
|
| - def _match_impl(self, state, pos):
|
| - (nextPos, ast) = self._sub_rule.match(state, pos)
|
| - if nextPos is not None:
|
| - return (nextPos, (self._label, ast))
|
| - return (None, None)
|
| -
|
| -
|
| -class RAISE(_PegParserRule):
|
| - """Raises a SyntaxError with a user-provided message.
|
| - Usage example: ['A','B', RAISE('should have not gotten here')]
|
| - Will not match 'A' but will raise an exception for 'AB'.
|
| - This rule is useful mostly for debugging grammars.
|
| - """
|
| - def __init__(self, message):
|
| - """Constructor.
|
| - Args:
|
| - message -- the message for the raised exception.
|
| - """
|
| - _PegParserRule.__init__(self)
|
| - self._message = message
|
| -
|
| - def _match_impl(self, state, pos):
|
| - raise RuntimeError(self._message)
|
| -
|
| -
|
| -class PegParser(object):
|
| - """PegParser class.
|
| - This generic parser can be configured with rules to parse a wide
|
| - range of inputs.
|
| - """
|
| -
|
| - def __init__(self, root_rule, whitespace_rule=None,
|
| - strings_are_tokens=False):
|
| - """Initializes a PegParser with rules and parsing options.
|
| -
|
| - Args:
|
| - root_rule -- the top level rule to start matching at. Rule can be
|
| - a regular expression, a string, or one of the special rules
|
| - such as SEQUENCE, MANY, OR, etc.
|
| - whitespace_rule -- used to identify and strip whitespace. Default
|
| - isNone, configuring the parser to not tolerate whitespace.
|
| - strings_are_tokens -- by default string rules are not treated as
|
| - tokens. In many programming languages, strings are tokens,
|
| - so this should be set to True.
|
| - """
|
| - self._strings_are_tokens = strings_are_tokens
|
| - self._root_rule = _compile(root_rule)
|
| - if whitespace_rule is None:
|
| - self._whitespace_rule = None
|
| - else:
|
| - self._whitespace_rule = _compile(whitespace_rule)
|
| -
|
| - def parse(self, text, start_pos=0):
|
| - """Parses the given text input
|
| - Args:
|
| - text -- data to parse.
|
| - start_pos -- the offset to start parsing at.
|
| -
|
| - Returns:
|
| - An abstract syntax tree, with nodes being pairs of the format
|
| - (label, value), where label is a string or a function, and value
|
| - is a string, a pair or a list of pairs.
|
| - """
|
| -
|
| - def calculate_line_number_and_offset(globalOffset):
|
| - """Calculates the line number and in-line offset"""
|
| - i = 0
|
| - lineNumber = 1
|
| - lineOffset = 0
|
| - lineData = []
|
| - while i < globalOffset and i < len(text):
|
| - if text[i] == '\n':
|
| - lineNumber += 1
|
| - lineOffset = 0
|
| - lineData = []
|
| - else:
|
| - lineData.append(text[i])
|
| - lineOffset += 1
|
| - i += 1
|
| - while i < len(text) and text[i] != '\n':
|
| - lineData.append(text[i])
|
| - i += 1
|
| - return (lineNumber, lineOffset, ''.join(lineData))
|
| -
|
| - def analyze_result(state, pos, ast):
|
| - """Analyze match output"""
|
| - if pos is not None:
|
| - # Its possible that matching is successful but trailing
|
| - # whitespace remains, so skip it.
|
| - pos = _skip_whitespace(state, pos)
|
| - if pos == len(state.text):
|
| - # End of intput reached. Success!
|
| - return ast
|
| -
|
| - # Failure - analyze and raise an error.
|
| - (lineNumber, lineOffset, lineData) = \
|
| - calculate_line_number_and_offset(state.max_pos)
|
| - message = 'unexpected error'
|
| - if state.max_rule:
|
| - set = {}
|
| - map(set.__setitem__, state.max_rule, [])
|
| -
|
| - def to_str(item):
|
| - return item.__str__()
|
| -
|
| - expected = ' or '.join(map(to_str, set.keys()))
|
| - found = state.text[state.max_pos:state.max_pos + 1]
|
| - message = 'Expected %s but "%s" found: "%s"' % \
|
| - (expected, found, lineData)
|
| - raise SyntaxError(
|
| - 'At line %s offset %s: %s' % \
|
| - (lineNumber, lineOffset, message))
|
| -
|
| - # Initialize state
|
| - state = _PegParserState(text,
|
| - whitespace_rule=self._whitespace_rule,
|
| - strings_are_tokens=self._strings_are_tokens)
|
| -
|
| - # Match and analyze result
|
| - (pos, ast) = self._root_rule.match(state, start_pos)
|
| - return analyze_result(state, pos, ast)
|
|
|