tools/lexer_generator/regex_parser.py - Issue 138973007: Experimental parser: support subgraph inlining

Unified Diff: tools/lexer_generator/regex_parser.py

Issue 138973007: Experimental parser: support subgraph inlining (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 6 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/lexer_generator/regex_parser.py

diff --git a/tools/lexer_generator/regex_parser.py b/tools/lexer_generator/regex_parser.py

index 4053bec03fde1635231fa2da50fb7728da8cb563..d19a5432570508bef88dd5e8e99fb28ec4fe7ea8 100644

--- a/tools/lexer_generator/regex_parser.py

+++ b/tools/lexer_generator/regex_parser.py

@@ -25,11 +25,123 @@

# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+import ply.lex as lex

import ply.yacc as yacc

from types import ListType, TupleType

from regex_lexer import RegexLexer

from action import Term

+def build_escape_map(chars):

+ def add_escape(d, char):

+ d['\\' + char] = char

+ return d

+ return reduce(add_escape, chars,

+ {'\\t' : '\t', '\\r' : '\r', '\\n' : '\n', '\\v' : '\v', '\\f' : '\f'})

+class RegexLexer:

+ tokens = (

+ 'GROUP_BEGIN',

+ 'GROUP_END',

+ 'CLASS_BEGIN',

+ 'CLASS_END',

+ 'OR',

+ 'ONE_OR_MORE',

+ 'ZERO_OR_MORE',

+ 'ZERO_OR_ONE',

+ 'ANY',

+ 'REPEAT_BEGIN',

+ 'REPEAT_END',

+ 'NUMBER',

+ 'COMMA',

+ 'LITERAL',

+ 'RANGE',

+ 'NOT',

+ 'CLASS_LITERAL',

+ 'CLASS_LITERAL_AS_OCTAL',

+ 'CHARACTER_CLASS',

+ )

+ states = (

+ ('class','exclusive'),

+ ('repeat','exclusive'),

+ )

+ __escaped_literals = build_escape_map("(){}[]?+.*|'\"\\")

+ def t_ESCAPED_LITERAL(self, t):

+ r'\\.'

+ t.type = 'LITERAL'

+ t.value = RegexLexer.__escaped_literals[t.value]

+ return t

+ t_GROUP_BEGIN = r'\('

+ t_GROUP_END = r'\)'

+ t_OR = r'\|'

+ t_ONE_OR_MORE = r'\+'

+ t_ZERO_OR_MORE = r'\*'

+ t_ZERO_OR_ONE = r'\?'

+ t_ANY = r'\.'

+ t_LITERAL = r'.'

+ def t_CLASS_BEGIN(self, t):

+ r'\['

+ self.lexer.push_state('class')

+ return t

+ def t_class_CLASS_END(self, t):

+ r'\]'

+ self.lexer.pop_state()

+ return t

+ t_class_RANGE = '-'

+ t_class_NOT = '\^'

+ t_class_CHARACTER_CLASS = r':\w+:'

+ def t_class_CLASS_LITERAL_AS_OCTAL(self, t):

+ r'\\\d+'

+ return t

+ __escaped_class_literals = build_escape_map("^[]-:\\")

+ def t_class_ESCAPED_CLASS_LITERAL(self, t):

+ r'\\.'

+ t.type = 'CLASS_LITERAL'

+ t.value = RegexLexer.__escaped_class_literals[t.value]

+ return t

+ t_class_CLASS_LITERAL = r'[\w *$_+\'\"/]'

+ def t_REPEAT_BEGIN(self, t):

+ r'\{'

+ self.lexer.push_state('repeat')

+ return t

+ def t_repeat_REPEAT_END(self, t):

+ r'\}'

+ self.lexer.pop_state()

+ return t

+ t_repeat_NUMBER = r'[0-9]+'

+ t_repeat_COMMA = r','

+ t_ANY_ignore = '\n'

+ def t_ANY_error(self, t):

+ raise Exception("Illegal character '%s'" % t.value[0])

+ def build(self, **kwargs):

+ self.lexer = lex.lex(module=self, **kwargs)

class RegexParser:

tokens = RegexLexer.tokens

« no previous file with comments | « tools/lexer_generator/regex_lexer.py ('k') | tools/lexer_generator/rule_lexer.py » ('j') | no next file with comments »