tools/lexer_generator/regex_parser.py - Issue 138973007: Experimental parser: support subgraph inlining

Side by Side Diff: tools/lexer_generator/regex_parser.py

Issue 138973007: Experimental parser: support subgraph inlining (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 6 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

11 # with the distribution.	11 # with the distribution.

12 # * Neither the name of Google Inc. nor the names of its	12 # * Neither the name of Google Inc. nor the names of its

13 # contributors may be used to endorse or promote products derived	13 # contributors may be used to endorse or promote products derived

14 # from this software without specific prior written permission.	14 # from this software without specific prior written permission.

15 #	15 #

16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS	16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT	17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR	18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT	19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,	20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT	21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,	22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY	23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT	24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE	25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.	26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

27	27

	28 import ply.lex as lex

28 import ply.yacc as yacc	29 import ply.yacc as yacc

29 from types import ListType, TupleType	30 from types import ListType, TupleType

30 from regex_lexer import RegexLexer	31 from regex_lexer import RegexLexer

31 from action import Term	32 from action import Term

32	33

	34 def build_escape_map(chars):

	35 def add_escape(d, char):

	36 d['\\' + char] = char

	37 return d

	38 return reduce(add_escape, chars,

	39 {'\\t' : '\t', '\\r' : '\r', '\\n' : '\n', '\\v' : '\v', '\\f' : '\f'})

	40

	41 class RegexLexer:

	42

	43 tokens = (

	44

	45 'GROUP_BEGIN',

	46 'GROUP_END',

	47

	48 'CLASS_BEGIN',

	49 'CLASS_END',

	50

	51 'OR',

	52 'ONE_OR_MORE',

	53 'ZERO_OR_MORE',

	54 'ZERO_OR_ONE',

	55 'ANY',

	56

	57 'REPEAT_BEGIN',

	58 'REPEAT_END',

	59

	60 'NUMBER',

	61 'COMMA',

	62 'LITERAL',

	63

	64 'RANGE',

	65 'NOT',

	66 'CLASS_LITERAL',

	67 'CLASS_LITERAL_AS_OCTAL',

	68 'CHARACTER_CLASS',

	69 )

	70

	71 states = (

	72 ('class','exclusive'),

	73 ('repeat','exclusive'),

	74 )

	75

	76 __escaped_literals = build_escape_map("(){}[]?+.*\|'\"\\")

	77

	78 def t_ESCAPED_LITERAL(self, t):

	79 r'\\.'

	80 t.type = 'LITERAL'

	81 t.value = RegexLexer.__escaped_literals[t.value]

	82 return t

	83

	84 t_GROUP_BEGIN = r'\('

	85 t_GROUP_END = r'\)'

	86

	87 t_OR = r'\\|'

	88 t_ONE_OR_MORE = r'\+'

	89 t_ZERO_OR_MORE = r'\*'

	90 t_ZERO_OR_ONE = r'\?'

	91

	92 t_ANY = r'\.'

	93

	94 t_LITERAL = r'.'

	95

	96 def t_CLASS_BEGIN(self, t):

	97 r'\['

	98 self.lexer.push_state('class')

	99 return t

	100

	101 def t_class_CLASS_END(self, t):

	102 r'\]'

	103 self.lexer.pop_state()

	104 return t

	105

	106 t_class_RANGE = '-'

	107 t_class_NOT = '\^'

	108 t_class_CHARACTER_CLASS = r':\w+:'

	109

	110 def t_class_CLASS_LITERAL_AS_OCTAL(self, t):

	111 r'\\\d+'

	112 return t

	113

	114 __escaped_class_literals = build_escape_map("^[]-:\\")

	115

	116 def t_class_ESCAPED_CLASS_LITERAL(self, t):

	117 r'\\.'

	118 t.type = 'CLASS_LITERAL'

	119 t.value = RegexLexer.__escaped_class_literals[t.value]

	120 return t

	121

	122 t_class_CLASS_LITERAL = r'[\w *$_+\'\"/]'

	123

	124 def t_REPEAT_BEGIN(self, t):

	125 r'\{'

	126 self.lexer.push_state('repeat')

	127 return t

	128

	129 def t_repeat_REPEAT_END(self, t):

	130 r'\}'

	131 self.lexer.pop_state()

	132 return t

	133

	134 t_repeat_NUMBER = r'[0-9]+'

	135 t_repeat_COMMA = r','

	136

	137 t_ANY_ignore = '\n'

	138

	139 def t_ANY_error(self, t):

	140 raise Exception("Illegal character '%s'" % t.value[0])

	141

	142 def build(self, **kwargs):

	143 self.lexer = lex.lex(module=self, **kwargs)

	144

33 class RegexParser:	145 class RegexParser:

34	146

35 tokens = RegexLexer.tokens	147 tokens = RegexLexer.tokens

36	148

37 token_map = {	149 token_map = {

38 '+': 'ONE_OR_MORE',	150 '+': 'ONE_OR_MORE',

39 '?': 'ZERO_OR_ONE',	151 '?': 'ZERO_OR_ONE',

40 '*': 'ZERO_OR_MORE',	152 '*': 'ZERO_OR_MORE',

41 '\|': 'OR',	153 '\|': 'OR',

42 '.': 'ANY',	154 '.': 'ANY',

(...skipping 111 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
154 parser = RegexParser.__static_instance	266 parser = RegexParser.__static_instance

155 if not parser:	267 if not parser:

156 parser = RegexParser()	268 parser = RegexParser()

157 parser.build()	269 parser.build()

158 RegexParser.__static_instance = parser	270 RegexParser.__static_instance = parser

159 try:	271 try:

160 return parser.parser.parse(data, lexer=parser.lexer.lexer)	272 return parser.parser.parse(data, lexer=parser.lexer.lexer)

161 except Exception:	273 except Exception:

162 RegexParser.__static_instance = None	274 RegexParser.__static_instance = None

163 raise	275 raise

OLD	NEW

« no previous file with comments | « tools/lexer_generator/regex_lexer.py ('k') | tools/lexer_generator/rule_lexer.py » ('j') | no next file with comments »