tools/lexer_generator/regex_lexer.py - Issue 59263003: Experimental lexer generator: parse \000 etc. inside char classes.

Side by Side Diff: tools/lexer_generator/regex_lexer.py

Issue 59263003: Experimental lexer generator: parse \000 etc. inside char classes. (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: . Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2013 the V8 project authors. All rights reserved.	1 # Copyright 2013 the V8 project authors. All rights reserved.

2 # Redistribution and use in source and binary forms, with or without	2 # Redistribution and use in source and binary forms, with or without

3 # modification, are permitted provided that the following conditions are	3 # modification, are permitted provided that the following conditions are

4 # met:	4 # met:

5 #	5 #

6 # * Redistributions of source code must retain the above copyright	6 # * Redistributions of source code must retain the above copyright

7 # notice, this list of conditions and the following disclaimer.	7 # notice, this list of conditions and the following disclaimer.

8 # * Redistributions in binary form must reproduce the above	8 # * Redistributions in binary form must reproduce the above

9 # copyright notice, this list of conditions and the following	9 # copyright notice, this list of conditions and the following

10 # disclaimer in the documentation and/or other materials provided	10 # disclaimer in the documentation and/or other materials provided

(...skipping 35 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
46 'REPEAT_BEGIN',	46 'REPEAT_BEGIN',

47 'REPEAT_END',	47 'REPEAT_END',

48	48

49 'NUMBER',	49 'NUMBER',

50 'COMMA',	50 'COMMA',

51 'LITERAL',	51 'LITERAL',

52	52

53 'RANGE',	53 'RANGE',

54 'NOT',	54 'NOT',

55 'CLASS_LITERAL',	55 'CLASS_LITERAL',

	56 'CLASS_LITERAL_AS_OCTAL',

56 'CHARACTER_CLASS',	57 'CHARACTER_CLASS',

57 )	58 )

58	59

59 states = (	60 states = (

60 ('class','exclusive'),	61 ('class','exclusive'),

61 ('repeat','exclusive'),	62 ('repeat','exclusive'),

62 )	63 )

63	64

64 def t_ESCAPED_LITERAL(self, t):	65 def t_ESCAPED_LITERAL(self, t):

65 r'\\$\|\\$\|\\\[\|\\\]\|\\\\|\|\\\+\|\\\*\|\\\?\|\\\.\|\\\\\|\\\{\|\\\}'	66 r'\\$\|\\$\|\\\[\|\\\]\|\\\\|\|\\\+\|\\\*\|\\\?\|\\\.\|\\\\\|\\\{\|\\\}'

(...skipping 20 matching lines...) Expand all Loading...
86	87

87 def t_class_CLASS_END(self, t):	88 def t_class_CLASS_END(self, t):

88 r'\]'	89 r'\]'

89 self.lexer.pop_state()	90 self.lexer.pop_state()

90 return t	91 return t

91	92

92 t_class_RANGE = '-'	93 t_class_RANGE = '-'

93 t_class_NOT = '\^'	94 t_class_NOT = '\^'

94 t_class_CHARACTER_CLASS = r':\w+:'	95 t_class_CHARACTER_CLASS = r':\w+:'

95	96

	97 def t_class_CLASS_LITERAL_AS_OCTAL(self, t):

	98 r'\\\d+'

	99 return t

	100

96 def t_class_ESCAPED_CLASS_LITERAL(self, t):	101 def t_class_ESCAPED_CLASS_LITERAL(self, t):

97 r'\\\^\|\\-\|\\\[\|\\\]\|\\\:\|\\\w'	102 r'\\\^\|\\-\|\\\[\|\\\]\|\\\:\|\\\w'

98 t.type = 'CLASS_LITERAL'	103 t.type = 'CLASS_LITERAL'

99 t.value = t.value[1:]	104 t.value = t.value[1:]

100 return t	105 return t

101	106

102 t_class_CLASS_LITERAL = r'[\w $_+]' # fix this	107 t_class_CLASS_LITERAL = r'[\w $_+]'

103	108

104 def t_REPEAT_BEGIN(self, t):	109 def t_REPEAT_BEGIN(self, t):

105 r'\{'	110 r'\{'

106 self.lexer.push_state('repeat')	111 self.lexer.push_state('repeat')

107 return t	112 return t

108	113

109 def t_repeat_REPEAT_END(self, t):	114 def t_repeat_REPEAT_END(self, t):

110 r'\}'	115 r'\}'

111 self.lexer.pop_state()	116 self.lexer.pop_state()

112 return t	117 return t

113	118

114 t_repeat_NUMBER = r'[0-9]+'	119 t_repeat_NUMBER = r'[0-9]+'

115 t_repeat_COMMA = r','	120 t_repeat_COMMA = r','

116	121

117 t_ANY_ignore = '\n'	122 t_ANY_ignore = '\n'

118	123

119 def t_ANY_error(self, t):	124 def t_ANY_error(self, t):

120 raise Exception("Illegal character '%s'" % t.value[0])	125 raise Exception("Illegal character '%s'" % t.value[0])

121	126

122 def build(self, **kwargs):	127 def build(self, **kwargs):

123 self.lexer = lex.lex(module=self, **kwargs)	128 self.lexer = lex.lex(module=self, **kwargs)

OLD	NEW

« no previous file with comments | « no previous file | tools/lexer_generator/regex_parser.py » ('j') | no next file with comments »