OLD | NEW |
1 # Copyright 2013 the V8 project authors. All rights reserved. | 1 # Copyright 2013 the V8 project authors. All rights reserved. |
2 # Redistribution and use in source and binary forms, with or without | 2 # Redistribution and use in source and binary forms, with or without |
3 # modification, are permitted provided that the following conditions are | 3 # modification, are permitted provided that the following conditions are |
4 # met: | 4 # met: |
5 # | 5 # |
6 # * Redistributions of source code must retain the above copyright | 6 # * Redistributions of source code must retain the above copyright |
7 # notice, this list of conditions and the following disclaimer. | 7 # notice, this list of conditions and the following disclaimer. |
8 # * Redistributions in binary form must reproduce the above | 8 # * Redistributions in binary form must reproduce the above |
9 # copyright notice, this list of conditions and the following | 9 # copyright notice, this list of conditions and the following |
10 # disclaimer in the documentation and/or other materials provided | 10 # disclaimer in the documentation and/or other materials provided |
(...skipping 12 matching lines...) Expand all Loading... |
23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
27 | 27 |
28 import ply.lex as lex | 28 import ply.lex as lex |
29 | 29 |
30 class RuleLexer: | 30 class RuleLexer: |
31 | 31 |
32 tokens = ( | 32 tokens = ( |
33 'ALIAS', | 33 'IDENTIFIER', |
34 'EQUALS', | 34 'STRING_REGEX', |
35 'REGEX', | 35 'CHARACTER_CLASS_REGEX', |
36 'CONDITION', | 36 'TRANSITION', |
37 'CONDITION_BEGIN', | 37 'TRANSITION_WITH_CODE', |
38 'CONDITION_END', | |
39 'REGEX_TRANSITION_BODY', | |
40 'REGEX_TRANSITION', | |
41 'REGEX_BODY', | |
42 ) | |
43 | 38 |
44 t_ANY_ignore = " \t\n" | 39 'PLUS', |
| 40 'QUESTION_MARK', |
| 41 'EQUALS', |
| 42 'OR', |
| 43 'STAR', |
| 44 'LEFT_PARENTHESIS', |
| 45 'RIGHT_PARENTHESIS', |
| 46 'LESS_THAN', |
| 47 'GREATER_THAN', |
| 48 'SEMICOLON', |
| 49 |
| 50 'LEFT_BRACKET', |
| 51 'RIGHT_BRACKET', |
| 52 |
| 53 'CODE_FRAGMENT', |
| 54 ) |
45 | 55 |
46 states = ( | 56 states = ( |
47 ('afterAlias', 'exclusive'), | 57 ('code','exclusive'), |
48 ('afterAliasEquals', 'exclusive'), | 58 ) |
49 ('inCondition', 'exclusive'), | |
50 ('seenCondition', 'exclusive'), | |
51 ('afterCondition', 'exclusive')) | |
52 | 59 |
53 def t_ALIAS(self, t): | 60 t_ignore = " \t\n\r" |
54 r'[a-zA-Z0-9_]+' | 61 t_code_ignore = "" |
55 self.lexer.begin('afterAlias') | 62 |
| 63 def t_COMMENT(self, t): |
| 64 r'\#.*[\n\r]+' |
| 65 pass |
| 66 |
| 67 t_IDENTIFIER = r'[a-zA-Z0-9_]+' |
| 68 t_STRING_REGEX = r'"((\\("|\w|\\))|[^\\"])+"' |
| 69 t_CHARACTER_CLASS_REGEX = r'\[([^\]]|\\\])+\]' |
| 70 t_TRANSITION = r':=>' |
| 71 t_TRANSITION_WITH_CODE = r'=>' |
| 72 |
| 73 t_PLUS = r'\+' |
| 74 t_QUESTION_MARK = r'\?' |
| 75 t_STAR = r'\*' |
| 76 t_OR = r'\|' |
| 77 t_EQUALS = r'=' |
| 78 t_LEFT_PARENTHESIS = r'\(' |
| 79 t_RIGHT_PARENTHESIS = r'\)' |
| 80 t_LESS_THAN = r'<' |
| 81 t_GREATER_THAN = r'>' |
| 82 t_SEMICOLON = r';' |
| 83 |
| 84 def t_LEFT_BRACKET(self, t): |
| 85 r'{' |
| 86 self.lexer.push_state('code') |
| 87 self.nesting = 1 |
56 return t | 88 return t |
57 | 89 |
58 def t_afterAlias_EQUALS(self, t): | 90 t_code_CODE_FRAGMENT = r'[^{}]+' |
59 r'=' | 91 |
60 self.lexer.begin('afterAliasEquals') | 92 def t_code_LEFT_BRACKET(self, t): |
| 93 r'{' |
| 94 self.nesting += 1 |
| 95 t.type = 'CODE_FRAGMENT' |
61 return t | 96 return t |
62 | 97 |
63 def t_afterAliasEquals_REGEX(self, t): | 98 def t_code_RIGHT_BRACKET(self, t): |
64 r'(?P<regex>.+)\s*;' | 99 r'}' |
65 self.lexer.begin('INITIAL') | 100 self.nesting -= 1 |
66 return t | 101 if self.nesting: |
67 | 102 t.type = 'CODE_FRAGMENT' |
68 def t_CONDITION_BEGIN(self, t): | 103 else: |
69 r'<' | 104 self.lexer.pop_state() |
70 self.lexer.begin('inCondition') | |
71 return t | |
72 | |
73 def t_inCondition_CONDITION(self, t): | |
74 r'[a-zA-Z0-9_]+' | |
75 self.lexer.begin('seenCondition') | |
76 return t | |
77 | |
78 def t_seenCondition_CONDITION_END(self, t): | |
79 r'>\s*' | |
80 self.lexer.begin('afterCondition') | |
81 return t | |
82 | |
83 def t_afterCondition_REGEX_TRANSITION_BODY(self, t): | |
84 r'(?P<regex>.+)\s*=>\s*(?P<new>.+)\s*{\s*(?P<body>.+)\s*}\s*' | |
85 self.lexer.begin('INITIAL') | |
86 return t | |
87 | |
88 def t_afterCondition_REGEX_TRANSITION(self, t): | |
89 r'(?P<regex>.+)\s*:=>\s*(?P<new>.+)\s*' | |
90 self.lexer.begin('INITIAL') | |
91 return t | |
92 | |
93 def t_afterCondition_REGEX_BODY(self, t): | |
94 r'(?P<regex>.+?)\s+{\s*(?P<body>.+)\s*}\s*' | |
95 self.lexer.begin('INITIAL') | |
96 return t | 105 return t |
97 | 106 |
98 def t_ANY_error(self, t): | 107 def t_ANY_error(self, t): |
99 raise Exception("Illegal character '%s'" % t.value[0]) | 108 raise Exception("Illegal character '%s'" % t.value[0]) |
100 | 109 |
101 def build(self, **kwargs): | 110 def build(self, **kwargs): |
102 self.lexer = lex.lex(module=self, **kwargs) | 111 self.lexer = lex.lex(module=self, **kwargs) |
OLD | NEW |