mojo/public/bindings/parse/mojo_lexer.py - Issue 130443003: Add support for using expressions as enum values.

Side by Side Diff: mojo/public/bindings/parse/mojo_lexer.py

Issue 130443003: Add support for using expressions as enum values. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: copyright Created 6 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 # PLY based Lexer class, based on pycparser by Eli Bendersky.

	2 #

	3 # Copyright (c) 2012, Eli Bendersky

	4 # All rights reserved.

	5 #

	6 # Redistribution and use in source and binary forms, with or without modificatio n,

	7 # are permitted provided that the following conditions are met:

	8 #

	9 # * Redistributions of source code must retain the above copyright notice, this

	10 # list of conditions and the following disclaimer.

	11 # * Redistributions in binary form must reproduce the above copyright notice,

	12 # this list of conditions and the following disclaimer in the documentation

	13 # and/or other materials provided with the distribution.

	14 # * Neither the name of Eli Bendersky nor the names of its contributors may

	15 # be used to endorse or promote products derived from this software without

	16 # specific prior written permission.

	17 #

	18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AN D

	19 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED

	20 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE

	21 # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE

	22 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

	23 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUT E

	24 # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)

	25 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

	26 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT

	27 # OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE .

	28

	29 import re

	30 import sys

	31 import os.path

	32

	33 # Try to load the ply module, if not, then assume it is in the third_party

	34 # directory.

	35 try:

	36 # Disable lint check which fails to find the ply module.

	37 # pylint: disable=F0401

	38 from ply.lex import TOKEN

	39 except ImportError:

	40 module_path, module_name = os.path.split(__file__)

	41 third_party = os.path.join(

	42 module_path, os.pardir, os.pardir, os.pardir, os.pardir, 'third_party')

	43 sys.path.append(third_party)

	44 # pylint: disable=F0401

	45 from ply.lex import TOKEN

	46

	47

	48 class Lexer(object):

	49 ######################-- PRIVATE --######################

	50

	51 ##

	52 ## Internal auxiliary methods

	53 ##

	54 def _error(self, msg, token):

	55 print('%s at line %d' % (msg, token.lineno))

	56 self.lexer.skip(1)

	57

	58 ##

	59 ## Reserved keywords

	60 ##

	61 keywords = (

	62 'HANDLE',

	63 'DATA_PIPE_CONSUMER',

	64 'DATA_PIPE_PRODUCER',

	65 'MESSAGE_PIPE',

	66

	67 'MODULE',

	68 'STRUCT',

	69 'INTERFACE',

	70 'ENUM',

	71 'VOID',

	72 )

	73

	74 keyword_map = {}

	75 for keyword in keywords:

	76 keyword_map[keyword.lower()] = keyword

	77

	78 ##

	79 ## All the tokens recognized by the lexer

	80 ##

	81 tokens = keywords + (

	82 # Identifiers

	83 'NAME',

	84

	85 # constants

	86 'ORDINAL',

	87 'INT_CONST_DEC', 'INT_CONST_OCT', 'INT_CONST_HEX',

	88 'FLOAT_CONST', 'HEX_FLOAT_CONST',

	89 'CHAR_CONST',

	90 'WCHAR_CONST',

	91

	92 # String literals

	93 'STRING_LITERAL',

	94 'WSTRING_LITERAL',

	95

	96 # Operators

	97 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',

	98 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',

	99 'LOR', 'LAND', 'LNOT',

	100 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',

	101

	102 # Assignment

	103 'EQUALS',

	104

	105 # Conditional operator (?)

	106 'CONDOP',

	107

	108 # Delimeters

	109 'LPAREN', 'RPAREN', # ( )

	110 'LBRACKET', 'RBRACKET', # [ ]

	111 'LBRACE', 'RBRACE', # { }

	112 'SEMI', 'COLON', # ; :

	113 'COMMA', # .

	114 )

	115

	116 ##

	117 ## Regexes for use in tokens

	118 ##

	119 ##

	120

	121 # valid C identifiers (K&R2: A.2.3), plus '$' (supported by some compilers)

	122 identifier = r'[a-zA-Z_$][0-9a-zA-Z_$]*'

	123

	124 hex_prefix = '0[xX]'

	125 hex_digits = '[0-9a-fA-F]+'

	126

	127 # integer constants (K&R2: A.2.5.1)

	128 integer_suffix_opt = \

	129 r'(([uU]ll)\|([uU]LL)\|(ll[uU]?)\|(LL[uU]?)\|([uU][lL])\|([lL][uU]?)\|[uU])?'

	130 decimal_constant = \

	131 '(0'+integer_suffix_opt+')\|([1-9][0-9]*'+integer_suffix_opt+')'

	132 octal_constant = '0[0-7]*'+integer_suffix_opt

	133 hex_constant = hex_prefix+hex_digits+integer_suffix_opt

	134

	135 bad_octal_constant = '0[0-7]*[89]'

	136

	137 # character constants (K&R2: A.2.5.2)

	138 # Note: a-zA-Z and '.-~^_!=&;,' are allowed as escape chars to support #line

	139 # directives with Windows paths as filenames (..\..\dir\file)

	140 # For the same reason, decimal_escape allows all digit sequences. We want to

	141 # parse all correct code, even if it means to sometimes parse incorrect

	142 # code.

	143 #

	144 simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])"""

	145 decimal_escape = r"""(\d+)"""

	146 hex_escape = r"""(x[0-9a-fA-F]+)"""

	147 bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-7])"""

	148

	149 escape_sequence = \

	150 r"""(\\("""+simple_escape+'\|'+decimal_escape+'\|'+hex_escape+'))'

	151 cconst_char = r"""([^'\\\n]\|"""+escape_sequence+')'

	152 char_const = "'"+cconst_char+"'"

	153 wchar_const = 'L'+char_const

	154 unmatched_quote = "('"+cconst_char+"\\n)\|('"+cconst_char+"$)"

	155 bad_char_const = \

	156 r"""('"""+cconst_char+"""[^'\n]+')\|('')\|('"""+ \

	157 bad_escape+r"""[^'\n]*')"""

	158

	159 # string literals (K&R2: A.2.6)

	160 string_char = r"""([^"\\\n]\|"""+escape_sequence+')'

	161 string_literal = '"'+string_char+'*"'

	162 wstring_literal = 'L'+string_literal

	163 bad_string_literal = '"'+string_char+''+bad_escape+string_char+'"'

	164

	165 # floating constants (K&R2: A.2.5.3)

	166 exponent_part = r"""([eE][-+]?[0-9]+)"""

	167 fractional_constant = r"""([0-9]*\.[0-9]+)\|([0-9]+\.)"""

	168 floating_constant = \

	169 '(((('+fractional_constant+')'+ \

	170 exponent_part+'?)\|([0-9]+'+exponent_part+'))[FfLl]?)'

	171 binary_exponent_part = r'''([pP][+-]?[0-9]+)'''

	172 hex_fractional_constant = \

	173 '((('+hex_digits+r""")?\."""+hex_digits+')\|('+hex_digits+r"""\.))"""

	174 hex_floating_constant = \

	175 '('+hex_prefix+'('+hex_digits+'\|'+hex_fractional_constant+')'+ \

	176 binary_exponent_part+'[FfLl]?)'

	177

	178 ##

	179 ## Rules for the normal state

	180 ##

	181 t_ignore = ' \t'

	182

	183 # Newlines

	184 def t_NEWLINE(self, t):

	185 r'\n+'

	186 t.lexer.lineno += t.value.count("\n")

	187

	188 # Operators

	189 t_PLUS = r'\+'

	190 t_MINUS = r'-'

	191 t_TIMES = r'\*'

	192 t_DIVIDE = r'/'

	193 t_MOD = r'%'

	194 t_OR = r'\\|'

	195 t_AND = r'&'

	196 t_NOT = r'~'

	197 t_XOR = r'\^'

	198 t_LSHIFT = r'<<'

	199 t_RSHIFT = r'>>'

	200 t_LOR = r'\\|\\|'

	201 t_LAND = r'&&'

	202 t_LNOT = r'!'

	203 t_LT = r'<'

	204 t_GT = r'>'

	205 t_LE = r'<='

	206 t_GE = r'>='

	207 t_EQ = r'=='

	208 t_NE = r'!='

	209

	210 # =

	211 t_EQUALS = r'='

	212

	213 # ?

	214 t_CONDOP = r'\?'

	215

	216 # Delimeters

	217 t_LPAREN = r'\('

	218 t_RPAREN = r'\)'

	219 t_LBRACKET = r'\['

	220 t_RBRACKET = r'\]'

	221 t_LBRACE = r'\{'

	222 t_RBRACE = r'\}'

	223 t_COMMA = r','

	224 t_SEMI = r';'

	225 t_COLON = r':'

	226

	227 t_STRING_LITERAL = string_literal

	228 t_ORDINAL = r'@[0-9]*'

	229

	230 # The following floating and integer constants are defined as

	231 # functions to impose a strict order (otherwise, decimal

	232 # is placed before the others because its regex is longer,

	233 # and this is bad)

	234 #

	235 @TOKEN(floating_constant)

	236 def t_FLOAT_CONST(self, t):

	237 return t

	238

	239 @TOKEN(hex_floating_constant)

	240 def t_HEX_FLOAT_CONST(self, t):

	241 return t

	242

	243 @TOKEN(hex_constant)

	244 def t_INT_CONST_HEX(self, t):

	245 return t

	246

	247 @TOKEN(bad_octal_constant)

	248 def t_BAD_CONST_OCT(self, t):

	249 msg = "Invalid octal constant"

	250 self._error(msg, t)

	251

	252 @TOKEN(octal_constant)

	253 def t_INT_CONST_OCT(self, t):

	254 return t

	255

	256 @TOKEN(decimal_constant)

	257 def t_INT_CONST_DEC(self, t):

	258 return t

	259

	260 # Must come before bad_char_const, to prevent it from

	261 # catching valid char constants as invalid

	262 #

	263 @TOKEN(char_const)

	264 def t_CHAR_CONST(self, t):

	265 return t

	266

	267 @TOKEN(wchar_const)

	268 def t_WCHAR_CONST(self, t):

	269 return t

	270

	271 @TOKEN(unmatched_quote)

	272 def t_UNMATCHED_QUOTE(self, t):

	273 msg = "Unmatched '"

	274 self._error(msg, t)

	275

	276 @TOKEN(bad_char_const)

	277 def t_BAD_CHAR_CONST(self, t):

	278 msg = "Invalid char constant %s" % t.value

	279 self._error(msg, t)

	280

	281 @TOKEN(wstring_literal)

	282 def t_WSTRING_LITERAL(self, t):

	283 return t

	284

	285 # unmatched string literals are caught by the preprocessor

	286

	287 @TOKEN(bad_string_literal)

	288 def t_BAD_STRING_LITERAL(self, t):

	289 msg = "String contains invalid escape code"

	290 self._error(msg, t)

	291

	292 @TOKEN(identifier)

	293 def t_NAME(self, t):

	294 t.type = self.keyword_map.get(t.value, "NAME")

	295 return t

	296

	297 # Ignore C and C++ style comments

	298 def t_COMMENT(self, t):

	299 r'(/\(.\|\n)?\/)\|(//.(\n[ \t]//.)*)'

	300 pass

	301

	302 def t_error(self, t):

	303 msg = 'Illegal character %s' % repr(t.value[0])

	304 self._error(msg, t)

OLD	NEW

« no previous file with comments | « no previous file | mojo/public/bindings/parse/mojo_parser.py » ('j') | no next file with comments »