Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(679)

Side by Side Diff: tools/lexer_generator/transition_keys.py

Issue 132693018: Experimental parser: handling unique keys in merges (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 6 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/lexer_generator/rule_parser.py ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 the V8 project authors. All rights reserved. 1 # Copyright 2013 the V8 project authors. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without 2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are 3 # modification, are permitted provided that the following conditions are
4 # met: 4 # met:
5 # 5 #
6 # * Redistributions of source code must retain the above copyright 6 # * Redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer. 7 # notice, this list of conditions and the following disclaimer.
8 # * Redistributions in binary form must reproduce the above 8 # * Redistributions in binary form must reproduce the above
9 # copyright notice, this list of conditions and the following 9 # copyright notice, this list of conditions and the following
10 # disclaimer in the documentation and/or other materials provided 10 # disclaimer in the documentation and/or other materials provided
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after
119 119
120 @staticmethod 120 @staticmethod
121 def __is_unique_range(r): 121 def __is_unique_range(r):
122 return (r[0] == r[1] and 122 return (r[0] == r[1] and
123 r[0] < 0 and 123 r[0] < 0 and
124 r[1] > TransitionKey.__unique_key_counter) 124 r[1] > TransitionKey.__unique_key_counter)
125 125
126 @staticmethod 126 @staticmethod
127 def __verify_ranges(encoding, ranges, check_merged): 127 def __verify_ranges(encoding, ranges, check_merged):
128 assert ranges 128 assert ranges
129 if len(ranges) == 1 and TransitionKey.__is_unique_range(ranges[0]):
130 return
131 last = None 129 last = None
132 for r in ranges: 130 for r in ranges:
133 assert not TransitionKey.__is_unique_range(r) 131 r_is_class = (TransitionKey.__is_unique_range(r) or
134 r_is_class = encoding.is_class_range(r) 132 encoding.is_class_range(r))
135 # Assert that the ranges are in order. 133 # Assert that the ranges are in order.
136 if last != None and check_merged: 134 if last != None and check_merged:
137 assert last[1] + 1 < r[0] or r_is_class 135 assert last[1] + 1 < r[0] or r_is_class
138 last = r 136 if r_is_class:
139 137 last = None
140 def __is_unique(self): 138 else:
141 return len(self.__ranges) == 1 and self.__is_unique_range(self.__ranges[0]) 139 last = r
142 140
143 @staticmethod 141 @staticmethod
144 def __cached_key(encoding, name, bounds_getter): 142 def __cached_key(encoding, name, bounds_getter):
145 encoding_name = encoding.name() if encoding else 'no_encoding' 143 encoding_name = encoding.name() if encoding else 'no_encoding'
146 cache = TransitionKey.__cached_keys[encoding_name] 144 cache = TransitionKey.__cached_keys[encoding_name]
147 if not name in cache: 145 if not name in cache:
148 bounds = bounds_getter(name) 146 bounds = bounds_getter(name)
149 cache[name] = TransitionKey(encoding, bounds, name) 147 cache[name] = TransitionKey(encoding, bounds, name)
150 return cache[name] 148 return cache[name]
151 149
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
230 def matches_char(self, char): 228 def matches_char(self, char):
231 char = ord(char) 229 char = ord(char)
232 assert char < 128 230 assert char < 128
233 for r in self.__ranges: 231 for r in self.__ranges:
234 if r[0] <= char and char <= r[1]: return True 232 if r[0] <= char and char <= r[1]: return True
235 return False 233 return False
236 234
237 def is_superset_of_key(self, key): 235 def is_superset_of_key(self, key):
238 '''Returns true if 'key' is a sub-key of this TransitionKey.''' 236 '''Returns true if 'key' is a sub-key of this TransitionKey.'''
239 assert isinstance(key, self.__class__) 237 assert isinstance(key, self.__class__)
240 assert key != TransitionKey.epsilon() and not key.__is_unique() 238 assert key != TransitionKey.epsilon()
241 assert len(key.__ranges) == 1 239 assert len(key.__ranges) == 1
242 subkey = key.__ranges[0] 240 subkey = key.__ranges[0]
243 matches = False 241 matches = False
244 for k in self.__ranges: 242 for k in self.__ranges:
245 if k[0] <= subkey[0]: 243 if k[0] <= subkey[0]:
246 assert subkey[1] <= k[1] or subkey[0] > k[1] 244 assert subkey[1] <= k[1] or subkey[0] > k[1]
247 if subkey[0] < k[0]: 245 if subkey[0] < k[0]:
248 assert subkey[1] < k[0] 246 assert subkey[1] < k[0]
249 if k[0] <= subkey[0] and k[1] >= subkey[1]: 247 if k[0] <= subkey[0] and k[1] >= subkey[1]:
250 assert not matches 248 assert not matches
(...skipping 20 matching lines...) Expand all
271 if self.__cached_hash == None: 269 if self.__cached_hash == None:
272 self.__cached_hash = hash(self.__ranges) 270 self.__cached_hash = hash(self.__ranges)
273 return self.__cached_hash 271 return self.__cached_hash
274 272
275 def __eq__(self, other): 273 def __eq__(self, other):
276 return isinstance(other, self.__class__) and self.__ranges == other.__ranges 274 return isinstance(other, self.__class__) and self.__ranges == other.__ranges
277 275
278 @staticmethod 276 @staticmethod
279 def __class_name(encoding, r): 277 def __class_name(encoding, r):
280 for name, v in encoding.class_range_iter(): 278 for name, v in encoding.class_range_iter():
281 if r == v: return name 279 if r == v:
280 return name
281 assert False
282
283 @staticmethod
284 def __unique_name(r):
285 for name, v in TransitionKey.__cached_keys['no_encoding'].items():
286 if v.__ranges and r == v.__ranges[0]:
287 return name[2:]
282 assert False 288 assert False
283 289
284 def range_iter(self, encoding): 290 def range_iter(self, encoding):
285 assert not self == TransitionKey.epsilon() and not self.__is_unique() 291 assert not self == TransitionKey.epsilon()
286 for r in self.__ranges: 292 for r in self.__ranges:
287 if encoding.is_class_range(r): 293 if self.__is_unique_range(r):
288 yield ('CLASS', TransitionKey.__class_name(encoding, r)) 294 yield ('UNIQUE', self.__unique_name(r))
295 elif encoding.is_class_range(r):
296 yield ('CLASS', self.__class_name(encoding, r))
289 else: 297 else:
290 yield ('PRIMARY_RANGE', r) 298 yield ('PRIMARY_RANGE', r)
291 299
292 __printable_cache = { 300 __printable_cache = {
293 ord('\t') : '\\t', 301 ord('\t') : '\\t',
294 ord('\n') : '\\n', 302 ord('\n') : '\\n',
295 ord('\r') : '\\r', 303 ord('\r') : '\\r',
296 } 304 }
297 305
298 @staticmethod 306 @staticmethod
299 def __range_str(encoding, r): 307 def __range_str(encoding, r):
308 if TransitionKey.__is_unique_range(r):
309 return TransitionKey.__unique_name(r)
300 if encoding and encoding.is_class_range(r): 310 if encoding and encoding.is_class_range(r):
301 return TransitionKey.__class_name(encoding, r) 311 return TransitionKey.__class_name(encoding, r)
302 def to_str(x): 312 def to_str(x):
303 assert not encoding or encoding.in_primary_range(x) 313 assert not encoding or encoding.in_primary_range(x)
304 if x > 127: 314 if x > 127:
305 return str(x) 315 return str(x)
306 if not x in TransitionKey.__printable_cache: 316 if not x in TransitionKey.__printable_cache:
307 res = "'%s'" % chr(x) if chr(x) in printable else str(x) 317 res = "'%s'" % chr(x) if chr(x) in printable else str(x)
308 TransitionKey.__printable_cache[x] = res 318 TransitionKey.__printable_cache[x] = res
309 return TransitionKey.__printable_cache[x] 319 return TransitionKey.__printable_cache[x]
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
357 current = range_map[i + 1] 367 current = range_map[i + 1]
358 range_map[i + 1] = (current[0], sort(current[1] + to_push)) 368 range_map[i + 1] = (current[0], sort(current[1] + to_push))
359 return ranges 369 return ranges
360 370
361 @staticmethod 371 @staticmethod
362 def __disjoint_ranges_from_key_set(encoding, key_set): 372 def __disjoint_ranges_from_key_set(encoding, key_set):
363 if not key_set: 373 if not key_set:
364 return [] 374 return []
365 range_map = {} 375 range_map = {}
366 for x in key_set: 376 for x in key_set:
367 assert not x.__is_unique()
368 assert x != TransitionKey.epsilon() 377 assert x != TransitionKey.epsilon()
369 for r in x.__ranges: 378 for r in x.__ranges:
370 if not r[0] in range_map: 379 if not r[0] in range_map:
371 range_map[r[0]] = [] 380 range_map[r[0]] = []
372 range_map[r[0]].append(r[1]) 381 range_map[r[0]].append(r[1])
373 ranges = TransitionKey.__disjoint_keys(encoding, range_map) 382 ranges = TransitionKey.__disjoint_keys(encoding, range_map)
374 TransitionKey.__verify_ranges(encoding, ranges, False) 383 TransitionKey.__verify_ranges(encoding, ranges, False)
375 return ranges 384 return ranges
376 385
377 @staticmethod 386 @staticmethod
(...skipping 19 matching lines...) Expand all
397 ranges = TransitionKey.__disjoint_ranges_from_key_set(encoding, key_set) 406 ranges = TransitionKey.__disjoint_ranges_from_key_set(encoding, key_set)
398 inverse = TransitionKey.__invert_ranges(encoding, ranges) 407 inverse = TransitionKey.__invert_ranges(encoding, ranges)
399 if not inverse: 408 if not inverse:
400 return None 409 return None
401 return TransitionKey(encoding, inverse) 410 return TransitionKey(encoding, inverse)
402 411
403 @staticmethod 412 @staticmethod
404 def __key_from_ranges(encoding, invert, ranges): 413 def __key_from_ranges(encoding, invert, ranges):
405 range_map = {} 414 range_map = {}
406 for r in ranges: 415 for r in ranges:
416 assert r
407 if not r[0] in range_map: 417 if not r[0] in range_map:
408 range_map[r[0]] = [] 418 range_map[r[0]] = []
409 range_map[r[0]].append(r[1]) 419 range_map[r[0]].append(r[1])
410 ranges = TransitionKey.__disjoint_keys(encoding, range_map) 420 ranges = TransitionKey.__disjoint_keys(encoding, range_map)
411 ranges = TransitionKey.__merge_ranges(encoding, ranges) 421 ranges = TransitionKey.__merge_ranges(encoding, ranges)
412 if invert: 422 if invert:
413 ranges = TransitionKey.__invert_ranges(encoding, ranges) 423 ranges = TransitionKey.__invert_ranges(encoding, ranges)
414 return TransitionKey(encoding, ranges) 424 return TransitionKey(encoding, ranges)
415 425
416 @staticmethod 426 @staticmethod
417 def __merge_ranges(encoding, ranges): 427 def __merge_ranges(encoding, ranges):
418 merged = [] 428 merged = []
419 last = None 429 last = None
420 for r in ranges: 430 for r in ranges:
421 assert not TransitionKey.__is_unique_range(r)
422 if last == None: 431 if last == None:
423 last = r 432 last = r
424 elif (last[1] + 1 == r[0] and not encoding.is_class_range(r)): 433 elif (last[1] + 1 == r[0] and
434 not TransitionKey.__is_unique_range(r) and
435 not encoding.is_class_range(r)):
425 last = (last[0], r[1]) 436 last = (last[0], r[1])
426 else: 437 else:
427 merged.append(last) 438 merged.append(last)
428 last = r 439 last = r
429 if last != None: 440 if last != None:
430 merged.append(last) 441 merged.append(last)
431 return merged 442 return merged
432 443
433 @staticmethod 444 @staticmethod
434 def merged_key(encoding, keys): 445 def merged_key(encoding, keys):
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
525 self.class_range('non_primary_whitespace')]) 536 self.class_range('non_primary_whitespace')])
526 self.add_predefined_range( 537 self.add_predefined_range(
527 'letter', [(65, 90), (97, 122), self.class_range('non_primary_letter')]) 538 'letter', [(65, 90), (97, 122), self.class_range('non_primary_letter')])
528 self.add_predefined_range( 539 self.add_predefined_range(
529 'line_terminator', 540 'line_terminator',
530 [(10, 10), (13, 13), self.class_range('non_primary_line_terminator')]) 541 [(10, 10), (13, 13), self.class_range('non_primary_line_terminator')])
531 self.add_predefined_range( 542 self.add_predefined_range(
532 'identifier_part_not_letter', 543 'identifier_part_not_letter',
533 [(48, 57), (95, 95), 544 [(48, 57), (95, 95),
534 self.class_range('non_primary_identifier_part_not_letter')]) 545 self.class_range('non_primary_identifier_part_not_letter')])
OLDNEW
« no previous file with comments | « tools/lexer_generator/rule_parser.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698