Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(67)

Unified Diff: tools/lexer_generator/transition_keys.py

Issue 66283004: Experimental parser: unique transition key support (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/lexer_generator/transition_keys.py
diff --git a/tools/lexer_generator/transition_keys.py b/tools/lexer_generator/transition_keys.py
index 41a1712c5b054e2241dff1c116ca76e9e0550333..b18cd1085122a856e9c1146f311cd8560b011219 100644
--- a/tools/lexer_generator/transition_keys.py
+++ b/tools/lexer_generator/transition_keys.py
@@ -29,68 +29,95 @@ from string import printable
class TransitionKey:
+ __class_bounds = {
+ "latin_1" : (0, 255),
+ "whitespace" : (256, 256),
+ "literal" : (257, 257),
+ }
__lower_bound = 0
- __latin_1_upper_bound = 255
- __unicode_whitespace_bounds = (256, 256)
- __unicode_literal_bounds = (257, 257)
- __upper_bound = 257
+ __upper_bound = reduce(lambda acc, (k, v): max(acc, v[1]), __class_bounds.items(), 0)
+
+ __cached_keys = {}
+
+ __unique_key_counter = -1
+
+ @staticmethod
+ def __in_latin_1(char):
+ bound = TransitionKey.__class_bounds["latin_1"]
+ return (bound[0] <= char and char <= bound[1])
+
+ @staticmethod
+ def __is_class_range(r):
+ return r[0] == r[1] and not TransitionKey.__in_latin_1(r[0])
+
+ @staticmethod
+ def __is_unique_range(r):
+ return r[0] == r[1] and r[0] < TransitionKey.__lower_bound
@staticmethod
def __verify_ranges(ranges, check_merged):
assert ranges
+ if len(ranges) == 1 and TransitionKey.__is_class_range(ranges[0]):
+ return
last = None
for r in ranges:
assert TransitionKey.__lower_bound <= r[0]
assert r[1] <= TransitionKey.__upper_bound
assert r[0] <= r[1]
- r_is_class = (
- r == TransitionKey.__unicode_whitespace_bounds or
- r == TransitionKey.__unicode_literal_bounds)
+ r_is_class = TransitionKey.__is_class_range(r)
if last != None and check_merged:
assert last[1] + 1 < r[0] or r_is_class
- if (r[0] > TransitionKey.__latin_1_upper_bound or
- r[1] > TransitionKey.__latin_1_upper_bound):
+ if not TransitionKey.__in_latin_1(r[0]):
+ assert r_is_class
+ if not TransitionKey.__in_latin_1(r[1]):
assert r_is_class
last = r
@staticmethod
- def __create(ranges):
- TransitionKey.__verify_ranges(ranges, True)
+ def __create(ranges, name = None):
+ if not ranges:
+ assert name == 'epsilon'
+ assert not name in TransitionKey.__cached_keys
+ else:
+ TransitionKey.__verify_ranges(ranges, True)
key = TransitionKey()
key.__ranges = tuple(ranges) # immutable
+ key.__name = name
key.__cached_hash = None
return key
- __cached_epsilon = None
+ @staticmethod
+ def __cached_key(name, bounds_getter):
+ if not name in TransitionKey.__cached_keys:
+ bounds = bounds_getter(name)
+ TransitionKey.__cached_keys[name] = TransitionKey.__create(bounds, name)
+ return TransitionKey.__cached_keys[name]
+
@staticmethod
def epsilon():
- if (TransitionKey.__cached_epsilon == None):
- key = TransitionKey()
- key.__ranges = tuple([])
- key.__cached_hash = None
- TransitionKey.__cached_epsilon = key
- return TransitionKey.__cached_epsilon
+ return TransitionKey.__cached_key("epsilon", lambda name : [])
- __cached_any = None
@staticmethod
def any():
- if (TransitionKey.__cached_any == None):
- bounds = [
- (TransitionKey.__lower_bound, TransitionKey.__latin_1_upper_bound),
- TransitionKey.__unicode_whitespace_bounds,
- TransitionKey.__unicode_literal_bounds,
- ]
- TransitionKey.__cached_any = TransitionKey.__create(bounds)
- return TransitionKey.__cached_any
+ return TransitionKey.__cached_key("any",
+ lambda name : TransitionKey.__class_bounds.values())
@staticmethod
def single_char(char):
char = ord(char)
- assert (TransitionKey.__lower_bound <= char and
- char <= TransitionKey.__latin_1_upper_bound)
+ assert TransitionKey.__in_latin_1(char)
return TransitionKey.__create([(char, char)])
@staticmethod
+ def unique_key(name):
+ def get_bounds(name):
+ bound = TransitionKey.__unique_key_counter
+ TransitionKey.__unique_key_counter -= 1
+ return [(bound, bound)]
+ name = "__" + name
+ return TransitionKey.__cached_key(name, get_bounds)
+
+ @staticmethod
def __process_graph(graph, ranges, key_map):
key = graph[0]
if key == 'RANGE':
@@ -103,9 +130,9 @@ class TransitionKey:
elif key == 'CHARACTER_CLASS':
class_name = graph[1]
if class_name == 'ws':
- ranges.append(TransitionKey.__unicode_whitespace_bounds)
+ ranges.append(TransitionKey.__class_bounds["whitespace"])
elif class_name == 'lit':
- ranges.append(TransitionKey.__unicode_literal_bounds)
+ ranges.append(TransitionKey.__class_bounds["literal"])
elif class_name in key_map:
ranges += key_map[class_name].__ranges
else:
@@ -155,29 +182,26 @@ class TransitionKey:
}
@staticmethod
- def __print_range(r):
- def to_str(x):
- if x <= TransitionKey.__latin_1_upper_bound:
- if not x in TransitionKey.__printable_cache:
- res = "'%s'" % chr(x) if chr(x) in printable else str(x)
- TransitionKey.__printable_cache[x] = res
- return TransitionKey.__printable_cache[x]
- if x == TransitionKey.__unicode_literal_bounds[0]:
- return "literal"
- if x == TransitionKey.__unicode_whitespace_bounds[0]:
- return "whitespace"
+ def __range_str(r):
+ if TransitionKey.__is_class_range(r):
+ for name, v in TransitionKey.__class_bounds.items():
+ if r == v: return name
assert False
+ def to_str(x):
+ assert TransitionKey.__in_latin_1(x)
+ if not x in TransitionKey.__printable_cache:
+ res = "'%s'" % chr(x) if chr(x) in printable else str(x)
+ TransitionKey.__printable_cache[x] = res
+ return TransitionKey.__printable_cache[x]
if r[0] == r[1]:
return "%s" % to_str(r[0])
else:
return "[%s-%s]" % (to_str(r[0]), to_str(r[1]))
def __str__(self):
- if self == self.epsilon():
- return "epsilon"
- if self == self.any():
- return "any"
- return ", ".join(TransitionKey.__print_range(x) for x in self.__ranges)
+ if self.__name:
+ return self.__name
+ return ", ".join(TransitionKey.__range_str(x) for x in self.__ranges)
@staticmethod
def __disjoint_keys(range_map):
@@ -236,11 +260,10 @@ class TransitionKey:
merged = []
last = None
for r in ranges:
+ assert not TransitionKey.__is_unique_range(r)
if last == None:
last = r
- elif (last[1] + 1 == r[0] and
- r != TransitionKey.__unicode_whitespace_bounds and
- r != TransitionKey.__unicode_literal_bounds):
+ elif (last[1] + 1 == r[0] and not TransitionKey.__is_class_range(r)):
last = (last[0], r[1])
else:
merged.append(last)
@@ -258,14 +281,12 @@ class TransitionKey:
def __invert_ranges(ranges):
inverted = []
last = None
- contains_whitespace = False
- contains_literal = False
+ classes = set(TransitionKey.__class_bounds.values())
+ classes.remove(TransitionKey.__class_bounds["latin_1"])
for r in ranges:
- if r == TransitionKey.__unicode_whitespace_bounds:
- contains_whitespace = True
- continue
- if r == TransitionKey.__unicode_literal_bounds:
- contains_literal = True
+ assert not TransitionKey.__is_unique_range(r)
+ if TransitionKey.__is_class_range(r):
+ classes.remove(r)
continue
if last == None:
if r[0] != TransitionKey.__lower_bound:
@@ -273,10 +294,8 @@ class TransitionKey:
elif last[1] + 1 < r[0]:
inverted.append((last[1] + 1, r[0] - 1))
last = r
- if last != None and last[1] < TransitionKey.__latin_1_upper_bound:
- inverted.append((last[1] + 1, TransitionKey.__latin_1_upper_bound))
- if not contains_whitespace:
- inverted.append(TransitionKey.__unicode_whitespace_bounds)
- if not contains_literal:
- inverted.append(TransitionKey.__unicode_literal_bounds)
+ upper_bound = TransitionKey.__class_bounds["latin_1"][1]
+ if last != None and last[1] < upper_bound:
+ inverted.append((last[1] + 1, upper_bound))
+ inverted += list(classes)
return inverted
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698