Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(7)

Unified Diff: tools/lexer_generator/transition_keys.py

Issue 54773002: Experimental parser: key inversion (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 7 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « tools/lexer_generator/transition_key_test.py ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/lexer_generator/transition_keys.py
diff --git a/tools/lexer_generator/transition_keys.py b/tools/lexer_generator/transition_keys.py
index da7e88be4f67044d51b1c1e72c00b34c405be820..3fd378cc787437344b4e502a09fc0c076bce8437 100644
--- a/tools/lexer_generator/transition_keys.py
+++ b/tools/lexer_generator/transition_keys.py
@@ -24,6 +24,7 @@
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
from string import printable
class TransitionKey:
@@ -35,19 +36,26 @@ class TransitionKey:
__upper_bound = 257
@staticmethod
- def __verify_ranges(ranges):
- last = (TransitionKey.__lower_bound - 1, TransitionKey.__lower_bound - 1)
+ def __verify_ranges(ranges, check_merged):
+ assert ranges
+ last = None
for r in ranges:
assert TransitionKey.__lower_bound <= r[0]
assert r[1] <= TransitionKey.__upper_bound
assert r[0] <= r[1]
- assert last[1] < r[0]
- # TODO check classes are always disjoint points
+ r_is_class = (
+ r == TransitionKey.__unicode_whitespace_bounds or
+ r == TransitionKey.__unicode_literal_bounds)
+ if last != None and check_merged:
+ assert last[1] + 1 < r[0] or r_is_class
+ if (r[0] > TransitionKey.__latin_1_upper_bound or
+ r[1] > TransitionKey.__latin_1_upper_bound):
+ assert r_is_class
last = r
@staticmethod
def __create(ranges):
- TransitionKey.__verify_ranges(ranges)
+ TransitionKey.__verify_ranges(ranges, True)
key = TransitionKey()
key.__ranges = tuple(ranges) # immutable
key.__cached_hash = None
@@ -57,7 +65,10 @@ class TransitionKey:
@staticmethod
def epsilon():
if (TransitionKey.__cached_epsilon == None):
- TransitionKey.__cached_epsilon = TransitionKey.__create([])
+ key = TransitionKey()
+ key.__ranges = tuple([])
+ key.__cached_hash = None
+ TransitionKey.__cached_epsilon = key
return TransitionKey.__cached_epsilon
__cached_any = None
@@ -80,9 +91,23 @@ class TransitionKey:
return TransitionKey.__create([(char, char)])
@staticmethod
+ def __process_graph(graph, ranges):
+ key = graph[0]
+ if key == 'RANGE':
+ ranges.append((ord(graph[1]), ord(graph[2])))
+ elif key == 'LITERAL':
+ ranges.append((ord(graph[1]), ord(graph[1])))
+ elif key == 'CAT':
+ for x in [graph[1], graph[2]]:
+ TransitionKey.__process_graph(x, ranges)
+ else:
+ assert False, "bad key %s" % key
+
+ @staticmethod
def character_class(invert, graph):
- # TODO
- return TransitionKey.__create([(129, 129)])
+ ranges = []
+ TransitionKey.__process_graph(graph, ranges)
+ return TransitionKey.__key_from_ranges(invert, ranges)
def matches_char(self, char):
char = ord(char)
@@ -139,13 +164,7 @@ class TransitionKey:
return ", ".join(TransitionKey.__print_range(x) for x in self.__ranges)
@staticmethod
- def disjoint_keys(key_set):
- range_map = {}
- for x in key_set:
- for r in x.__ranges:
- if not r[0] in range_map:
- range_map[r[0]] = []
- range_map[r[0]].append(r[1])
+ def __disjoint_keys(range_map):
sort = lambda x : sorted(set(x))
range_map = sorted(map(lambda (k, v): (k, sort(v)), range_map.items()))
ranges = []
@@ -165,5 +184,76 @@ class TransitionKey:
if to_push:
current = range_map[i + 1]
range_map[i + 1] = (current[0], sort(current[1] + to_push))
- TransitionKey.__verify_ranges(ranges)
+ return ranges
+
+ @staticmethod
+ def disjoint_keys(key_set):
+ if not key_set:
+ return []
+ range_map = {}
+ for x in key_set:
+ for r in x.__ranges:
+ if not r[0] in range_map:
+ range_map[r[0]] = []
+ range_map[r[0]].append(r[1])
+ ranges = TransitionKey.__disjoint_keys(range_map)
+ TransitionKey.__verify_ranges(ranges, False)
return map(lambda x : TransitionKey.__create([x]), ranges)
+
+ @staticmethod
+ def __key_from_ranges(invert, ranges):
+ range_map = {}
+ for r in ranges:
+ if not r[0] in range_map:
+ range_map[r[0]] = []
+ range_map[r[0]].append(r[1])
+ ranges = TransitionKey.__disjoint_keys(range_map)
+ ranges = TransitionKey.__merge_ranges(ranges)
+ if invert:
+ ranges = TransitionKey.__invert_ranges(ranges)
+ return TransitionKey.__create(ranges)
+
+ @staticmethod
+ def __merge_ranges(ranges):
+ merged = []
+ last = None
+ for r in ranges:
+ if last == None:
+ last = r
+ elif (last[1] + 1 == r[0] and
+ r != TransitionKey.__unicode_whitespace_bounds and
+ r != TransitionKey.__unicode_literal_bounds):
+ last = (last[0], r[1])
+ else:
+ merged.append(last)
+ last = r
+ if last != None:
+ merged.append(last)
+ return merged
+
+ @staticmethod
+ def __invert_ranges(ranges):
+ inverted = []
+ last = None
+ contains_whitespace = False
+ contains_literal = False
+ for r in ranges:
+ if r == TransitionKey.__unicode_whitespace_bounds:
+ contains_whitespace = True
+ continue
+ if r == TransitionKey.__unicode_literal_bounds:
+ contains_literal = True
+ continue
+ if last == None:
+ if r[0] != TransitionKey.__lower_bound:
+ inverted.append((TransitionKey.__lower_bound, r[0] - 1))
+ elif last[1] + 1 < r[0]:
+ inverted.append((last[1] + 1, r[0] - 1))
+ last = r
+ if last != None and last[1] < TransitionKey.__latin_1_upper_bound:
+ inverted.append((last[1] + 1, TransitionKey.__latin_1_upper_bound))
+ if not contains_whitespace:
+ inverted.append(TransitionKey.__unicode_whitespace_bounds)
+ if not contains_literal:
+ inverted.append(TransitionKey.__unicode_literal_bounds)
+ return inverted
« no previous file with comments | « tools/lexer_generator/transition_key_test.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698