Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(138)

Unified Diff: tools/lexer_generator/dfa_path_writer.py

Issue 209473005: Experimental parser: dfa path iteration test (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | tools/lexer_generator/generator.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/lexer_generator/dfa_path_writer.py
diff --git a/tools/lexer_generator/dfa_path_writer.py b/tools/lexer_generator/dfa_path_writer.py
new file mode 100644
index 0000000000000000000000000000000000000000..42523d0e9d6af22db3684caa694ef878f09f3439
--- /dev/null
+++ b/tools/lexer_generator/dfa_path_writer.py
@@ -0,0 +1,207 @@
+# Copyright 2014 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import logging
+from transition_key import TransitionKey
+
+class Path(object):
+
+ def __init__(self, keys, states):
+ self.__keys = keys
+ self.__states = states
+ assert self.__states
+ self.__hash = None
+
+ def sub_path(self):
+ if len(self) == 1:
+ return None
+ return Path(self.__keys[:-1], self.__states[:-1])
+
+ def key_iter(self):
+ return iter(self.__keys)
+
+ def state_at(self, index):
+ return self.__states[index]
+
+ @staticmethod
+ def __hash_function((reps, l), state):
+ n = state.node_number()
+ h = (reps >> (l % 31) + 1) ^ reps ^ n ^ (n << (l % 31) + 1)
+ return (h, l + 1)
+
+ def __hash__(self):
+ if self.__hash == None:
+ self.__hash = reduce(self.__hash_function, self.__states, (101, -74))[0]
+ return self.__hash
+
+ def __eq__(self, other):
+ return (isinstance(other, self.__class__) and
+ self.__states == other.__states)
+
+ def __len__(self):
+ return len(self.__states)
+
+class DfaPathWriter(object):
+
+ def __init__(self, dfa):
+ self.__dfa = dfa
+
+ def write_lexer_shell_test_file(self, f):
+ dfa = self.__dfa
+ complete_paths = set()
+ total_paths = 0
+ encoding = dfa.encoding()
+ start_state = dfa.start_state()
+ for path, states_in_path, is_non_looping in dfa.path_iter():
+ states = []
+ keys = []
+ for i, (key, state) in enumerate(path[1:]):
+ states.append(state)
+ keys.append(key)
+ if key == TransitionKey.omega() and is_non_looping:
+ assert i == len(path) - 2
+ p = Path(keys, states)
+ assert not p in complete_paths
+ add_tail_transition = False
+ total_paths += len(p)
+ while p != None and (p not in complete_paths):
+ complete_paths.add(p)
+ self.__write_lines(p, start_state, add_tail_transition, encoding, f)
+ p = p.sub_path()
+ add_tail_transition = True
+ logging.info("complete_paths %d" % len(complete_paths))
+ logging.info("total_paths %d" % total_paths)
+
+ __representative_map = {
+ 'non_primary_letter' : 256,
+ 'non_primary_whitespace' : 5760,
+ 'non_primary_line_terminator' : 8232,
+ 'non_primary_everything_else' : 706,
+ 'non_primary_identifier_part_not_letter' : 768,
+ }
+
+ __eos_rep = -1
+ __omega_rep = -2
+
+ @staticmethod
+ def get_representatives(key, remaining = -1):
+ reps = []
+ contains_special = False
+ for name, r in key.range_iter(None):
+ if name == 'PRIMARY_RANGE':
+ reps.append(r[0])
+ if r[0] != r[1]:
+ reps.append(r[1])
+ elif name == 'CLASS':
+ reps.append(DfaPathWriter.__representative_map[r])
+ elif name == 'UNIQUE':
+ if r == 'eos':
+ assert remaining == 1
+ reps.append(DfaPathWriter.__eos_rep)
+ contains_special = True
+ else:
+ raise Exception('unimplemented %s %s' % (name, str(r)))
+ elif name == 'OMEGA':
+ assert remaining == 0
+ reps.append(DfaPathWriter.__omega_rep)
+ contains_special = True
+ else:
+ raise Exception('unimplemented %s %s' % (name, str(r)))
+ if contains_special:
+ assert len(reps) == 1
+ return reps
+
+ @staticmethod
+ def no_transition_keys(state, encoding):
+ all_keys = set(state.key_iter())
+ eos_found = False
+ all_keys.discard(TransitionKey.omega())
+ eos_key = TransitionKey.unique('eos')
+ if eos_key in all_keys:
+ all_keys.remove(eos_key)
+ eos_found = True
+ # TODO(dcarney): have inverse key return uniques as a separate list
+ inverse_key = TransitionKey.inverse_key(encoding, all_keys)
+ if not inverse_key:
+ return []
+ reps = DfaPathWriter.get_representatives(inverse_key)
+ if not eos_found:
+ reps.append(DfaPathWriter.__eos_rep)
+ return reps
+
+ @staticmethod
+ def __write_line(head, tail, f):
+ length = len(head)
+ if tail != DfaPathWriter.__eos_rep:
+ length += 1
+ if not length:
+ return
+ if tail == DfaPathWriter.__eos_rep:
+ f.write('%s\n' % ' '.join(map(str, head)))
+ elif head:
+ f.write('%s %s\n' % (' '.join(map(str, head)), str(tail)))
+ else:
+ f.write("%s\n" % str(tail))
+
+ def __write_lines(self, path, start_state, add_tail_transition, encoding, f):
+ current_state = start_state
+ reps = []
+ last_index = len(path) - 1
+ if add_tail_transition:
+ last_index += 1
+ for i, key in enumerate(path.key_iter()):
+ # current_state = current_state.transition_state_for_key(key)
+ # assert current_state == self.__states[i]
+ reps.append(self.get_representatives(key, last_index - i))
+ head = []
+ for i, x in enumerate(reps):
+ assert x
+ if x[0] >= 0:
+ # TODO(dcarney): produce a file with x[-1] here
+ head.append(x[0])
+ elif x[0] == DfaPathWriter.__eos_rep:
+ # this is a trailing eos or a trailing omega following eos.
+ assert i == len(reps) - 1 or (
+ len(x) == 1 and i == len(reps) - 2 and
+ len(reps[-1]) == 1 and reps[-1][0] == DfaPathWriter.__omega_rep)
+ assert add_tail_transition == (i == len(reps) - 1)
+ if i == len(reps) - 1:
+ add_tail_transition = False
+ elif x[0] == DfaPathWriter.__omega_rep:
+ # this is a trailing omega
+ # TODO(dcarney): produce an omega transition, not eos.
+ assert not add_tail_transition
+ assert len(x) == 1 and i == len(reps) - 1
+ else:
+ raise Exception('unreachable')
+ if not add_tail_transition:
+ self.__write_line(head, self.__eos_rep, f)
+ return
+ no_transition_keys = self.no_transition_keys(path.state_at(-1), encoding)
+ # TODO(dcarney): don't test all edges here.
+ for tail in no_transition_keys:
+ self.__write_line(head, tail, f)
« no previous file with comments | « no previous file | tools/lexer_generator/generator.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698