OLD | NEW |
(Empty) | |
| 1 # |
| 2 # Plex - Transition Maps |
| 3 # |
| 4 # This version represents state sets direcly as dicts |
| 5 # for speed. |
| 6 # |
| 7 |
| 8 from sys import maxint as maxint |
| 9 |
| 10 class TransitionMap(object): |
| 11 """ |
| 12 A TransitionMap maps an input event to a set of states. |
| 13 An input event is one of: a range of character codes, |
| 14 the empty string (representing an epsilon move), or one |
| 15 of the special symbols BOL, EOL, EOF. |
| 16 |
| 17 For characters, this implementation compactly represents |
| 18 the map by means of a list: |
| 19 |
| 20 [code_0, states_0, code_1, states_1, code_2, states_2, |
| 21 ..., code_n-1, states_n-1, code_n] |
| 22 |
| 23 where |code_i| is a character code, and |states_i| is a |
| 24 set of states corresponding to characters with codes |c| |
| 25 in the range |code_i| <= |c| <= |code_i+1|. |
| 26 |
| 27 The following invariants hold: |
| 28 n >= 1 |
| 29 code_0 == -maxint |
| 30 code_n == maxint |
| 31 code_i < code_i+1 for i in 0..n-1 |
| 32 states_0 == states_n-1 |
| 33 |
| 34 Mappings for the special events '', BOL, EOL, EOF are |
| 35 kept separately in a dictionary. |
| 36 """ |
| 37 |
| 38 map = None # The list of codes and states |
| 39 special = None # Mapping for special events |
| 40 |
| 41 def __init__(self, map = None, special = None): |
| 42 if not map: |
| 43 map = [-maxint, {}, maxint] |
| 44 if not special: |
| 45 special = {} |
| 46 self.map = map |
| 47 self.special = special |
| 48 #self.check() ### |
| 49 |
| 50 def add(self, event, new_state, |
| 51 TupleType = tuple): |
| 52 """ |
| 53 Add transition to |new_state| on |event|. |
| 54 """ |
| 55 if type(event) is TupleType: |
| 56 code0, code1 = event |
| 57 i = self.split(code0) |
| 58 j = self.split(code1) |
| 59 map = self.map |
| 60 while i < j: |
| 61 map[i + 1][new_state] = 1 |
| 62 i = i + 2 |
| 63 else: |
| 64 self.get_special(event)[new_state] = 1 |
| 65 |
| 66 def add_set(self, event, new_set, |
| 67 TupleType = tuple): |
| 68 """ |
| 69 Add transitions to the states in |new_set| on |event|. |
| 70 """ |
| 71 if type(event) is TupleType: |
| 72 code0, code1 = event |
| 73 i = self.split(code0) |
| 74 j = self.split(code1) |
| 75 map = self.map |
| 76 while i < j: |
| 77 map[i + 1].update(new_set) |
| 78 i = i + 2 |
| 79 else: |
| 80 self.get_special(event).update(new_set) |
| 81 |
| 82 def get_epsilon(self, |
| 83 none = None): |
| 84 """ |
| 85 Return the mapping for epsilon, or None. |
| 86 """ |
| 87 return self.special.get('', none) |
| 88 |
| 89 def iteritems(self, |
| 90 len = len): |
| 91 """ |
| 92 Return the mapping as an iterable of ((code1, code2), state_set) and |
| 93 (special_event, state_set) pairs. |
| 94 """ |
| 95 result = [] |
| 96 map = self.map |
| 97 else_set = map[1] |
| 98 i = 0 |
| 99 n = len(map) - 1 |
| 100 code0 = map[0] |
| 101 while i < n: |
| 102 set = map[i + 1] |
| 103 code1 = map[i + 2] |
| 104 if set or else_set: |
| 105 result.append(((code0, code1), set)) |
| 106 code0 = code1 |
| 107 i = i + 2 |
| 108 for event, set in self.special.iteritems(): |
| 109 if set: |
| 110 result.append((event, set)) |
| 111 return iter(result) |
| 112 items = iteritems |
| 113 |
| 114 # ------------------- Private methods -------------------- |
| 115 |
| 116 def split(self, code, |
| 117 len = len, maxint = maxint): |
| 118 """ |
| 119 Search the list for the position of the split point for |code|, |
| 120 inserting a new split point if necessary. Returns index |i| such |
| 121 that |code| == |map[i]|. |
| 122 """ |
| 123 # We use a funky variation on binary search. |
| 124 map = self.map |
| 125 hi = len(map) - 1 |
| 126 # Special case: code == map[-1] |
| 127 if code == maxint: |
| 128 return hi |
| 129 # General case |
| 130 lo = 0 |
| 131 # loop invariant: map[lo] <= code < map[hi] and hi - lo >= 2 |
| 132 while hi - lo >= 4: |
| 133 # Find midpoint truncated to even index |
| 134 mid = ((lo + hi) // 2) & ~1 |
| 135 if code < map[mid]: |
| 136 hi = mid |
| 137 else: |
| 138 lo = mid |
| 139 # map[lo] <= code < map[hi] and hi - lo == 2 |
| 140 if map[lo] == code: |
| 141 return lo |
| 142 else: |
| 143 map[hi:hi] = [code, map[hi - 1].copy()] |
| 144 #self.check() ### |
| 145 return hi |
| 146 |
| 147 def get_special(self, event): |
| 148 """ |
| 149 Get state set for special event, adding a new entry if necessary. |
| 150 """ |
| 151 special = self.special |
| 152 set = special.get(event, None) |
| 153 if not set: |
| 154 set = {} |
| 155 special[event] = set |
| 156 return set |
| 157 |
| 158 # --------------------- Conversion methods ----------------------- |
| 159 |
| 160 def __str__(self): |
| 161 map_strs = [] |
| 162 map = self.map |
| 163 n = len(map) |
| 164 i = 0 |
| 165 while i < n: |
| 166 code = map[i] |
| 167 if code == -maxint: |
| 168 code_str = "-inf" |
| 169 elif code == maxint: |
| 170 code_str = "inf" |
| 171 else: |
| 172 code_str = str(code) |
| 173 map_strs.append(code_str) |
| 174 i = i + 1 |
| 175 if i < n: |
| 176 map_strs.append(state_set_str(map[i])) |
| 177 i = i + 1 |
| 178 special_strs = {} |
| 179 for event, set in self.special.iteritems(): |
| 180 special_strs[event] = state_set_str(set) |
| 181 return "[%s]+%s" % ( |
| 182 ','.join(map_strs), |
| 183 special_strs |
| 184 ) |
| 185 |
| 186 # --------------------- Debugging methods ----------------------- |
| 187 |
| 188 def check(self): |
| 189 """Check data structure integrity.""" |
| 190 if not self.map[-3] < self.map[-1]: |
| 191 print(self) |
| 192 assert 0 |
| 193 |
| 194 def dump(self, file): |
| 195 map = self.map |
| 196 i = 0 |
| 197 n = len(map) - 1 |
| 198 while i < n: |
| 199 self.dump_range(map[i], map[i + 2], map[i + 1], file) |
| 200 i = i + 2 |
| 201 for event, set in self.special.iteritems(): |
| 202 if set: |
| 203 if not event: |
| 204 event = 'empty' |
| 205 self.dump_trans(event, set, file) |
| 206 |
| 207 def dump_range(self, code0, code1, set, file): |
| 208 if set: |
| 209 if code0 == -maxint: |
| 210 if code1 == maxint: |
| 211 k = "any" |
| 212 else: |
| 213 k = "< %s" % self.dump_char(code1) |
| 214 elif code1 == maxint: |
| 215 k = "> %s" % self.dump_char(code0 - 1) |
| 216 elif code0 == code1 - 1: |
| 217 k = self.dump_char(code0) |
| 218 else: |
| 219 k = "%s..%s" % (self.dump_char(code0), |
| 220 self.dump_char(code1 - 1)) |
| 221 self.dump_trans(k, set, file) |
| 222 |
| 223 def dump_char(self, code): |
| 224 if 0 <= code <= 255: |
| 225 return repr(chr(code)) |
| 226 else: |
| 227 return "chr(%d)" % code |
| 228 |
| 229 def dump_trans(self, key, set, file): |
| 230 file.write(" %s --> %s\n" % (key, self.dump_set(set))) |
| 231 |
| 232 def dump_set(self, set): |
| 233 return state_set_str(set) |
| 234 |
| 235 # |
| 236 # State set manipulation functions |
| 237 # |
| 238 |
| 239 #def merge_state_sets(set1, set2): |
| 240 # for state in set2.keys(): |
| 241 # set1[state] = 1 |
| 242 |
| 243 def state_set_str(set): |
| 244 return "[%s]" % ','.join(["S%d" % state.number for state in set]) |
| 245 |
| 246 |
| 247 |
OLD | NEW |