| Index: tools/lexer_generator/transition_keys.py
|
| diff --git a/tools/lexer_generator/transition_keys.py b/tools/lexer_generator/transition_keys.py
|
| index 983b4870ca05c49168a27b41b519021bc51fc131..b3dc04d9560cd3d45c63729ed3d562cc28c4397b 100644
|
| --- a/tools/lexer_generator/transition_keys.py
|
| +++ b/tools/lexer_generator/transition_keys.py
|
| @@ -25,92 +25,9 @@
|
| # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
| # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
| -from types import IntType
|
| from itertools import chain
|
| +from encoding import KeyEncoding
|
| from action import Term
|
| -from string import printable
|
| -
|
| -class KeyEncoding(object):
|
| -
|
| - __encodings = {}
|
| -
|
| - @staticmethod
|
| - def get(name):
|
| - if not KeyEncoding.__encodings:
|
| - Latin1Encoding()
|
| - Utf16Encoding()
|
| - Utf8Encoding()
|
| - return KeyEncoding.__encodings[name]
|
| -
|
| - def __init__(self, name, primary_range, named_ranges, predefined_ranges):
|
| - assert not name in KeyEncoding.__encodings
|
| - assert primary_range[0] <= primary_range[1]
|
| - KeyEncoding.__encodings[name] = self
|
| - self.__name = name
|
| - self.__primary_range = primary_range
|
| - self.__lower_bound = primary_range[0]
|
| - self.__upper_bound = primary_range[1]
|
| - self.__primary_range_component = self.numeric_range_term(primary_range[0],
|
| - primary_range[1])
|
| - self.__named_ranges = {
|
| - k : Term('NAMED_RANGE_KEY', k) for k in named_ranges }
|
| - def f(v):
|
| - if len(v) == 2:
|
| - return self.numeric_range_term(v[0], v[1])
|
| - elif len(v) == 1:
|
| - assert v[0] in self.__named_ranges
|
| - return self.__named_ranges[v[0]]
|
| - else:
|
| - raise Exception()
|
| - self.__predefined_ranges = {
|
| - k : map(f, v) for k, v in predefined_ranges.iteritems() }
|
| -
|
| - def name(self):
|
| - return self.__name
|
| -
|
| - def lower_bound(self):
|
| - return self.__lower_bound
|
| -
|
| - def upper_bound(self):
|
| - return self.__upper_bound
|
| -
|
| - def primary_range(self):
|
| - return self.__primary_range
|
| -
|
| - def named_range(self, name):
|
| - ranges = self.__named_ranges
|
| - return Term.empty_term() if not name in ranges else ranges[name]
|
| -
|
| - def named_range_iter(self):
|
| - return self.__named_range.iteritems()
|
| -
|
| - def named_range_key_iter(self):
|
| - return self.__named_ranges.iterkeys()
|
| -
|
| - def named_range_value_iter(self):
|
| - return self.__named_ranges.itervalues()
|
| -
|
| - def predefined_range_iter(self, name):
|
| - ranges = self.__predefined_ranges
|
| - return None if not name in ranges else iter(ranges[name])
|
| -
|
| - def __primary_range_iter(self):
|
| - yield self.__primary_range_component
|
| -
|
| - def all_components_iter(self):
|
| - return chain(self.__primary_range_iter(), self.__named_ranges.itervalues())
|
| -
|
| - def is_primary_range(self, r):
|
| - assert len(r) == 2
|
| - return self.in_primary_range(r[0], r[1])
|
| -
|
| - def in_primary_range(self, a, b):
|
| - return self.lower_bound() <= a and b <= self.upper_bound()
|
| -
|
| - def numeric_range_term(self, a, b):
|
| - assert type(a) == IntType and type(b) == IntType
|
| - assert self.in_primary_range(a, b)
|
| - return Term('NUMERIC_RANGE_KEY', a, b)
|
|
|
| class TransitionKey(object):
|
| '''Represents a transition from a state in DFA or NFA to another state.
|
| @@ -299,20 +216,6 @@ class TransitionKey(object):
|
| def __eq__(self, other):
|
| return isinstance(other, TransitionKey) and self.__term == other.__term
|
|
|
| - @staticmethod
|
| - def __class_name(encoding, r):
|
| - for name, v in encoding.class_range_iter():
|
| - if r == v:
|
| - return name
|
| - assert False
|
| -
|
| - @staticmethod
|
| - def __unique_name(r):
|
| - for name, v in TransitionKey.__cached_keys['no_encoding'].items():
|
| - if v.__ranges and r == v.__ranges[0]:
|
| - return name[2:]
|
| - assert False
|
| -
|
| def range_iter(self, encoding):
|
| for c in self.__flatten():
|
| if c.name() == 'NUMERIC_RANGE_KEY':
|
| @@ -324,12 +227,6 @@ class TransitionKey(object):
|
| else:
|
| assert False, 'unimplemented %s' % c
|
|
|
| - __printable_cache = {
|
| - ord('\t') : '\\t',
|
| - ord('\n') : '\\n',
|
| - ord('\r') : '\\r',
|
| - }
|
| -
|
| @staticmethod
|
| def __component_str(encoding, component):
|
| if component.name() == 'TERM_KEY':
|
| @@ -340,21 +237,13 @@ class TransitionKey(object):
|
| return 'epsilon'
|
| elif component.name() == 'OMEGA_KEY':
|
| return 'omega'
|
| - elif component.name() != 'NUMERIC_RANGE_KEY':
|
| - raise Exception('unprintable %s' % component)
|
| - r = component.args()
|
| - def to_str(x):
|
| - assert not encoding or encoding.in_primary_range(x, x)
|
| - if x > 127:
|
| - return str(x)
|
| - if not x in TransitionKey.__printable_cache:
|
| - res = "'%s'" % chr(x) if chr(x) in printable else str(x)
|
| - TransitionKey.__printable_cache[x] = res
|
| - return TransitionKey.__printable_cache[x]
|
| - if r[0] == r[1]:
|
| - return '%s' % to_str(r[0])
|
| - else:
|
| + elif component.name() == 'NUMERIC_RANGE_KEY':
|
| + r = component.args()
|
| + to_str = lambda x: KeyEncoding.to_str(encoding, x)
|
| + if r[0] == r[1]:
|
| + return '%s' % to_str(r[0])
|
| return '[%s-%s]' % (to_str(r[0]), to_str(r[1]))
|
| + raise Exception('unprintable %s' % component)
|
|
|
| def __flatten(self):
|
| return self.__flatten_components([self.__term])
|
| @@ -416,6 +305,8 @@ class TransitionKey(object):
|
|
|
| @staticmethod
|
| def __construct(encoding, components):
|
| + if isinstance(components, Term):
|
| + components = [components]
|
| is_unique = False
|
| acc = []
|
| last = Term.empty_term()
|
| @@ -441,8 +332,6 @@ class TransitionKey(object):
|
| return acc[0] if len(acc) == 1 else Term('COMPOSITE_KEY', *acc)
|
|
|
| def __init__(self, encoding, components):
|
| - if isinstance(components, Term):
|
| - components = [components]
|
| self.__term = TransitionKey.__construct(encoding, components)
|
| self.__cached_hash = None
|
|
|
| @@ -559,8 +448,7 @@ class TransitionKey(object):
|
|
|
| @staticmethod
|
| def __invert_components(encoding, components):
|
| - def key(x, y):
|
| - return encoding.numeric_range_term(x, y)
|
| + key = lambda x, y: encoding.numeric_range_term(x, y)
|
| last = None
|
| classes = set(encoding.named_range_value_iter())
|
| for c in components:
|
| @@ -584,69 +472,3 @@ class TransitionKey(object):
|
| yield key(last[1] + 1, upper_bound)
|
| for c in sorted(classes, TransitionKey.__component_compare):
|
| yield c
|
| -
|
| -class Latin1Encoding(KeyEncoding):
|
| -
|
| - def __init__(self):
|
| - super(Latin1Encoding, self).__init__(
|
| - 'latin1',
|
| - (0, 255),
|
| - [],
|
| - {
|
| - 'whitespace':
|
| - [(9, 9), (11, 12), (32, 32), (133, 133), (160, 160)],
|
| - 'letter':
|
| - [(65, 90), (97, 122), (170, 170), (181, 181),
|
| - (186, 186), (192, 214), (216, 246), (248, 255)],
|
| - 'line_terminator':
|
| - [(10, 10), (13, 13)],
|
| - 'identifier_part_not_letter':
|
| - [(48, 57), (95, 95)]
|
| - })
|
| -
|
| -class Utf16Encoding(KeyEncoding):
|
| -
|
| - def __init__(self):
|
| - super(Utf16Encoding, self).__init__(
|
| - 'utf16',
|
| - (0, 255),
|
| - ['non_primary_whitespace',
|
| - 'non_primary_letter',
|
| - 'non_primary_identifier_part_not_letter',
|
| - 'non_primary_line_terminator',
|
| - 'non_primary_everything_else'],
|
| - {
|
| - 'whitespace':
|
| - [(9, 9), (11, 12), (32, 32), (133, 133), (160, 160),
|
| - ('non_primary_whitespace',)],
|
| - 'letter':
|
| - [(65, 90), (97, 122), (170, 170), (181, 181),
|
| - (186, 186), (192, 214), (216, 246), (248, 255),
|
| - ('non_primary_letter',)],
|
| - 'line_terminator':
|
| - [(10, 10), (13, 13), ('non_primary_line_terminator',)],
|
| - 'identifier_part_not_letter':
|
| - [(48, 57), (95, 95), ('non_primary_identifier_part_not_letter',)],
|
| - })
|
| -
|
| -class Utf8Encoding(KeyEncoding):
|
| -
|
| - def __init__(self):
|
| - super(Utf8Encoding, self).__init__(
|
| - 'utf8',
|
| - (0, 127),
|
| - ['non_primary_whitespace',
|
| - 'non_primary_letter',
|
| - 'non_primary_identifier_part_not_letter',
|
| - 'non_primary_line_terminator',
|
| - 'non_primary_everything_else'],
|
| - {
|
| - 'whitespace':
|
| - [(9, 9), (11, 12), (32, 32), ('non_primary_whitespace',)],
|
| - 'letter':
|
| - [(65, 90), (97, 122), ('non_primary_letter',)],
|
| - 'line_terminator':
|
| - [(10, 10), (13, 13), ('non_primary_line_terminator',)],
|
| - 'identifier_part_not_letter':
|
| - [(48, 57), (95, 95), ('non_primary_identifier_part_not_letter',)],
|
| - })
|
|
|