| OLD | NEW |
| (Empty) |
| 1 def lines_get(f): | |
| 2 '''Parse a file like object, removing comments and returning a list of | |
| 3 lines.''' | |
| 4 def cut_comment(line): | |
| 5 first_hash = line.find('#') | |
| 6 if first_hash == -1: | |
| 7 return line | |
| 8 return line[:first_hash] | |
| 9 | |
| 10 return [x for x in [cut_comment(x[:-1]) for x in f.readlines()] if len(x)] | |
| 11 | |
| 12 def line_split(line): | |
| 13 '''Split a line based on a semicolon separator.''' | |
| 14 def normalise(word): | |
| 15 return word.lstrip().rstrip() | |
| 16 return [normalise(x) for x in line.split(';')] | |
| 17 | |
| 18 def codepoints_parse(token): | |
| 19 '''Parse a Unicode style code-point range. Return either a single value or a | |
| 20 tuple of (start, end) for a range of code-points.''' | |
| 21 def fromHex(token): | |
| 22 return int(token, 16) | |
| 23 parts = token.split('..') | |
| 24 if len(parts) == 2: | |
| 25 return (fromHex(parts[0]), fromHex(parts[1])) | |
| 26 elif len(parts) == 1: | |
| 27 return fromHex(parts[0]) | |
| 28 else: | |
| 29 raise ValueError(token) | |
| 30 | |
| 31 def unicode_file_parse(input, map, default_value = None): | |
| 32 '''Parse a file like object, @input where the first column is a code-point | |
| 33 range and the second column is mapped via the given dict, @map.''' | |
| 34 ranges = [] | |
| 35 tokens = [line_split(x) for x in lines_get(input)] | |
| 36 for line in tokens: | |
| 37 if len(line) == 2: | |
| 38 codepoints = codepoints_parse(line[0]) | |
| 39 value = map[line[1]] | |
| 40 if value == default_value: | |
| 41 continue | |
| 42 | |
| 43 if type(codepoints) == int: | |
| 44 codepoints = (codepoints, codepoints) | |
| 45 | |
| 46 ranges.append((codepoints[0], codepoints[1], value)) | |
| 47 else: | |
| 48 raise ValueError(line) | |
| 49 | |
| 50 return ranges | |
| 51 | |
| 52 def sort_and_merge(ranges): | |
| 53 '''Given a list of (start, end, value), merge elements where the ranges are | |
| 54 continuous and the values are the same.''' | |
| 55 output = [] | |
| 56 ranges.sort() | |
| 57 current = None | |
| 58 for v in ranges: | |
| 59 if current is None: | |
| 60 current = v | |
| 61 continue | |
| 62 if current[1] + 1 == v[0] and current[2] == v[2]: | |
| 63 current = (current[0], v[1], v[2]) | |
| 64 else: | |
| 65 output.append(current) | |
| 66 current = v | |
| 67 if current is not None: | |
| 68 output.append(current) | |
| 69 | |
| 70 return output | |
| OLD | NEW |