OLD | NEW |
| (Empty) |
1 def lines_get(f): | |
2 '''Parse a file like object, removing comments and returning a list of | |
3 lines.''' | |
4 def cut_comment(line): | |
5 first_hash = line.find('#') | |
6 if first_hash == -1: | |
7 return line | |
8 return line[:first_hash] | |
9 | |
10 return [x for x in [cut_comment(x[:-1]) for x in f.readlines()] if len(x)] | |
11 | |
12 def line_split(line): | |
13 '''Split a line based on a semicolon separator.''' | |
14 def normalise(word): | |
15 return word.lstrip().rstrip() | |
16 return [normalise(x) for x in line.split(';')] | |
17 | |
18 def codepoints_parse(token): | |
19 '''Parse a Unicode style code-point range. Return either a single value or a | |
20 tuple of (start, end) for a range of code-points.''' | |
21 def fromHex(token): | |
22 return int(token, 16) | |
23 parts = token.split('..') | |
24 if len(parts) == 2: | |
25 return (fromHex(parts[0]), fromHex(parts[1])) | |
26 elif len(parts) == 1: | |
27 return fromHex(parts[0]) | |
28 else: | |
29 raise ValueError(token) | |
30 | |
31 def unicode_file_parse(input, map, default_value = None): | |
32 '''Parse a file like object, @input where the first column is a code-point | |
33 range and the second column is mapped via the given dict, @map.''' | |
34 ranges = [] | |
35 tokens = [line_split(x) for x in lines_get(input)] | |
36 for line in tokens: | |
37 if len(line) == 2: | |
38 codepoints = codepoints_parse(line[0]) | |
39 value = map[line[1]] | |
40 if value == default_value: | |
41 continue | |
42 | |
43 if type(codepoints) == int: | |
44 codepoints = (codepoints, codepoints) | |
45 | |
46 ranges.append((codepoints[0], codepoints[1], value)) | |
47 else: | |
48 raise ValueError(line) | |
49 | |
50 return ranges | |
51 | |
52 def sort_and_merge(ranges): | |
53 '''Given a list of (start, end, value), merge elements where the ranges are | |
54 continuous and the values are the same.''' | |
55 output = [] | |
56 ranges.sort() | |
57 current = None | |
58 for v in ranges: | |
59 if current is None: | |
60 current = v | |
61 continue | |
62 if current[1] + 1 == v[0] and current[2] == v[2]: | |
63 current = (current[0], v[1], v[2]) | |
64 else: | |
65 output.append(current) | |
66 current = v | |
67 if current is not None: | |
68 output.append(current) | |
69 | |
70 return output | |
OLD | NEW |