OLD | NEW |
| (Empty) |
1 #!/usr/bin/python | |
2 # Copyright 2008 The RE2 Authors. All Rights Reserved. | |
3 # Use of this source code is governed by a BSD-style | |
4 # license that can be found in the LICENSE file. | |
5 | |
6 """Generate C++ tables for Unicode Script and Category groups.""" | |
7 | |
8 import sys | |
9 import unicode | |
10 | |
11 _header = """ | |
12 // GENERATED BY make_unicode_groups.py; DO NOT EDIT. | |
13 // make_unicode_groups.py >unicode_groups.cc | |
14 | |
15 #include "re2/unicode_groups.h" | |
16 | |
17 namespace re2 { | |
18 | |
19 """ | |
20 | |
21 _trailer = """ | |
22 | |
23 } // namespace re2 | |
24 | |
25 """ | |
26 | |
27 n16 = 0 | |
28 n32 = 0 | |
29 | |
30 def MakeRanges(codes): | |
31 """Turn a list like [1,2,3,7,8,9] into a range list [[1,3], [7,9]]""" | |
32 ranges = [] | |
33 last = -100 | |
34 for c in codes: | |
35 if c == last+1: | |
36 ranges[-1][1] = c | |
37 else: | |
38 ranges.append([c, c]) | |
39 last = c | |
40 return ranges | |
41 | |
42 def PrintRanges(type, name, ranges): | |
43 """Print the ranges as an array of type named name.""" | |
44 print "static const %s %s[] = {" % (type, name,) | |
45 for lo, hi in ranges: | |
46 print "\t{ %d, %d }," % (lo, hi) | |
47 print "};" | |
48 | |
49 # def PrintCodes(type, name, codes): | |
50 # """Print the codes as an array of type named name.""" | |
51 # print "static %s %s[] = {" % (type, name,) | |
52 # for c in codes: | |
53 # print "\t%d," % (c,) | |
54 # print "};" | |
55 | |
56 def PrintGroup(name, codes): | |
57 """Print the data structures for the group of codes. | |
58 Return a UGroup literal for the group.""" | |
59 | |
60 # See unicode_groups.h for a description of the data structure. | |
61 | |
62 # Split codes into 16-bit ranges and 32-bit ranges. | |
63 range16 = MakeRanges([c for c in codes if c < 65536]) | |
64 range32 = MakeRanges([c for c in codes if c >= 65536]) | |
65 | |
66 # Pull singleton ranges out of range16. | |
67 # code16 = [lo for lo, hi in range16 if lo == hi] | |
68 # range16 = [[lo, hi] for lo, hi in range16 if lo != hi] | |
69 | |
70 global n16 | |
71 global n32 | |
72 n16 += len(range16) | |
73 n32 += len(range32) | |
74 | |
75 ugroup = "{ \"%s\", +1" % (name,) | |
76 # if len(code16) > 0: | |
77 # PrintCodes("uint16", name+"_code16", code16) | |
78 # ugroup += ", %s_code16, %d" % (name, len(code16)) | |
79 # else: | |
80 # ugroup += ", 0, 0" | |
81 if len(range16) > 0: | |
82 PrintRanges("URange16", name+"_range16", range16) | |
83 ugroup += ", %s_range16, %d" % (name, len(range16)) | |
84 else: | |
85 ugroup += ", 0, 0" | |
86 if len(range32) > 0: | |
87 PrintRanges("URange32", name+"_range32", range32) | |
88 ugroup += ", %s_range32, %d" % (name, len(range32)) | |
89 else: | |
90 ugroup += ", 0, 0" | |
91 ugroup += " }" | |
92 return ugroup | |
93 | |
94 def main(): | |
95 print _header | |
96 ugroups = [] | |
97 for name, codes in unicode.Categories().iteritems(): | |
98 ugroups.append(PrintGroup(name, codes)) | |
99 for name, codes in unicode.Scripts().iteritems(): | |
100 ugroups.append(PrintGroup(name, codes)) | |
101 print "// %d 16-bit ranges, %d 32-bit ranges" % (n16, n32) | |
102 print "const UGroup unicode_groups[] = {"; | |
103 ugroups.sort() | |
104 for ug in ugroups: | |
105 print "\t%s," % (ug,) | |
106 print "};" | |
107 print "const int num_unicode_groups = %d;" % (len(ugroups),) | |
108 print _trailer | |
109 | |
110 if __name__ == '__main__': | |
111 main() | |
OLD | NEW |