| OLD | NEW |
| (Empty) |
| 1 #!/usr/bin/python | |
| 2 # Copyright 2008 The RE2 Authors. All Rights Reserved. | |
| 3 # Use of this source code is governed by a BSD-style | |
| 4 # license that can be found in the LICENSE file. | |
| 5 | |
| 6 """Generate C++ tables for Unicode Script and Category groups.""" | |
| 7 | |
| 8 import sys | |
| 9 import unicode | |
| 10 | |
| 11 _header = """ | |
| 12 // GENERATED BY make_unicode_groups.py; DO NOT EDIT. | |
| 13 // make_unicode_groups.py >unicode_groups.cc | |
| 14 | |
| 15 #include "re2/unicode_groups.h" | |
| 16 | |
| 17 namespace re2 { | |
| 18 | |
| 19 """ | |
| 20 | |
| 21 _trailer = """ | |
| 22 | |
| 23 } // namespace re2 | |
| 24 | |
| 25 """ | |
| 26 | |
| 27 n16 = 0 | |
| 28 n32 = 0 | |
| 29 | |
| 30 def MakeRanges(codes): | |
| 31 """Turn a list like [1,2,3,7,8,9] into a range list [[1,3], [7,9]]""" | |
| 32 ranges = [] | |
| 33 last = -100 | |
| 34 for c in codes: | |
| 35 if c == last+1: | |
| 36 ranges[-1][1] = c | |
| 37 else: | |
| 38 ranges.append([c, c]) | |
| 39 last = c | |
| 40 return ranges | |
| 41 | |
| 42 def PrintRanges(type, name, ranges): | |
| 43 """Print the ranges as an array of type named name.""" | |
| 44 print "static const %s %s[] = {" % (type, name,) | |
| 45 for lo, hi in ranges: | |
| 46 print "\t{ %d, %d }," % (lo, hi) | |
| 47 print "};" | |
| 48 | |
| 49 # def PrintCodes(type, name, codes): | |
| 50 # """Print the codes as an array of type named name.""" | |
| 51 # print "static %s %s[] = {" % (type, name,) | |
| 52 # for c in codes: | |
| 53 # print "\t%d," % (c,) | |
| 54 # print "};" | |
| 55 | |
| 56 def PrintGroup(name, codes): | |
| 57 """Print the data structures for the group of codes. | |
| 58 Return a UGroup literal for the group.""" | |
| 59 | |
| 60 # See unicode_groups.h for a description of the data structure. | |
| 61 | |
| 62 # Split codes into 16-bit ranges and 32-bit ranges. | |
| 63 range16 = MakeRanges([c for c in codes if c < 65536]) | |
| 64 range32 = MakeRanges([c for c in codes if c >= 65536]) | |
| 65 | |
| 66 # Pull singleton ranges out of range16. | |
| 67 # code16 = [lo for lo, hi in range16 if lo == hi] | |
| 68 # range16 = [[lo, hi] for lo, hi in range16 if lo != hi] | |
| 69 | |
| 70 global n16 | |
| 71 global n32 | |
| 72 n16 += len(range16) | |
| 73 n32 += len(range32) | |
| 74 | |
| 75 ugroup = "{ \"%s\", +1" % (name,) | |
| 76 # if len(code16) > 0: | |
| 77 # PrintCodes("uint16", name+"_code16", code16) | |
| 78 # ugroup += ", %s_code16, %d" % (name, len(code16)) | |
| 79 # else: | |
| 80 # ugroup += ", 0, 0" | |
| 81 if len(range16) > 0: | |
| 82 PrintRanges("URange16", name+"_range16", range16) | |
| 83 ugroup += ", %s_range16, %d" % (name, len(range16)) | |
| 84 else: | |
| 85 ugroup += ", 0, 0" | |
| 86 if len(range32) > 0: | |
| 87 PrintRanges("URange32", name+"_range32", range32) | |
| 88 ugroup += ", %s_range32, %d" % (name, len(range32)) | |
| 89 else: | |
| 90 ugroup += ", 0, 0" | |
| 91 ugroup += " }" | |
| 92 return ugroup | |
| 93 | |
| 94 def main(): | |
| 95 print _header | |
| 96 ugroups = [] | |
| 97 for name, codes in unicode.Categories().iteritems(): | |
| 98 ugroups.append(PrintGroup(name, codes)) | |
| 99 for name, codes in unicode.Scripts().iteritems(): | |
| 100 ugroups.append(PrintGroup(name, codes)) | |
| 101 print "// %d 16-bit ranges, %d 32-bit ranges" % (n16, n32) | |
| 102 print "const UGroup unicode_groups[] = {"; | |
| 103 ugroups.sort() | |
| 104 for ug in ugroups: | |
| 105 print "\t%s," % (ug,) | |
| 106 print "};" | |
| 107 print "const int num_unicode_groups = %d;" % (len(ugroups),) | |
| 108 print _trailer | |
| 109 | |
| 110 if __name__ == '__main__': | |
| 111 main() | |
| OLD | NEW |