Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(6)

Side by Side Diff: third_party/re2/re2/make_unicode_casefold.py

Issue 1544433002: Replace RE2 import with a dependency (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Re-Added LICENSE and OWNERS file Created 4 years, 12 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/re2/re2/make_perl_groups.pl ('k') | third_party/re2/re2/make_unicode_groups.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/python
2 # coding=utf-8
3 #
4 # Copyright 2008 The RE2 Authors. All Rights Reserved.
5 # Use of this source code is governed by a BSD-style
6 # license that can be found in the LICENSE file.
7
8 # See unicode_casefold.h for description of case folding tables.
9
10 """Generate C++ table for Unicode case folding."""
11
12 import sys
13 import unicode
14
15 _header = """
16 // GENERATED BY make_unicode_casefold.py; DO NOT EDIT.
17 // make_unicode_casefold.py >unicode_casefold.cc
18
19 #include "re2/unicode_casefold.h"
20
21 namespace re2 {
22
23 """
24
25 _trailer = """
26
27 } // namespace re2
28
29 """
30
31 def _Delta(a, b):
32 """Compute the delta for b - a. Even/odd and odd/even
33 are handled specially, as described above."""
34 if a+1 == b:
35 if a%2 == 0:
36 return 'EvenOdd'
37 else:
38 return 'OddEven'
39 if a == b+1:
40 if a%2 == 0:
41 return 'OddEven'
42 else:
43 return 'EvenOdd'
44 return b - a
45
46 def _AddDelta(a, delta):
47 """Return a + delta, handling EvenOdd and OddEven specially."""
48 if type(delta) == int:
49 return a+delta
50 if delta == 'EvenOdd':
51 if a%2 == 0:
52 return a+1
53 else:
54 return a-1
55 if delta == 'OddEven':
56 if a%2 == 1:
57 return a+1
58 else:
59 return a-1
60 print >>sys.stderr, "Bad Delta: ", delta
61 raise "Bad Delta"
62
63 def _MakeRanges(pairs):
64 """Turn a list like [(65,97), (66, 98), ..., (90,122)]
65 into [(65, 90, +32)]."""
66 ranges = []
67 last = -100
68
69 def evenodd(last, a, b, r):
70 if a != last+1 or b != _AddDelta(a, r[2]):
71 return False
72 r[1] = a
73 return True
74
75 def evenoddpair(last, a, b, r):
76 if a != last+2:
77 return False
78 delta = r[2]
79 d = delta
80 if type(delta) is not str:
81 return False
82 if delta.endswith('Skip'):
83 d = delta[:-4]
84 else:
85 delta = d + 'Skip'
86 if b != _AddDelta(a, d):
87 return False
88 r[1] = a
89 r[2] = delta
90 return True
91
92 for a, b in pairs:
93 if ranges and evenodd(last, a, b, ranges[-1]):
94 pass
95 elif ranges and evenoddpair(last, a, b, ranges[-1]):
96 pass
97 else:
98 ranges.append([a, a, _Delta(a, b)])
99 last = a
100 return ranges
101
102 # The maximum size of a case-folding group.
103 # Case folding is implemented in parse.cc by a recursive process
104 # with a recursion depth equal to the size of the largest
105 # case-folding group, so it is important that this bound be small.
106 # The current tables have no group bigger than 4.
107 # If there are ever groups bigger than 10 or so, it will be
108 # time to rework the code in parse.cc.
109 MaxCasefoldGroup = 4
110
111 def main():
112 lowergroups, casegroups = unicode.CaseGroups()
113 foldpairs = []
114 seen = {}
115 for c in casegroups:
116 if len(c) > MaxCasefoldGroup:
117 raise unicode.Error("casefold group too long: %s" % (c,))
118 for i in range(len(c)):
119 if c[i-1] in seen:
120 raise unicode.Error("bad casegroups %d -> %d" % (c[i-1], c[i]))
121 seen[c[i-1]] = True
122 foldpairs.append([c[i-1], c[i]])
123
124 lowerpairs = []
125 for lower, group in lowergroups.iteritems():
126 for g in group:
127 if g != lower:
128 lowerpairs.append([g, lower])
129
130 def printpairs(name, foldpairs):
131 foldpairs.sort()
132 foldranges = _MakeRanges(foldpairs)
133 print "// %d groups, %d pairs, %d ranges" % (len(casegroups), len(foldpairs) , len(foldranges))
134 print "const CaseFold unicode_%s[] = {" % (name,)
135 for lo, hi, delta in foldranges:
136 print "\t{ %d, %d, %s }," % (lo, hi, delta)
137 print "};"
138 print "const int num_unicode_%s = %d;" % (name, len(foldranges),)
139 print ""
140
141 print _header
142 printpairs("casefold", foldpairs)
143 printpairs("tolower", lowerpairs)
144 print _trailer
145
146 if __name__ == '__main__':
147 main()
OLDNEW
« no previous file with comments | « third_party/re2/re2/make_perl_groups.pl ('k') | third_party/re2/re2/make_unicode_groups.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698