Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(499)

Side by Side Diff: build_tools/filter_data_for_size.py

Issue 1000163003: Generate the icu data binaries at compile time instead of checking in binaries Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: Fixed warnings in cross compiling Created 5 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « build_tools/copy_rename_file.py ('k') | build_tools/filter_data_for_size_unittest.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 """Rewrites data so that it becomes smaller after compilation."""
2
3 from __future__ import print_function
4
5 import argparse
6 import os
7 import shutil
8
9 def _mergetree(source_dir, dest_dir):
10 files = os.listdir(source_dir)
11 for f in files:
12 src = os.path.join(source_dir, f)
13 dest = os.path.join(dest_dir, f)
14 if os.path.isdir(src):
15 copytree(src, dest)
16 else:
17 shutil.copy2(src, dest)
18
19 def copytree(source, dest):
20 """Like shutil.copytree but can handle that dest exists and
21 merge/overwrite the files there in that case."""
22 if not os.path.isdir(source):
23 shutil.copy2(source, dest)
24 elif os.path.isdir(dest):
25 _mergetree(source, dest)
26 else:
27 shutil.copytree(source, dest)
28
29 def main():
30 parser = argparse.ArgumentParser(
31 description=('Generates an icudata.lst file to be compiled by icu.'))
32
33 parser.add_argument('--mode',
34 required=True,
35 choices=['clean-copy', 'strip-for-size'],
36 help='Whether the files should be changed or not.')
37
38 parser.add_argument('--in-word-txt',
39 required=True,
40 help='The word.txt to filter')
41
42 parser.add_argument('--out-word-txt',
43 required=True,
44 help='The word.txt to filter')
45
46 parser.add_argument('--in-brkitr-root-txt',
47 required=True,
48 help='The brkitr/root.txt to filter')
49
50 parser.add_argument('--out-brkitr-root-txt',
51 required=True,
52 help='The brkitr/root.txt to filter')
53
54 parser.add_argument('--in-brkitr-ja-txt',
55 required=True,
56 help='The brkitr/ja.txt to filter')
57
58 parser.add_argument('--out-brkitr-ja-txt',
59 required=True,
60 help='The brkitr/ja.txt to filter')
61
62 parser.add_argument('--currency-keep-list',
63 required=True,
64 help="The file with currencies to keep")
65
66 parser.add_argument('--in-curr-dir',
67 required=True,
68 help="The currency data files")
69
70 parser.add_argument('--out-curr-dir',
71 required=True,
72 help="The filtered/copied currency data files")
73
74 parser.add_argument('--minimize-language-list',
75 required=True,
76 help=("Comma separated list of languages (locales) " +
77 "to minimize the data for."))
78
79 parser.add_argument('--in-locales-dir',
80 required=True,
81 help="The locales data files")
82
83 parser.add_argument('--out-locales-dir',
84 required=True,
85 help="The filtered/copied locales data files")
86
87 parser.add_argument('--in-lang-dir',
88 required=True,
89 help="The lang data files")
90
91 parser.add_argument('--out-lang-dir',
92 required=True,
93 help="The filtered/copied lang data files")
94
95 parser.add_argument('--remove-data-already-existing-in-android',
96 action="store_true",
97 help=("Removes data in lang/region/... that " +
98 "can be fetched from Android APIs"))
99
100 parser.add_argument('--in-zone-dir',
101 required=True,
102 help="The zone data files")
103
104 parser.add_argument('--out-zone-dir',
105 required=True,
106 help="The filtered/copied zone data files")
107
108 args = parser.parse_args()
109
110 if args.mode == 'strip-for-size':
111 fix_word_txt(args.in_word_txt, args.out_word_txt)
112 fix_brkitr_root_txt(args.in_brkitr_root_txt, args.out_brkitr_root_txt)
113 fix_brkitr_ja_txt(args.in_brkitr_ja_txt, args.out_brkitr_ja_txt)
114 fix_currencies(args.currency_keep_list,
115 args.in_curr_dir,
116 args.out_curr_dir)
117 minimize_data_for_locales(args.in_locales_dir,
118 args.out_locales_dir,
119 args.minimize_language_list)
120 remove_zone_example_cities(args.in_zone_dir, args.out_zone_dir)
121 else:
122 assert args.mode == 'clean-copy'
123 shutil.copyfile(args.in_word_txt, args.out_word_txt)
124 shutil.copyfile(args.in_brkitr_root_txt, args.out_brkitr_root_txt)
125 shutil.copyfile(args.in_brkitr_root_txt, args.out_brkitr_ja_txt)
126 copytree(args.in_curr_dir, args.out_curr_dir)
127 copytree(args.in_locales_dir, args.out_locales_dir)
128 copytree(args.in_zone_dir, args.out_zone_dir)
129
130 if (args.mode == 'strip-for-size' and
131 args.remove_data_already_existing_in_android):
132 strip_android_langs(args.in_lang_dir, args.out_lang_dir)
133 else:
134 assert args.mode == 'clean-copy'
135 copytree(args.in_lang_dir, args.out_lang_dir)
136
137 def fix_word_txt(source, dest):
138 """Strip all references to dictionaryCJK and KanaKanji and
139 HangulSymbal."""
140 with open(source) as in_file:
141 with open(dest, "w") as out:
142 _process_word_txt(in_file, out)
143
144 WORD_TXT_REPLACEMENTS = (
145 ("$ALetter-$dictionaryCJK", "$ALetter"),
146 ("$ComplexContext $dictionaryCJK", "$ComplexContext"),
147 ("$dictionaryCJK", None),
148 ("special handling for CJK", None),
149 ("$HangulSyllable $HangulSyllable", None),
150 ("$KanaKanji $KanaKanji", None),
151 )
152
153 def _process_word_txt(in_file, out):
154 for line in in_file:
155 for replacement_pattern, replace_with in WORD_TXT_REPLACEMENTS:
156 if replacement_pattern in line:
157 if replace_with is None:
158 # Delete line.
159 line = None
160 else:
161 line = line.replace(replacement_pattern,
162 replace_with)
163 break
164 if line is not None:
165 out.write(line)
166
167 def fix_brkitr_root_txt(source, dest):
168 """Strip all references to cjdict."""
169 with open(source) as in_file:
170 with open(dest, "w") as out:
171 trigger1 = False
172 for line in in_file:
173 if "cjdict.dict" in line:
174 trigger1 = True
175 continue
176 out.write(line)
177 assert trigger1
178
179 def fix_brkitr_ja_txt(source, dest):
180 """Strip all references to line_ja.brk."""
181 with open(source) as in_file:
182 with open(dest, "w") as out:
183 trigger1 = False
184 for line in in_file:
185 if 'line:process(dependency){"line_ja.brk"}' in line:
186 line = line.replace(
187 'line:process(dependency){"line_ja.brk"}',
188 'word:process(dependency){"word_ja.brk"}')
189 trigger1 = True
190 out.write(line)
191 assert trigger1
192
193 def _print_block(line, in_file, out):
194 indentation_count = 0
195 for char in line:
196 if char != " ":
197 break
198 indentation_count += 1
199 indentation = " " * indentation_count
200
201 if out is not None:
202 out.write(line)
203 if "}" in line:
204 return
205
206 line = in_file.next()
207 while not line.startswith(indentation + "}"):
208 if out is not None:
209 out.write(line)
210 line = in_file.next()
211 if out is not None:
212 out.write(line)
213
214 def _skip_block(line, in_file):
215 _print_block(line, in_file, None)
216
217 def _print_or_skip_block(line, in_file, out, keep):
218 if not keep:
219 out = None
220 _print_block(line, in_file, out)
221
222 def _block_name(line):
223 assert "{" in line
224 return line.split("{")[0].lstrip()
225
226 def _copy_start_of_file(in_file, out, locale_name):
227 line = in_file.next()
228 while not line.startswith("%s{" % locale_name):
229 out.write(line)
230 line = in_file.next()
231 out.write(line)
232
233 def fix_currencies(keep_list_file, in_curr_dir, out_curr_dir):
234 """Remove currencies that are not globally important."""
235
236 with open(keep_list_file) as list_file:
237 currencies_to_keep = set(list_file.read().split())
238 currency_files = os.listdir(in_curr_dir)
239 for currency_file in currency_files:
240 in_file_name = os.path.join(in_curr_dir, currency_file)
241 out_file_name = os.path.join(out_curr_dir, currency_file)
242 if (not currency_file.endswith(".txt") or
243 currency_file == "supplementalData.txt"):
244 # For instance pool.res.
245 shutil.copyfile(in_file_name, out_file_name)
246 continue
247 locale_name = os.path.splitext(currency_file)[0]
248 with open(in_file_name) as in_file:
249 with open(out_file_name, "w") as out:
250 _process_currency_file(in_file, out, locale_name,
251 currencies_to_keep)
252
253 def _process_currency_file(in_file, out, locale_name, currencies_to_keep):
254 try:
255 _copy_start_of_file(in_file, out, locale_name)
256 while True:
257 line = in_file.next()
258 if "{" in line:
259 block_name = _block_name(line)
260 if block_name in ("Currencies",
261 "Currencies%narrow",
262 "CurrencyPlurals"):
263 # Keep only certain currencies.
264 out.write(line)
265 line = in_file.next()
266 while not line.startswith(" }"):
267 if "{" in line:
268 currency_name = _block_name(line)
269 keep_curr = currency_name in currencies_to_keep
270 _print_or_skip_block(line, in_file, out, keep_curr)
271 else:
272 out.write(line)
273 line = in_file.next()
274 out.write(line)
275 else:
276 keep = block_name in ('"%%ALIAS"',
277 "%%Parent",
278 "currencyMap",
279 "CurrencyMap",
280 "currencyMeta",
281 "CurrencyMeta",
282 "currencySpacing",
283 "CurrencySpacing",
284 "currencyUnitPatterns",
285 "CurrencyUnitPatterns",
286 "Version")
287 _print_or_skip_block(line, in_file, out, keep)
288 else:
289 out.write(line)
290 except StopIteration:
291 pass
292
293 def is_interesting_calendar(locale_name, calendar_name):
294 if calendar_name in ("generic", "gregorian"):
295 return True
296 interesting_calendar_map = {
297 "th": "buddhist",
298 "zh": "chinese",
299 "zh_Hant": "roc",
300 "ko": "dangi",
301 "am": "ethiopic",
302 "he": "hebrew",
303 "ar": "arabic",
304 "fa": "persian",
305 "ja": "japanese",
306 }
307 locale_parts = locale_name.split("_")
308 for i in range(len(locale_parts)):
309 base_locale = "_".join(locale_parts[:(i+1)])
310 if calendar_name == interesting_calendar_map.get(base_locale):
311 return True
312 return False
313
314 def minimize_data_for_locales(locales_in_dir, locales_out_dir,
315 languages_to_minimize_str):
316 languages_to_minimize = set(languages_to_minimize_str.split(","))
317
318 locale_files = os.listdir(locales_in_dir)
319 for locale_file in locale_files:
320 in_file_name = os.path.join(locales_in_dir, locale_file)
321 out_file_name = os.path.join(locales_out_dir, locale_file)
322 if not locale_file.endswith(".txt"):
323 # For instance pool.res.
324 shutil.copyfile(in_file_name, out_file_name)
325 continue
326 locale_name = os.path.splitext(locale_file)[0]
327
328 with open(in_file_name) as in_file:
329 with open(out_file_name, "w") as out:
330 _process_locale_file(in_file, out, locale_name,
331 languages_to_minimize)
332
333 def _process_locale_file(in_file, out, locale_name, languages_to_minimize):
334 try:
335 _copy_start_of_file(in_file, out, locale_name)
336 while True: # Until StopIteration exception.
337 # Keep all blocks after "Version" but only a few
338 # blocks before that.
339 line = in_file.next()
340 if "{" in line:
341 block_name = _block_name(line)
342 if (not locale_name in languages_to_minimize and
343 block_name == "calendar"):
344 out.write(line)
345 line = in_file.next()
346 while not line.startswith(" }"):
347 if "{" in line:
348 calendar_name = _block_name(line)
349 keep_cal = is_interesting_calendar(locale_name,
350 calendar_name)
351 _print_or_skip_block(line, in_file, out, keep_cal)
352 else:
353 out.write(line)
354 line = in_file.next()
355 out.write(line)
356 continue
357
358 # Do not include '%%Parent' line on purpose.
359 keep = (not locale_name in languages_to_minimize or
360 block_name in (
361 '"%%ALIAS"',
362 "AuxExemplarCharacters",
363 "ExemplarCharacters",
364 "LocaleScript",
365 "layout",
366 "Version"))
367 _print_or_skip_block(line, in_file, out, keep)
368 else:
369 out.write(line)
370 except StopIteration:
371 pass
372
373 # Note: patch_locale.sh also has code to strip only some calendars
374 # but since the calendar block is removed above that makes no
375 # difference.
376
377 def strip_android_langs(in_lang_dir, out_lang_dir):
378 lang_files = os.listdir(in_lang_dir)
379 for lang_file in lang_files:
380 in_file_name = os.path.join(in_lang_dir, lang_file)
381 out_file_name = os.path.join(out_lang_dir, lang_file)
382 if not lang_file.endswith(".txt"):
383 # For instance pool.res
384 shutil.copyfile(in_file_name, out_file_name)
385 continue
386 locale_name = os.path.splitext(lang_file)[0]
387 with open(in_file_name) as in_file:
388 with open(out_file_name, "w") as out:
389 _process_lang_file(in_file, out, locale_name)
390
391 def _process_lang_file(in_file, out, locale_name):
392 try:
393 _copy_start_of_file(in_file, out, locale_name)
394 while True: # Until StopIteration exception.
395 line = in_file.next()
396 if "{" in line:
397 block_name = _block_name(line)
398 if block_name in ("Languages", "Scripts"):
399 out.write(line)
400 parts_to_keep = {
401 "Languages": ("zh{",),
402 "Scripts": ("Hans{", "Hant{") }[block_name]
403 line = in_file.next()
404 while not line.startswith(" }"):
405 if line.strip().startswith(parts_to_keep):
406 out.write(line)
407 line = in_file.next()
408 out.write(line)
409 else:
410 keep = True
411 if block_name in ("Keys",
412 "LanguagesShort",
413 "Types",
414 "Variants",
415 "calendar",
416 "codePatterns",
417 "localeDisplayPattern"):
418 # Delete the whole block.
419 keep = False
420 _print_or_skip_block(line, in_file, out, keep)
421 else:
422 out.write(line)
423 except StopIteration:
424 pass
425
426 def remove_zone_example_cities(in_zone_dir, out_zone_dir):
427 zone_files = os.listdir(in_zone_dir)
428 for zone_file in zone_files:
429 locale_name = os.path.splitext(zone_file)[0]
430 with open(os.path.join(in_zone_dir, zone_file)) as in_file:
431 with open(os.path.join(out_zone_dir, zone_file), "w") as out:
432 # Strip everything from zoneStrings to the first line with
433 # a "meta:" name.
434 pos = 0 # 0 = start -> keep, 1 = mid -> delete, 2 = end -> keep
435 if locale_name == "root":
436 pos = 2 # Keep everything in root.txt.
437 for line in in_file:
438 if pos == 0:
439 out.write(line)
440 if line.startswith(" zoneStrings"):
441 pos = 1
442 if pos == 1 and line.startswith(" \"meta:"):
443 pos = 2
444 if pos == 2:
445 out.write(line)
446
447 if __name__ == '__main__':
448 main()
449
OLDNEW
« no previous file with comments | « build_tools/copy_rename_file.py ('k') | build_tools/filter_data_for_size_unittest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698