Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(977)

Side by Side Diff: net/tools/dafsa/make_dafsa.py

Issue 2649033004: [3 of 4] Speedup GetRegistryLengthImpl() by seeding the DAFSA in reverse
Patch Set: Language. Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # Copyright 2014 The Chromium Authors. All rights reserved. 2 # Copyright 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """ 6 """
7 A Deterministic acyclic finite state automaton (DAFSA) is a compact 7 A Deterministic acyclic finite state automaton (DAFSA) is a compact
8 representation of an unordered word list (dictionary). 8 representation of an unordered word list (dictionary).
9 9
10 http://en.wikipedia.org/wiki/Deterministic_acyclic_finite_state_automaton 10 http://en.wikipedia.org/wiki/Deterministic_acyclic_finite_state_automaton
(...skipping 174 matching lines...) Expand 10 before | Expand all | Expand 10 after
185 185
186 5: 0x61 <char> label character 0x61 -> match "a" 186 5: 0x61 <char> label character 0x61 -> match "a"
187 6: 0x61 <char> label character 0x61 -> match "a" 187 6: 0x61 <char> label character 0x61 -> match "a"
188 7: 0x81 <return_value> 0x81 & 0x0F -> return 1 188 7: 0x81 <return_value> 0x81 & 0x0F -> return 1
189 189
190 8: 0x62 <char> label character 0x62 -> match "b" 190 8: 0x62 <char> label character 0x62 -> match "b"
191 9: 0x62 <char> label character 0x62 -> match "b" 191 9: 0x62 <char> label character 0x62 -> match "b"
192 10: 0x82 <return_value> 0x82 & 0x0F -> return 2 192 10: 0x82 <return_value> 0x82 & 0x0F -> return 2
193 """ 193 """
194 194
195 import argparse
195 import sys 196 import sys
196 197
198
199 parser = argparse.ArgumentParser()
200 parser.add_argument('infile', nargs='?', type=argparse.FileType('r'))
201 parser.add_argument('outfile', nargs='?', type=argparse.FileType('w'))
202 parser.add_argument('--reverse_inputs', action='store_true',
203 help='Generates a DAFSA on reversed input strings; this is '
204 'useful for suffix queries')
205 args = parser.parse_args()
206
207
197 class InputError(Exception): 208 class InputError(Exception):
198 """Exception raised for errors in the input file.""" 209 """Exception raised for errors in the input file."""
199 210
200 211
201 def to_dafsa(words): 212 def to_dafsa(words):
202 """Generates a DAFSA from a word list and returns the source node. 213 """Generates a DAFSA from a word list and returns the source node.
203 214
204 Each word is split into characters so that each character is represented by 215 Each word is split into characters so that each character is represented by
205 a unique node. It is assumed the word list is not empty. 216 a unique node. It is assumed the word list is not empty.
206 """ 217 """
(...skipping 239 matching lines...) Expand 10 before | Expand all | Expand 10 after
446 end = lines.index('%%', begin) 457 end = lines.index('%%', begin)
447 lines = lines[begin:end] 458 lines = lines[begin:end]
448 for line in lines: 459 for line in lines:
449 if line[-3:-1] != ', ': 460 if line[-3:-1] != ', ':
450 raise InputError('Expected "domainname, <digit>", found "%s"' % line) 461 raise InputError('Expected "domainname, <digit>", found "%s"' % line)
451 # Technically the DAFSA format can support return values in the range 462 # Technically the DAFSA format can support return values in the range
452 # [0-31], but only the first three bits have any defined meaning. 463 # [0-31], but only the first three bits have any defined meaning.
453 if not line.endswith(('0', '1', '2', '3', '4', '5', '6', '7')): 464 if not line.endswith(('0', '1', '2', '3', '4', '5', '6', '7')):
454 raise InputError('Expected value to be in the range of 0-7, found "%s"' % 465 raise InputError('Expected value to be in the range of 0-7, found "%s"' %
455 line[-1]) 466 line[-1])
456 return [line[:-3] + line[-1] for line in lines] 467 result = []
457 468 for line in lines:
469 word, result_code = line[:-3], line[-1]
470 if args.reverse_inputs:
471 word = word[::-1]
472 result.append(word + result_code)
473 result.sort()
474 return result
458 475
459 def main(): 476 def main():
460 if len(sys.argv) != 3: 477 with args.infile as infile, args.outfile as outfile:
461 print('usage: %s infile outfile' % sys.argv[0])
462 return 1
463 with open(sys.argv[1], 'r') as infile, open(sys.argv[2], 'w') as outfile:
464 outfile.write(words_to_cxx(parse_gperf(infile))) 478 outfile.write(words_to_cxx(parse_gperf(infile)))
465 return 0 479 return 0
466 480
467 481
468 if __name__ == '__main__': 482 if __name__ == '__main__':
469 sys.exit(main()) 483 sys.exit(main())
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698