Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(48)

Side by Side Diff: net/tools/dafsa/make_dafsa.py

Issue 2649033004: [3 of 4] Speedup GetRegistryLengthImpl() by seeding the DAFSA in reverse
Patch Set: rebase Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # Copyright 2014 The Chromium Authors. All rights reserved. 2 # Copyright 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """ 6 """
7 A Deterministic acyclic finite state automaton (DAFSA) is a compact 7 A Deterministic acyclic finite state automaton (DAFSA) is a compact
8 representation of an unordered word list (dictionary). 8 representation of an unordered word list (dictionary).
9 9
10 http://en.wikipedia.org/wiki/Deterministic_acyclic_finite_state_automaton 10 http://en.wikipedia.org/wiki/Deterministic_acyclic_finite_state_automaton
(...skipping 174 matching lines...) Expand 10 before | Expand all | Expand 10 after
185 185
186 5: 0x61 <char> label character 0x61 -> match "a" 186 5: 0x61 <char> label character 0x61 -> match "a"
187 6: 0x61 <char> label character 0x61 -> match "a" 187 6: 0x61 <char> label character 0x61 -> match "a"
188 7: 0x81 <return_value> 0x81 & 0x0F -> return 1 188 7: 0x81 <return_value> 0x81 & 0x0F -> return 1
189 189
190 8: 0x62 <char> label character 0x62 -> match "b" 190 8: 0x62 <char> label character 0x62 -> match "b"
191 9: 0x62 <char> label character 0x62 -> match "b" 191 9: 0x62 <char> label character 0x62 -> match "b"
192 10: 0x82 <return_value> 0x82 & 0x0F -> return 2 192 10: 0x82 <return_value> 0x82 & 0x0F -> return 2
193 """ 193 """
194 194
195 import argparse
195 import sys 196 import sys
196 197
198
199 parser = argparse.ArgumentParser()
200 parser.add_argument('infile', nargs='?', type=argparse.FileType('r'))
201 parser.add_argument('outfile', nargs='?', type=argparse.FileType('w'))
202 parser.add_argument('--reverse_inputs', action='store_true', help='Generates a D AFSA on reversed input strings; this is useful for suffix queries')
203 args = parser.parse_args()
204
205
197 class InputError(Exception): 206 class InputError(Exception):
198 """Exception raised for errors in the input file.""" 207 """Exception raised for errors in the input file."""
199 208
200 209
201 def to_dafsa(words): 210 def to_dafsa(words):
202 """Generates a DAFSA from a word list and returns the source node. 211 """Generates a DAFSA from a word list and returns the source node.
203 212
204 Each word is split into characters so that each character is represented by 213 Each word is split into characters so that each character is represented by
205 a unique node. It is assumed the word list is not empty. 214 a unique node. It is assumed the word list is not empty.
206 """ 215 """
(...skipping 239 matching lines...) Expand 10 before | Expand all | Expand 10 after
446 end = lines.index('%%', begin) 455 end = lines.index('%%', begin)
447 lines = lines[begin:end] 456 lines = lines[begin:end]
448 for line in lines: 457 for line in lines:
449 if line[-3:-1] != ', ': 458 if line[-3:-1] != ', ':
450 raise InputError('Expected "domainname, <digit>", found "%s"' % line) 459 raise InputError('Expected "domainname, <digit>", found "%s"' % line)
451 # Technically the DAFSA format can support return values in the range 460 # Technically the DAFSA format can support return values in the range
452 # [0-31], but only the first three bits have any defined meaning. 461 # [0-31], but only the first three bits have any defined meaning.
453 if not line.endswith(('0', '1', '2', '3', '4', '5', '6', '7')): 462 if not line.endswith(('0', '1', '2', '3', '4', '5', '6', '7')):
454 raise InputError('Expected value to be in the range of 0-7, found "%s"' % 463 raise InputError('Expected value to be in the range of 0-7, found "%s"' %
455 line[-1]) 464 line[-1])
456 return [line[:-3] + line[-1] for line in lines] 465 result = []
457 466 for line in lines:
467 word, result_code = line[:-3], line[-1]
468 if args.reverse_inputs:
469 word = word[::-1]
470 result.append(word + result_code)
471 result.sort()
472 return result
458 473
459 def main(): 474 def main():
460 if len(sys.argv) != 3: 475 infile = args.infile
461 print('usage: %s infile outfile' % sys.argv[0]) 476 outfile = args.outfile
462 return 1 477 outfile.write(words_to_cxx(parse_gperf(infile)))
463 with open(sys.argv[1], 'r') as infile, open(sys.argv[2], 'w') as outfile:
464 outfile.write(words_to_cxx(parse_gperf(infile)))
465 return 0 478 return 0
466 479
467 480
468 if __name__ == '__main__': 481 if __name__ == '__main__':
469 sys.exit(main()) 482 sys.exit(main())
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698