OLD | NEW |
---|---|
(Empty) | |
1 #!/usr/bin/python | |
2 # Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 | |
6 """Generates profile dictionaries for Autofill. | |
7 | |
8 Used to test autofill.AutofillTest.FormFillLatencyAfterSubmit. | |
9 Can be used as a stand alone script with -h to print out help text by running: | |
10 python autofill_dataset_generator.py -h | |
11 """ | |
12 | |
13 import codecs | |
14 import logging | |
15 from optparse import OptionParser | |
16 import os | |
17 import random | |
18 import re | |
19 import sys | |
20 | |
21 | |
22 class NullHandler(logging.Handler): | |
23 def emit(self, record): | |
24 pass | |
25 | |
26 | |
27 class DatasetGenerator(object): | |
28 """Generates a dataset of dictionaries. | |
29 | |
30 The lists (such as address_construct, city_construct) define the way the | |
31 corresponding field is generated. They accomplish this by specifying a | |
32 list of function-args lists. | |
33 """ | |
34 address_construct = [ | |
35 [ random.randint, 1, 10000], | |
36 [ None, u'foobar'], | |
37 [ random.choice, [ u'St', u'Ave', u'Ln', u'Ct', ]], | |
38 [ random.choice, [ u'#1', u'#2', u'#3', ]], | |
39 ] | |
40 | |
41 city_construct = [ | |
42 [ random.choice, [ u'San Jose', u'San Francisco', u'Sacramento', | |
43 u'Los Angeles', ]], | |
44 ] | |
45 | |
46 state_construct = [ | |
47 [ None, u'CA'] | |
48 ] | |
49 | |
50 zip_construct = [ u'95110', u'94109', u'94203', u'90120'] | |
dennis_jeffrey
2011/03/27 16:42:35
Add a comment saying that these zips are now match
dyu1
2011/03/29 03:07:52
Done.
| |
51 | |
52 logger = logging.getLogger(__name__) | |
53 logger.addHandler(NullHandler()) | |
54 log_handlers = {'StreamHandler': None} | |
55 | |
56 def __init__(self, output_filename=None, logging_level=None): | |
57 """Constructs dataset generator object. | |
58 | |
59 Creates 'fields' data member which is a list of pair (two values) lists. | |
60 These pairs are comprised of a field key e.g. u'NAME_FIRST' and a | |
61 generator method e.g. self.GenerateNameFirst which will generate the value. | |
62 If we want the value to always be the same e.g. u'John' we can use this | |
63 instead of a method. We can even use None keyword which will give | |
64 a value of u''. | |
65 | |
66 'output_pattern' for one field would have been: "{u'NAME_FIRST': u'%s',}" | |
67 which is ready to accept a value for the 'NAME_FIRST' field key once | |
68 this value is generated. | |
69 'output_pattern' is used in 'GenerateNextDict()' to generate the next | |
70 dict line. | |
71 | |
72 Args: | |
73 output_filename: specified filename of generated dataset to be saved. | |
74 Default value is None and no saving takes place. | |
75 logging_level: set verbosity levels, default is None. | |
76 """ | |
77 if logging_level: | |
78 if not self.log_handlers['StreamHandler']: | |
79 console = logging.StreamHandler() | |
80 console.setLevel(logging.INFO) | |
81 self.log_handlers['StreamHandler'] = console | |
82 self.logger.addHandler(console) | |
83 self.logger.setLevel(logging_level) | |
84 else: | |
85 if self.log_handlers['StreamHandler']: | |
86 self.logger.removeHandler(self.log_handlers['StreamHandler']) | |
87 self.log_handlers['StreamHandler'] = None | |
88 | |
89 self.output_filename = output_filename | |
90 | |
91 self.dict_no = 0 | |
92 self.fields = [ | |
93 [u'NAME_FIRST', self.GenerateNameFirst], | |
94 [u'NAME_MIDDLE', None], | |
95 [u'NAME_LAST', None], | |
96 [u'EMAIL_ADDRESS', self.GenerateEmail], | |
97 [u'COMPANY_NAME', None], | |
98 [u'ADDRESS_HOME_LINE1', self.GenerateAddress], | |
99 [u'ADDRESS_HOME_LINE2', None], | |
100 [u'ADDRESS_HOME_CITY', self.GenerateCity], | |
101 [u'ADDRESS_HOME_STATE', self.GenerateState], | |
102 [u'ADDRESS_HOME_ZIP', self.GenerateZip], | |
103 [u'ADDRESS_HOME_COUNTRY', u'United States'], | |
104 [u'PHONE_HOME_WHOLE_NUMBER', None], | |
105 [u'PHONE_FAX_WHOLE_NUMBER', u'6501234555'], | |
106 ] | |
107 | |
108 self.next_dict = {} | |
109 # Using implicit line joining does not work well in this case as each line | |
110 # has to be strings and not function calls that may return strings. | |
111 self.output_pattern = u'{\'' +\ | |
112 u', '.join([u'u"%s" : u"%%s"' % key for key, method in self.fields]) +\ | |
dennis_jeffrey
2011/03/27 16:42:35
Recommend adding a space in-between the "+\" to ma
dyu1
2011/03/29 03:07:52
Done.
| |
113 u',}' | |
114 | |
115 def _GenerateField(self, field_construct): | |
116 """Generates each field in each dictionary. | |
117 | |
118 Args: | |
119 field_construct: it is a list of lists. | |
120 The first value (index 0) of each containing list is a function or None. | |
121 The remaining values are the args. If function is None then arg is just | |
122 returned. | |
123 | |
124 Example 1: zip_construct = [[ None, u'95110']]. There is one | |
125 containing list only and function here is None and arg is u'95110'. | |
126 This just returns u'95110'. | |
127 | |
128 Example 2: address_construct = [ [ random.randint, 1, 10000], | |
129 [ None, u'foobar'] ] This has two containing lists and it will return | |
130 the result of: | |
131 random.randint(1, 10000) + ' ' + u'foobar' | |
132 which could be u'7832 foobar' | |
133 """ | |
134 parts = [] | |
135 for function_and_args in field_construct: | |
136 function = function_and_args[0] | |
137 args = function_and_args[1:] | |
138 if not function: | |
139 function = lambda x: x | |
140 parts.append(str(function(*args))) | |
141 return (' ').join(parts) | |
142 | |
143 def GenerateAddress(self): | |
144 """Uses _GenerateField() and address_construct to gen a random address. | |
145 | |
146 Returns: | |
147 A random address. | |
148 """ | |
149 return self._GenerateField(self.address_construct) | |
150 | |
151 def GenerateCity(self): | |
152 """Uses _GenerateField() and city_construct to gen a random city. | |
153 | |
154 Returns: | |
155 A random city. | |
156 """ | |
157 return self._GenerateField(self.city_construct) | |
158 | |
159 def GenerateState(self): | |
160 """Uses _GenerateField() and state_construct to generate a state. | |
161 | |
162 Returns: | |
163 A state. | |
164 """ | |
165 return self._GenerateField(self.state_construct) | |
166 | |
167 def GenerateZip(self): | |
168 """Uses _GenerateField() and zip_construct to generate a zip code. | |
dennis_jeffrey
2011/03/27 16:42:35
This function no longer uses "_GenerateField()".
dyu1
2011/03/29 03:07:52
Done.
| |
169 | |
170 Returns: | |
171 A matched zip code. | |
dennis_jeffrey
2011/03/27 16:42:35
Maybe be a little more specific about what "matche
dyu1
2011/03/29 03:07:52
Done.
| |
172 """ | |
173 city_selected = self.next_dict['ADDRESS_HOME_CITY'] | |
174 index = self.city_construct[0][1].index(city_selected) | |
175 return self.zip_construct[index] | |
176 | |
177 def GenerateCountry(self): | |
178 """Uses _GenerateField() and country_construct to generate a country. | |
179 | |
180 Returns: | |
181 A country. | |
182 """ | |
183 return self._GenerateField(self.country_construct) | |
184 | |
185 def GenerateNameFirst(self): | |
186 """Generates a numerical first name. | |
187 | |
188 The name is the number of the current dict. | |
189 i.e. u'1', u'2', u'3' | |
190 | |
191 Returns: | |
192 A numerical first name. | |
193 """ | |
194 return u'%s' % self.dict_no | |
195 | |
196 def GenerateEmail(self): | |
197 """Generates an email that corresponds to the first name. | |
198 | |
199 i.e. u'1@example.com', u'2@example.com', u'3@example.com' | |
200 | |
201 Returns: | |
202 An email address that corresponds to the first name. | |
203 """ | |
204 return u'%s@example.com' % self.dict_no | |
205 | |
206 | |
207 def GenerateNextDict(self): | |
208 """Generates next dictionary of the dataset. | |
209 | |
210 Returns: | |
211 The output dictionary. | |
212 """ | |
213 self.dict_no += 1 | |
214 self.next_dict = {} | |
215 for key, method_or_value in self.fields: | |
216 if not method_or_value: | |
217 self.next_dict[key] = '' | |
218 elif type(method_or_value) in [str, unicode]: | |
219 self.next_dict[key] = '%s' % method_or_value | |
220 else: | |
221 self.next_dict[key] = method_or_value() | |
222 return self.next_dict | |
223 | |
224 def GenerateDataset(self, num_of_dict_to_generate=10): | |
225 """Generates a list of dictionaries. | |
226 | |
227 Args: | |
228 num_of_dict_to_generate: The number of dictionaries to be generated. | |
229 Default value is 10. | |
230 | |
231 Returns: | |
232 The dictionary list. | |
233 """ | |
234 random.seed(0) # All randomly generated values are reproducible. | |
235 if self.output_filename: | |
236 output_file = codecs.open( | |
237 self.output_filename, mode='wb', encoding='utf-8-sig') | |
238 else: | |
239 output_file = None | |
240 try: | |
241 list_of_dict = [] | |
242 if output_file: | |
243 output_file.write('[') | |
244 output_file.write(os.linesep) | |
245 | |
246 while self.dict_no < num_of_dict_to_generate: | |
247 output_dict = self.GenerateNextDict() | |
248 list_of_dict.append(output_dict) | |
249 output_line = self.output_pattern % tuple( | |
250 [output_dict[key] for key, method in self.fields]) | |
251 if output_file: | |
252 output_file.write(output_line) | |
253 output_file.write(os.linesep) | |
254 self.logger.info( | |
255 '%d: %s' % (self.dict_no, output_line.encode(sys.stdout.encoding, | |
256 'ignore'))) | |
257 | |
258 if output_file: | |
259 output_file.write(']') | |
260 output_file.write(os.linesep) | |
261 self.logger.info('%d dictionaries generated SUCCESSFULLY!', self.dict_no) | |
262 self.logger.info('--- FINISHED ---') | |
263 return list_of_dict | |
264 finally: | |
265 if output_file: | |
266 output_file.close() | |
267 | |
268 | |
269 def main(): | |
270 # Command line options. | |
271 parser = OptionParser() | |
272 parser.add_option( | |
273 '-o', '--output', dest='output_filename', default='', | |
274 help='write output to FILE [optional]', metavar='FILE') | |
275 parser.add_option( | |
276 '-d', '--dict', type='int', dest='dict_no', metavar='DICT_NO', default=10, | |
277 help='DICT_NO: number of dictionaries to be generated [default: %default]') | |
278 parser.add_option( | |
279 '-l', '--log_level', dest='log_level', default='debug', | |
280 metavar='LOG_LEVEL', | |
281 help='LOG_LEVEL: "debug", "info", "warning" or "error" [default: %default]') | |
282 | |
283 (options, args) = parser.parse_args() | |
284 if args: | |
285 parser.print_help() | |
286 sys.exit(1) | |
287 options.log_level = options.log_level.lower() | |
288 if options.log_level not in ['debug', 'info', 'warning', 'error']: | |
289 parser.error('Wrong log_level argument.') | |
290 parser.print_help() | |
291 else: | |
292 if options.log_level == 'debug': | |
293 options.log_level = logging.DEBUG | |
294 elif options.log_level == 'info': | |
295 options.log_level = logging.INFO | |
296 elif options.log_level == 'warning': | |
297 options.log_level = logging.WARNING | |
298 elif options.log_level == 'error': | |
299 options.log_level = logging.ERROR | |
300 | |
301 gen = DatasetGenerator(options.output_filename, options.log_level) | |
302 gen.GenerateDataset(options.dict_no) | |
303 | |
304 | |
305 if __name__ == '__main__': | |
306 main() | |
OLD | NEW |