OLD | NEW |
---|---|
(Empty) | |
1 #!/usr/bin/python | |
2 # Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 | |
6 """Generates profile dictionaries for Autofill. | |
7 | |
8 Used to test autofill.AutoFillTest.FormFillLatencyAfterSubmit. | |
9 Can be used as a stand alone script with -h to print out help text by running: | |
10 python autofill_dataset_generator.py -h | |
11 """ | |
12 | |
13 import codecs | |
14 import logging | |
15 import random | |
16 import re | |
17 import sys | |
18 import os | |
19 | |
20 | |
21 class NullHandler(logging.Handler): | |
22 def emit(self, record): | |
23 pass | |
24 | |
25 | |
26 class DatasetGenerator(object): | |
27 """Generates a dataset of dictionaries. | |
28 | |
29 The lists (such as address_construct, city_construct) define the way the | |
30 corresponding field is generated. They accomplish this by specifying a | |
31 list of function-args lists. | |
32 """ | |
33 address_construct = [ | |
34 [ random.randint, 1, 10000], | |
35 [ None, u'foobar'], | |
36 [ random.choice, [ u'St', u'Ave', u'Ln', u'Ct', ]], | |
37 [ random.choice, [ u'#1', u'#2', u'#3', ]], | |
38 ] | |
39 | |
40 city_construct = [ | |
41 [ random.choice, [ u'San Jose', u'San Francisco', u'Sacramento', | |
42 u'Los Angeles', ]], | |
43 ] | |
44 | |
45 state_construct = [ | |
46 [ None, u'CA'] | |
47 ] | |
48 | |
49 zip_construct = [ | |
50 [ None, u'95110'], | |
51 ] | |
52 | |
53 re_single_quote = re.compile("'", re.UNICODE) | |
54 logger = logging.getLogger(__name__) | |
55 logger.addHandler(NullHandler()) | |
56 log_handlers = {'StreamHandler': None} | |
57 | |
58 def __init__(self, output_filename=None, logging_level=None): | |
59 """Constructs dataset generator object. | |
60 | |
61 Creates 'fields' data member which is a list of pair (two values) lists. | |
62 These pairs are comprised of a field key e.g. u'NAME_FIRST' and a | |
63 generator method e.g. self.GenerateNameFirst which will generate the value. | |
64 If we want the value to always be the same .e.g. u'John' we can use this | |
65 instead a a method. We can even use None keyword which will give | |
66 a value of u''. | |
67 | |
68 'output_pattern' for one field would have been: "{u'NAME_FIRST': u'%s',}" | |
69 which is ready to accept a value for the 'NAME_FIRST' field key once | |
70 this value is generated. | |
71 'output_pattern' is used in 'GenerateNextDict()' to generate the next | |
72 dict line. | |
73 | |
74 Args: | |
75 output_filename: specified filename of generated dataset to be saved. | |
76 Default value is None and no saving takes place. | |
77 logging_level: set verbosity levels, default is None. | |
78 """ | |
79 if logging_level: | |
80 if not self.log_handlers['StreamHandler']: | |
81 console = logging.StreamHandler() | |
82 console.setLevel(logging.INFO) | |
83 self.log_handlers['StreamHandler'] = console | |
84 self.logger.addHandler(console) | |
85 self.logger.setLevel(logging_level) | |
86 else: | |
87 if self.log_handlers['StreamHandler']: | |
88 self.logger.removeHandler(self._log_handlers['StreamHandler']) | |
89 self.log_handlers['StreamHandler'] = None | |
90 | |
91 self.output_filename = output_filename | |
92 | |
93 self.dict_no = 0 | |
94 self.fields = [ | |
95 [u'NAME_FIRST', self.GenerateNameFirst], | |
96 [u'NAME_MIDDLE', None], | |
97 [u'NAME_LAST', None], | |
98 [u'EMAIL_ADDRESS', self.GenerateEmail], | |
99 [u'COMPANY_NAME', None], | |
100 [u'ADDRESS_HOME_LINE1', self.GenerateAddress], | |
101 [u'ADDRESS_HOME_LINE2', None], | |
102 [u'ADDRESS_HOME_CITY', self.GenerateCity], | |
103 [u'ADDRESS_HOME_STATE', self.GenerateState], | |
104 [u'ADDRESS_HOME_ZIP', self.GenerateZip], | |
105 [u'ADDRESS_HOME_COUNTRY', u'United States'], | |
106 [u'PHONE_HOME_WHOLE_NUMBER', None], | |
107 [u'PHONE_FAX_WHOLE_NUMBER', u'6501234555'], | |
108 ] | |
109 | |
110 self.dict_length = len(self.fields) | |
111 self.output_pattern = u"{" | |
112 for key_and_method in self.fields: | |
113 self.output_pattern += u"u'%s': u'%s', " %(key_and_method[0], "%s") | |
114 self.output_pattern = self.output_pattern[:-1] + "}," | |
115 | |
116 def _GenerateField(self, field_construct): | |
117 """Generates each field in each dictionary. | |
118 | |
119 Args: | |
120 field_construct: it is a list of lists. | |
121 The first value (index 0) of each containing list is a function or None. | |
122 The rest values are the args. If function is None then arg is just | |
123 returned. | |
124 | |
125 Example 1: zip_construct = [[ None, u'95110']]. There is one | |
126 containing list only and function here is None and arg is u'95110'. | |
127 This just returns u'95110'. | |
128 | |
129 Example 2: address_construct = [ [ random.randint, 1, 10000], | |
130 [ None, u'foobar'] ] This has two containing lists and it will return | |
131 the result of: | |
132 random.randint(1, 10000) + ' ' + u'foobar' | |
133 which could be u'7832 foobar' | |
134 """ | |
135 parts = [] | |
136 for function_and_args in field_construct: | |
137 function = function_and_args[0] | |
138 args = function_and_args[1:] | |
139 if not function: | |
140 function = lambda x: x | |
141 parts.append(u'%s' %function(*args)) | |
142 return (' ').join(parts) | |
143 | |
144 def GenerateAddress(self): | |
145 """Uses _GenerateField() and address_construct to gen a random address. | |
146 | |
147 Returns random address. | |
148 """ | |
149 return self._GenerateField(self.address_construct) | |
150 | |
151 def GenerateCity(self): | |
152 """Uses _GenerateField() and city_construct to gen a random city. | |
153 | |
154 Returns random city. | |
155 """ | |
156 return self._GenerateField(self.city_construct) | |
157 | |
158 def GenerateState(self): | |
159 """Uses _GenerateField() and state_construct to generate a state. | |
160 | |
161 Returns random state. | |
162 """ | |
163 return self._GenerateField(self.state_construct) | |
164 | |
165 def GenerateZip(self): | |
166 """Uses _GenerateField() and zip_construct to generate a zip code. | |
167 | |
168 Returns random zip code. | |
169 """ | |
170 return self._GenerateField(self.zip_construct) | |
171 | |
172 def GenerateCountry(self): | |
173 """Uses _GenerateField() and country_construct to generate a country. | |
174 | |
175 Returns random country. | |
176 """ | |
177 return self._GenerateField(self.country_construct) | |
178 | |
179 def GenerateNameFirst(self): | |
180 """Generates Numerical First Names. | |
181 | |
182 Its Name is the number of the current dict. | |
183 i.e. u'1', u'2', u'3' | |
184 | |
185 Returns random first names. | |
186 """ | |
187 return u'%s' % self.dict_no | |
188 | |
189 def GenerateEmail(self): | |
190 """Generates emails that correspond to the First Name. | |
191 | |
192 i.e. u'1@example.com', u'2@example.com', u'3@example.com' | |
193 | |
194 Returns random email addresses. | |
195 """ | |
196 return u'%s@example.com' % self.dict_no | |
197 | |
198 | |
199 def GenerateNextDict(self): | |
200 """Generates next dictionary of the dataset. | |
201 | |
202 It first increments zero starting dict_no. | |
203 | |
204 Returns the output dictionary. | |
205 """ | |
206 self.dict_no += 1 | |
207 output_dict = {} | |
208 for key, method_or_value in self.fields: | |
209 if not method_or_value: | |
210 output_dict[key] = '' | |
211 elif type(method_or_value) in [str, unicode]: | |
212 output_dict[key] = '%s' % method_or_value | |
213 else: | |
214 output_dict[key] = method_or_value() | |
215 output_dict[key] = self.re_single_quote.sub( | |
216 r"\'", output_dict[key]) # escaping single quote: "'" -> "\'" | |
217 return output_dict | |
218 | |
219 def GenerateDataset(self, num_of_dict_to_generate=10): | |
220 """Generates a list of dictionaries. | |
221 | |
222 Args: | |
223 num_of_dict_to_generate: The number of dictionaries to be generated. | |
224 Default value is 10. | |
225 | |
226 Returns the dictionary list. | |
227 """ | |
228 if self.output_filename: | |
229 output_file = codecs.open( | |
230 self.output_filename, mode = 'wb', encoding = 'utf-8-sig') | |
231 else: | |
232 output_file = None | |
233 try: | |
234 list_of_dict = [] | |
235 if output_file: | |
236 output_file.write('[') | |
237 output_file.write(os.linesep) | |
238 | |
239 while self.dict_no < num_of_dict_to_generate: | |
240 output_dict = self.GenerateNextDict() | |
241 list_of_dict.append(output_dict) | |
242 output_line = self.output_pattern %tuple( | |
243 [output_dict[key_and_method[0]] for key_and_method in self.fields]) | |
244 if output_file: | |
245 output_file.write(output_line) | |
246 output_file.write(os.linesep) | |
247 self.logger.info( | |
248 "%d: %s" %(self.dict_no, output_line.encode(sys.stdout.encoding, | |
249 'ignore'))) | |
250 | |
251 if output_file: | |
252 output_file.write(']') | |
253 output_file.write(os.linesep) | |
254 self.logger.info("%d dictionaries generated SUCCESSFULLY!", self.dict_no) | |
255 self.logger.info("--- FINISHED ---") | |
256 return list_of_dict | |
257 finally: | |
258 if output_file: | |
259 output_file.close() | |
260 | |
dennis_jeffrey
2011/03/18 21:44:01
Add one more blank line before the start of the ma
dyu1
2011/03/24 19:46:51
Done.
| |
261 def main(): | |
262 # Command line options. | |
263 from optparse import OptionParser | |
264 parser = OptionParser() | |
265 parser.add_option("-o", "--output", | |
266 dest="output_filename", default="", | |
267 help="write output to FILE [optional]", metavar="FILE") | |
268 parser.add_option("-v", "--verbose", action="store_true", dest="verbose", | |
269 default=True, | |
270 help="display all [default]") | |
271 parser.add_option("-q", "--quiet", action="store_false", dest="verbose", | |
272 help="display nothing") | |
273 parser.add_option("-l", "--log", dest="logging_level", default = None, | |
274 help="LOG_LEVEL: 'debug, 'info', 'warning' or 'error'", | |
275 metavar="LOG_LEVEL") | |
276 | |
277 (options, args) = parser.parse_args() | |
278 if args: | |
279 parser.print_help() | |
280 sys.exit(1) | |
281 if not options.verbose: | |
282 options.logging_level = None | |
283 if options.verbose and not options.logging_level: | |
284 options.logging_level = 'info' | |
285 if options.logging_level: | |
286 if 'debug' in options.logging_level.lower(): | |
287 options.logging_level = logging.DEBUG | |
288 elif 'info' in options.logging_level.lower(): | |
289 options.logging_level = logging.INFO | |
290 elif 'warn' in options.logging_level.lower(): | |
291 options.logging_level = logging.WARNING | |
292 elif 'error' in options.logging_level.lower(): | |
293 options.logging_level = logging.ERROR | |
294 | |
295 gen = DatasetGenerator(options.output_filename, options.logging_level) | |
296 gen.GenerateDataset(100) | |
297 | |
298 | |
299 if __name__ == '__main__': | |
300 main() | |
OLD | NEW |