Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 #!/usr/bin/python | |
| 2 # Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
| 3 # Use of this source code is governed by a BSD-style license that can be | |
| 4 # found in the LICENSE file. | |
| 5 | |
| 6 """Generates profile dictionaries for Autofill. | |
| 7 | |
| 8 Used to test autofill.AutoFillTest.FormFillLatencyAfterSubmit. | |
|
Ilya Sherman
2011/03/22 04:08:58
nit: AutoFill -> Autofill
dyu1
2011/03/24 19:46:51
Done.
| |
| 9 Can be used as a stand alone script with -h to print out help text by running: | |
| 10 python autofill_dataset_generator.py -h | |
| 11 """ | |
| 12 | |
| 13 import codecs | |
| 14 import logging | |
| 15 from optparse import OptionParser | |
| 16 import os | |
| 17 import random | |
| 18 import re | |
| 19 import sys | |
| 20 | |
| 21 | |
| 22 class NullHandler(logging.Handler): | |
| 23 def emit(self, record): | |
| 24 pass | |
| 25 | |
| 26 | |
| 27 class DatasetGenerator(object): | |
| 28 """Generates a dataset of dictionaries. | |
| 29 | |
| 30 The lists (such as address_construct, city_construct) define the way the | |
| 31 corresponding field is generated. They accomplish this by specifying a | |
| 32 list of function-args lists. | |
| 33 """ | |
| 34 address_construct = [ | |
| 35 [ random.randint, 1, 10000], | |
| 36 [ None, u'foobar'], | |
| 37 [ random.choice, [ u'St', u'Ave', u'Ln', u'Ct', ]], | |
| 38 [ random.choice, [ u'#1', u'#2', u'#3', ]], | |
| 39 ] | |
| 40 | |
| 41 city_construct = [ | |
| 42 [ random.choice, [ u'San Jose', u'San Francisco', u'Sacramento', | |
| 43 u'Los Angeles', ]], | |
| 44 ] | |
| 45 | |
| 46 state_construct = [ | |
| 47 [ None, u'CA'] | |
| 48 ] | |
| 49 | |
| 50 zip_construct = [ | |
| 51 [ None, u'95110'] | |
| 52 ] | |
| 53 | |
| 54 re_single_quote = re.compile('\'', re.UNICODE) | |
| 55 logger = logging.getLogger(__name__) | |
| 56 logger.addHandler(NullHandler()) | |
| 57 log_handlers = {'StreamHandler': None} | |
| 58 | |
| 59 def __init__(self, output_filename=None, logging_level=None): | |
| 60 """Constructs dataset generator object. | |
| 61 | |
| 62 Creates 'fields' data member which is a list of pair (two values) lists. | |
| 63 These pairs are comprised of a field key e.g. u'NAME_FIRST' and a | |
| 64 generator method e.g. self.GenerateNameFirst which will generate the value. | |
| 65 If we want the value to always be the same e.g. u'John' we can use this | |
| 66 instead of a method. We can even use None keyword which will give | |
| 67 a value of u''. | |
| 68 | |
| 69 'output_pattern' for one field would have been: "{u'NAME_FIRST': u'%s',}" | |
| 70 which is ready to accept a value for the 'NAME_FIRST' field key once | |
| 71 this value is generated. | |
| 72 'output_pattern' is used in 'GenerateNextDict()' to generate the next | |
| 73 dict line. | |
| 74 | |
| 75 Args: | |
| 76 output_filename: specified filename of generated dataset to be saved. | |
| 77 Default value is None and no saving takes place. | |
| 78 logging_level: set verbosity levels, default is None. | |
| 79 """ | |
| 80 if logging_level: | |
| 81 if not self.log_handlers['StreamHandler']: | |
| 82 console = logging.StreamHandler() | |
| 83 console.setLevel(logging.INFO) | |
| 84 self.log_handlers['StreamHandler'] = console | |
| 85 self.logger.addHandler(console) | |
| 86 self.logger.setLevel(logging_level) | |
| 87 else: | |
| 88 if self.log_handlers['StreamHandler']: | |
| 89 self.logger.removeHandler(self._log_handlers['StreamHandler']) | |
| 90 self.log_handlers['StreamHandler'] = None | |
| 91 | |
| 92 self.output_filename = output_filename | |
| 93 | |
| 94 self.dict_no = 0 | |
| 95 self.fields = [ | |
| 96 [u'NAME_FIRST', self.GenerateNameFirst], | |
| 97 [u'NAME_MIDDLE', None], | |
| 98 [u'NAME_LAST', None], | |
| 99 [u'EMAIL_ADDRESS', self.GenerateEmail], | |
| 100 [u'COMPANY_NAME', None], | |
| 101 [u'ADDRESS_HOME_LINE1', self.GenerateAddress], | |
| 102 [u'ADDRESS_HOME_LINE2', None], | |
| 103 [u'ADDRESS_HOME_CITY', self.GenerateCity], | |
| 104 [u'ADDRESS_HOME_STATE', self.GenerateState], | |
| 105 [u'ADDRESS_HOME_ZIP', self.GenerateZip], | |
| 106 [u'ADDRESS_HOME_COUNTRY', u'United States'], | |
| 107 [u'PHONE_HOME_WHOLE_NUMBER', None], | |
| 108 [u'PHONE_FAX_WHOLE_NUMBER', u'6501234555'], | |
| 109 ] | |
| 110 | |
| 111 self.dict_length = len(self.fields) | |
|
dennis_jeffrey
2011/03/22 23:28:53
This variable doesn't seem to be used in this file
dyu1
2011/03/24 19:46:51
Nice catch. Removed.
| |
| 112 self.output_pattern = u'{' | |
| 113 for key_and_method in self.fields: | |
| 114 self.output_pattern += u'u"%s": u"%s", ' % (key_and_method[0], "%s") | |
|
Ilya Sherman
2011/03/22 04:08:58
nit: How about """ self.output_pattern += u'u"%s":
dyu1
2011/03/24 19:46:51
Done.
| |
| 115 self.output_pattern = ( | |
| 116 self.output_pattern[:-1] + '},') # Trim the trailing space to remove | |
| 117 # unneeded whitespace. | |
| 118 | |
| 119 def _GenerateField(self, field_construct): | |
| 120 """Generates each field in each dictionary. | |
| 121 | |
| 122 Args: | |
| 123 field_construct: it is a list of lists. | |
| 124 The first value (index 0) of each containing list is a function or None. | |
| 125 The remaining values are the args. If function is None then arg is just | |
| 126 returned. | |
| 127 | |
| 128 Example 1: zip_construct = [[ None, u'95110']]. There is one | |
| 129 containing list only and function here is None and arg is u'95110'. | |
| 130 This just returns u'95110'. | |
| 131 | |
| 132 Example 2: address_construct = [ [ random.randint, 1, 10000], | |
| 133 [ None, u'foobar'] ] This has two containing lists and it will return | |
| 134 the result of: | |
| 135 random.randint(1, 10000) + ' ' + u'foobar' | |
| 136 which could be u'7832 foobar' | |
| 137 """ | |
| 138 parts = [] | |
| 139 for function_and_args in field_construct: | |
| 140 function = function_and_args[0] | |
| 141 args = function_and_args[1:] | |
| 142 if not function: | |
| 143 function = lambda x: x | |
| 144 parts.append(u'%s' % function(*args)) | |
|
Ilya Sherman
2011/03/22 04:08:58
nit: Why not just """ parts.append(function(*args)
dyu1
2011/03/24 19:46:51
This can't be done as I'm generating addresses. I
Ilya Sherman
2011/03/25 03:55:31
In that case, I think you want """ parts.append(st
dyu1
2011/03/25 19:10:57
Done.
| |
| 145 return (' ').join(parts) | |
| 146 | |
| 147 def GenerateAddress(self): | |
| 148 """Uses _GenerateField() and address_construct to gen a random address. | |
| 149 | |
| 150 Returns: | |
| 151 A random address. | |
| 152 """ | |
| 153 return self._GenerateField(self.address_construct) | |
| 154 | |
| 155 def GenerateCity(self): | |
| 156 """Uses _GenerateField() and city_construct to gen a random city. | |
| 157 | |
| 158 Returns: | |
| 159 A random city. | |
| 160 """ | |
| 161 return self._GenerateField(self.city_construct) | |
| 162 | |
| 163 def GenerateState(self): | |
| 164 """Uses _GenerateField() and state_construct to generate a state. | |
|
dennis_jeffrey
2011/03/22 23:28:53
"a state" --> "a random state"
dyu1
2011/03/24 19:46:51
Well it's only one state which is CA.
| |
| 165 | |
| 166 Returns: | |
| 167 A random state. | |
| 168 """ | |
| 169 return self._GenerateField(self.state_construct) | |
| 170 | |
| 171 def GenerateZip(self): | |
| 172 """Uses _GenerateField() and zip_construct to generate a zip code. | |
|
dennis_jeffrey
2011/03/22 23:28:53
"a zip" --> "a random zip"
dyu1
2011/03/24 19:46:51
Only one zip code.
| |
| 173 | |
| 174 Returns: | |
| 175 A random zip code. | |
| 176 """ | |
| 177 return self._GenerateField(self.zip_construct) | |
| 178 | |
| 179 def GenerateCountry(self): | |
| 180 """Uses _GenerateField() and country_construct to generate a country. | |
|
dennis_jeffrey
2011/03/22 23:28:53
"a country" --> "a random country"
dyu1
2011/03/24 19:46:51
One country.
| |
| 181 | |
| 182 Returns: | |
| 183 A random country. | |
| 184 """ | |
| 185 return self._GenerateField(self.country_construct) | |
| 186 | |
| 187 def GenerateNameFirst(self): | |
| 188 """Generates a numerical first name. | |
| 189 | |
| 190 The name is the number of the current dict. | |
| 191 i.e. u'1', u'2', u'3' | |
| 192 | |
| 193 Returns: | |
| 194 A random first name. | |
|
Ilya Sherman
2011/03/22 04:08:58
nit: This does not return a random value, right?
dyu1
2011/03/24 19:46:51
Done.
| |
| 195 """ | |
| 196 return u'%s' % self.dict_no | |
| 197 | |
| 198 def GenerateEmail(self): | |
| 199 """Generates an email that corresponds to the first name. | |
| 200 | |
| 201 i.e. u'1@example.com', u'2@example.com', u'3@example.com' | |
| 202 | |
| 203 Returns: | |
| 204 A random email address. | |
|
Ilya Sherman
2011/03/22 04:08:58
nit: Again, not actually random.
dyu1
2011/03/24 19:46:51
Done.
| |
| 205 """ | |
| 206 return u'%s@example.com' % self.dict_no | |
| 207 | |
| 208 | |
| 209 def GenerateNextDict(self): | |
| 210 """Generates next dictionary of the dataset. | |
| 211 | |
| 212 Returns: | |
| 213 The output dictionary. | |
| 214 """ | |
| 215 self.dict_no += 1 | |
| 216 output_dict = {} | |
| 217 for key, method_or_value in self.fields: | |
| 218 if not method_or_value: | |
| 219 output_dict[key] = '' | |
| 220 elif type(method_or_value) in [str, unicode]: | |
| 221 output_dict[key] = '%s' % method_or_value | |
| 222 else: | |
| 223 output_dict[key] = method_or_value() | |
| 224 output_dict[key] = self.re_single_quote.sub( | |
| 225 r'\'', output_dict[key]) # Escaping single quote: "'" -> '\'' | |
|
Ilya Sherman
2011/03/22 04:08:58
nit: I'm fairly sure this line is a no-op. Can yo
dyu1
2011/03/24 19:46:51
Deleted.
| |
| 226 return output_dict | |
| 227 | |
| 228 def GenerateDataset(self, num_of_dict_to_generate=10): | |
| 229 """Generates a list of dictionaries. | |
| 230 | |
| 231 Args: | |
| 232 num_of_dict_to_generate: The number of dictionaries to be generated. | |
| 233 Default value is 10. | |
| 234 | |
| 235 Returns: | |
| 236 The dictionary list. | |
| 237 """ | |
| 238 if self.output_filename: | |
| 239 output_file = codecs.open( | |
| 240 self.output_filename, mode='wb', encoding='utf-8-sig') | |
| 241 else: | |
| 242 output_file = None | |
| 243 try: | |
| 244 list_of_dict = [] | |
| 245 if output_file: | |
| 246 output_file.write('[') | |
| 247 output_file.write(os.linesep) | |
| 248 | |
| 249 while self.dict_no < num_of_dict_to_generate: | |
| 250 output_dict = self.GenerateNextDict() | |
| 251 list_of_dict.append(output_dict) | |
| 252 output_line = self.output_pattern % tuple( | |
| 253 [output_dict[key_and_method[0]] for key_and_method in self.fields]) | |
|
Ilya Sherman
2011/03/22 04:08:58
nit: How about """ [output_dict[key] for [key, met
dyu1
2011/03/24 19:46:51
Done.
| |
| 254 if output_file: | |
| 255 output_file.write(output_line) | |
| 256 output_file.write(os.linesep) | |
| 257 self.logger.info( | |
| 258 '%d: %s' % (self.dict_no, output_line.encode(sys.stdout.encoding, | |
| 259 'ignore'))) | |
| 260 | |
| 261 if output_file: | |
| 262 output_file.write(']') | |
| 263 output_file.write(os.linesep) | |
| 264 self.logger.info('%d dictionaries generated SUCCESSFULLY!', self.dict_no) | |
| 265 self.logger.info('--- FINISHED ---') | |
| 266 return list_of_dict | |
| 267 finally: | |
| 268 if output_file: | |
| 269 output_file.close() | |
| 270 | |
| 271 def main(): | |
| 272 # Command line options. | |
| 273 parser = OptionParser() | |
| 274 parser.add_option( | |
| 275 '-o', '--output', dest='output_filename', default='', | |
| 276 help='write output to FILE [optional]', metavar='FILE') | |
| 277 parser.add_option( | |
| 278 '-d', '--dict', type='int', dest='dict_no', metavar='DICT_NO', default=10, | |
| 279 help='DICT_NO: number of dictionaries to be generated [default: %default]') | |
| 280 parser.add_option( | |
| 281 '-l', '--log_level', dest='log_level', default='debug', | |
| 282 metavar='LOG_LEVEL', | |
| 283 help='LOG_LEVEL: "debug", "info", "warning" or "error" [default: %default]') | |
| 284 | |
| 285 (options, args) = parser.parse_args() | |
| 286 if args: | |
| 287 parser.print_help() | |
| 288 sys.exit(1) | |
| 289 options.log_level = options.log_level.lower() | |
| 290 if options.log_level not in ['debug', 'info', 'warning', 'error']: | |
| 291 parser.error('Wrong log_level argument.') | |
|
dennis_jeffrey
2011/03/22 23:28:53
May also want to do "parser.print_help()" here, so
dyu1
2011/03/24 19:46:51
Done.
| |
| 292 else: | |
| 293 if options.log_level == 'debug': | |
| 294 options.log_level = logging.DEBUG | |
| 295 elif options.log_level == 'info': | |
| 296 options.log_level = logging.INFO | |
| 297 elif options.log_level == 'warning': | |
| 298 options.log_level = logging.WARNING | |
| 299 elif options.log_level == 'error': | |
| 300 options.log_level = logging.ERROR | |
| 301 | |
| 302 gen = DatasetGenerator(options.output_filename, options.log_level) | |
| 303 gen.GenerateDataset(options.dict_no) | |
| 304 | |
| 305 | |
| 306 if __name__ == '__main__': | |
| 307 main() | |
| OLD | NEW |