OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/python |
| 2 # Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. |
| 5 |
| 6 """Generates profile dictionaries for Autofill. |
| 7 |
| 8 Used to test autofill.AutofillTest.FormFillLatencyAfterSubmit. |
| 9 Can be used as a stand alone script with -h to print out help text by running: |
| 10 python autofill_dataset_generator.py -h |
| 11 """ |
| 12 |
| 13 import codecs |
| 14 import logging |
| 15 from optparse import OptionParser |
| 16 import os |
| 17 import random |
| 18 import re |
| 19 import sys |
| 20 |
| 21 |
| 22 class NullHandler(logging.Handler): |
| 23 def emit(self, record): |
| 24 pass |
| 25 |
| 26 |
| 27 class DatasetGenerator(object): |
| 28 """Generates a dataset of dictionaries. |
| 29 |
| 30 The lists (such as address_construct, city_construct) define the way the |
| 31 corresponding field is generated. They accomplish this by specifying a |
| 32 list of function-args lists. |
| 33 """ |
| 34 address_construct = [ |
| 35 [ random.randint, 1, 10000], |
| 36 [ None, u'foobar'], |
| 37 [ random.choice, [ u'St', u'Ave', u'Ln', u'Ct', ]], |
| 38 [ random.choice, [ u'#1', u'#2', u'#3', ]], |
| 39 ] |
| 40 |
| 41 city_construct = [ |
| 42 [ random.choice, [ u'San Jose', u'San Francisco', u'Sacramento', |
| 43 u'Los Angeles', ]], |
| 44 ] |
| 45 |
| 46 state_construct = [ |
| 47 [ None, u'CA'] |
| 48 ] |
| 49 |
| 50 # These zip codes are now matched to the corresponding cities in |
| 51 # city_construct. |
| 52 zip_construct = [ u'95110', u'94109', u'94203', u'90120'] |
| 53 |
| 54 logger = logging.getLogger(__name__) |
| 55 logger.addHandler(NullHandler()) |
| 56 log_handlers = {'StreamHandler': None} |
| 57 |
| 58 def __init__(self, output_filename=None, logging_level=None): |
| 59 """Constructs dataset generator object. |
| 60 |
| 61 Creates 'fields' data member which is a list of pair (two values) lists. |
| 62 These pairs are comprised of a field key e.g. u'NAME_FIRST' and a |
| 63 generator method e.g. self.GenerateNameFirst which will generate the value. |
| 64 If we want the value to always be the same e.g. u'John' we can use this |
| 65 instead of a method. We can even use None keyword which will give |
| 66 a value of u''. |
| 67 |
| 68 'output_pattern' for one field would have been: "{u'NAME_FIRST': u'%s',}" |
| 69 which is ready to accept a value for the 'NAME_FIRST' field key once |
| 70 this value is generated. |
| 71 'output_pattern' is used in 'GenerateNextDict()' to generate the next |
| 72 dict line. |
| 73 |
| 74 Args: |
| 75 output_filename: specified filename of generated dataset to be saved. |
| 76 Default value is None and no saving takes place. |
| 77 logging_level: set verbosity levels, default is None. |
| 78 """ |
| 79 if logging_level: |
| 80 if not self.log_handlers['StreamHandler']: |
| 81 console = logging.StreamHandler() |
| 82 console.setLevel(logging.INFO) |
| 83 self.log_handlers['StreamHandler'] = console |
| 84 self.logger.addHandler(console) |
| 85 self.logger.setLevel(logging_level) |
| 86 else: |
| 87 if self.log_handlers['StreamHandler']: |
| 88 self.logger.removeHandler(self.log_handlers['StreamHandler']) |
| 89 self.log_handlers['StreamHandler'] = None |
| 90 |
| 91 self.output_filename = output_filename |
| 92 |
| 93 self.dict_no = 0 |
| 94 self.fields = [ |
| 95 [u'NAME_FIRST', self.GenerateNameFirst], |
| 96 [u'NAME_MIDDLE', None], |
| 97 [u'NAME_LAST', None], |
| 98 [u'EMAIL_ADDRESS', self.GenerateEmail], |
| 99 [u'COMPANY_NAME', None], |
| 100 [u'ADDRESS_HOME_LINE1', self.GenerateAddress], |
| 101 [u'ADDRESS_HOME_LINE2', None], |
| 102 [u'ADDRESS_HOME_CITY', self.GenerateCity], |
| 103 [u'ADDRESS_HOME_STATE', self.GenerateState], |
| 104 [u'ADDRESS_HOME_ZIP', self.GenerateZip], |
| 105 [u'ADDRESS_HOME_COUNTRY', u'United States'], |
| 106 [u'PHONE_HOME_WHOLE_NUMBER', None], |
| 107 [u'PHONE_FAX_WHOLE_NUMBER', u'6501234555'], |
| 108 ] |
| 109 |
| 110 self.next_dict = {} |
| 111 # Using implicit line joining does not work well in this case as each line |
| 112 # has to be strings and not function calls that may return strings. |
| 113 self.output_pattern = u'{\'' + \ |
| 114 u', '.join([u'u"%s" : u"%%s"' % key for key, method in self.fields]) + \ |
| 115 u',}' |
| 116 |
| 117 def _GenerateField(self, field_construct): |
| 118 """Generates each field in each dictionary. |
| 119 |
| 120 Args: |
| 121 field_construct: it is a list of lists. |
| 122 The first value (index 0) of each containing list is a function or None. |
| 123 The remaining values are the args. If function is None then arg is just |
| 124 returned. |
| 125 |
| 126 Example 1: zip_construct = [[ None, u'95110']]. There is one |
| 127 containing list only and function here is None and arg is u'95110'. |
| 128 This just returns u'95110'. |
| 129 |
| 130 Example 2: address_construct = [ [ random.randint, 1, 10000], |
| 131 [ None, u'foobar'] ] This has two containing lists and it will return |
| 132 the result of: |
| 133 random.randint(1, 10000) + ' ' + u'foobar' |
| 134 which could be u'7832 foobar' |
| 135 """ |
| 136 parts = [] |
| 137 for function_and_args in field_construct: |
| 138 function = function_and_args[0] |
| 139 args = function_and_args[1:] |
| 140 if not function: |
| 141 function = lambda x: x |
| 142 parts.append(str(function(*args))) |
| 143 return (' ').join(parts) |
| 144 |
| 145 def GenerateAddress(self): |
| 146 """Uses _GenerateField() and address_construct to gen a random address. |
| 147 |
| 148 Returns: |
| 149 A random address. |
| 150 """ |
| 151 return self._GenerateField(self.address_construct) |
| 152 |
| 153 def GenerateCity(self): |
| 154 """Uses _GenerateField() and city_construct to gen a random city. |
| 155 |
| 156 Returns: |
| 157 A random city. |
| 158 """ |
| 159 return self._GenerateField(self.city_construct) |
| 160 |
| 161 def GenerateState(self): |
| 162 """Uses _GenerateField() and state_construct to generate a state. |
| 163 |
| 164 Returns: |
| 165 A state. |
| 166 """ |
| 167 return self._GenerateField(self.state_construct) |
| 168 |
| 169 def GenerateZip(self): |
| 170 """Uses zip_construct and generated cities to return a matched zip code. |
| 171 |
| 172 Returns: |
| 173 A zip code matched to the corresponding city. |
| 174 """ |
| 175 city_selected = self.next_dict['ADDRESS_HOME_CITY'] |
| 176 index = self.city_construct[0][1].index(city_selected) |
| 177 return self.zip_construct[index] |
| 178 |
| 179 def GenerateCountry(self): |
| 180 """Uses _GenerateField() and country_construct to generate a country. |
| 181 |
| 182 Returns: |
| 183 A country. |
| 184 """ |
| 185 return self._GenerateField(self.country_construct) |
| 186 |
| 187 def GenerateNameFirst(self): |
| 188 """Generates a numerical first name. |
| 189 |
| 190 The name is the number of the current dict. |
| 191 i.e. u'1', u'2', u'3' |
| 192 |
| 193 Returns: |
| 194 A numerical first name. |
| 195 """ |
| 196 return u'%s' % self.dict_no |
| 197 |
| 198 def GenerateEmail(self): |
| 199 """Generates an email that corresponds to the first name. |
| 200 |
| 201 i.e. u'1@example.com', u'2@example.com', u'3@example.com' |
| 202 |
| 203 Returns: |
| 204 An email address that corresponds to the first name. |
| 205 """ |
| 206 return u'%s@example.com' % self.dict_no |
| 207 |
| 208 |
| 209 def GenerateNextDict(self): |
| 210 """Generates next dictionary of the dataset. |
| 211 |
| 212 Returns: |
| 213 The output dictionary. |
| 214 """ |
| 215 self.dict_no += 1 |
| 216 self.next_dict = {} |
| 217 for key, method_or_value in self.fields: |
| 218 if not method_or_value: |
| 219 self.next_dict[key] = '' |
| 220 elif type(method_or_value) in [str, unicode]: |
| 221 self.next_dict[key] = '%s' % method_or_value |
| 222 else: |
| 223 self.next_dict[key] = method_or_value() |
| 224 return self.next_dict |
| 225 |
| 226 def GenerateDataset(self, num_of_dict_to_generate=10): |
| 227 """Generates a list of dictionaries. |
| 228 |
| 229 Args: |
| 230 num_of_dict_to_generate: The number of dictionaries to be generated. |
| 231 Default value is 10. |
| 232 |
| 233 Returns: |
| 234 The dictionary list. |
| 235 """ |
| 236 random.seed(0) # All randomly generated values are reproducible. |
| 237 if self.output_filename: |
| 238 output_file = codecs.open( |
| 239 self.output_filename, mode='wb', encoding='utf-8-sig') |
| 240 else: |
| 241 output_file = None |
| 242 try: |
| 243 list_of_dict = [] |
| 244 if output_file: |
| 245 output_file.write('[') |
| 246 output_file.write(os.linesep) |
| 247 |
| 248 while self.dict_no < num_of_dict_to_generate: |
| 249 output_dict = self.GenerateNextDict() |
| 250 list_of_dict.append(output_dict) |
| 251 output_line = self.output_pattern % tuple( |
| 252 [output_dict[key] for key, method in self.fields]) |
| 253 if output_file: |
| 254 output_file.write(output_line) |
| 255 output_file.write(os.linesep) |
| 256 self.logger.info( |
| 257 '%d: %s' % (self.dict_no, output_line.encode(sys.stdout.encoding, |
| 258 'ignore'))) |
| 259 |
| 260 if output_file: |
| 261 output_file.write(']') |
| 262 output_file.write(os.linesep) |
| 263 self.logger.info('%d dictionaries generated SUCCESSFULLY!', self.dict_no) |
| 264 self.logger.info('--- FINISHED ---') |
| 265 return list_of_dict |
| 266 finally: |
| 267 if output_file: |
| 268 output_file.close() |
| 269 |
| 270 |
| 271 def main(): |
| 272 # Command line options. |
| 273 parser = OptionParser() |
| 274 parser.add_option( |
| 275 '-o', '--output', dest='output_filename', default='', |
| 276 help='write output to FILE [optional]', metavar='FILE') |
| 277 parser.add_option( |
| 278 '-d', '--dict', type='int', dest='dict_no', metavar='DICT_NO', default=10, |
| 279 help='DICT_NO: number of dictionaries to be generated [default: %default]') |
| 280 parser.add_option( |
| 281 '-l', '--log_level', dest='log_level', default='debug', |
| 282 metavar='LOG_LEVEL', |
| 283 help='LOG_LEVEL: "debug", "info", "warning" or "error" [default: %default]') |
| 284 |
| 285 (options, args) = parser.parse_args() |
| 286 if args: |
| 287 parser.print_help() |
| 288 sys.exit(1) |
| 289 options.log_level = options.log_level.lower() |
| 290 if options.log_level not in ['debug', 'info', 'warning', 'error']: |
| 291 parser.error('Wrong log_level argument.') |
| 292 parser.print_help() |
| 293 else: |
| 294 if options.log_level == 'debug': |
| 295 options.log_level = logging.DEBUG |
| 296 elif options.log_level == 'info': |
| 297 options.log_level = logging.INFO |
| 298 elif options.log_level == 'warning': |
| 299 options.log_level = logging.WARNING |
| 300 elif options.log_level == 'error': |
| 301 options.log_level = logging.ERROR |
| 302 |
| 303 gen = DatasetGenerator(options.output_filename, options.log_level) |
| 304 gen.GenerateDataset(options.dict_no) |
| 305 |
| 306 |
| 307 if __name__ == '__main__': |
| 308 main() |
OLD | NEW |