Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 #!/usr/bin/python | |
| 2 # Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
| 3 # Use of this source code is governed by a BSD-style license that can be | |
| 4 # found in the LICENSE file. | |
| 5 | |
| 6 """Generates profile dictionaries for Autofill. | |
| 7 | |
| 8 Used to test autofill.AutoFillTest.FormFillLatencyAfterSubmit. | |
| 9 Can be used as a stand alone script with -h to print out help text by running: | |
| 10 python autofill_dataset_generator.py -h | |
| 11 """ | |
| 12 | |
| 13 import codecs | |
| 14 import logging | |
| 15 import random | |
| 16 import re | |
| 17 import sys | |
| 18 import os | |
|
dennis_jeffrey
2011/03/18 21:44:01
"import os" should come between "logging" and "ran
dyu1
2011/03/21 18:42:35
Done.
| |
| 19 | |
| 20 | |
| 21 class NullHandler(logging.Handler): | |
| 22 def emit(self, record): | |
| 23 pass | |
| 24 | |
| 25 | |
| 26 class DatasetGenerator(object): | |
| 27 """Generates a dataset of dictionaries. | |
| 28 | |
| 29 The lists (such as address_construct, city_construct) define the way the | |
| 30 corresponding field is generated. They accomplish this by specifying a | |
| 31 list of function-args lists. | |
| 32 """ | |
| 33 address_construct = [ | |
| 34 [ random.randint, 1, 10000], | |
|
Ilya Sherman
2011/03/18 23:17:11
Do we really need randomness? Deterministic tests
dyu1
2011/03/21 18:42:35
This creates what you recommended...
{u'NAME_FIRS
Ilya Sherman
2011/03/22 01:18:58
So, what's the purpose of the random.randint then?
dyu1
2011/03/22 02:52:35
Purpose of [random.randint, 1, 10000] is to genera
Ilya Sherman
2011/03/22 04:08:57
We have no plans to validate user-entered addresse
| |
| 35 [ None, u'foobar'], | |
| 36 [ random.choice, [ u'St', u'Ave', u'Ln', u'Ct', ]], | |
| 37 [ random.choice, [ u'#1', u'#2', u'#3', ]], | |
| 38 ] | |
| 39 | |
| 40 city_construct = [ | |
| 41 [ random.choice, [ u'San Jose', u'San Francisco', u'Sacramento', | |
| 42 u'Los Angeles', ]], | |
| 43 ] | |
| 44 | |
| 45 state_construct = [ | |
| 46 [ None, u'CA'] | |
| 47 ] | |
| 48 | |
| 49 zip_construct = [ | |
| 50 [ None, u'95110'], | |
| 51 ] | |
|
dennis_jeffrey
2011/03/18 21:44:01
Should we have more choices for cities, states, an
dyu1
2011/03/21 18:42:35
I don't think it makes a difference since the mini
Ilya Sherman
2011/03/22 01:18:58
In fact, it's fine to have the whole profile just
| |
| 52 | |
| 53 re_single_quote = re.compile("'", re.UNICODE) | |
|
dennis_jeffrey
2011/03/18 21:44:01
Prefer single quotes for strings:
'\''
Ilya Sherman
2011/03/18 23:17:11
Curious: What's the motivation for this?
dennis_jeffrey
2011/03/18 23:25:39
Consistency; most strings I've seen in python test
Ilya Sherman
2011/03/19 00:41:52
In general, I too prefer single quotes for consist
dyu1
2011/03/21 18:42:35
Done.
| |
| 54 logger = logging.getLogger(__name__) | |
| 55 logger.addHandler(NullHandler()) | |
| 56 log_handlers = {'StreamHandler': None} | |
| 57 | |
| 58 def __init__(self, output_filename=None, logging_level=None): | |
| 59 """Constructs dataset generator object. | |
| 60 | |
| 61 Creates 'fields' data member which is a list of pair (two values) lists. | |
| 62 These pairs are comprised of a field key e.g. u'NAME_FIRST' and a | |
| 63 generator method e.g. self.GenerateNameFirst which will generate the value. | |
| 64 If we want the value to always be the same .e.g. u'John' we can use this | |
|
dennis_jeffrey
2011/03/18 21:44:01
".e.g." --> "e.g."
dyu1
2011/03/21 18:42:35
Done.
| |
| 65 instead a a method. We can even use None keyword which will give | |
|
dennis_jeffrey
2011/03/18 21:44:01
"a a method" --> "of a method"
dyu1
2011/03/21 18:42:35
Done.
| |
| 66 a value of u''. | |
| 67 | |
| 68 'output_pattern' for one field would have been: "{u'NAME_FIRST': u'%s',}" | |
|
Ilya Sherman
2011/03/18 23:17:11
I don't understand why there is both a trailing co
dyu1
2011/03/21 18:42:35
I thought the trailing ,} is required for the last
Ilya Sherman
2011/03/22 01:18:58
Both the comma and the whitespace are optional. I
| |
| 69 which is ready to accept a value for the 'NAME_FIRST' field key once | |
| 70 this value is generated. | |
| 71 'output_pattern' is used in 'GenerateNextDict()' to generate the next | |
| 72 dict line. | |
| 73 | |
| 74 Args: | |
| 75 output_filename: specified filename of generated dataset to be saved. | |
| 76 Default value is None and no saving takes place. | |
| 77 logging_level: set verbosity levels, default is None. | |
| 78 """ | |
| 79 if logging_level: | |
| 80 if not self.log_handlers['StreamHandler']: | |
| 81 console = logging.StreamHandler() | |
| 82 console.setLevel(logging.INFO) | |
| 83 self.log_handlers['StreamHandler'] = console | |
| 84 self.logger.addHandler(console) | |
| 85 self.logger.setLevel(logging_level) | |
| 86 else: | |
| 87 if self.log_handlers['StreamHandler']: | |
| 88 self.logger.removeHandler(self._log_handlers['StreamHandler']) | |
| 89 self.log_handlers['StreamHandler'] = None | |
| 90 | |
| 91 self.output_filename = output_filename | |
| 92 | |
| 93 self.dict_no = 0 | |
| 94 self.fields = [ | |
| 95 [u'NAME_FIRST', self.GenerateNameFirst], | |
| 96 [u'NAME_MIDDLE', None], | |
| 97 [u'NAME_LAST', None], | |
| 98 [u'EMAIL_ADDRESS', self.GenerateEmail], | |
| 99 [u'COMPANY_NAME', None], | |
| 100 [u'ADDRESS_HOME_LINE1', self.GenerateAddress], | |
| 101 [u'ADDRESS_HOME_LINE2', None], | |
| 102 [u'ADDRESS_HOME_CITY', self.GenerateCity], | |
| 103 [u'ADDRESS_HOME_STATE', self.GenerateState], | |
| 104 [u'ADDRESS_HOME_ZIP', self.GenerateZip], | |
| 105 [u'ADDRESS_HOME_COUNTRY', u'United States'], | |
| 106 [u'PHONE_HOME_WHOLE_NUMBER', None], | |
| 107 [u'PHONE_FAX_WHOLE_NUMBER', u'6501234555'], | |
| 108 ] | |
| 109 | |
| 110 self.dict_length = len(self.fields) | |
| 111 self.output_pattern = u"{" | |
|
dennis_jeffrey
2011/03/18 21:44:01
Prefer single quotes over double quotes in strings
dyu1
2011/03/21 18:42:35
Done.
| |
| 112 for key_and_method in self.fields: | |
| 113 self.output_pattern += u"u'%s': u'%s', " %(key_and_method[0], "%s") | |
|
dennis_jeffrey
2011/03/18 21:44:01
Prefer single quotes over double quotes in strings
dyu1
2011/03/21 18:42:35
Done.
| |
| 114 self.output_pattern = self.output_pattern[:-1] + "}," | |
|
dennis_jeffrey
2011/03/18 21:44:01
Prefer single quotes over double quotes in strings
Ilya Sherman
2011/03/18 23:17:11
This line is confusing -- why are we trimming a sp
dyu1
2011/03/21 18:42:35
Done.
dyu1
2011/03/21 18:42:35
This is because in the previous "for key_and_metho
Ilya Sherman
2011/03/22 01:18:58
It's not clear to me why the space matters, and wh
dyu1
2011/03/22 02:52:35
This is more for aesthetic reasons to remove unnee
| |
| 115 | |
| 116 def _GenerateField(self, field_construct): | |
| 117 """Generates each field in each dictionary. | |
| 118 | |
| 119 Args: | |
| 120 field_construct: it is a list of lists. | |
| 121 The first value (index 0) of each containing list is a function or None. | |
| 122 The rest values are the args. If function is None then arg is just | |
|
dennis_jeffrey
2011/03/18 21:44:01
"rest values" --> "remaining values"
dyu1
2011/03/21 18:42:35
Done.
| |
| 123 returned. | |
| 124 | |
| 125 Example 1: zip_construct = [[ None, u'95110']]. There is one | |
| 126 containing list only and function here is None and arg is u'95110'. | |
| 127 This just returns u'95110'. | |
| 128 | |
| 129 Example 2: address_construct = [ [ random.randint, 1, 10000], | |
| 130 [ None, u'foobar'] ] This has two containing lists and it will return | |
| 131 the result of: | |
| 132 random.randint(1, 10000) + ' ' + u'foobar' | |
| 133 which could be u'7832 foobar' | |
| 134 """ | |
| 135 parts = [] | |
| 136 for function_and_args in field_construct: | |
| 137 function = function_and_args[0] | |
| 138 args = function_and_args[1:] | |
| 139 if not function: | |
| 140 function = lambda x: x | |
| 141 parts.append(u'%s' %function(*args)) | |
|
dennis_jeffrey
2011/03/18 21:44:01
Put a space after the '%' operator.
dyu1
2011/03/21 18:42:35
Done.
| |
| 142 return (' ').join(parts) | |
|
dennis_jeffrey
2011/03/18 21:44:01
I think you can remove the parens around the ' '
dyu1
2011/03/21 18:42:35
Seems weird to me without it. Does it hurt to leav
dennis_jeffrey
2011/03/22 23:28:52
No, I don't think it hurts except for requiring tw
| |
| 143 | |
| 144 def GenerateAddress(self): | |
| 145 """Uses _GenerateField() and address_construct to gen a random address. | |
| 146 | |
| 147 Returns random address. | |
| 148 """ | |
| 149 return self._GenerateField(self.address_construct) | |
| 150 | |
| 151 def GenerateCity(self): | |
| 152 """Uses _GenerateField() and city_construct to gen a random city. | |
| 153 | |
| 154 Returns random city. | |
| 155 """ | |
| 156 return self._GenerateField(self.city_construct) | |
| 157 | |
| 158 def GenerateState(self): | |
| 159 """Uses _GenerateField() and state_construct to generate a state. | |
| 160 | |
| 161 Returns random state. | |
| 162 """ | |
| 163 return self._GenerateField(self.state_construct) | |
| 164 | |
| 165 def GenerateZip(self): | |
| 166 """Uses _GenerateField() and zip_construct to generate a zip code. | |
| 167 | |
| 168 Returns random zip code. | |
| 169 """ | |
| 170 return self._GenerateField(self.zip_construct) | |
| 171 | |
| 172 def GenerateCountry(self): | |
| 173 """Uses _GenerateField() and country_construct to generate a country. | |
| 174 | |
| 175 Returns random country. | |
| 176 """ | |
| 177 return self._GenerateField(self.country_construct) | |
| 178 | |
| 179 def GenerateNameFirst(self): | |
| 180 """Generates Numerical First Names. | |
|
dennis_jeffrey
2011/03/18 21:44:01
"Numerical First Names" --> "a numerical first nam
dyu1
2011/03/21 18:42:35
Done.
| |
| 181 | |
| 182 Its Name is the number of the current dict. | |
|
dennis_jeffrey
2011/03/18 21:44:01
"Its Name" --> "The name"
dyu1
2011/03/21 18:42:35
Done.
| |
| 183 i.e. u'1', u'2', u'3' | |
| 184 | |
| 185 Returns random first names. | |
|
dennis_jeffrey
2011/03/18 21:44:01
"random first names" --> "a random first name"
dyu1
2011/03/21 18:42:35
Done.
| |
| 186 """ | |
| 187 return u'%s' % self.dict_no | |
| 188 | |
| 189 def GenerateEmail(self): | |
| 190 """Generates emails that correspond to the First Name. | |
|
dennis_jeffrey
2011/03/18 21:44:01
"emails that correspond to the First Name" -->
"an
dyu1
2011/03/21 18:42:35
Done.
| |
| 191 | |
| 192 i.e. u'1@example.com', u'2@example.com', u'3@example.com' | |
| 193 | |
| 194 Returns random email addresses. | |
|
dennis_jeffrey
2011/03/18 21:44:01
"random email addresses" --> "a random email addre
dyu1
2011/03/21 18:42:35
Done.
| |
| 195 """ | |
| 196 return u'%s@example.com' % self.dict_no | |
| 197 | |
| 198 | |
| 199 def GenerateNextDict(self): | |
| 200 """Generates next dictionary of the dataset. | |
| 201 | |
| 202 It first increments zero starting dict_no. | |
|
dennis_jeffrey
2011/03/18 21:44:01
Probably don't need to mention this in the functio
dyu1
2011/03/21 18:42:35
Done.
| |
| 203 | |
| 204 Returns the output dictionary. | |
|
dennis_jeffrey
2011/03/18 21:44:01
Returns:
The output dictionary.
dyu1
2011/03/21 18:42:35
Done.
| |
| 205 """ | |
| 206 self.dict_no += 1 | |
| 207 output_dict = {} | |
| 208 for key, method_or_value in self.fields: | |
| 209 if not method_or_value: | |
| 210 output_dict[key] = '' | |
| 211 elif type(method_or_value) in [str, unicode]: | |
| 212 output_dict[key] = '%s' % method_or_value | |
| 213 else: | |
| 214 output_dict[key] = method_or_value() | |
| 215 output_dict[key] = self.re_single_quote.sub( | |
| 216 r"\'", output_dict[key]) # escaping single quote: "'" -> "\'" | |
|
dennis_jeffrey
2011/03/18 21:44:01
Put one more space before the "#" that starts the
dennis_jeffrey
2011/03/18 21:44:01
Prefer single quotes to double quotes in strings.
dyu1
2011/03/21 18:42:35
Done.
dyu1
2011/03/21 18:42:35
Done.
| |
| 217 return output_dict | |
| 218 | |
| 219 def GenerateDataset(self, num_of_dict_to_generate=10): | |
| 220 """Generates a list of dictionaries. | |
| 221 | |
| 222 Args: | |
| 223 num_of_dict_to_generate: The number of dictionaries to be generated. | |
| 224 Default value is 10. | |
| 225 | |
| 226 Returns the dictionary list. | |
| 227 """ | |
| 228 if self.output_filename: | |
| 229 output_file = codecs.open( | |
| 230 self.output_filename, mode = 'wb', encoding = 'utf-8-sig') | |
|
dennis_jeffrey
2011/03/18 21:44:01
Remove the spaces around the two "=" in this line.
dyu1
2011/03/21 18:42:35
Done.
| |
| 231 else: | |
| 232 output_file = None | |
| 233 try: | |
| 234 list_of_dict = [] | |
| 235 if output_file: | |
| 236 output_file.write('[') | |
| 237 output_file.write(os.linesep) | |
| 238 | |
| 239 while self.dict_no < num_of_dict_to_generate: | |
| 240 output_dict = self.GenerateNextDict() | |
| 241 list_of_dict.append(output_dict) | |
| 242 output_line = self.output_pattern %tuple( | |
|
dennis_jeffrey
2011/03/18 21:44:01
Put a space after the "%"
dyu1
2011/03/21 18:42:35
Done.
| |
| 243 [output_dict[key_and_method[0]] for key_and_method in self.fields]) | |
| 244 if output_file: | |
| 245 output_file.write(output_line) | |
| 246 output_file.write(os.linesep) | |
| 247 self.logger.info( | |
| 248 "%d: %s" %(self.dict_no, output_line.encode(sys.stdout.encoding, | |
|
dennis_jeffrey
2011/03/18 21:44:01
Put a space after the "%"
dennis_jeffrey
2011/03/18 21:44:01
Prefer single quotes to double quotes in strings.
| |
| 249 'ignore'))) | |
| 250 | |
| 251 if output_file: | |
| 252 output_file.write(']') | |
| 253 output_file.write(os.linesep) | |
| 254 self.logger.info("%d dictionaries generated SUCCESSFULLY!", self.dict_no) | |
| 255 self.logger.info("--- FINISHED ---") | |
|
dennis_jeffrey
2011/03/18 21:44:01
Prefer single quotes to double quotes in strings (
dyu1
2011/03/21 18:42:35
Done.
| |
| 256 return list_of_dict | |
| 257 finally: | |
| 258 if output_file: | |
| 259 output_file.close() | |
| 260 | |
| 261 def main(): | |
| 262 # Command line options. | |
| 263 from optparse import OptionParser | |
|
dennis_jeffrey
2011/03/18 21:44:01
This should be up with the rest of the imports at
dyu1
2011/03/21 18:42:35
Done.
| |
| 264 parser = OptionParser() | |
| 265 parser.add_option("-o", "--output", | |
| 266 dest="output_filename", default="", | |
|
dennis_jeffrey
2011/03/18 21:44:01
This should be indented to line up underneath the
dyu1
2011/03/21 18:42:35
Done.
| |
| 267 help="write output to FILE [optional]", metavar="FILE") | |
| 268 parser.add_option("-v", "--verbose", action="store_true", dest="verbose", | |
| 269 default=True, | |
|
dennis_jeffrey
2011/03/18 21:44:01
Default should probably be False if the action is
dyu1
2011/03/21 18:42:35
The default should be True since this is the defau
dennis_jeffrey
2011/03/22 23:28:52
Yes, I recommend removing the -v and -q options an
| |
| 270 help="display all [default]") | |
| 271 parser.add_option("-q", "--quiet", action="store_false", dest="verbose", | |
| 272 help="display nothing") | |
|
dennis_jeffrey
2011/03/18 21:44:01
What's the default here?
dyu1
2011/03/21 18:42:35
The default is True since both options -v and -q u
| |
| 273 parser.add_option("-l", "--log", dest="logging_level", default = None, | |
| 274 help="LOG_LEVEL: 'debug, 'info', 'warning' or 'error'", | |
| 275 metavar="LOG_LEVEL") | |
|
dennis_jeffrey
2011/03/18 21:44:01
Prefer single quotes to double quotes in strings (
dyu1
2011/03/21 18:42:35
Done.
| |
| 276 | |
| 277 (options, args) = parser.parse_args() | |
| 278 if args: | |
| 279 parser.print_help() | |
| 280 sys.exit(1) | |
| 281 if not options.verbose: | |
| 282 options.logging_level = None | |
|
dennis_jeffrey
2011/03/18 21:44:01
This doesn't seem to make sense. Why would we cle
dyu1
2011/03/21 18:42:35
Deleted
| |
| 283 if options.verbose and not options.logging_level: | |
| 284 options.logging_level = 'info' | |
|
dennis_jeffrey
2011/03/18 21:44:01
"Verbose" usually means the most logging messages.
dyu1
2011/03/21 18:42:35
Deleted.
| |
| 285 if options.logging_level: | |
| 286 if 'debug' in options.logging_level.lower(): | |
| 287 options.logging_level = logging.DEBUG | |
| 288 elif 'info' in options.logging_level.lower(): | |
| 289 options.logging_level = logging.INFO | |
| 290 elif 'warn' in options.logging_level.lower(): | |
| 291 options.logging_level = logging.WARNING | |
| 292 elif 'error' in options.logging_level.lower(): | |
| 293 options.logging_level = logging.ERROR | |
| 294 | |
| 295 gen = DatasetGenerator(options.output_filename, options.logging_level) | |
| 296 gen.GenerateDataset(100) | |
|
dennis_jeffrey
2011/03/18 21:44:01
Maybe this value "100" should be configurable with
dyu1
2011/03/21 18:42:35
Done.
| |
| 297 | |
| 298 | |
| 299 if __name__ == '__main__': | |
| 300 main() | |
| OLD | NEW |