Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(193)

Side by Side Diff: chrome/test/functional/autofill_dataset_converter.py

Issue 6246147: Test Autofill's ability to merge duplicate profiles and... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 9 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 #!/usr/bin/python
2 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Converts profile datasets to dictionary list for Autofill profiles.
7
8 Used for test autofill.AutoFillTest.testMergeDuplicateProfilesInAutofill.
9 """
10
11 import codecs
12 import logging
13 import os
14 import re
15 import sys
16
17
18 class NullHandler(logging.Handler):
19 def emit(self, record):
20 pass
21
22
23 class DatasetConverter(object):
24 _fields = [
25 u'NAME_FIRST',
26 u'NAME_MIDDLE',
27 u'NAME_LAST',
28 u'EMAIL_ADDRESS',
29 u'COMPANY_NAME',
30 u'ADDRESS_HOME_LINE1',
31 u'ADDRESS_HOME_LINE2',
32 u'ADDRESS_HOME_CITY',
33 u'ADDRESS_HOME_STATE',
34 u'ADDRESS_HOME_ZIP',
35 u'ADDRESS_HOME_COUNTRY',
36 u'PHONE_HOME_WHOLE_NUMBER',
37 u'PHONE_FAX_WHOLE_NUMBER',
38 ]
39 _record_length = len(_fields)
40 _output_pattern = u'{'
41 for key in _fields:
42 _output_pattern += u"u'%s': u'%%s', " % key
43 _output_pattern = _output_pattern[:-1] + '},'
44 _re_single_quote = re.compile("'", re.UNICODE)
45 _logger = logging.getLogger(__name__)
46 _logger.addHandler(NullHandler())
dennis_jeffrey 2011/02/17 22:58:35 In the rest of this file, you use "self._logger",
dyu1 2011/02/18 00:31:47 Done.
47 info_level = logging.INFO
48 warning_level = logging.WARNING
49 error_level = logging.ERROR
dennis_jeffrey 2011/02/17 22:58:35 I think there's no need to define "info_level", "w
dyu1 2011/02/18 00:31:47 Done.
50
51 def __init__(self, input_filename, output_filename=None, logging_level=None):
dennis_jeffrey 2011/02/17 22:58:35 Rather than having "logging_level" default to "Non
dyu1 2011/02/18 00:31:47 Done.
52 """Constructs a dataset converter object.
53
54 Full input pattern:
55 '(?P<NAME_FIRST>.*?)\|(?P<MIDDLE_NAME>.*?)\|(?P<NAME_LAST>.*?)\|
56 (?P<EMAIL_ADDRESS>.*?)\|(?P<COMPANY_NAME>.*?)\|(?P<ADDRESS_HOME_LINE1>.*?)
57 \|(?P<ADDRESS_HOME_LINE2>.*?)\|(?P<ADDRESS_HOME_CITY>.*?)\|
58 (?P<ADDRESS_HOME_STATE>.*?)\|(?P<ADDRESS_HOME_ZIP>.*?)\|
59 (?P<ADDRESS_HOME_COUNTRY>.*?)\|
60 (?P<PHONE_HOME_WHOLE_NUMBER>.*?)\|(?P<PHONE_FAX_WHOLE_NUMBER>.*?)$'
61
62 Full ouput pattern:
63 "{u'NAME_FIRST': u'%s', u'NAME_MIDDLE': u'%s', u'NAME_LAST': u'%s',
64 u'EMAIL_ADDRESS': u'%s', u'COMPANY_NAME': u'%s', u'ADDRESS_HOME_LINE1':
65 u'%s', u'ADDRESS_HOME_LINE2': u'%s', u'ADDRESS_HOME_CITY': u'%s',
66 u'ADDRESS_HOME_STATE': u'%s', u'ADDRESS_HOME_ZIP': u'%s',
67 u'ADDRESS_HOME_COUNTRY': u'%s', u'PHONE_HOME_WHOLE_NUMBER': u'%s',
68 u'PHONE_FAX_WHOLE_NUMBER': u'%s',},"
69
70 Args:
71 input_filename: name and path of the input dataset.
72 output_filename: name and path of the converted file, default is none.
73 logging_level: set verbosity levels, default is none.
74
75 Raises:
76 IOError: error if input file does not exist.
77 """
78 if logging_level:
79 console = logging.StreamHandler()
80 console.setLevel(logging.INFO)
81 self._logger.addHandler(console)
82 self._logger.setLevel(logging_level)
dennis_jeffrey 2011/02/17 22:58:35 Right now, if the default logging level of "None"
dyu1 2011/02/18 00:31:47 Done.
83
84 self._input_filename = os.path.join(os.path.dirname(sys.argv[0]),
85 input_filename)
86 if not os.path.isfile(self._input_filename):
87 msg = 'File "%s" does not exist' % self._input_filename
88 self._logger.error(msg)
89 raise IOError(msg)
90 self._output_filename = output_filename
91
92 def _CreateDictionaryFromRecord(self, record):
93 """Constructs and returns a dictionary from a record in the dataset file.
94
95 Escapes single quotation first and uses split('|') to separate values.
96 Example:
97 Take an argument as a string u'John|Doe|Mountain View'
98 and returns a dictionary
99 {
100 u'NAME_FIRST': u'John',
101 u'NAME_LAST': u'Doe',
102 u'ADDRESS_HOME_CITY': u'Mountain View',
103 }
dennis_jeffrey 2011/02/17 22:58:35 You may want to also mention in the comment here t
dyu1 2011/02/18 00:31:47 Done.
104
105 Args:
106 record: row of record from the dataset file.
107
108 Returns:
109 A dictionary representing a single record from the dataset file.
dennis_jeffrey 2011/02/17 22:58:35 The method may also potentially return None if the
dyu1 2011/02/18 00:31:47 Done.
110 """
111 # Ignore irrelevant record lines that do not contain '|'.
112 if not '|' in record:
113 return
114 # Escaping single quote: "'" -> "\'"
115 record = self._re_single_quote.sub(r"\'", record)
116 record_list = record.split('|')
117 if record_list:
118 # Check for case when a record may have more or less fields than expected.
119 if len(record_list) != self._record_length:
120 self._logger.warning(
121 'A "|" separated line has %d fields instead of %d: %s' % (
122 len(record_list), self._record_length, record))
123 return
124 out_record = {}
125 i = 0
126 for key in self._fields:
127 out_record[key] = record_list[i]
128 i += 1
dennis_jeffrey 2011/02/17 22:58:35 There's a cool way in python to iterate through a
dyu1 2011/02/18 00:31:47 Done.
129 return out_record
130
131 def Convert(self):
132 """Wrapper function to convert input data into the desired output format."""
dennis_jeffrey 2011/02/17 22:58:35 This function can return something, so you should
dennis_jeffrey 2011/02/17 22:58:35 Since you've removed the "_Convert()" function, th
dyu1 2011/02/18 00:31:47 Done.
dyu1 2011/02/18 00:31:47 Done.
133 with open(self._input_filename) as input_file:
134 if self._output_filename:
135 output_file = codecs.open(self._output_filename, mode='wb',
136 encoding='utf-8-sig')
137 else:
138 output_file = None
139 try:
140 list_of_dict = []
141 i = 0
142 if output_file:
143 output_file.write('[')
144 output_file.write(os.linesep)
145 for line in input_file.readlines():
146 line = line.strip()
147 if not line:
148 continue
149 line = unicode(line, 'UTF-8')
150 output_record = self._CreateDictionaryFromRecord(line)
151 if output_record:
152 i += 1
153 list_of_dict.append(output_record)
154 output_line = self._output_pattern % tuple(
155 [output_record[key] for key in self._fields])
156 if output_file:
157 output_file.write(output_line)
158 output_file.write(os.linesep)
159 self._logger.info('%d: %s' % (i, line.encode(sys.stdout.encoding,
160 'ignore')))
161 self._logger.info('\tconverted to: %s' %
162 output_line.encode(sys.stdout.encoding, 'ignore'))
163 if output_file:
164 output_file.write(']')
165 output_file.write(os.linesep)
166 self._logger.info('%d lines converted SUCCESSFULLY!' % i)
167 self._logger.info('--- FINISHED ---')
168 return list_of_dict
169 finally:
170 if output_file:
171 output_file.close()
172
173
174 def main():
175 c = DatasetConverter(r'../data/autofill/dataset.txt',
176 r'../data/autofill/dataset_duplicate-profiles.txt',
177 DatasetConverter.info_level)
dennis_jeffrey 2011/02/17 22:58:35 I recommend changing "DatasetConverter.info_level
dyu1 2011/02/18 00:31:47 Done.
178 c.Convert()
179
180 if __name__ == '__main__':
181 main()
OLDNEW
« chrome/test/functional/autofill.py ('K') | « chrome/test/functional/autofill.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698