Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(30)

Side by Side Diff: chrome/test/functional/dataset_converter.py

Issue 6246147: Test Autofill's ability to merge duplicate profiles and... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 9 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 #!/usr/bin/python
2 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Converts profile datasets to dictionary list for Autofill profiles.
7
8 Used for test autofill.AutoFillTest.testMergeDuplicateProfilesInAutofill.
9 """
10
11 import codecs
12 import logging
13 import os
14 import re
15 import sys
16
17
18 class NullHandler(logging.Handler):
19 def emit(self, record):
20 pass
dennis_jeffrey 2011/02/16 19:43:29 Right now it looks like you will never see any log
dyu1 2011/02/17 20:38:06 Done.
21
dennis_jeffrey 2011/02/16 19:43:29 Put one more blank line here, to separate these tw
dyu1 2011/02/17 20:38:06 Done.
22 class DatasetConverter(object):
23 _fields = [
24 u'NAME_FIRST',
25 u'NAME_MIDDLE',
26 u'NAME_LAST',
27 u'EMAIL_ADDRESS',
28 u'COMPANY_NAME',
29 u'ADDRESS_HOME_LINE1',
30 u'ADDRESS_HOME_LINE2',
31 u'ADDRESS_HOME_CITY',
32 u'ADDRESS_HOME_STATE',
33 u'ADDRESS_HOME_ZIP',
34 u'ADDRESS_HOME_COUNTRY',
35 u'PHONE_HOME_WHOLE_NUMBER',
36 u'PHONE_FAX_WHOLE_NUMBER',
37 ]
38 _record_length = len(_fields)
39 _output_pattern = u'{'
40 for key in _fields:
41 _output_pattern += u"u'%s': u'%%s', " % key
42 _output_pattern = _output_pattern[:-1] + '},'
43 _re_single_quote = re.compile("'", re.UNICODE)
44 _logger = logging.getLogger(__name__)
45 _logger.addHandler(NullHandler())
46
47 def __init__(self, input_filename, output_filename=None):
48 """Constructs a dataset converter object.
49
50 Full input pattern:
51 '(?P<NAME_FIRST>.*?)\|(?P<MIDDLE_NAME>.*?)\|(?P<NAME_LAST>.*?)\|
52 (?P<EMAIL_ADDRESS>.*?)\|(?P<COMPANY_NAME>.*?)\|(?P<ADDRESS_HOME_LINE1>.*?)
53 \|(?P<ADDRESS_HOME_LINE2>.*?)\|(?P<ADDRESS_HOME_CITY>.*?)\|
54 (?P<ADDRESS_HOME_STATE>.*?)\|(?P<ADDRESS_HOME_ZIP>.*?)\|
55 (?P<ADDRESS_HOME_COUNTRY>.*?)\|
56 (?P<PHONE_HOME_WHOLE_NUMBER>.*?)\|(?P<PHONE_FAX_WHOLE_NUMBER>.*?)$'
57
58 Full ouput pattern:
59 "{u'NAME_FIRST': u'%s', u'NAME_MIDDLE': u'%s', u'NAME_LAST': u'%s',
60 u'EMAIL_ADDRESS': u'%s', u'COMPANY_NAME': u'%s', u'ADDRESS_HOME_LINE1':
61 u'%s', u'ADDRESS_HOME_LINE2': u'%s', u'ADDRESS_HOME_CITY': u'%s',
62 u'ADDRESS_HOME_STATE': u'%s', u'ADDRESS_HOME_ZIP': u'%s',
63 u'ADDRESS_HOME_COUNTRY': u'%s', u'PHONE_HOME_WHOLE_NUMBER': u'%s',
64 u'PHONE_FAX_WHOLE_NUMBER': u'%s',},"
65
66 args:
dennis_jeffrey 2011/02/16 19:43:29 Capitalize "a" in "args".
dyu1 2011/02/17 20:38:06 Done.
67 input_filename: name and path of the input dataset.
68 output_filename: name and path of the converted file, default is none.
dennis_jeffrey 2011/02/16 19:43:29 Since this method can now possibly raise "IOError"
dyu1 2011/02/17 20:38:06 Done.
69 """
70 self._input_filename = os.path.join(os.path.dirname(sys.argv[0]),
71 input_filename)
72 if not os.path.isfile(self._input_filename):
73 raise IOError('File "%s" does not exist' % self._input_filename)
74 self._output_filename = output_filename
75
76 def _CreateDictionaryFromRecord(self, line):
77 """Constructs and returns a dictionary from a record in the dataset file.
78
79 Escapes single quotation first and uses split('|') to separate values.
80 Example:
81 Take an argument as a string u'John|Doe|Mountain View'
82 and returns a dictionary
83 {
84 u'NAME_FIRST': u'John',
85 u'NAME_LAST': u'Doe',
86 u'ADDRESS_HOME_CITY': u'Mountain View',
87 }
88
89 Args:
90 line: row of record from the dataset file.
dennis_jeffrey 2011/02/16 19:43:29 Maybe a variable name of "record" might be better
dyu1 2011/02/17 20:38:06 Done.
91
92 Returns:
93 out_record: a dictionary that comes from conversion of a single line.
94 same as the output_record.
dennis_jeffrey 2011/02/16 19:43:29 In the "Returns:" section, I think you don't need
dyu1 2011/02/17 20:38:06 Done.
95 """
96 # Ignore irrelevant record lines that does not contain '|'.
dennis_jeffrey 2011/02/16 19:43:29 "does" --> "do"
dyu1 2011/02/17 20:38:06 Done.
97 if not '|' in line:
98 return
99 # Escaping single quote: "'" -> "\'"
100 line = self._re_single_quote.sub(r"\'", line)
101 line_list = line.split('|')
102 if line_list:
103 # Check for case when a line may have more or less fields than expected.
104 if len(line_list) != self._record_length:
105 self._logger.warning(
106 'A "|" seperated line has %d fields instead of %d: %s' % (
dennis_jeffrey 2011/02/16 19:43:29 "seperated" --> "separated"
dyu1 2011/02/17 20:38:06 Done.
107 len(line_list), self._record_length, line))
108 return
109 out_record = {}
110 i = 0
111 for key in self._fields:
112 out_record[key] = line_list[i]
113 i += 1
114 return out_record
115
116 def _Convert(self, input_file, output_file):
117 """The real conversion takes place here.
118
119 The output pattern takes place in this function. Each field needs to be
dennis_jeffrey 2011/02/16 19:43:29 What does it mean for an "output pattern" to "take
dyu1 2011/02/17 20:38:06 Removed this function. On 2011/02/16 19:43:29, de
120 formatted in order to give the converted line.
121
122 Args:
123 input_file: dataset input file.
124 output_file: the converted dictionary list output file.
125
126 Returns:
127 list_of_dict: list that holds all the dictionaries.
dennis_jeffrey 2011/02/16 19:43:29 Can remove the returned variable name "list_of_dic
dyu1 2011/02/17 20:38:06 Done.
128 """
129 list_of_dict = []
130 i = 0
131 if output_file:
132 output_file.write('[')
133 output_file.write(os.linesep)
134 for line in input_file.readlines():
135 line = line.strip()
136 if not line:
137 continue
138 line = unicode(line, 'UTF-8')
139 output_record = self._CreateDictionaryFromRecord(line)
140 if output_record:
141 i += 1
142 list_of_dict.append(output_record)
143 output_line = self._output_pattern % tuple(
144 [output_record[key] for key in self._fields])
145 if output_file:
146 output_file.write(output_line)
147 output_file.write(os.linesep)
148 self._logger.info('%d: %s' % (i, line.encode(sys.stdout.encoding,
149 'ignore')))
150 self._logger.info('\tconverted to: %s' %
151 output_line.encode(sys.stdout.encoding, 'ignore'))
152 if output_file:
153 output_file.write(']')
154 output_file.write(os.linesep)
155 self._logger.info('%d lines converted SUCCESSFULLY!' % i)
156 self._logger.info('--- FINISHED ---')
157 return list_of_dict
158
159 def Convert(self):
160 """Uses values of the two data attributes of the current objects."""
dennis_jeffrey 2011/02/16 19:43:29 I think a more descriptive comment might be someth
dyu1 2011/02/17 20:38:06 Done.
161 with open(self._input_filename) as input_file:
162 if self._output_filename:
163 with codecs.open(self._output_filename, mode='wb',
164 encoding='utf-8-sig') as output_file:
165 return self._Convert(input_file, output_file)
166 else:
167 return self._Convert(input_file, None)
168
169
170 def main():
171 c = DatasetConverter(r'../data/autofill/dataset.txt',
172 r'../data/autofill/dataset_duplicate-profiles.txt')
173 c.Convert()
174
175 if __name__ == '__main__':
176 main()
OLDNEW
« chrome/test/functional/autofill.py ('K') | « chrome/test/functional/autofill.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698