Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(143)

Side by Side Diff: chrome/test/functional/autofill_dataset_converter.py

Issue 6246147: Test Autofill's ability to merge duplicate profiles and... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 9 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « chrome/test/functional/autofill.py ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/python
2 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Converts profile datasets to dictionary list for Autofill profiles.
7
8 Used for test autofill.AutoFillTest.testMergeDuplicateProfilesInAutofill.
9 """
10
11 import codecs
12 import logging
13 import os
14 import re
15 import sys
16
17
18 class DatasetConverter(object):
19 _fields = [
20 u'NAME_FIRST',
21 u'NAME_MIDDLE',
22 u'NAME_LAST',
23 u'EMAIL_ADDRESS',
24 u'COMPANY_NAME',
25 u'ADDRESS_HOME_LINE1',
26 u'ADDRESS_HOME_LINE2',
27 u'ADDRESS_HOME_CITY',
28 u'ADDRESS_HOME_STATE',
29 u'ADDRESS_HOME_ZIP',
30 u'ADDRESS_HOME_COUNTRY',
31 u'PHONE_HOME_WHOLE_NUMBER',
32 u'PHONE_FAX_WHOLE_NUMBER',
33 ]
34 _record_length = len(_fields)
35 _output_pattern = u'{'
36 for key in _fields:
37 _output_pattern += u"u'%s': u'%%s', " % key
38 _output_pattern = _output_pattern[:-1] + '},'
39 _re_single_quote = re.compile("'", re.UNICODE)
40 _logger = logging.getLogger(__name__)
41
42 def __init__(self, input_filename, output_filename=None,
43 logging_level=logging.ERROR):
44 """Constructs a dataset converter object.
45
46 Full input pattern:
47 '(?P<NAME_FIRST>.*?)\|(?P<MIDDLE_NAME>.*?)\|(?P<NAME_LAST>.*?)\|
48 (?P<EMAIL_ADDRESS>.*?)\|(?P<COMPANY_NAME>.*?)\|(?P<ADDRESS_HOME_LINE1>.*?)
49 \|(?P<ADDRESS_HOME_LINE2>.*?)\|(?P<ADDRESS_HOME_CITY>.*?)\|
50 (?P<ADDRESS_HOME_STATE>.*?)\|(?P<ADDRESS_HOME_ZIP>.*?)\|
51 (?P<ADDRESS_HOME_COUNTRY>.*?)\|
52 (?P<PHONE_HOME_WHOLE_NUMBER>.*?)\|(?P<PHONE_FAX_WHOLE_NUMBER>.*?)$'
53
54 Full ouput pattern:
55 "{u'NAME_FIRST': u'%s', u'NAME_MIDDLE': u'%s', u'NAME_LAST': u'%s',
56 u'EMAIL_ADDRESS': u'%s', u'COMPANY_NAME': u'%s', u'ADDRESS_HOME_LINE1':
57 u'%s', u'ADDRESS_HOME_LINE2': u'%s', u'ADDRESS_HOME_CITY': u'%s',
58 u'ADDRESS_HOME_STATE': u'%s', u'ADDRESS_HOME_ZIP': u'%s',
59 u'ADDRESS_HOME_COUNTRY': u'%s', u'PHONE_HOME_WHOLE_NUMBER': u'%s',
60 u'PHONE_FAX_WHOLE_NUMBER': u'%s',},"
61
62 Args:
63 input_filename: name and path of the input dataset.
64 output_filename: name and path of the converted file, default is none.
65 logging_level: set verbosity levels, default is ERROR.
66
67 Raises:
68 IOError: error if input file does not exist.
69 """
70 console = logging.StreamHandler()
71 console.setLevel(logging_level)
72 self._logger.addHandler(console)
73
74 self._input_filename = os.path.join(os.path.dirname(sys.argv[0]),
75 input_filename)
76 if not os.path.isfile(self._input_filename):
77 msg = 'File "%s" does not exist' % self._input_filename
78 self._logger.error(msg)
79 raise IOError(msg)
80 self._output_filename = output_filename
81
82 def _CreateDictionaryFromRecord(self, record):
83 """Constructs and returns a dictionary from a record in the dataset file.
84
85 Escapes single quotation first and uses split('|') to separate values.
86 The method assumes a valid record always contains at least one "|"
87 character.
88 Example:
89 Take an argument as a string u'John|Doe|Mountain View'
90 and returns a dictionary
91 {
92 u'NAME_FIRST': u'John',
93 u'NAME_LAST': u'Doe',
94 u'ADDRESS_HOME_CITY': u'Mountain View',
95 }
96
97 Args:
98 record: row of record from the dataset file.
99
100 Returns:
101 None if the current record line is invalid or a dictionary representing a
102 single record from the dataset file.
103 """
104 # Ignore irrelevant record lines that do not contain '|'.
105 if not '|' in record:
106 return
107 # Escaping single quote: "'" -> "\'"
108 record = self._re_single_quote.sub(r"\'", record)
109 record_list = record.split('|')
110 if record_list:
111 # Check for case when a record may have more or less fields than expected.
112 if len(record_list) != self._record_length:
113 self._logger.warning(
114 'A "|" separated line has %d fields instead of %d: %s' % (
115 len(record_list), self._record_length, record))
116 return
117 out_record = {}
118 for i, key in enumerate(self._fields):
119 out_record[key] = record_list[i]
120 return out_record
121
122 def Convert(self):
123 """Function to convert input data into the desired output format.
124
125 Returns:
126 List that holds all the dictionaries.
127 """
128 with open(self._input_filename) as input_file:
129 if self._output_filename:
130 output_file = codecs.open(self._output_filename, mode='wb',
131 encoding='utf-8-sig')
132 else:
133 output_file = None
134 try:
135 list_of_dict = []
136 i = 0
137 if output_file:
138 output_file.write('[')
139 output_file.write(os.linesep)
140 for line in input_file.readlines():
141 line = line.strip()
142 if not line:
143 continue
144 line = unicode(line, 'UTF-8')
145 output_record = self._CreateDictionaryFromRecord(line)
146 if output_record:
147 i += 1
148 list_of_dict.append(output_record)
149 output_line = self._output_pattern % tuple(
150 [output_record[key] for key in self._fields])
151 if output_file:
152 output_file.write(output_line)
153 output_file.write(os.linesep)
154 self._logger.info('%d: %s' % (i, line.encode(sys.stdout.encoding,
155 'ignore')))
156 self._logger.info('\tconverted to: %s' %
157 output_line.encode(sys.stdout.encoding, 'ignore'))
158 if output_file:
159 output_file.write(']')
160 output_file.write(os.linesep)
161 self._logger.info('%d lines converted SUCCESSFULLY!' % i)
162 self._logger.info('--- FINISHED ---')
163 return list_of_dict
164 finally:
165 if output_file:
166 output_file.close()
167
168
169 def main():
170 c = DatasetConverter(r'../data/autofill/dataset.txt',
171 r'../data/autofill/dataset_duplicate-profiles.txt',
172 logging.INFO)
173 c.Convert()
174
175 if __name__ == '__main__':
176 main()
OLDNEW
« no previous file with comments | « chrome/test/functional/autofill.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698