Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(7363)

Unified Diff: chrome/test/functional/dataset_converter.py

Issue 6246147: Test Autofill's ability to merge duplicate profiles and... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 9 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« chrome/test/functional/autofill.py ('K') | « chrome/test/functional/autofill.py ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: chrome/test/functional/dataset_converter.py
===================================================================
--- chrome/test/functional/dataset_converter.py (revision 0)
+++ chrome/test/functional/dataset_converter.py (revision 0)
@@ -0,0 +1,176 @@
+#!/usr/bin/python
+# Copyright (c) 2011 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Converts profile datasets to dictionary list for Autofill profiles.
+
+Used for test autofill.AutoFillTest.testMergeDuplicateProfilesInAutofill.
+"""
+
+import codecs
+import logging
+import os
+import re
+import sys
+
+
+class NullHandler(logging.Handler):
+ def emit(self, record):
+ pass
dennis_jeffrey 2011/02/16 19:43:29 Right now it looks like you will never see any log
dyu1 2011/02/17 20:38:06 Done.
+
dennis_jeffrey 2011/02/16 19:43:29 Put one more blank line here, to separate these tw
dyu1 2011/02/17 20:38:06 Done.
+class DatasetConverter(object):
+ _fields = [
+ u'NAME_FIRST',
+ u'NAME_MIDDLE',
+ u'NAME_LAST',
+ u'EMAIL_ADDRESS',
+ u'COMPANY_NAME',
+ u'ADDRESS_HOME_LINE1',
+ u'ADDRESS_HOME_LINE2',
+ u'ADDRESS_HOME_CITY',
+ u'ADDRESS_HOME_STATE',
+ u'ADDRESS_HOME_ZIP',
+ u'ADDRESS_HOME_COUNTRY',
+ u'PHONE_HOME_WHOLE_NUMBER',
+ u'PHONE_FAX_WHOLE_NUMBER',
+ ]
+ _record_length = len(_fields)
+ _output_pattern = u'{'
+ for key in _fields:
+ _output_pattern += u"u'%s': u'%%s', " % key
+ _output_pattern = _output_pattern[:-1] + '},'
+ _re_single_quote = re.compile("'", re.UNICODE)
+ _logger = logging.getLogger(__name__)
+ _logger.addHandler(NullHandler())
+
+ def __init__(self, input_filename, output_filename=None):
+ """Constructs a dataset converter object.
+
+ Full input pattern:
+ '(?P<NAME_FIRST>.*?)\|(?P<MIDDLE_NAME>.*?)\|(?P<NAME_LAST>.*?)\|
+ (?P<EMAIL_ADDRESS>.*?)\|(?P<COMPANY_NAME>.*?)\|(?P<ADDRESS_HOME_LINE1>.*?)
+ \|(?P<ADDRESS_HOME_LINE2>.*?)\|(?P<ADDRESS_HOME_CITY>.*?)\|
+ (?P<ADDRESS_HOME_STATE>.*?)\|(?P<ADDRESS_HOME_ZIP>.*?)\|
+ (?P<ADDRESS_HOME_COUNTRY>.*?)\|
+ (?P<PHONE_HOME_WHOLE_NUMBER>.*?)\|(?P<PHONE_FAX_WHOLE_NUMBER>.*?)$'
+
+ Full ouput pattern:
+ "{u'NAME_FIRST': u'%s', u'NAME_MIDDLE': u'%s', u'NAME_LAST': u'%s',
+ u'EMAIL_ADDRESS': u'%s', u'COMPANY_NAME': u'%s', u'ADDRESS_HOME_LINE1':
+ u'%s', u'ADDRESS_HOME_LINE2': u'%s', u'ADDRESS_HOME_CITY': u'%s',
+ u'ADDRESS_HOME_STATE': u'%s', u'ADDRESS_HOME_ZIP': u'%s',
+ u'ADDRESS_HOME_COUNTRY': u'%s', u'PHONE_HOME_WHOLE_NUMBER': u'%s',
+ u'PHONE_FAX_WHOLE_NUMBER': u'%s',},"
+
+ args:
dennis_jeffrey 2011/02/16 19:43:29 Capitalize "a" in "args".
dyu1 2011/02/17 20:38:06 Done.
+ input_filename: name and path of the input dataset.
+ output_filename: name and path of the converted file, default is none.
dennis_jeffrey 2011/02/16 19:43:29 Since this method can now possibly raise "IOError"
dyu1 2011/02/17 20:38:06 Done.
+ """
+ self._input_filename = os.path.join(os.path.dirname(sys.argv[0]),
+ input_filename)
+ if not os.path.isfile(self._input_filename):
+ raise IOError('File "%s" does not exist' % self._input_filename)
+ self._output_filename = output_filename
+
+ def _CreateDictionaryFromRecord(self, line):
+ """Constructs and returns a dictionary from a record in the dataset file.
+
+ Escapes single quotation first and uses split('|') to separate values.
+ Example:
+ Take an argument as a string u'John|Doe|Mountain View'
+ and returns a dictionary
+ {
+ u'NAME_FIRST': u'John',
+ u'NAME_LAST': u'Doe',
+ u'ADDRESS_HOME_CITY': u'Mountain View',
+ }
+
+ Args:
+ line: row of record from the dataset file.
dennis_jeffrey 2011/02/16 19:43:29 Maybe a variable name of "record" might be better
dyu1 2011/02/17 20:38:06 Done.
+
+ Returns:
+ out_record: a dictionary that comes from conversion of a single line.
+ same as the output_record.
dennis_jeffrey 2011/02/16 19:43:29 In the "Returns:" section, I think you don't need
dyu1 2011/02/17 20:38:06 Done.
+ """
+ # Ignore irrelevant record lines that does not contain '|'.
dennis_jeffrey 2011/02/16 19:43:29 "does" --> "do"
dyu1 2011/02/17 20:38:06 Done.
+ if not '|' in line:
+ return
+ # Escaping single quote: "'" -> "\'"
+ line = self._re_single_quote.sub(r"\'", line)
+ line_list = line.split('|')
+ if line_list:
+ # Check for case when a line may have more or less fields than expected.
+ if len(line_list) != self._record_length:
+ self._logger.warning(
+ 'A "|" seperated line has %d fields instead of %d: %s' % (
dennis_jeffrey 2011/02/16 19:43:29 "seperated" --> "separated"
dyu1 2011/02/17 20:38:06 Done.
+ len(line_list), self._record_length, line))
+ return
+ out_record = {}
+ i = 0
+ for key in self._fields:
+ out_record[key] = line_list[i]
+ i += 1
+ return out_record
+
+ def _Convert(self, input_file, output_file):
+ """The real conversion takes place here.
+
+ The output pattern takes place in this function. Each field needs to be
dennis_jeffrey 2011/02/16 19:43:29 What does it mean for an "output pattern" to "take
dyu1 2011/02/17 20:38:06 Removed this function. On 2011/02/16 19:43:29, de
+ formatted in order to give the converted line.
+
+ Args:
+ input_file: dataset input file.
+ output_file: the converted dictionary list output file.
+
+ Returns:
+ list_of_dict: list that holds all the dictionaries.
dennis_jeffrey 2011/02/16 19:43:29 Can remove the returned variable name "list_of_dic
dyu1 2011/02/17 20:38:06 Done.
+ """
+ list_of_dict = []
+ i = 0
+ if output_file:
+ output_file.write('[')
+ output_file.write(os.linesep)
+ for line in input_file.readlines():
+ line = line.strip()
+ if not line:
+ continue
+ line = unicode(line, 'UTF-8')
+ output_record = self._CreateDictionaryFromRecord(line)
+ if output_record:
+ i += 1
+ list_of_dict.append(output_record)
+ output_line = self._output_pattern % tuple(
+ [output_record[key] for key in self._fields])
+ if output_file:
+ output_file.write(output_line)
+ output_file.write(os.linesep)
+ self._logger.info('%d: %s' % (i, line.encode(sys.stdout.encoding,
+ 'ignore')))
+ self._logger.info('\tconverted to: %s' %
+ output_line.encode(sys.stdout.encoding, 'ignore'))
+ if output_file:
+ output_file.write(']')
+ output_file.write(os.linesep)
+ self._logger.info('%d lines converted SUCCESSFULLY!' % i)
+ self._logger.info('--- FINISHED ---')
+ return list_of_dict
+
+ def Convert(self):
+ """Uses values of the two data attributes of the current objects."""
dennis_jeffrey 2011/02/16 19:43:29 I think a more descriptive comment might be someth
dyu1 2011/02/17 20:38:06 Done.
+ with open(self._input_filename) as input_file:
+ if self._output_filename:
+ with codecs.open(self._output_filename, mode='wb',
+ encoding='utf-8-sig') as output_file:
+ return self._Convert(input_file, output_file)
+ else:
+ return self._Convert(input_file, None)
+
+
+def main():
+ c = DatasetConverter(r'../data/autofill/dataset.txt',
+ r'../data/autofill/dataset_duplicate-profiles.txt')
+ c.Convert()
+
+if __name__ == '__main__':
+ main()
« chrome/test/functional/autofill.py ('K') | « chrome/test/functional/autofill.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698