Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(871)

Unified Diff: chrome/test/functional/autofill_dataset_converter.py

Issue 6246147: Test Autofill's ability to merge duplicate profiles and... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 9 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « chrome/test/functional/autofill.py ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: chrome/test/functional/autofill_dataset_converter.py
===================================================================
--- chrome/test/functional/autofill_dataset_converter.py (revision 0)
+++ chrome/test/functional/autofill_dataset_converter.py (revision 0)
@@ -0,0 +1,176 @@
+#!/usr/bin/python
+# Copyright (c) 2011 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Converts profile datasets to dictionary list for Autofill profiles.
+
+Used for test autofill.AutoFillTest.testMergeDuplicateProfilesInAutofill.
+"""
+
+import codecs
+import logging
+import os
+import re
+import sys
+
+
+class DatasetConverter(object):
+ _fields = [
+ u'NAME_FIRST',
+ u'NAME_MIDDLE',
+ u'NAME_LAST',
+ u'EMAIL_ADDRESS',
+ u'COMPANY_NAME',
+ u'ADDRESS_HOME_LINE1',
+ u'ADDRESS_HOME_LINE2',
+ u'ADDRESS_HOME_CITY',
+ u'ADDRESS_HOME_STATE',
+ u'ADDRESS_HOME_ZIP',
+ u'ADDRESS_HOME_COUNTRY',
+ u'PHONE_HOME_WHOLE_NUMBER',
+ u'PHONE_FAX_WHOLE_NUMBER',
+ ]
+ _record_length = len(_fields)
+ _output_pattern = u'{'
+ for key in _fields:
+ _output_pattern += u"u'%s': u'%%s', " % key
+ _output_pattern = _output_pattern[:-1] + '},'
+ _re_single_quote = re.compile("'", re.UNICODE)
+ _logger = logging.getLogger(__name__)
+
+ def __init__(self, input_filename, output_filename=None,
+ logging_level=logging.ERROR):
+ """Constructs a dataset converter object.
+
+ Full input pattern:
+ '(?P<NAME_FIRST>.*?)\|(?P<MIDDLE_NAME>.*?)\|(?P<NAME_LAST>.*?)\|
+ (?P<EMAIL_ADDRESS>.*?)\|(?P<COMPANY_NAME>.*?)\|(?P<ADDRESS_HOME_LINE1>.*?)
+ \|(?P<ADDRESS_HOME_LINE2>.*?)\|(?P<ADDRESS_HOME_CITY>.*?)\|
+ (?P<ADDRESS_HOME_STATE>.*?)\|(?P<ADDRESS_HOME_ZIP>.*?)\|
+ (?P<ADDRESS_HOME_COUNTRY>.*?)\|
+ (?P<PHONE_HOME_WHOLE_NUMBER>.*?)\|(?P<PHONE_FAX_WHOLE_NUMBER>.*?)$'
+
+ Full ouput pattern:
+ "{u'NAME_FIRST': u'%s', u'NAME_MIDDLE': u'%s', u'NAME_LAST': u'%s',
+ u'EMAIL_ADDRESS': u'%s', u'COMPANY_NAME': u'%s', u'ADDRESS_HOME_LINE1':
+ u'%s', u'ADDRESS_HOME_LINE2': u'%s', u'ADDRESS_HOME_CITY': u'%s',
+ u'ADDRESS_HOME_STATE': u'%s', u'ADDRESS_HOME_ZIP': u'%s',
+ u'ADDRESS_HOME_COUNTRY': u'%s', u'PHONE_HOME_WHOLE_NUMBER': u'%s',
+ u'PHONE_FAX_WHOLE_NUMBER': u'%s',},"
+
+ Args:
+ input_filename: name and path of the input dataset.
+ output_filename: name and path of the converted file, default is none.
+ logging_level: set verbosity levels, default is ERROR.
+
+ Raises:
+ IOError: error if input file does not exist.
+ """
+ console = logging.StreamHandler()
+ console.setLevel(logging_level)
+ self._logger.addHandler(console)
+
+ self._input_filename = os.path.join(os.path.dirname(sys.argv[0]),
+ input_filename)
+ if not os.path.isfile(self._input_filename):
+ msg = 'File "%s" does not exist' % self._input_filename
+ self._logger.error(msg)
+ raise IOError(msg)
+ self._output_filename = output_filename
+
+ def _CreateDictionaryFromRecord(self, record):
+ """Constructs and returns a dictionary from a record in the dataset file.
+
+ Escapes single quotation first and uses split('|') to separate values.
+ The method assumes a valid record always contains at least one "|"
+ character.
+ Example:
+ Take an argument as a string u'John|Doe|Mountain View'
+ and returns a dictionary
+ {
+ u'NAME_FIRST': u'John',
+ u'NAME_LAST': u'Doe',
+ u'ADDRESS_HOME_CITY': u'Mountain View',
+ }
+
+ Args:
+ record: row of record from the dataset file.
+
+ Returns:
+ None if the current record line is invalid or a dictionary representing a
+ single record from the dataset file.
+ """
+ # Ignore irrelevant record lines that do not contain '|'.
+ if not '|' in record:
+ return
+ # Escaping single quote: "'" -> "\'"
+ record = self._re_single_quote.sub(r"\'", record)
+ record_list = record.split('|')
+ if record_list:
+ # Check for case when a record may have more or less fields than expected.
+ if len(record_list) != self._record_length:
+ self._logger.warning(
+ 'A "|" separated line has %d fields instead of %d: %s' % (
+ len(record_list), self._record_length, record))
+ return
+ out_record = {}
+ for i, key in enumerate(self._fields):
+ out_record[key] = record_list[i]
+ return out_record
+
+ def Convert(self):
+ """Function to convert input data into the desired output format.
+
+ Returns:
+ List that holds all the dictionaries.
+ """
+ with open(self._input_filename) as input_file:
+ if self._output_filename:
+ output_file = codecs.open(self._output_filename, mode='wb',
+ encoding='utf-8-sig')
+ else:
+ output_file = None
+ try:
+ list_of_dict = []
+ i = 0
+ if output_file:
+ output_file.write('[')
+ output_file.write(os.linesep)
+ for line in input_file.readlines():
+ line = line.strip()
+ if not line:
+ continue
+ line = unicode(line, 'UTF-8')
+ output_record = self._CreateDictionaryFromRecord(line)
+ if output_record:
+ i += 1
+ list_of_dict.append(output_record)
+ output_line = self._output_pattern % tuple(
+ [output_record[key] for key in self._fields])
+ if output_file:
+ output_file.write(output_line)
+ output_file.write(os.linesep)
+ self._logger.info('%d: %s' % (i, line.encode(sys.stdout.encoding,
+ 'ignore')))
+ self._logger.info('\tconverted to: %s' %
+ output_line.encode(sys.stdout.encoding, 'ignore'))
+ if output_file:
+ output_file.write(']')
+ output_file.write(os.linesep)
+ self._logger.info('%d lines converted SUCCESSFULLY!' % i)
+ self._logger.info('--- FINISHED ---')
+ return list_of_dict
+ finally:
+ if output_file:
+ output_file.close()
+
+
+def main():
+ c = DatasetConverter(r'../data/autofill/dataset.txt',
+ r'../data/autofill/dataset_duplicate-profiles.txt',
+ logging.INFO)
+ c.Convert()
+
+if __name__ == '__main__':
+ main()
« no previous file with comments | « chrome/test/functional/autofill.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698