Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4678)

Unified Diff: chrome/test/functional/autofill_dataset_converter.py

Issue 6246147: Test Autofill's ability to merge duplicate profiles and... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 9 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« chrome/test/functional/autofill.py ('K') | « chrome/test/functional/autofill.py ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: chrome/test/functional/autofill_dataset_converter.py
===================================================================
--- chrome/test/functional/autofill_dataset_converter.py (revision 0)
+++ chrome/test/functional/autofill_dataset_converter.py (revision 0)
@@ -0,0 +1,181 @@
+#!/usr/bin/python
+# Copyright (c) 2011 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Converts profile datasets to dictionary list for Autofill profiles.
+
+Used for test autofill.AutoFillTest.testMergeDuplicateProfilesInAutofill.
+"""
+
+import codecs
+import logging
+import os
+import re
+import sys
+
+
+class NullHandler(logging.Handler):
+ def emit(self, record):
+ pass
+
+
+class DatasetConverter(object):
+ _fields = [
+ u'NAME_FIRST',
+ u'NAME_MIDDLE',
+ u'NAME_LAST',
+ u'EMAIL_ADDRESS',
+ u'COMPANY_NAME',
+ u'ADDRESS_HOME_LINE1',
+ u'ADDRESS_HOME_LINE2',
+ u'ADDRESS_HOME_CITY',
+ u'ADDRESS_HOME_STATE',
+ u'ADDRESS_HOME_ZIP',
+ u'ADDRESS_HOME_COUNTRY',
+ u'PHONE_HOME_WHOLE_NUMBER',
+ u'PHONE_FAX_WHOLE_NUMBER',
+ ]
+ _record_length = len(_fields)
+ _output_pattern = u'{'
+ for key in _fields:
+ _output_pattern += u"u'%s': u'%%s', " % key
+ _output_pattern = _output_pattern[:-1] + '},'
+ _re_single_quote = re.compile("'", re.UNICODE)
+ _logger = logging.getLogger(__name__)
+ _logger.addHandler(NullHandler())
dennis_jeffrey 2011/02/17 22:58:35 In the rest of this file, you use "self._logger",
dyu1 2011/02/18 00:31:47 Done.
+ info_level = logging.INFO
+ warning_level = logging.WARNING
+ error_level = logging.ERROR
dennis_jeffrey 2011/02/17 22:58:35 I think there's no need to define "info_level", "w
dyu1 2011/02/18 00:31:47 Done.
+
+ def __init__(self, input_filename, output_filename=None, logging_level=None):
dennis_jeffrey 2011/02/17 22:58:35 Rather than having "logging_level" default to "Non
dyu1 2011/02/18 00:31:47 Done.
+ """Constructs a dataset converter object.
+
+ Full input pattern:
+ '(?P<NAME_FIRST>.*?)\|(?P<MIDDLE_NAME>.*?)\|(?P<NAME_LAST>.*?)\|
+ (?P<EMAIL_ADDRESS>.*?)\|(?P<COMPANY_NAME>.*?)\|(?P<ADDRESS_HOME_LINE1>.*?)
+ \|(?P<ADDRESS_HOME_LINE2>.*?)\|(?P<ADDRESS_HOME_CITY>.*?)\|
+ (?P<ADDRESS_HOME_STATE>.*?)\|(?P<ADDRESS_HOME_ZIP>.*?)\|
+ (?P<ADDRESS_HOME_COUNTRY>.*?)\|
+ (?P<PHONE_HOME_WHOLE_NUMBER>.*?)\|(?P<PHONE_FAX_WHOLE_NUMBER>.*?)$'
+
+ Full ouput pattern:
+ "{u'NAME_FIRST': u'%s', u'NAME_MIDDLE': u'%s', u'NAME_LAST': u'%s',
+ u'EMAIL_ADDRESS': u'%s', u'COMPANY_NAME': u'%s', u'ADDRESS_HOME_LINE1':
+ u'%s', u'ADDRESS_HOME_LINE2': u'%s', u'ADDRESS_HOME_CITY': u'%s',
+ u'ADDRESS_HOME_STATE': u'%s', u'ADDRESS_HOME_ZIP': u'%s',
+ u'ADDRESS_HOME_COUNTRY': u'%s', u'PHONE_HOME_WHOLE_NUMBER': u'%s',
+ u'PHONE_FAX_WHOLE_NUMBER': u'%s',},"
+
+ Args:
+ input_filename: name and path of the input dataset.
+ output_filename: name and path of the converted file, default is none.
+ logging_level: set verbosity levels, default is none.
+
+ Raises:
+ IOError: error if input file does not exist.
+ """
+ if logging_level:
+ console = logging.StreamHandler()
+ console.setLevel(logging.INFO)
+ self._logger.addHandler(console)
+ self._logger.setLevel(logging_level)
dennis_jeffrey 2011/02/17 22:58:35 Right now, if the default logging level of "None"
dyu1 2011/02/18 00:31:47 Done.
+
+ self._input_filename = os.path.join(os.path.dirname(sys.argv[0]),
+ input_filename)
+ if not os.path.isfile(self._input_filename):
+ msg = 'File "%s" does not exist' % self._input_filename
+ self._logger.error(msg)
+ raise IOError(msg)
+ self._output_filename = output_filename
+
+ def _CreateDictionaryFromRecord(self, record):
+ """Constructs and returns a dictionary from a record in the dataset file.
+
+ Escapes single quotation first and uses split('|') to separate values.
+ Example:
+ Take an argument as a string u'John|Doe|Mountain View'
+ and returns a dictionary
+ {
+ u'NAME_FIRST': u'John',
+ u'NAME_LAST': u'Doe',
+ u'ADDRESS_HOME_CITY': u'Mountain View',
+ }
dennis_jeffrey 2011/02/17 22:58:35 You may want to also mention in the comment here t
dyu1 2011/02/18 00:31:47 Done.
+
+ Args:
+ record: row of record from the dataset file.
+
+ Returns:
+ A dictionary representing a single record from the dataset file.
dennis_jeffrey 2011/02/17 22:58:35 The method may also potentially return None if the
dyu1 2011/02/18 00:31:47 Done.
+ """
+ # Ignore irrelevant record lines that do not contain '|'.
+ if not '|' in record:
+ return
+ # Escaping single quote: "'" -> "\'"
+ record = self._re_single_quote.sub(r"\'", record)
+ record_list = record.split('|')
+ if record_list:
+ # Check for case when a record may have more or less fields than expected.
+ if len(record_list) != self._record_length:
+ self._logger.warning(
+ 'A "|" separated line has %d fields instead of %d: %s' % (
+ len(record_list), self._record_length, record))
+ return
+ out_record = {}
+ i = 0
+ for key in self._fields:
+ out_record[key] = record_list[i]
+ i += 1
dennis_jeffrey 2011/02/17 22:58:35 There's a cool way in python to iterate through a
dyu1 2011/02/18 00:31:47 Done.
+ return out_record
+
+ def Convert(self):
+ """Wrapper function to convert input data into the desired output format."""
dennis_jeffrey 2011/02/17 22:58:35 This function can return something, so you should
dennis_jeffrey 2011/02/17 22:58:35 Since you've removed the "_Convert()" function, th
dyu1 2011/02/18 00:31:47 Done.
dyu1 2011/02/18 00:31:47 Done.
+ with open(self._input_filename) as input_file:
+ if self._output_filename:
+ output_file = codecs.open(self._output_filename, mode='wb',
+ encoding='utf-8-sig')
+ else:
+ output_file = None
+ try:
+ list_of_dict = []
+ i = 0
+ if output_file:
+ output_file.write('[')
+ output_file.write(os.linesep)
+ for line in input_file.readlines():
+ line = line.strip()
+ if not line:
+ continue
+ line = unicode(line, 'UTF-8')
+ output_record = self._CreateDictionaryFromRecord(line)
+ if output_record:
+ i += 1
+ list_of_dict.append(output_record)
+ output_line = self._output_pattern % tuple(
+ [output_record[key] for key in self._fields])
+ if output_file:
+ output_file.write(output_line)
+ output_file.write(os.linesep)
+ self._logger.info('%d: %s' % (i, line.encode(sys.stdout.encoding,
+ 'ignore')))
+ self._logger.info('\tconverted to: %s' %
+ output_line.encode(sys.stdout.encoding, 'ignore'))
+ if output_file:
+ output_file.write(']')
+ output_file.write(os.linesep)
+ self._logger.info('%d lines converted SUCCESSFULLY!' % i)
+ self._logger.info('--- FINISHED ---')
+ return list_of_dict
+ finally:
+ if output_file:
+ output_file.close()
+
+
+def main():
+ c = DatasetConverter(r'../data/autofill/dataset.txt',
+ r'../data/autofill/dataset_duplicate-profiles.txt',
+ DatasetConverter.info_level)
dennis_jeffrey 2011/02/17 22:58:35 I recommend changing "DatasetConverter.info_level
dyu1 2011/02/18 00:31:47 Done.
+ c.Convert()
+
+if __name__ == '__main__':
+ main()
« chrome/test/functional/autofill.py ('K') | « chrome/test/functional/autofill.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698