chrome/test/functional/autofill_dataset_converter.py - Issue 6246147: Test Autofill's ability to merge duplicate profiles and...

Unified Diff: chrome/test/functional/autofill_dataset_converter.py

Issue 6246147: Test Autofill's ability to merge duplicate profiles and... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 9 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: chrome/test/functional/autofill_dataset_converter.py

===================================================================

--- chrome/test/functional/autofill_dataset_converter.py (revision 0)

+++ chrome/test/functional/autofill_dataset_converter.py (revision 0)

@@ -0,0 +1,181 @@

+#!/usr/bin/python

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+"""Converts profile datasets to dictionary list for Autofill profiles.

+Used for test autofill.AutoFillTest.testMergeDuplicateProfilesInAutofill.

+"""

+import codecs

+import logging

+import os

+import re

+import sys

+class NullHandler(logging.Handler):

+ def emit(self, record):

+ pass

+class DatasetConverter(object):

+ _fields = [

+ u'NAME_FIRST',

+ u'NAME_MIDDLE',

+ u'NAME_LAST',

+ u'EMAIL_ADDRESS',

+ u'COMPANY_NAME',

+ u'ADDRESS_HOME_LINE1',

+ u'ADDRESS_HOME_LINE2',

+ u'ADDRESS_HOME_CITY',

+ u'ADDRESS_HOME_STATE',

+ u'ADDRESS_HOME_ZIP',

+ u'ADDRESS_HOME_COUNTRY',

+ u'PHONE_HOME_WHOLE_NUMBER',

+ u'PHONE_FAX_WHOLE_NUMBER',

+ ]

+ _record_length = len(_fields)

+ _output_pattern = u'{'

+ for key in _fields:

+ _output_pattern += u"u'%s': u'%%s', " % key

+ _output_pattern = _output_pattern[:-1] + '},'

+ _re_single_quote = re.compile("'", re.UNICODE)

+ _logger = logging.getLogger(__name__)

+ _logger.addHandler(NullHandler())

dennis_jeffrey 2011/02/17 22:58:35 In the rest of this file, you use "self._logger",

dyu1 2011/02/18 00:31:47 Done.

+ info_level = logging.INFO

+ warning_level = logging.WARNING

+ error_level = logging.ERROR

dennis_jeffrey 2011/02/17 22:58:35 I think there's no need to define "info_level", "w

dyu1 2011/02/18 00:31:47 Done.

+ def __init__(self, input_filename, output_filename=None, logging_level=None):

dennis_jeffrey 2011/02/17 22:58:35 Rather than having "logging_level" default to "Non

dyu1 2011/02/18 00:31:47 Done.

+ """Constructs a dataset converter object.

+ Full input pattern:

+ '(?P<NAME_FIRST>.*?)\|(?P<MIDDLE_NAME>.*?)\|(?P<NAME_LAST>.*?)\|

+ (?P<EMAIL_ADDRESS>.*?)\|(?P<COMPANY_NAME>.*?)\|(?P<ADDRESS_HOME_LINE1>.*?)

+ \|(?P<ADDRESS_HOME_LINE2>.*?)\|(?P<ADDRESS_HOME_CITY>.*?)\|

+ (?P<ADDRESS_HOME_STATE>.*?)\|(?P<ADDRESS_HOME_ZIP>.*?)\|

+ (?P<ADDRESS_HOME_COUNTRY>.*?)\|

+ (?P<PHONE_HOME_WHOLE_NUMBER>.*?)\|(?P<PHONE_FAX_WHOLE_NUMBER>.*?)$'

+ Full ouput pattern:

+ "{u'NAME_FIRST': u'%s', u'NAME_MIDDLE': u'%s', u'NAME_LAST': u'%s',

+ u'EMAIL_ADDRESS': u'%s', u'COMPANY_NAME': u'%s', u'ADDRESS_HOME_LINE1':

+ u'%s', u'ADDRESS_HOME_LINE2': u'%s', u'ADDRESS_HOME_CITY': u'%s',

+ u'ADDRESS_HOME_STATE': u'%s', u'ADDRESS_HOME_ZIP': u'%s',

+ u'ADDRESS_HOME_COUNTRY': u'%s', u'PHONE_HOME_WHOLE_NUMBER': u'%s',

+ u'PHONE_FAX_WHOLE_NUMBER': u'%s',},"

+ Args:

+ input_filename: name and path of the input dataset.

+ output_filename: name and path of the converted file, default is none.

+ logging_level: set verbosity levels, default is none.

+ Raises:

+ IOError: error if input file does not exist.

+ """

+ if logging_level:

+ console = logging.StreamHandler()

+ console.setLevel(logging.INFO)

+ self._logger.addHandler(console)

+ self._logger.setLevel(logging_level)

dennis_jeffrey 2011/02/17 22:58:35 Right now, if the default logging level of "None"

dyu1 2011/02/18 00:31:47 Done.

+ self._input_filename = os.path.join(os.path.dirname(sys.argv[0]),

+ input_filename)

+ if not os.path.isfile(self._input_filename):

+ msg = 'File "%s" does not exist' % self._input_filename

+ self._logger.error(msg)

+ raise IOError(msg)

+ self._output_filename = output_filename

+ def _CreateDictionaryFromRecord(self, record):

+ """Constructs and returns a dictionary from a record in the dataset file.

+ Escapes single quotation first and uses split('|') to separate values.

+ Example:

+ Take an argument as a string u'John|Doe|Mountain View'

+ and returns a dictionary

+ {

+ u'NAME_FIRST': u'John',

+ u'NAME_LAST': u'Doe',

+ u'ADDRESS_HOME_CITY': u'Mountain View',

+ }

dennis_jeffrey 2011/02/17 22:58:35 You may want to also mention in the comment here t

dyu1 2011/02/18 00:31:47 Done.

+ Args:

+ record: row of record from the dataset file.

+ Returns:

+ A dictionary representing a single record from the dataset file.

dennis_jeffrey 2011/02/17 22:58:35 The method may also potentially return None if the

dyu1 2011/02/18 00:31:47 Done.

+ """

+ # Ignore irrelevant record lines that do not contain '|'.

+ if not '|' in record:

+ return

+ # Escaping single quote: "'" -> "\'"

+ record = self._re_single_quote.sub(r"\'", record)

+ record_list = record.split('|')

+ if record_list:

+ # Check for case when a record may have more or less fields than expected.

+ if len(record_list) != self._record_length:

+ self._logger.warning(

+ 'A "|" separated line has %d fields instead of %d: %s' % (

+ len(record_list), self._record_length, record))

+ return

+ out_record = {}

+ i = 0

+ for key in self._fields:

+ out_record[key] = record_list[i]

+ i += 1

dennis_jeffrey 2011/02/17 22:58:35 There's a cool way in python to iterate through a

dyu1 2011/02/18 00:31:47 Done.

+ return out_record

+ def Convert(self):

+ """Wrapper function to convert input data into the desired output format."""

dennis_jeffrey 2011/02/17 22:58:35 This function can return something, so you should

dennis_jeffrey 2011/02/17 22:58:35 Since you've removed the "_Convert()" function, th

dyu1 2011/02/18 00:31:47 Done.

+ with open(self._input_filename) as input_file:

+ if self._output_filename:

+ output_file = codecs.open(self._output_filename, mode='wb',

+ encoding='utf-8-sig')

+ else:

+ output_file = None

+ try:

+ list_of_dict = []

+ i = 0

+ if output_file:

+ output_file.write('[')

+ output_file.write(os.linesep)

+ for line in input_file.readlines():

+ line = line.strip()

+ if not line:

+ continue

+ line = unicode(line, 'UTF-8')

+ output_record = self._CreateDictionaryFromRecord(line)

+ if output_record:

+ i += 1

+ list_of_dict.append(output_record)

+ output_line = self._output_pattern % tuple(

+ [output_record[key] for key in self._fields])

+ if output_file:

+ output_file.write(output_line)

+ output_file.write(os.linesep)

+ self._logger.info('%d: %s' % (i, line.encode(sys.stdout.encoding,

+ 'ignore')))

+ self._logger.info('\tconverted to: %s' %

+ output_line.encode(sys.stdout.encoding, 'ignore'))

+ if output_file:

+ output_file.write(']')

+ output_file.write(os.linesep)

+ self._logger.info('%d lines converted SUCCESSFULLY!' % i)

+ self._logger.info('--- FINISHED ---')

+ return list_of_dict

+ finally:

+ if output_file:

+ output_file.close()

+def main():

+ c = DatasetConverter(r'../data/autofill/dataset.txt',

+ r'../data/autofill/dataset_duplicate-profiles.txt',

+ DatasetConverter.info_level)

dennis_jeffrey 2011/02/17 22:58:35 I recommend changing "DatasetConverter.info_level

dyu1 2011/02/18 00:31:47 Done.

+ c.Convert()

+if __name__ == '__main__':

+ main()

« chrome/test/functional/autofill.py ('K') | « chrome/test/functional/autofill.py ('k') | no next file » | no next file with comments »