chrome/test/functional/dataset_converter.py - Issue 6246147: Test Autofill's ability to merge duplicate profiles and...

Unified Diff: chrome/test/functional/dataset_converter.py

Issue 6246147: Test Autofill's ability to merge duplicate profiles and... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 9 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: chrome/test/functional/dataset_converter.py

===================================================================

--- chrome/test/functional/dataset_converter.py (revision 0)

+++ chrome/test/functional/dataset_converter.py (revision 0)

@@ -0,0 +1,176 @@

+#!/usr/bin/python

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+"""Converts profile datasets to dictionary list for Autofill profiles.

+Used for test autofill.AutoFillTest.testMergeDuplicateProfilesInAutofill.

+"""

+import codecs

+import logging

+import os

+import re

+import sys

+class NullHandler(logging.Handler):

+ def emit(self, record):

+ pass

dennis_jeffrey 2011/02/16 19:43:29 Right now it looks like you will never see any log

dyu1 2011/02/17 20:38:06 Done.

dennis_jeffrey 2011/02/16 19:43:29 Put one more blank line here, to separate these tw

dyu1 2011/02/17 20:38:06 Done.

+class DatasetConverter(object):

+ _fields = [

+ u'NAME_FIRST',

+ u'NAME_MIDDLE',

+ u'NAME_LAST',

+ u'EMAIL_ADDRESS',

+ u'COMPANY_NAME',

+ u'ADDRESS_HOME_LINE1',

+ u'ADDRESS_HOME_LINE2',

+ u'ADDRESS_HOME_CITY',

+ u'ADDRESS_HOME_STATE',

+ u'ADDRESS_HOME_ZIP',

+ u'ADDRESS_HOME_COUNTRY',

+ u'PHONE_HOME_WHOLE_NUMBER',

+ u'PHONE_FAX_WHOLE_NUMBER',

+ ]

+ _record_length = len(_fields)

+ _output_pattern = u'{'

+ for key in _fields:

+ _output_pattern += u"u'%s': u'%%s', " % key

+ _output_pattern = _output_pattern[:-1] + '},'

+ _re_single_quote = re.compile("'", re.UNICODE)

+ _logger = logging.getLogger(__name__)

+ _logger.addHandler(NullHandler())

+ def __init__(self, input_filename, output_filename=None):

+ """Constructs a dataset converter object.

+ Full input pattern:

+ '(?P<NAME_FIRST>.*?)\|(?P<MIDDLE_NAME>.*?)\|(?P<NAME_LAST>.*?)\|

+ (?P<EMAIL_ADDRESS>.*?)\|(?P<COMPANY_NAME>.*?)\|(?P<ADDRESS_HOME_LINE1>.*?)

+ \|(?P<ADDRESS_HOME_LINE2>.*?)\|(?P<ADDRESS_HOME_CITY>.*?)\|

+ (?P<ADDRESS_HOME_STATE>.*?)\|(?P<ADDRESS_HOME_ZIP>.*?)\|

+ (?P<ADDRESS_HOME_COUNTRY>.*?)\|

+ (?P<PHONE_HOME_WHOLE_NUMBER>.*?)\|(?P<PHONE_FAX_WHOLE_NUMBER>.*?)$'

+ Full ouput pattern:

+ "{u'NAME_FIRST': u'%s', u'NAME_MIDDLE': u'%s', u'NAME_LAST': u'%s',

+ u'EMAIL_ADDRESS': u'%s', u'COMPANY_NAME': u'%s', u'ADDRESS_HOME_LINE1':

+ u'%s', u'ADDRESS_HOME_LINE2': u'%s', u'ADDRESS_HOME_CITY': u'%s',

+ u'ADDRESS_HOME_STATE': u'%s', u'ADDRESS_HOME_ZIP': u'%s',

+ u'ADDRESS_HOME_COUNTRY': u'%s', u'PHONE_HOME_WHOLE_NUMBER': u'%s',

+ u'PHONE_FAX_WHOLE_NUMBER': u'%s',},"

+ args:

dennis_jeffrey 2011/02/16 19:43:29 Capitalize "a" in "args".

dyu1 2011/02/17 20:38:06 Done.

+ input_filename: name and path of the input dataset.

+ output_filename: name and path of the converted file, default is none.

dennis_jeffrey 2011/02/16 19:43:29 Since this method can now possibly raise "IOError"

dyu1 2011/02/17 20:38:06 Done.

+ """

+ self._input_filename = os.path.join(os.path.dirname(sys.argv[0]),

+ input_filename)

+ if not os.path.isfile(self._input_filename):

+ raise IOError('File "%s" does not exist' % self._input_filename)

+ self._output_filename = output_filename

+ def _CreateDictionaryFromRecord(self, line):

+ """Constructs and returns a dictionary from a record in the dataset file.

+ Escapes single quotation first and uses split('|') to separate values.

+ Example:

+ Take an argument as a string u'John|Doe|Mountain View'

+ and returns a dictionary

+ {

+ u'NAME_FIRST': u'John',

+ u'NAME_LAST': u'Doe',

+ u'ADDRESS_HOME_CITY': u'Mountain View',

+ }

+ Args:

+ line: row of record from the dataset file.

dennis_jeffrey 2011/02/16 19:43:29 Maybe a variable name of "record" might be better

dyu1 2011/02/17 20:38:06 Done.

+ Returns:

+ out_record: a dictionary that comes from conversion of a single line.

+ same as the output_record.

dennis_jeffrey 2011/02/16 19:43:29 In the "Returns:" section, I think you don't need

dyu1 2011/02/17 20:38:06 Done.

+ """

+ # Ignore irrelevant record lines that does not contain '|'.

dennis_jeffrey 2011/02/16 19:43:29 "does" --> "do"

dyu1 2011/02/17 20:38:06 Done.

+ if not '|' in line:

+ return

+ # Escaping single quote: "'" -> "\'"

+ line = self._re_single_quote.sub(r"\'", line)

+ line_list = line.split('|')

+ if line_list:

+ # Check for case when a line may have more or less fields than expected.

+ if len(line_list) != self._record_length:

+ self._logger.warning(

+ 'A "|" seperated line has %d fields instead of %d: %s' % (

dennis_jeffrey 2011/02/16 19:43:29 "seperated" --> "separated"

dyu1 2011/02/17 20:38:06 Done.

+ len(line_list), self._record_length, line))

+ return

+ out_record = {}

+ i = 0

+ for key in self._fields:

+ out_record[key] = line_list[i]

+ i += 1

+ return out_record

+ def _Convert(self, input_file, output_file):

+ """The real conversion takes place here.

+ The output pattern takes place in this function. Each field needs to be

dennis_jeffrey 2011/02/16 19:43:29 What does it mean for an "output pattern" to "take

dyu1 2011/02/17 20:38:06 Removed this function. On 2011/02/16 19:43:29, de

+ formatted in order to give the converted line.

+ Args:

+ input_file: dataset input file.

+ output_file: the converted dictionary list output file.

+ Returns:

+ list_of_dict: list that holds all the dictionaries.

dennis_jeffrey 2011/02/16 19:43:29 Can remove the returned variable name "list_of_dic

dyu1 2011/02/17 20:38:06 Done.

+ """

+ list_of_dict = []

+ i = 0

+ if output_file:

+ output_file.write('[')

+ output_file.write(os.linesep)

+ for line in input_file.readlines():

+ line = line.strip()

+ if not line:

+ continue

+ line = unicode(line, 'UTF-8')

+ output_record = self._CreateDictionaryFromRecord(line)

+ if output_record:

+ i += 1

+ list_of_dict.append(output_record)

+ output_line = self._output_pattern % tuple(

+ [output_record[key] for key in self._fields])

+ if output_file:

+ output_file.write(output_line)

+ output_file.write(os.linesep)

+ self._logger.info('%d: %s' % (i, line.encode(sys.stdout.encoding,

+ 'ignore')))

+ self._logger.info('\tconverted to: %s' %

+ output_line.encode(sys.stdout.encoding, 'ignore'))

+ if output_file:

+ output_file.write(']')

+ output_file.write(os.linesep)

+ self._logger.info('%d lines converted SUCCESSFULLY!' % i)

+ self._logger.info('--- FINISHED ---')

+ return list_of_dict

+ def Convert(self):

+ """Uses values of the two data attributes of the current objects."""

dennis_jeffrey 2011/02/16 19:43:29 I think a more descriptive comment might be someth

dyu1 2011/02/17 20:38:06 Done.

+ with open(self._input_filename) as input_file:

+ if self._output_filename:

+ with codecs.open(self._output_filename, mode='wb',

+ encoding='utf-8-sig') as output_file:

+ return self._Convert(input_file, output_file)

+ else:

+ return self._Convert(input_file, None)

+def main():

+ c = DatasetConverter(r'../data/autofill/dataset.txt',

+ r'../data/autofill/dataset_duplicate-profiles.txt')

+ c.Convert()

+if __name__ == '__main__':

+ main()

« chrome/test/functional/autofill.py ('K') | « chrome/test/functional/autofill.py ('k') | no next file » | no next file with comments »