chrome/test/functional/autofill_dataset_converter.py - Issue 6246147: Test Autofill's ability to merge duplicate profiles and...

Side by Side Diff: chrome/test/functional/autofill_dataset_converter.py

Issue 6246147: Test Autofill's ability to merge duplicate profiles and... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 9 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 #!/usr/bin/python

	2 # Copyright (c) 2011 The Chromium Authors. All rights reserved.

	3 # Use of this source code is governed by a BSD-style license that can be

	4 # found in the LICENSE file.

	5

	6 """Converts profile datasets to dictionary list for Autofill profiles.

	7

	8 Used for test autofill.AutoFillTest.testMergeDuplicateProfilesInAutofill.

	9 """

	10

	11 import codecs

	12 import logging

	13 import os

	14 import re

	15 import sys

	16

	17

	18 class DatasetConverter(object):

	19 _fields = [

	20 u'NAME_FIRST',

	21 u'NAME_MIDDLE',

	22 u'NAME_LAST',

	23 u'EMAIL_ADDRESS',

	24 u'COMPANY_NAME',

	25 u'ADDRESS_HOME_LINE1',

	26 u'ADDRESS_HOME_LINE2',

	27 u'ADDRESS_HOME_CITY',

	28 u'ADDRESS_HOME_STATE',

	29 u'ADDRESS_HOME_ZIP',

	30 u'ADDRESS_HOME_COUNTRY',

	31 u'PHONE_HOME_WHOLE_NUMBER',

	32 u'PHONE_FAX_WHOLE_NUMBER',

	33 ]

	34 _record_length = len(_fields)

	35 _output_pattern = u'{'

	36 for key in _fields:

	37 _output_pattern += u"u'%s': u'%%s', " % key

	38 _output_pattern = _output_pattern[:-1] + '},'

	39 _re_single_quote = re.compile("'", re.UNICODE)

	40 _logger = logging.getLogger(__name__)

	41

	42 def __init__(self, input_filename, output_filename=None,

	43 logging_level=logging.ERROR):

	44 """Constructs a dataset converter object.

	45

	46 Full input pattern:

	47 '(?P<NAME_FIRST>.?)\\|(?P<MIDDLE_NAME>.?)\\|(?P<NAME_LAST>.*?)\\|

	48 (?P<EMAIL_ADDRESS>.?)\\|(?P<COMPANY_NAME>.?)\\|(?P<ADDRESS_HOME_LINE1>.*?)

	49 \\|(?P<ADDRESS_HOME_LINE2>.?)\\|(?P<ADDRESS_HOME_CITY>.?)\\|

	50 (?P<ADDRESS_HOME_STATE>.?)\\|(?P<ADDRESS_HOME_ZIP>.?)\\|

	51 (?P<ADDRESS_HOME_COUNTRY>.*?)\\|

	52 (?P<PHONE_HOME_WHOLE_NUMBER>.?)\\|(?P<PHONE_FAX_WHOLE_NUMBER>.?)$'

	53

	54 Full ouput pattern:

	55 "{u'NAME_FIRST': u'%s', u'NAME_MIDDLE': u'%s', u'NAME_LAST': u'%s',

	56 u'EMAIL_ADDRESS': u'%s', u'COMPANY_NAME': u'%s', u'ADDRESS_HOME_LINE1':

	57 u'%s', u'ADDRESS_HOME_LINE2': u'%s', u'ADDRESS_HOME_CITY': u'%s',

	58 u'ADDRESS_HOME_STATE': u'%s', u'ADDRESS_HOME_ZIP': u'%s',

	59 u'ADDRESS_HOME_COUNTRY': u'%s', u'PHONE_HOME_WHOLE_NUMBER': u'%s',

	60 u'PHONE_FAX_WHOLE_NUMBER': u'%s',},"

	61

	62 Args:

	63 input_filename: name and path of the input dataset.

	64 output_filename: name and path of the converted file, default is none.

	65 logging_level: set verbosity levels, default is ERROR.

	66

	67 Raises:

	68 IOError: error if input file does not exist.

	69 """

	70 console = logging.StreamHandler()

	71 console.setLevel(logging_level)

	72 self._logger.addHandler(console)

	73

	74 self._input_filename = os.path.join(os.path.dirname(sys.argv[0]),

	75 input_filename)

	76 if not os.path.isfile(self._input_filename):

	77 msg = 'File "%s" does not exist' % self._input_filename

	78 self._logger.error(msg)

	79 raise IOError(msg)

	80 self._output_filename = output_filename

	81

	82 def _CreateDictionaryFromRecord(self, record):

	83 """Constructs and returns a dictionary from a record in the dataset file.

	84

	85 Escapes single quotation first and uses split('\|') to separate values.

	86 The method assumes a valid record always contains at least one "\|"

	87 character.

	88 Example:

	89 Take an argument as a string u'John\|Doe\|Mountain View'

	90 and returns a dictionary

	91 {

	92 u'NAME_FIRST': u'John',

	93 u'NAME_LAST': u'Doe',

	94 u'ADDRESS_HOME_CITY': u'Mountain View',

	95 }

	96

	97 Args:

	98 record: row of record from the dataset file.

	99

	100 Returns:

	101 None if the current record line is invalid or a dictionary representing a

	102 single record from the dataset file.

	103 """

	104 # Ignore irrelevant record lines that do not contain '\|'.

	105 if not '\|' in record:

	106 return

	107 # Escaping single quote: "'" -> "\'"

	108 record = self._re_single_quote.sub(r"\'", record)

	109 record_list = record.split('\|')

	110 if record_list:

	111 # Check for case when a record may have more or less fields than expected.

	112 if len(record_list) != self._record_length:

	113 self._logger.warning(

	114 'A "\|" separated line has %d fields instead of %d: %s' % (

	115 len(record_list), self._record_length, record))

	116 return

	117 out_record = {}

	118 for i, key in enumerate(self._fields):

	119 out_record[key] = record_list[i]

	120 return out_record

	121

	122 def Convert(self):

	123 """Function to convert input data into the desired output format.

	124

	125 Returns:

	126 List that holds all the dictionaries.

	127 """

	128 with open(self._input_filename) as input_file:

	129 if self._output_filename:

	130 output_file = codecs.open(self._output_filename, mode='wb',

	131 encoding='utf-8-sig')

	132 else:

	133 output_file = None

	134 try:

	135 list_of_dict = []

	136 i = 0

	137 if output_file:

	138 output_file.write('[')

	139 output_file.write(os.linesep)

	140 for line in input_file.readlines():

	141 line = line.strip()

	142 if not line:

	143 continue

	144 line = unicode(line, 'UTF-8')

	145 output_record = self._CreateDictionaryFromRecord(line)

	146 if output_record:

	147 i += 1

	148 list_of_dict.append(output_record)

	149 output_line = self._output_pattern % tuple(

	150 [output_record[key] for key in self._fields])

	151 if output_file:

	152 output_file.write(output_line)

	153 output_file.write(os.linesep)

	154 self._logger.info('%d: %s' % (i, line.encode(sys.stdout.encoding,

	155 'ignore')))

	156 self._logger.info('\tconverted to: %s' %

	157 output_line.encode(sys.stdout.encoding, 'ignore'))

	158 if output_file:

	159 output_file.write(']')

	160 output_file.write(os.linesep)

	161 self._logger.info('%d lines converted SUCCESSFULLY!' % i)

	162 self._logger.info('--- FINISHED ---')

	163 return list_of_dict

	164 finally:

	165 if output_file:

	166 output_file.close()

	167

	168

	169 def main():

	170 c = DatasetConverter(r'../data/autofill/dataset.txt',

	171 r'../data/autofill/dataset_duplicate-profiles.txt',

	172 logging.INFO)

	173 c.Convert()

	174

	175 if __name__ == '__main__':

	176 main()

OLD	NEW

« no previous file with comments | « chrome/test/functional/autofill.py ('k') | no next file » | no next file with comments »