chrome/test/functional/dataset-converter.py - Issue 6246147: Test Autofill's ability to merge duplicate profiles and...

Side by Side Diff: chrome/test/functional/dataset-converter.py

Issue 6246147: Test Autofill's ability to merge duplicate profiles and... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: Created 9 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 #!/usr/bin/python

	2 # Copyright (c) 2010 The Chromium Authors. All rights reserved.

	3 # Use of this source code is governed by a BSD-style license that can be

	4 # found in the LICENSE file.

	5

	6

	7 """Takes in a CSV profiles file and outputs to a pyAuto dictionary list format

	8 for converting Autofill Prfofile datasets.

	9

	10 Used for test autofill.AutoFillTest.testMergeDuplicateProfilesInAutofill.

	11 """

	12

	13 # Specify input and output filename can be full

	14 # path: 'c:\folder\file' or '/home/folder/file'

	15 INPUT_FILE = r"../data/autofill/dataset.txt"

	16 OUTPUT_FILE = r"../data/autofill/dataset_duplicate-profiles.txt"

	17

	18 # Controls output display on the screen.

	19 DISPLAY_INPUT_LINES = True

	20 DISPLAY_CONVERTED_LINES = False

	21

	22 FIELDS = [

	23 u'NAME_FIRST',

	24 u'NAME_MIDDLE',

	25 u'NAME_LAST',

	26 u'EMAIL_ADDRESS',

	27 u'COMPANY_NAME',

	28 u'ADDRESS_HOME_LINE1',

	29 u'ADDRESS_HOME_LINE2',

	30 u'ADDRESS_HOME_CITY',

	31 u'ADDRESS_HOME_STATE',

	32 u'ADDRESS_HOME_ZIP',

	33 u'ADDRESS_HOME_COUNTRY',

	34 u'PHONE_HOME_WHOLE_NUMBER',

	35 u'PHONE_FAX_WHOLE_NUMBER',

	36 ]

	37

	38 import codecs

	39 import re

	40 import sys

	41

	42 class Converter(object):

	43 def __init__(self, fields, filein, fileout):

	44 """

	45 The pattern is a regular expression which has named parenthesis groups

	46 like this (?P<name>...) in order to match the '\|' separated fields.

	47 If we had only the NAME_FIRST and NAME_MIDDLE fields (e.g 'Jared\|JV') our

	48 pattern would be: "(?P<NAME_FIRST>.?)\\|(?P<NAME_MIDDLE>.?)$"

	49

	50 This means that '(?P<NAME_FIRST> regexp)\\|' matches whatever regular

	51 expression is inside the parentheses, and indicates the start and end of a

	52 group; the contents of a group can be retrieved after a match has been

	53 performed using the symbolic group name 'NAME_FIRST'.

	54

	55 The regexp is '.?'. '.' which means to match 0 or more repetitions of any

	56 character. The following '?' makes the regexp non-greedy meaning it will

	57 stop at the first occurrence of the '\|' character (escaped in the pattern).

	58

	59 For '(?P<NAME_MIDDLE>.*?)$' there is no '\|' at the end, so we have '$' to

	60 indicate the end of the line.

	61

	62 From the full pattern, we construct once from the FIELDS list.

	63

	64 The out_line_pattern for one field: "{u'NAME_FIRST': u'%s',"

	65 is ready to accept the value for the 'NAME_FIRST' field once it is extracted

	66 from an input line using the above group pattern.

	67

	68 'pattern' is used in __gerRec(line) to construct and return a dictionary

	69 from a line.

	70

	71 'out_line_pattern' is used in 'convert()' to construct the final dataset

	72 line that will be printed to the output file.

	73 """

	74 self.fields = fields[:]

	75 self.pattern = '(?P<%s>.*?)' %fields[0]

	76 for key in fields[1:]:

	77 self.pattern += '\\|(?P<%s>.*?)' %key

	78 self.pattern = self.pattern + "$"

	79

	80 self.out_line_pattern = u"{"

	81 for key in fields:

	82 self.out_line_pattern += u"u'%s': u'%s', " %(key, "%s")

	83 self.out_line_pattern = self.out_line_pattern[:-1] + "},\n"

	84

	85 self.filein = filein

	86 self.fileout = fileout

	87

	88 def __getRec(self, line):

	89 """

	90 Constructs and returns a dictionary from a line using patterns.

	91 See constructor above.

	92 """

	93 rePat = re.compile("'", re.UNICODE)

	94 line = rePat.sub(r"\'", line)

	95 rePat = re.compile(self.pattern, re.UNICODE)

	96 m = rePat.match(line)

	97 if m:

	98 outrec = {}

	99 for key in self.fields:

	100 outrec[key] = m.group(key)

	101 return outrec

	102

	103 def convert(self, display_input_lines, display_converted_lines):

	104 """

	105 The out_line_pattern is here. Each field needs to be formatted with a tuple

	106 of values for each containing '%s'.

	107 This is done in the line:

	108 out_line = self.out_line_pattern %tuple(

	109 [outrec[key] for key in self.fields])

	110 For two fiels, translates to:

	111 out_line = "{u'NAME_FIRST': u'%s', u'MIDDLE_NAME': u'%s',}," % (

	112 outrec['NAME_FIRST'], outrec['MIDDLE_NAME'])

	113 """

	114 with open(self.filein) as fin:

	115 with codecs.open(self.fileout, mode = "wb",

	116 encoding = "utf-8-sig") as fout:

	117 i = 0

	118 fout.write("[\n")

	119 for line in fin.readlines():

	120 line = line.strip()

	121 if not line:

	122 continue

	123 line = unicode(line, 'UTF-8')

	124 outrec = self.__getRec(line)

	125 if outrec:

	126 i += 1

	127 out_line = self.out_line_pattern %tuple(

	128 [outrec[key] for key in self.fields])

	129 fout.write(out_line)

	130 if display_input_lines:

	131 print "\n%d: %s" %(i, line.encode(sys.stdout.encoding, 'ignore'))

	132 if display_converted_lines:

	133 print "\tconverted to: %s" %out_line.encode(

	134 sys.stdout.encoding, 'ignore')

	135 else:

	136 if not display_input_lines and not i % 10:

	137 print "\t%d lines converted so far!" %i

	138

	139 fout.write("]\n")

	140 print

	141 print "%d lines converted SUCCESSFULLY!" %i

	142 print "--- FINISHED ---"

	143 print

	144

	145

	146 def main():

	147 c = Converter(FIELDS, INPUT_FILE, OUTPUT_FILE)

	148 c.convert(DISPLAY_INPUT_LINES, DISPLAY_CONVERTED_LINES)

	149

	150 if __name__ == '__main__':

	151 main()

OLD	NEW

« chrome/test/functional/autofill.py ('K') | « chrome/test/functional/autofill.py ('k') | no next file » | no next file with comments »