chrome/test/functional/dataset-converter.py - Issue 6246147: Test Autofill's ability to merge duplicate profiles and...

Unified Diff: chrome/test/functional/dataset-converter.py

Issue 6246147: Test Autofill's ability to merge duplicate profiles and... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 9 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: chrome/test/functional/dataset-converter.py

===================================================================

--- chrome/test/functional/dataset-converter.py (revision 0)

+++ chrome/test/functional/dataset-converter.py (revision 0)

@@ -0,0 +1,155 @@

+#!/usr/bin/python

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+"""Takes in a CSV profiles file and outputs to a pyAuto dictionary list format

Nirnimesh 2011/02/07 22:20:02 what's a pyauto dictionary list format?

dyu1 2011/02/07 23:06:59 Changed the wording. Input is a csv file with a bu

+for converting Autofill Prfofile datasets.

+Used for test autofill.AutoFillTest.testMergeDuplicateProfilesInAutofill.

+"""

Nirnimesh 2011/02/07 22:20:02 Do you really need dataset.txt? Why not just have

dyu1 2011/02/07 23:06:59 Then I have to manually create this list. I was gi

+# Specify input and output filename can be full

+# path: 'c:\folder\file' or '/home/folder/file'

+INPUT_FILE = r"../data/autofill/dataset.txt"

+OUTPUT_FILE = r"../data/autofill/dataset_duplicate-profiles.txt"

+# Controls output display on the screen.

+DISPLAY_INPUT_LINES = True

+DISPLAY_CONVERTED_LINES = False

+FIELDS = [

+ u'NAME_FIRST',

+ u'NAME_MIDDLE',

+ u'NAME_LAST',

+ u'EMAIL_ADDRESS',

+ u'COMPANY_NAME',

+ u'ADDRESS_HOME_LINE1',

+ u'ADDRESS_HOME_LINE2',

+ u'ADDRESS_HOME_CITY',

+ u'ADDRESS_HOME_STATE',

+ u'ADDRESS_HOME_ZIP',

+ u'ADDRESS_HOME_COUNTRY',

+ u'PHONE_HOME_WHOLE_NUMBER',

+ u'PHONE_FAX_WHOLE_NUMBER',

+import codecs

+import os

+import re

+import sys

+class Converter(object):

+ def __init__(self, fields, filein, fileout):

+ """

+ The pattern is a regular expression which has named parenthesis groups

+ like this (?P<name>...) in order to match the '|' separated fields.

+ If we had only the NAME_FIRST and NAME_MIDDLE fields (e.g 'Jared|JV') our

+ pattern would be: "(?P<NAME_FIRST>.*?)\|(?P<NAME_MIDDLE>.*?)$"

+ This means that '(?P<NAME_FIRST> regexp)\|' matches whatever regular

+ expression is inside the parentheses, and indicates the start and end of a

+ group; the contents of a group can be retrieved after a match has been

+ performed using the symbolic group name 'NAME_FIRST'.

+ The regexp is '.*?'. '.*' which means to match 0 or more repetitions of any

+ character. The following '?' makes the regexp non-greedy meaning it will

+ stop at the first occurrence of the '|' character (escaped in the pattern).

+ For '(?P<NAME_MIDDLE>.*?)$' there is no '|' at the end, so we have '$' to

+ indicate the end of the line.

+ From the full pattern, we construct once from the FIELDS list.

+ The out_line_pattern for one field: "{u'NAME_FIRST': u'%s',"

+ is ready to accept the value for the 'NAME_FIRST' field once it is extracted

+ from an input line using the above group pattern.

+ 'pattern' is used in __gerRec(line) to construct and return a dictionary

+ from a line.

+ 'out_line_pattern' is used in 'convert()' to construct the final dataset

+ line that will be printed to the output file.

+ """

+ self.fields = fields[:]

+ self.pattern = '(?P<%s>.*?)' %fields[0]

+ for key in fields[1:]:

+ self.pattern += '\|(?P<%s>.*?)' %key

+ self.pattern = self.pattern + "$"

+ self.out_line_pattern = u"{"

+ for key in fields:

+ self.out_line_pattern += u"u'%s': u'%s', " %(key, "%s")

+ self.out_line_pattern = self.out_line_pattern[:-1] + "},\n"

+ self.filein = filein

+ self.fileout = fileout

+ def __getRec(self, line):

+ """

+ Constructs and returns a dictionary from a line using patterns.

+ See constructor above.

+ """

+ rePat = re.compile("'", re.UNICODE)

+ line = rePat.sub(r"\'", line)

+ rePat = re.compile(self.pattern, re.UNICODE)

+ m = rePat.match(line)

+ if m:

+ outrec = {}

+ for key in self.fields:

+ outrec[key] = m.group(key)

+ return outrec

+ def convert(self, display_input_lines, display_converted_lines):

+ """

+ The out_line_pattern is here. Each field needs to be formatted with a tuple

+ of values for each containing '%s'.

+ This is done in the line:

+ out_line = self.out_line_pattern %tuple(

+ [outrec[key] for key in self.fields])

+ For two fiels, translates to:

+ out_line = "{u'NAME_FIRST': u'%s', u'MIDDLE_NAME': u'%s',}," % (

+ outrec['NAME_FIRST'], outrec['MIDDLE_NAME'])

+ """

+ with open(self.filein) as fin:

+ with codecs.open(self.fileout, mode = "wb",

+ encoding = "utf-8-sig") as fout:

+ i = 0

+ fout.write("[")

+ fout.write(os.linesep)

+ for line in fin.readlines():

+ line = line.strip()

+ if not line:

+ continue

+ line = unicode(line, 'UTF-8')

+ outrec = self.__getRec(line)

+ if outrec:

+ i += 1

+ out_line = self.out_line_pattern %tuple(

+ [outrec[key] for key in self.fields])

+ fout.write(out_line)

+ fout.write(os.linesep)

+ if display_input_lines:

+ print "\n%d: %s" %(i, line.encode(sys.stdout.encoding, 'ignore'))

+ if display_converted_lines:

+ print "\tconverted to: %s" %out_line.encode(

+ sys.stdout.encoding, 'ignore')

+ else:

+ if not display_input_lines and not i % 10:

+ print "\t%d lines converted so far!" %i

+ fout.write("]")

+ fout.write(os.linesep)

+ print

+ print "%d lines converted SUCCESSFULLY!" %i

+ print "--- FINISHED ---"

+ print

+def main():

+ c = Converter(FIELDS, INPUT_FILE, OUTPUT_FILE)

+ c.convert(DISPLAY_INPUT_LINES, DISPLAY_CONVERTED_LINES)

+if __name__ == '__main__':

+ main()

« chrome/test/functional/autofill.py ('K') | « chrome/test/functional/autofill.py ('k') | no next file » | no next file with comments »