Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(255)

Unified Diff: chrome/test/functional/dataset-converter.py

Issue 6246147: Test Autofill's ability to merge duplicate profiles and... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: Created 9 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« chrome/test/functional/autofill.py ('K') | « chrome/test/functional/autofill.py ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: chrome/test/functional/dataset-converter.py
===================================================================
--- chrome/test/functional/dataset-converter.py (revision 0)
+++ chrome/test/functional/dataset-converter.py (revision 0)
@@ -0,0 +1,151 @@
+#!/usr/bin/python
+# Copyright (c) 2010 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+
+"""Takes in a CSV profiles file and outputs to a pyAuto dictionary list format
+for converting Autofill Prfofile datasets.
+
+Used for test autofill.AutoFillTest.testMergeDuplicateProfilesInAutofill.
+"""
+
+# Specify input and output filename can be full
+# path: 'c:\folder\file' or '/home/folder/file'
+INPUT_FILE = r"../data/autofill/dataset.txt"
+OUTPUT_FILE = r"../data/autofill/dataset_duplicate-profiles.txt"
+
+# Controls output display on the screen.
+DISPLAY_INPUT_LINES = True
+DISPLAY_CONVERTED_LINES = False
+
+FIELDS = [
+ u'NAME_FIRST',
+ u'NAME_MIDDLE',
+ u'NAME_LAST',
+ u'EMAIL_ADDRESS',
+ u'COMPANY_NAME',
+ u'ADDRESS_HOME_LINE1',
+ u'ADDRESS_HOME_LINE2',
+ u'ADDRESS_HOME_CITY',
+ u'ADDRESS_HOME_STATE',
+ u'ADDRESS_HOME_ZIP',
+ u'ADDRESS_HOME_COUNTRY',
+ u'PHONE_HOME_WHOLE_NUMBER',
+ u'PHONE_FAX_WHOLE_NUMBER',
+]
+
+import codecs
+import re
+import sys
+
+class Converter(object):
+ def __init__(self, fields, filein, fileout):
+ """
+ The pattern is a regular expression which has named parenthesis groups
+ like this (?P<name>...) in order to match the '|' separated fields.
+ If we had only the NAME_FIRST and NAME_MIDDLE fields (e.g 'Jared|JV') our
+ pattern would be: "(?P<NAME_FIRST>.*?)\|(?P<NAME_MIDDLE>.*?)$"
+
+ This means that '(?P<NAME_FIRST> regexp)\|' matches whatever regular
+ expression is inside the parentheses, and indicates the start and end of a
+ group; the contents of a group can be retrieved after a match has been
+ performed using the symbolic group name 'NAME_FIRST'.
+
+ The regexp is '.*?'. '.*' which means to match 0 or more repetitions of any
+ character. The following '?' makes the regexp non-greedy meaning it will
+ stop at the first occurrence of the '|' character (escaped in the pattern).
+
+ For '(?P<NAME_MIDDLE>.*?)$' there is no '|' at the end, so we have '$' to
+ indicate the end of the line.
+
+ From the full pattern, we construct once from the FIELDS list.
+
+ The out_line_pattern for one field: "{u'NAME_FIRST': u'%s',"
+ is ready to accept the value for the 'NAME_FIRST' field once it is extracted
+ from an input line using the above group pattern.
+
+ 'pattern' is used in __gerRec(line) to construct and return a dictionary
+ from a line.
+
+ 'out_line_pattern' is used in 'convert()' to construct the final dataset
+ line that will be printed to the output file.
+ """
+ self.fields = fields[:]
+ self.pattern = '(?P<%s>.*?)' %fields[0]
+ for key in fields[1:]:
+ self.pattern += '\|(?P<%s>.*?)' %key
+ self.pattern = self.pattern + "$"
+
+ self.out_line_pattern = u"{"
+ for key in fields:
+ self.out_line_pattern += u"u'%s': u'%s', " %(key, "%s")
+ self.out_line_pattern = self.out_line_pattern[:-1] + "},\n"
+
+ self.filein = filein
+ self.fileout = fileout
+
+ def __getRec(self, line):
+ """
+ Constructs and returns a dictionary from a line using patterns.
+ See constructor above.
+ """
+ rePat = re.compile("'", re.UNICODE)
+ line = rePat.sub(r"\'", line)
+ rePat = re.compile(self.pattern, re.UNICODE)
+ m = rePat.match(line)
+ if m:
+ outrec = {}
+ for key in self.fields:
+ outrec[key] = m.group(key)
+ return outrec
+
+ def convert(self, display_input_lines, display_converted_lines):
+ """
+ The out_line_pattern is here. Each field needs to be formatted with a tuple
+ of values for each containing '%s'.
+ This is done in the line:
+ out_line = self.out_line_pattern %tuple(
+ [outrec[key] for key in self.fields])
+ For two fiels, translates to:
+ out_line = "{u'NAME_FIRST': u'%s', u'MIDDLE_NAME': u'%s',}," % (
+ outrec['NAME_FIRST'], outrec['MIDDLE_NAME'])
+ """
+ with open(self.filein) as fin:
+ with codecs.open(self.fileout, mode = "wb",
+ encoding = "utf-8-sig") as fout:
+ i = 0
+ fout.write("[\n")
+ for line in fin.readlines():
+ line = line.strip()
+ if not line:
+ continue
+ line = unicode(line, 'UTF-8')
+ outrec = self.__getRec(line)
+ if outrec:
+ i += 1
+ out_line = self.out_line_pattern %tuple(
+ [outrec[key] for key in self.fields])
+ fout.write(out_line)
+ if display_input_lines:
+ print "\n%d: %s" %(i, line.encode(sys.stdout.encoding, 'ignore'))
+ if display_converted_lines:
+ print "\tconverted to: %s" %out_line.encode(
+ sys.stdout.encoding, 'ignore')
+ else:
+ if not display_input_lines and not i % 10:
+ print "\t%d lines converted so far!" %i
+
+ fout.write("]\n")
+ print
+ print "%d lines converted SUCCESSFULLY!" %i
+ print "--- FINISHED ---"
+ print
+
+
+def main():
+ c = Converter(FIELDS, INPUT_FILE, OUTPUT_FILE)
+ c.convert(DISPLAY_INPUT_LINES, DISPLAY_CONVERTED_LINES)
+
+if __name__ == '__main__':
+ main()
« chrome/test/functional/autofill.py ('K') | « chrome/test/functional/autofill.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698