Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(53)

Side by Side Diff: chrome/test/functional/dataset-converter.py

Issue 6246147: Test Autofill's ability to merge duplicate profiles and... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: Created 9 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 #!/usr/bin/python
2 # Copyright (c) 2010 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6
7 """Takes in a CSV profiles file and outputs to a pyAuto dictionary list format
8 for converting Autofill Prfofile datasets.
9
10 Used for test autofill.AutoFillTest.testMergeDuplicateProfilesInAutofill.
11 """
12
13 # Specify input and output filename can be full
14 # path: 'c:\folder\file' or '/home/folder/file'
15 INPUT_FILE = r"../data/autofill/dataset.txt"
16 OUTPUT_FILE = r"../data/autofill/dataset_duplicate-profiles.txt"
17
18 # Controls output display on the screen.
19 DISPLAY_INPUT_LINES = True
20 DISPLAY_CONVERTED_LINES = False
21
22 FIELDS = [
23 u'NAME_FIRST',
24 u'NAME_MIDDLE',
25 u'NAME_LAST',
26 u'EMAIL_ADDRESS',
27 u'COMPANY_NAME',
28 u'ADDRESS_HOME_LINE1',
29 u'ADDRESS_HOME_LINE2',
30 u'ADDRESS_HOME_CITY',
31 u'ADDRESS_HOME_STATE',
32 u'ADDRESS_HOME_ZIP',
33 u'ADDRESS_HOME_COUNTRY',
34 u'PHONE_HOME_WHOLE_NUMBER',
35 u'PHONE_FAX_WHOLE_NUMBER',
36 ]
37
38 import codecs
39 import re
40 import sys
41
42 class Converter(object):
43 def __init__(self, fields, filein, fileout):
44 """
45 The pattern is a regular expression which has named parenthesis groups
46 like this (?P<name>...) in order to match the '|' separated fields.
47 If we had only the NAME_FIRST and NAME_MIDDLE fields (e.g 'Jared|JV') our
48 pattern would be: "(?P<NAME_FIRST>.*?)\|(?P<NAME_MIDDLE>.*?)$"
49
50 This means that '(?P<NAME_FIRST> regexp)\|' matches whatever regular
51 expression is inside the parentheses, and indicates the start and end of a
52 group; the contents of a group can be retrieved after a match has been
53 performed using the symbolic group name 'NAME_FIRST'.
54
55 The regexp is '.*?'. '.*' which means to match 0 or more repetitions of any
56 character. The following '?' makes the regexp non-greedy meaning it will
57 stop at the first occurrence of the '|' character (escaped in the pattern).
58
59 For '(?P<NAME_MIDDLE>.*?)$' there is no '|' at the end, so we have '$' to
60 indicate the end of the line.
61
62 From the full pattern, we construct once from the FIELDS list.
63
64 The out_line_pattern for one field: "{u'NAME_FIRST': u'%s',"
65 is ready to accept the value for the 'NAME_FIRST' field once it is extracted
66 from an input line using the above group pattern.
67
68 'pattern' is used in __gerRec(line) to construct and return a dictionary
69 from a line.
70
71 'out_line_pattern' is used in 'convert()' to construct the final dataset
72 line that will be printed to the output file.
73 """
74 self.fields = fields[:]
75 self.pattern = '(?P<%s>.*?)' %fields[0]
76 for key in fields[1:]:
77 self.pattern += '\|(?P<%s>.*?)' %key
78 self.pattern = self.pattern + "$"
79
80 self.out_line_pattern = u"{"
81 for key in fields:
82 self.out_line_pattern += u"u'%s': u'%s', " %(key, "%s")
83 self.out_line_pattern = self.out_line_pattern[:-1] + "},\n"
84
85 self.filein = filein
86 self.fileout = fileout
87
88 def __getRec(self, line):
89 """
90 Constructs and returns a dictionary from a line using patterns.
91 See constructor above.
92 """
93 rePat = re.compile("'", re.UNICODE)
94 line = rePat.sub(r"\'", line)
95 rePat = re.compile(self.pattern, re.UNICODE)
96 m = rePat.match(line)
97 if m:
98 outrec = {}
99 for key in self.fields:
100 outrec[key] = m.group(key)
101 return outrec
102
103 def convert(self, display_input_lines, display_converted_lines):
104 """
105 The out_line_pattern is here. Each field needs to be formatted with a tuple
106 of values for each containing '%s'.
107 This is done in the line:
108 out_line = self.out_line_pattern %tuple(
109 [outrec[key] for key in self.fields])
110 For two fiels, translates to:
111 out_line = "{u'NAME_FIRST': u'%s', u'MIDDLE_NAME': u'%s',}," % (
112 outrec['NAME_FIRST'], outrec['MIDDLE_NAME'])
113 """
114 with open(self.filein) as fin:
115 with codecs.open(self.fileout, mode = "wb",
116 encoding = "utf-8-sig") as fout:
117 i = 0
118 fout.write("[\n")
119 for line in fin.readlines():
120 line = line.strip()
121 if not line:
122 continue
123 line = unicode(line, 'UTF-8')
124 outrec = self.__getRec(line)
125 if outrec:
126 i += 1
127 out_line = self.out_line_pattern %tuple(
128 [outrec[key] for key in self.fields])
129 fout.write(out_line)
130 if display_input_lines:
131 print "\n%d: %s" %(i, line.encode(sys.stdout.encoding, 'ignore'))
132 if display_converted_lines:
133 print "\tconverted to: %s" %out_line.encode(
134 sys.stdout.encoding, 'ignore')
135 else:
136 if not display_input_lines and not i % 10:
137 print "\t%d lines converted so far!" %i
138
139 fout.write("]\n")
140 print
141 print "%d lines converted SUCCESSFULLY!" %i
142 print "--- FINISHED ---"
143 print
144
145
146 def main():
147 c = Converter(FIELDS, INPUT_FILE, OUTPUT_FILE)
148 c.convert(DISPLAY_INPUT_LINES, DISPLAY_CONVERTED_LINES)
149
150 if __name__ == '__main__':
151 main()
OLDNEW
« chrome/test/functional/autofill.py ('K') | « chrome/test/functional/autofill.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698