Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(276)

Side by Side Diff: chrome/test/functional/dataset-converter.py

Issue 6246147: Test Autofill's ability to merge duplicate profiles and... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 9 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 #!/usr/bin/python
2 # Copyright (c) 2010 The Chromium Authors. All rights reserved.
Nirnimesh 2011/02/07 23:26:20 2011
dyu1 2011/02/09 19:44:56 Done.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
Nirnimesh 2011/02/07 23:26:20 Remove blank line
dyu1 2011/02/09 19:44:56 Done.
6
7 """Takes in a CSV profiles file and outputs to a dictionary list format
8 for converting Autofill profile datasets.
9
10 Used for test autofill.AutoFillTest.testMergeDuplicateProfilesInAutofill.
11 """
12
13 # Specify input and output filename can be full
14 # path: 'c:\folder\file' or '/home/folder/file'
15 INPUT_FILE = r"../data/autofill/dataset.txt"
Nirnimesh 2011/02/07 23:26:20 Can these be moved inside the class as memeber var
dyu1 2011/02/09 19:44:56 Done.
16 OUTPUT_FILE = r"../data/autofill/dataset_duplicate-profiles.txt"
17
18 # Controls output display on the screen.
19 DISPLAY_INPUT_LINES = True
20 DISPLAY_CONVERTED_LINES = False
21
22 FIELDS = [
23 u'NAME_FIRST',
24 u'NAME_MIDDLE',
25 u'NAME_LAST',
26 u'EMAIL_ADDRESS',
27 u'COMPANY_NAME',
28 u'ADDRESS_HOME_LINE1',
29 u'ADDRESS_HOME_LINE2',
30 u'ADDRESS_HOME_CITY',
31 u'ADDRESS_HOME_STATE',
32 u'ADDRESS_HOME_ZIP',
33 u'ADDRESS_HOME_COUNTRY',
34 u'PHONE_HOME_WHOLE_NUMBER',
35 u'PHONE_FAX_WHOLE_NUMBER',
36 ]
37
38 import codecs
39 import os
40 import re
41 import sys
42
Nirnimesh 2011/02/07 23:26:20 Leave another blank line
dyu1 2011/02/09 19:44:56 Done.
43 class Converter(object):
Nirnimesh 2011/02/07 23:26:20 Use a more descriptive name. |Converter| is too ge
dyu1 2011/02/09 19:44:56 Done.
44 def __init__(self, fields, filein, fileout):
Nirnimesh 2011/02/07 23:26:20 Explain the args. Add an "Args:" section in the do
dyu1 2011/02/09 19:44:56 Done.
45 """
46 The pattern is a regular expression which has named parenthesis groups
Nirnimesh 2011/02/07 23:26:20 An example would explain this much better
dyu1 2011/02/09 19:44:56 Done.
47 like this (?P<name>...) in order to match the '|' separated fields.
48 If we had only the NAME_FIRST and NAME_MIDDLE fields (e.g 'Jared|JV') our
49 pattern would be: "(?P<NAME_FIRST>.*?)\|(?P<NAME_MIDDLE>.*?)$"
50
51 This means that '(?P<NAME_FIRST> regexp)\|' matches whatever regular
52 expression is inside the parentheses, and indicates the start and end of a
53 group; the contents of a group can be retrieved after a match has been
54 performed using the symbolic group name 'NAME_FIRST'.
55
56 The regexp is '.*?'. '.*' which means to match 0 or more repetitions of any
57 character. The following '?' makes the regexp non-greedy meaning it will
58 stop at the first occurrence of the '|' character (escaped in the pattern).
59
60 For '(?P<NAME_MIDDLE>.*?)$' there is no '|' at the end, so we have '$' to
61 indicate the end of the line.
62
63 From the full pattern, we construct once from the FIELDS list.
64
65 The out_line_pattern for one field: "{u'NAME_FIRST': u'%s',"
66 is ready to accept the value for the 'NAME_FIRST' field once it is extracted
67 from an input line using the above group pattern.
68
69 'pattern' is used in __gerRec(line) to construct and return a dictionary
70 from a line.
71
72 'out_line_pattern' is used in 'convert()' to construct the final dataset
73 line that will be printed to the output file.
74 """
75 self.fields = fields[:]
Nirnimesh 2011/02/07 23:26:20 self._fields use _ prefix for member vars Repeat f
dyu1 2011/02/09 19:44:56 Done.
76 self.pattern = '(?P<%s>.*?)' %fields[0]
77 for key in fields[1:]:
78 self.pattern += '\|(?P<%s>.*?)' %key
Nirnimesh 2011/02/07 23:26:20 Isn't it easier to split by '|'? re.split('|', lin
dyu1 2011/02/09 19:44:56 Done.
79 self.pattern = self.pattern + "$"
80
81 self.out_line_pattern = u"{"
82 for key in fields:
83 self.out_line_pattern += u"u'%s': u'%s', " %(key, "%s")
84 self.out_line_pattern = self.out_line_pattern[:-1] + "},\n"
85
86 self.filein = filein
87 self.fileout = fileout
88
89 def __getRec(self, line):
Nirnimesh 2011/02/07 23:26:20 why double _? Method names begin with Cap letter.
dyu1 2011/02/09 19:44:56 Based on this http://docs.python.org/tutorial/clas
90 """
91 Constructs and returns a dictionary from a line using patterns.
Nirnimesh 2011/02/07 23:26:20 Merge with last line. Repeat for all methods
dyu1 2011/02/09 19:44:56 Done.
92 See constructor above.
93 """
94 rePat = re.compile("'", re.UNICODE)
Nirnimesh 2011/02/07 23:26:20 the style guide prohibits camelCase style for loca
dyu1 2011/02/09 19:44:56 Done.
95 line = rePat.sub(r"\'", line)
96 rePat = re.compile(self.pattern, re.UNICODE)
97 m = rePat.match(line)
98 if m:
99 outrec = {}
100 for key in self.fields:
101 outrec[key] = m.group(key)
102 return outrec
103
104 def convert(self, display_input_lines, display_converted_lines):
105 """
106 The out_line_pattern is here. Each field needs to be formatted with a tuple
107 of values for each containing '%s'.
108 This is done in the line:
109 out_line = self.out_line_pattern %tuple(
110 [outrec[key] for key in self.fields])
111 For two fiels, translates to:
112 out_line = "{u'NAME_FIRST': u'%s', u'MIDDLE_NAME': u'%s',}," % (
113 outrec['NAME_FIRST'], outrec['MIDDLE_NAME'])
114 """
115 with open(self.filein) as fin:
116 with codecs.open(self.fileout, mode = "wb",
117 encoding = "utf-8-sig") as fout:
118 i = 0
119 fout.write("[")
120 fout.write(os.linesep)
121 for line in fin.readlines():
122 line = line.strip()
123 if not line:
124 continue
125 line = unicode(line, 'UTF-8')
126 outrec = self.__getRec(line)
127 if outrec:
128 i += 1
129 out_line = self.out_line_pattern %tuple(
130 [outrec[key] for key in self.fields])
131 fout.write(out_line)
132 fout.write(os.linesep)
133 if display_input_lines:
134 print "\n%d: %s" %(i, line.encode(sys.stdout.encoding, 'ignore'))
135 if display_converted_lines:
136 print "\tconverted to: %s" %out_line.encode(
137 sys.stdout.encoding, 'ignore')
138 else:
139 if not display_input_lines and not i % 10:
140 print "\t%d lines converted so far!" %i
141
142 fout.write("]")
143 fout.write(os.linesep)
144 print
145 print "%d lines converted SUCCESSFULLY!" %i
146 print "--- FINISHED ---"
147 print
148
149
150 def main():
151 c = Converter(FIELDS, INPUT_FILE, OUTPUT_FILE)
Nirnimesh 2011/02/07 23:26:20 Do you really need OUTPUT_FILE? You could just ret
dyu1 2011/02/09 19:44:56 Will write this script so that it be standalone an
152 c.convert(DISPLAY_INPUT_LINES, DISPLAY_CONVERTED_LINES)
153
154 if __name__ == '__main__':
155 main()
OLDNEW
« chrome/test/functional/autofill.py ('K') | « chrome/test/functional/autofill.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698