OLD | NEW |
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 # Copyright (c) 2008-2010 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2008-2010 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """Generate index.html files for a Google Storage for Developers directory. | 6 """Generate index.html files for a Google Storage for Developers directory. |
7 | 7 |
8 Google Storage for Developers provides only a raw set of objects. | 8 Google Storage for Developers provides only a raw set of objects. |
9 For some buckets we would like to be able to support browsing of the directory | 9 For some buckets we would like to be able to support browsing of the directory |
10 tree. This utility will generate the needed index and upload/update it. | 10 tree. This utility will generate the needed index and upload/update it. |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
51 """Collect size, date, md5 for a give gsd path.""" | 51 """Collect size, date, md5 for a give gsd path.""" |
52 # Check current state. | 52 # Check current state. |
53 cmd = [options.gsutil, 'ls', '-L', path] | 53 cmd = [options.gsutil, 'ls', '-L', path] |
54 p = subprocess.Popen(cmd, stdout=subprocess.PIPE) | 54 p = subprocess.Popen(cmd, stdout=subprocess.PIPE) |
55 p_stdout, _ = p.communicate() | 55 p_stdout, _ = p.communicate() |
56 assert p.returncode == 0 | 56 assert p.returncode == 0 |
57 # Extract intersting fields. | 57 # Extract intersting fields. |
58 fields = {} | 58 fields = {} |
59 fields['size'] = FixupSize(re.search('\tObject size:\t([0-9]+)\n', | 59 fields['size'] = FixupSize(re.search('\tObject size:\t([0-9]+)\n', |
60 p_stdout).group(1)) | 60 p_stdout).group(1)) |
61 fields['md5'] = re.search('\tMD5:\t([^\n]+)\n', p_stdout).group(1) | 61 fields['md5'] = re.search('\t(MD5|Etag):\t([^\n]+)\n', p_stdout).group(2) |
62 fields['date'] = re.search('\tLast mod:\t([^\n]+)\n', p_stdout).group(1) | 62 fields['date'] = re.search('\tLast mod:\t([^\n]+)\n', p_stdout).group(1) |
63 return fields | 63 return fields |
64 | 64 |
65 | 65 |
66 def GenerateIndex(path, children, directories, options): | 66 def GenerateIndex(path, children, directories, options): |
67 """Generate index for a given path as needed.""" | 67 """Generate index for a given path as needed.""" |
68 # Generate index content. | 68 # Generate index content. |
69 index = '' | 69 index = '' |
70 index += '<html>' | 70 index += '<html>' |
71 index += '<head>' | 71 index += '<head>' |
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
133 # Optionally update acl. | 133 # Optionally update acl. |
134 if options.acl: | 134 if options.acl: |
135 cmd = [options.gsutil, 'setacl', options.acl] | 135 cmd = [options.gsutil, 'setacl', options.acl] |
136 cmd += [posixpath.join(path, GENERATED_INDEX)] | 136 cmd += [posixpath.join(path, GENERATED_INDEX)] |
137 p = subprocess.Popen(cmd) | 137 p = subprocess.Popen(cmd) |
138 p.communicate() | 138 p.communicate() |
139 assert p.returncode == 0 | 139 assert p.returncode == 0 |
140 print '%s -- updated index' % path | 140 print '%s -- updated index' % path |
141 | 141 |
142 | 142 |
143 def IndexWorker(index_list, mutex, directories, objects, options): | 143 def IndexWorker(index_list, errors, mutex, directories, objects, options): |
144 while True: | 144 while True: |
145 # Pluck out one index to work on, or quit if no more work left. | 145 # Pluck out one index to work on, or quit if no more work left. |
146 mutex.acquire() | 146 mutex.acquire() |
147 if not len(index_list): | 147 if not index_list: |
148 mutex.release() | 148 mutex.release() |
149 return | 149 return |
150 d = index_list.pop(0) | 150 d = index_list.pop(0) |
151 mutex.release() | 151 mutex.release() |
152 # Find just this directories children. | 152 # Find just this directories children. |
153 children = [o for o in objects if posixpath.dirname(o) == d] | 153 children = [o for o in objects if posixpath.dirname(o) == d] |
154 # Generate it. | 154 # Generate it. |
155 GenerateIndex(d, children, directories, options) | 155 try: |
| 156 GenerateIndex(d, children, directories, options) |
| 157 except Exception, e: |
| 158 mutex.acquire() |
| 159 errors.push(e) |
| 160 print str(e) |
| 161 mutex.release() |
156 | 162 |
157 | 163 |
158 def GenerateIndexes(path, options): | 164 def GenerateIndexes(path, options): |
159 """Generate all relevant indexes for a given gsd path.""" | 165 """Generate all relevant indexes for a given gsd path.""" |
160 # Get a list of objects under this prefix. | 166 # Get a list of objects under this prefix. |
161 cmd = [options.gsutil, 'ls', posixpath.join(path, '*')] | 167 cmd = [options.gsutil, 'ls', posixpath.join(path, '*')] |
162 p = subprocess.Popen(cmd, stdout=subprocess.PIPE) | 168 p = subprocess.Popen(cmd, stdout=subprocess.PIPE) |
163 p_stdout, _ = p.communicate() | 169 p_stdout, _ = p.communicate() |
164 assert p.returncode == 0 | 170 assert p.returncode == 0 |
165 objects = str(p_stdout).splitlines() | 171 objects = str(p_stdout).splitlines() |
166 objects = [o for o in objects if posixpath.basename(o) != GENERATED_INDEX] | 172 objects = [o for o in objects if posixpath.basename(o) != GENERATED_INDEX] |
167 # Find common prefixes. | 173 # Find common prefixes. |
168 directories = set() | 174 directories = set() |
169 for o in objects: | 175 for o in objects: |
170 part = posixpath.dirname(o) | 176 part = posixpath.dirname(o) |
171 while part.startswith(path): | 177 while part.startswith(path): |
172 directories.add(part) | 178 directories.add(part) |
173 part = posixpath.dirname(part) | 179 part = posixpath.dirname(part) |
174 objects += list(directories) | 180 objects += list(directories) |
175 # Generate index for each directory. | 181 # Generate index for each directory. |
176 index_list = [i for i in directories | 182 index_list = [i for i in directories |
177 if not options.path or options.path.startswith(i)] | 183 if not options.path or options.path.startswith(i)] |
178 # Spawn workers | 184 # Spawn workers |
179 mutex = threading.Lock() | 185 mutex = threading.Lock() |
| 186 errors = [] |
180 workers = [threading.Thread(target=IndexWorker, | 187 workers = [threading.Thread(target=IndexWorker, |
181 args=(index_list, mutex, | 188 args=(index_list, errors, mutex, |
182 directories, objects, options)) | 189 directories, objects, options)) |
183 for _ in range(0, NUM_THREADS)] | 190 for _ in range(0, NUM_THREADS)] |
184 # Start threads. | 191 # Start threads. |
185 for w in workers: | 192 for w in workers: |
186 w.start() | 193 w.start() |
187 # Wait for them to finish. | 194 # Wait for them to finish. |
188 for w in workers: | 195 for w in workers: |
189 w.join() | 196 w.join() |
| 197 if errors: |
| 198 return 2 |
190 return 0 | 199 return 0 |
191 | 200 |
192 | 201 |
193 def main(argv): | 202 def main(argv): |
194 parser = optparse.OptionParser(usage='usage: %prog [options] gs://base-dir') | 203 parser = optparse.OptionParser(usage='usage: %prog [options] gs://base-dir') |
195 parser.add_option('-p', '--path', dest='path', | 204 parser.add_option('-p', '--path', dest='path', |
196 help='only update indexes on a given path') | 205 help='only update indexes on a given path') |
197 parser.add_option('-a', dest='acl', help='acl to set on indexes') | 206 parser.add_option('-a', dest='acl', help='acl to set on indexes') |
198 parser.add_option('-f', '--force', action='store_true', default=False, | 207 parser.add_option('-f', '--force', action='store_true', default=False, |
199 dest='force', help='upload all indexes even on match') | 208 dest='force', help='upload all indexes even on match') |
200 parser.add_option('', '--gsutil', default='gsutil', | 209 parser.add_option('', '--gsutil', default='gsutil', |
201 dest='gsutil', help='path to gsutil') | 210 dest='gsutil', help='path to gsutil') |
202 options, args = parser.parse_args(argv) | 211 options, args = parser.parse_args(argv) |
203 if len(args) != 2 or not args[1].startswith('gs://'): | 212 if len(args) != 2 or not args[1].startswith('gs://'): |
204 parser.print_help() | 213 parser.print_help() |
205 return 1 | 214 return 1 |
206 return GenerateIndexes(args[1], options) | 215 return GenerateIndexes(args[1], options) |
207 | 216 |
208 | 217 |
209 if __name__ == '__main__': | 218 if __name__ == '__main__': |
210 sys.exit(main(sys.argv)) | 219 sys.exit(main(sys.argv)) |
OLD | NEW |