Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(49)

Side by Side Diff: gsd_generate_index.py

Issue 3034014: Generate index in parallel.... (Closed) Base URL: svn://chrome-svn/chrome/trunk/tools/gsd_generate_index/
Patch Set: Created 10 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/python 1 #!/usr/bin/python
2 # Copyright (c) 2008-2010 The Chromium Authors. All rights reserved. 2 # Copyright (c) 2008-2010 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """Generate index.html files for a Google Storage for Developers directory. 6 """Generate index.html files for a Google Storage for Developers directory.
7 7
8 Google Storage for Developers provides only a raw set of objects. 8 Google Storage for Developers provides only a raw set of objects.
9 For some buckets we would like to be able to support browsing of the directory 9 For some buckets we would like to be able to support browsing of the directory
10 tree. This utility will generate the needed index and upload/update it. 10 tree. This utility will generate the needed index and upload/update it.
11 """ 11 """
12 12
13 import optparse 13 import optparse
14 import posixpath 14 import posixpath
15 import re 15 import re
16 import subprocess 16 import subprocess
17 import sys 17 import sys
18 import tempfile 18 import tempfile
19 import threading
19 20
20 21
21 GENERATED_INDEX = '_index.html' 22 GENERATED_INDEX = '_index.html'
23 NUM_THREADS = 100
22 24
23 25
24 def PathToLink(path): 26 def PathToLink(path):
25 return path.replace('gs://', 'https://sandbox.google.com/storage/') 27 return path.replace('gs://', 'https://sandbox.google.com/storage/')
26 28
27 29
28 def FixupSize(sz): 30 def FixupSize(sz):
29 """Convert a size string in bytes to human readable form. 31 """Convert a size string in bytes to human readable form.
30 32
31 Arguments: 33 Arguments:
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after
131 # Optionally update acl. 133 # Optionally update acl.
132 if options.acl: 134 if options.acl:
133 cmd = [options.gsutil, 'setacl', options.acl] 135 cmd = [options.gsutil, 'setacl', options.acl]
134 cmd += [posixpath.join(path, GENERATED_INDEX)] 136 cmd += [posixpath.join(path, GENERATED_INDEX)]
135 p = subprocess.Popen(cmd) 137 p = subprocess.Popen(cmd)
136 p.communicate() 138 p.communicate()
137 assert p.returncode == 0 139 assert p.returncode == 0
138 print '%s -- updated index' % path 140 print '%s -- updated index' % path
139 141
140 142
143 def IndexWorker(index_list, mutex, directories, objects, options):
144 while True:
145 # Pluck out one index to work on, or quit if no more work left.
146 mutex.acquire()
147 if not len(index_list):
148 mutex.release()
149 return
150 d = index_list.pop(0)
151 mutex.release()
152 # Find just this directories children.
153 children = [o for o in objects if posixpath.dirname(o) == d]
154 # Generate it.
155 GenerateIndex(d, children, directories, options)
156
157
141 def GenerateIndexes(path, options): 158 def GenerateIndexes(path, options):
142 """Generate all relevant indexes for a given gsd path.""" 159 """Generate all relevant indexes for a given gsd path."""
143 # Get a list of objects under this prefix. 160 # Get a list of objects under this prefix.
144 cmd = [options.gsutil, 'ls', posixpath.join(path, '*')] 161 cmd = [options.gsutil, 'ls', posixpath.join(path, '*')]
145 p = subprocess.Popen(cmd, stdout=subprocess.PIPE) 162 p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
146 p_stdout, _ = p.communicate() 163 p_stdout, _ = p.communicate()
147 assert p.returncode == 0 164 assert p.returncode == 0
148 objects = str(p_stdout).splitlines() 165 objects = str(p_stdout).splitlines()
149 objects = [o for o in objects if posixpath.basename(o) != GENERATED_INDEX] 166 objects = [o for o in objects if posixpath.basename(o) != GENERATED_INDEX]
150 # Find common prefixes. 167 # Find common prefixes.
151 directories = set() 168 directories = set()
152 for o in objects: 169 for o in objects:
153 part = posixpath.dirname(o) 170 part = posixpath.dirname(o)
154 while part.startswith(path): 171 while part.startswith(path):
155 directories.add(part) 172 directories.add(part)
156 part = posixpath.dirname(part) 173 part = posixpath.dirname(part)
157 objects += list(directories) 174 objects += list(directories)
158 # Generate index for each directory. 175 # Generate index for each directory.
159 for d in directories: 176 index_list = [i for i in directories
160 # Skip directories not on the target path if any. 177 if not options.path or options.path.startswith(i)]
161 if options.path and not options.path.startswith(d): 178 # Spawn workers
162 continue 179 mutex = threading.Lock()
163 # Find just this directories children. 180 workers = [threading.Thread(target=IndexWorker,
164 children = [o for o in objects if posixpath.dirname(o) == d] 181 args=(index_list, mutex,
165 # Generate this directory's index if needed. 182 directories, objects, options))
166 GenerateIndex(d, children, directories, options) 183 for _ in range(0, NUM_THREADS)]
184 # Start threads.
185 for w in workers:
186 w.start()
187 # Wait for them to finish.
188 for w in workers:
189 w.join()
167 return 0 190 return 0
168 191
169 192
170 def main(argv): 193 def main(argv):
171 parser = optparse.OptionParser(usage='usage: %prog [options] gs://base-dir') 194 parser = optparse.OptionParser(usage='usage: %prog [options] gs://base-dir')
172 parser.add_option('-p', '--path', dest='path', 195 parser.add_option('-p', '--path', dest='path',
173 help='only update indexes on a given path') 196 help='only update indexes on a given path')
174 parser.add_option('-a', dest='acl', help='acl to set on indexes') 197 parser.add_option('-a', dest='acl', help='acl to set on indexes')
175 parser.add_option('-f', '--force', action='store_true', default=False, 198 parser.add_option('-f', '--force', action='store_true', default=False,
176 dest='force', help='upload all indexes even on match') 199 dest='force', help='upload all indexes even on match')
177 parser.add_option('', '--gsutil', default='gsutil', 200 parser.add_option('', '--gsutil', default='gsutil',
178 dest='gsutil', help='path to gsutil') 201 dest='gsutil', help='path to gsutil')
179 options, args = parser.parse_args(argv) 202 options, args = parser.parse_args(argv)
180 if len(args) != 2 or not args[1].startswith('gs://'): 203 if len(args) != 2 or not args[1].startswith('gs://'):
181 parser.print_help() 204 parser.print_help()
182 return 1 205 return 1
183 return GenerateIndexes(args[1], options) 206 return GenerateIndexes(args[1], options)
184 207
185 208
186 if __name__ == '__main__': 209 if __name__ == '__main__':
187 sys.exit(main(sys.argv)) 210 sys.exit(main(sys.argv))
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698