OLD | NEW |
---|---|
1 # Copyright 2014 The Chromium Authors. All rights reserved. | 1 # Copyright 2014 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 from third_party.cloudstorage import cloudstorage_api | 5 import json |
6 from third_party.cloudstorage import common | 6 import logging |
7 from third_party.cloudstorage import errors | 7 import posixpath |
8 import traceback | |
9 import urllib | |
8 | 10 |
9 from docs_server_utils import StringIdentity | 11 from docs_server_utils import StringIdentity |
12 from environment_wrappers import CreateUrlFetcher | |
Ken Rockot(use gerrit already)
2015/05/26 00:26:24
Unfortunately cloudstorage_api only works from App
| |
10 from file_system import FileSystem, FileNotFoundError, StatInfo | 13 from file_system import FileSystem, FileNotFoundError, StatInfo |
11 from future import Future | 14 from future import Future |
12 from path_util import ( | 15 from path_util import ( |
13 AssertIsDirectory, AssertIsFile, AssertIsValid, IsDirectory, Join) | 16 AssertIsDirectory, AssertIsFile, AssertIsValid, IsDirectory, Join) |
14 | 17 |
15 import logging | |
16 import traceback | |
17 | |
18 | 18 |
19 # See gcs_file_system_provider.py for documentation on using Google Cloud | 19 # See gcs_file_system_provider.py for documentation on using Google Cloud |
20 # Storage as a filesystem. | 20 # Storage as a filesystem. |
21 # | 21 # |
22 # Note that the path requirements for GCS are different for the docserver; | 22 # Note that the path requirements for GCS are different for the docserver; |
23 # GCS requires that paths start with a /, we require that they don't. | 23 # GCS requires that paths start with a /, we require that they don't. |
24 | 24 |
25 | 25 |
26 # Name of the file containing the Git hash of the latest commit sync'ed | 26 # Name of the file containing the Git hash of the latest commit sync'ed |
27 # to Cloud Storage. This file is generated by the Github->GCS sync script | 27 # to Cloud Storage. This file is generated by the Github->GCS sync script |
28 LAST_COMMIT_HASH_FILENAME = '.__lastcommit.txt' | 28 _LAST_COMMIT_HASH_FILENAME = '.__lastcommit.txt' |
29 | 29 |
30 def _ReadFile(filename): | |
31 AssertIsFile(filename) | |
32 try: | |
33 with cloudstorage_api.open('/' + filename, 'r') as f: | |
34 return f.read() | |
35 except errors.Error: | |
36 raise FileNotFoundError('Read failed for %s: %s' % (filename, | |
37 traceback.format_exc())) | |
38 | 30 |
39 def _ListDir(dir_name, recursive=False): | 31 # Base URL for GCS requests. |
40 AssertIsDirectory(dir_name) | 32 _STORAGE_API_BASE = 'https://www.googleapis.com/storage/v1' |
41 try: | |
42 # The listbucket method uses a prefix approach to simulate hierarchy. | |
43 # Calling it with the "delimiter" argument set to '/' gets only files | |
44 # directly inside the directory, not all recursive content. | |
45 delimiter = None if recursive else '/' | |
46 files = cloudstorage_api.listbucket('/' + dir_name, delimiter=delimiter) | |
47 return [os_path.filename.lstrip('/')[len(dir_name):] for os_path in files] | |
48 except errors.Error: | |
49 raise FileNotFoundError('cloudstorage.listbucket failed for %s: %s' % | |
50 (dir_name, traceback.format_exc())) | |
51 | 33 |
52 def _CreateStatInfo(bucket, path): | |
53 full_path = Join(bucket, path) | |
54 last_commit_file = Join(bucket, LAST_COMMIT_HASH_FILENAME) | |
55 try: | |
56 last_commit = _ReadFile(last_commit_file) | |
57 if IsDirectory(full_path): | |
58 child_versions = dict((filename, last_commit) | |
59 for filename in _ListDir(full_path)) | |
60 else: | |
61 child_versions = None | |
62 return StatInfo(last_commit, child_versions) | |
63 except (TypeError, errors.Error): | |
64 raise FileNotFoundError('cloudstorage.stat failed for %s: %s' % (path, | |
65 traceback.format_exc())) | |
66 | 34 |
67 class CloudStorageFileSystem(FileSystem): | 35 class CloudStorageFileSystem(FileSystem): |
68 '''FileSystem implementation which fetches resources from Google Cloud | 36 '''FileSystem implementation which fetches resources from Google Cloud |
69 Storage. | 37 Storage. |
70 ''' | 38 ''' |
71 def __init__(self, bucket, debug_access_token=None, debug_bucket_prefix=None): | 39 def __init__(self, bucket, debug_bucket_prefix=None): |
72 self._bucket = bucket | 40 self._bucket = bucket |
73 if debug_access_token: | 41 self._access_token = None |
74 logging.debug('gcs: using debug access token: %s' % debug_access_token) | 42 self._last_commit_hash = None |
75 common.set_access_token(debug_access_token) | |
76 if debug_bucket_prefix: | |
77 logging.debug('gcs: prefixing all bucket names with %s' % | |
78 debug_bucket_prefix) | |
79 self._bucket = debug_bucket_prefix + self._bucket | |
80 AssertIsValid(self._bucket) | 43 AssertIsValid(self._bucket) |
81 | 44 |
82 def Read(self, paths, skip_not_found=False): | 45 def Read(self, paths, skip_not_found=False): |
83 def resolve(): | 46 def resolve(): |
84 try: | 47 result = {} |
85 result = {} | 48 for path in paths: |
86 for path in paths: | 49 if IsDirectory(path): |
87 full_path = Join(self._bucket, path) | 50 result[path] = self._ListDir(path) |
88 logging.debug('gcs: requested path "%s", reading "%s"' % | 51 else: |
89 (path, full_path)) | 52 result[path] = self._ReadFile(path) |
90 if IsDirectory(path): | 53 return result |
91 result[path] = _ListDir(full_path) | |
92 else: | |
93 result[path] = _ReadFile(full_path) | |
94 return result | |
95 except errors.AuthorizationError: | |
96 self._warnAboutAuthError() | |
97 raise | |
98 | 54 |
99 return Future(callback=resolve) | 55 return Future(callback=resolve) |
100 | 56 |
101 def Refresh(self): | 57 def Refresh(self): |
102 return Future(value=()) | 58 return Future(value=()) |
103 | 59 |
104 def Stat(self, path): | 60 def Stat(self, path): |
105 AssertIsValid(path) | 61 AssertIsValid(path) |
106 try: | 62 return self._CreateStatInfo(path) |
107 return _CreateStatInfo(self._bucket, path) | |
108 except errors.AuthorizationError: | |
109 self._warnAboutAuthError() | |
110 raise | |
111 | 63 |
112 def GetIdentity(self): | 64 def GetIdentity(self): |
113 return '@'.join((self.__class__.__name__, StringIdentity(self._bucket))) | 65 return '@'.join((self.__class__.__name__, StringIdentity(self._bucket))) |
114 | 66 |
67 def _CreateStatInfo(self, path): | |
68 if not self._last_commit_hash: | |
69 self._last_commit_hash = self._ReadFile(_LAST_COMMIT_HASH_FILENAME) | |
70 if IsDirectory(path): | |
71 child_versions = dict((filename, self._last_commit_hash) | |
72 for filename in self._ListDir(path)) | |
73 else: | |
74 child_versions = None | |
75 return StatInfo(self._last_commit_hash, child_versions) | |
76 | |
77 def _ReadFile(self, path): | |
78 AssertIsFile(path) | |
79 return self._FetchObjectData(path) | |
80 | |
81 def _ListDir(self, path, recursive=False): | |
82 AssertIsDirectory(path) | |
83 # The listbucket method uses a prefix approach to simulate hierarchy. | |
84 # Calling it with the "delimiter" argument set to '/' gets only files | |
85 # directly inside the directory, not all recursive content. | |
86 | |
87 # Subdirectories are returned in the 'prefixes' property, but they are | |
88 # full paths from the root. This plucks off the name of the leaf with a | |
89 # trailing slash. | |
90 def path_from_prefix(prefix): | |
91 return posixpath.split(posixpath.split(prefix)[0])[1] + '/' | |
92 | |
93 query = { 'prefix': path } | |
94 if not recursive: | |
95 query['delimiter'] = '/' | |
96 root_object = json.loads(self._FetchObject('', query=query)) | |
97 files = [posixpath.basename(o['name']) | |
98 for o in root_object.get('items', [])] | |
99 dirs = [path_from_prefix(prefix) | |
100 for prefix in root_object.get('prefixes', [])] | |
101 return files + dirs | |
102 | |
103 def _FetchObject(self, path, query={}): | |
104 # Escape the path, including slashes. | |
105 url_path = urllib.quote(path.lstrip('/'), safe='') | |
106 fetcher = CreateUrlFetcher() | |
107 object_url = '%s/b/%s/o/%s' % (_STORAGE_API_BASE, self._bucket, url_path) | |
108 response = fetcher.Fetch(object_url, query=query) | |
109 if response.status_code != 200: | |
110 raise FileNotFoundError( | |
111 'Path %s not found in GCS bucket %s' % (path, self._bucket)) | |
112 return response.content | |
113 | |
114 def _FetchObjectData(self, path, query={}): | |
115 q = query.copy() | |
116 q.update({ 'alt': 'media' }) | |
117 return self._FetchObject(path, query=q) | |
118 | |
115 def __repr__(self): | 119 def __repr__(self): |
116 return 'CloudStorageFileSystem(%s)' % self._bucket | 120 return 'CloudStorageFileSystem(%s)' % self._bucket |
117 | |
118 def _warnAboutAuthError(self): | |
Ken Rockot(use gerrit already)
2015/05/26 00:26:24
Auth is no longer required. The github repos are p
| |
119 logging.warn(('Authentication error on Cloud Storage. Check if your' | |
120 ' appengine project has permissions to Read the GCS' | |
121 ' buckets. If you are running a local appengine server,' | |
122 ' you need to set an access_token in' | |
123 ' local_debug/gcs_debug.conf.' | |
124 ' Remember that this token expires in less than 10' | |
125 ' minutes, so keep it updated. See' | |
126 ' gcs_file_system_provider.py for instructions.')); | |
127 logging.debug(traceback.format_exc()) | |
OLD | NEW |