chrome/common/extensions/docs/server2/gcs_file_system.py - Issue 1151283007: Docserver overhaul: Gitiles away from me.

Side by Side Diff: chrome/common/extensions/docs/server2/gcs_file_system.py

Issue 1151283007: Docserver overhaul: Gitiles away from me. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 5 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« chrome/common/extensions/docs/server2/environment.py ('K') | « chrome/common/extensions/docs/server2/fake_fetchers.py ('k') | chrome/common/extensions/docs/server2/gcs_file_system_provider.py » ('j') | chrome/common/extensions/docs/server2/gcs_file_system_provider.py » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 # Copyright 2014 The Chromium Authors. All rights reserved.	1 # Copyright 2014 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 from third_party.cloudstorage import cloudstorage_api	5 import json

6 from third_party.cloudstorage import common	6 import logging

7 from third_party.cloudstorage import errors	7 import posixpath

	8 import traceback

	9 import urllib

8	10

9 from docs_server_utils import StringIdentity	11 from docs_server_utils import StringIdentity

	12 from environment_wrappers import CreateUrlFetcher
	Ken Rockot(use gerrit already) 2015/05/26 00:26:24 Unfortunately cloudstorage_api only works from App Unfortunately cloudstorage_api only works from AppEngine and we needed GCS support in other environments. So I reimplemented CloudStorageFileSystem using the Cloud Storage JSON API directly on top of whatever UrlFetcher the environment provides. This was pretty trivial since we only need listing and file reading.
10 from file_system import FileSystem, FileNotFoundError, StatInfo	13 from file_system import FileSystem, FileNotFoundError, StatInfo

11 from future import Future	14 from future import Future

12 from path_util import (	15 from path_util import (

13 AssertIsDirectory, AssertIsFile, AssertIsValid, IsDirectory, Join)	16 AssertIsDirectory, AssertIsFile, AssertIsValid, IsDirectory, Join)

14	17

15 import logging

16 import traceback

17

18	18

19 # See gcs_file_system_provider.py for documentation on using Google Cloud	19 # See gcs_file_system_provider.py for documentation on using Google Cloud

20 # Storage as a filesystem.	20 # Storage as a filesystem.

21 #	21 #

22 # Note that the path requirements for GCS are different for the docserver;	22 # Note that the path requirements for GCS are different for the docserver;

23 # GCS requires that paths start with a /, we require that they don't.	23 # GCS requires that paths start with a /, we require that they don't.

24	24

25	25

26 # Name of the file containing the Git hash of the latest commit sync'ed	26 # Name of the file containing the Git hash of the latest commit sync'ed

27 # to Cloud Storage. This file is generated by the Github->GCS sync script	27 # to Cloud Storage. This file is generated by the Github->GCS sync script

28 LAST_COMMIT_HASH_FILENAME = '.__lastcommit.txt'	28 _LAST_COMMIT_HASH_FILENAME = '.__lastcommit.txt'

29	29

30 def _ReadFile(filename):

31 AssertIsFile(filename)

32 try:

33 with cloudstorage_api.open('/' + filename, 'r') as f:

34 return f.read()

35 except errors.Error:

36 raise FileNotFoundError('Read failed for %s: %s' % (filename,

37 traceback.format_exc()))

38	30

39 def _ListDir(dir_name, recursive=False):	31 # Base URL for GCS requests.

40 AssertIsDirectory(dir_name)	32 _STORAGE_API_BASE = 'https://www.googleapis.com/storage/v1'

41 try:

42 # The listbucket method uses a prefix approach to simulate hierarchy.

43 # Calling it with the "delimiter" argument set to '/' gets only files

44 # directly inside the directory, not all recursive content.

45 delimiter = None if recursive else '/'

46 files = cloudstorage_api.listbucket('/' + dir_name, delimiter=delimiter)

47 return [os_path.filename.lstrip('/')[len(dir_name):] for os_path in files]

48 except errors.Error:

49 raise FileNotFoundError('cloudstorage.listbucket failed for %s: %s' %

50 (dir_name, traceback.format_exc()))

51	33

52 def _CreateStatInfo(bucket, path):

53 full_path = Join(bucket, path)

54 last_commit_file = Join(bucket, LAST_COMMIT_HASH_FILENAME)

55 try:

56 last_commit = _ReadFile(last_commit_file)

57 if IsDirectory(full_path):

58 child_versions = dict((filename, last_commit)

59 for filename in _ListDir(full_path))

60 else:

61 child_versions = None

62 return StatInfo(last_commit, child_versions)

63 except (TypeError, errors.Error):

64 raise FileNotFoundError('cloudstorage.stat failed for %s: %s' % (path,

65 traceback.format_exc()))

66	34

67 class CloudStorageFileSystem(FileSystem):	35 class CloudStorageFileSystem(FileSystem):

68 '''FileSystem implementation which fetches resources from Google Cloud	36 '''FileSystem implementation which fetches resources from Google Cloud

69 Storage.	37 Storage.

70 '''	38 '''

71 def __init__(self, bucket, debug_access_token=None, debug_bucket_prefix=None):	39 def __init__(self, bucket, debug_bucket_prefix=None):

72 self._bucket = bucket	40 self._bucket = bucket

73 if debug_access_token:	41 self._access_token = None

74 logging.debug('gcs: using debug access token: %s' % debug_access_token)	42 self._last_commit_hash = None

75 common.set_access_token(debug_access_token)

76 if debug_bucket_prefix:

77 logging.debug('gcs: prefixing all bucket names with %s' %

78 debug_bucket_prefix)

79 self._bucket = debug_bucket_prefix + self._bucket

80 AssertIsValid(self._bucket)	43 AssertIsValid(self._bucket)

81	44

82 def Read(self, paths, skip_not_found=False):	45 def Read(self, paths, skip_not_found=False):

83 def resolve():	46 def resolve():

84 try:	47 result = {}

85 result = {}	48 for path in paths:

86 for path in paths:	49 if IsDirectory(path):

87 full_path = Join(self._bucket, path)	50 result[path] = self._ListDir(path)

88 logging.debug('gcs: requested path "%s", reading "%s"' %	51 else:

89 (path, full_path))	52 result[path] = self._ReadFile(path)

90 if IsDirectory(path):	53 return result

91 result[path] = _ListDir(full_path)

92 else:

93 result[path] = _ReadFile(full_path)

94 return result

95 except errors.AuthorizationError:

96 self._warnAboutAuthError()

97 raise

98	54

99 return Future(callback=resolve)	55 return Future(callback=resolve)

100	56

101 def Refresh(self):	57 def Refresh(self):

102 return Future(value=())	58 return Future(value=())

103	59

104 def Stat(self, path):	60 def Stat(self, path):

105 AssertIsValid(path)	61 AssertIsValid(path)

106 try:	62 return self._CreateStatInfo(path)

107 return _CreateStatInfo(self._bucket, path)

108 except errors.AuthorizationError:

109 self._warnAboutAuthError()

110 raise

111	63

112 def GetIdentity(self):	64 def GetIdentity(self):

113 return '@'.join((self.__class__.__name__, StringIdentity(self._bucket)))	65 return '@'.join((self.__class__.__name__, StringIdentity(self._bucket)))

114	66

	67 def _CreateStatInfo(self, path):

	68 if not self._last_commit_hash:

	69 self._last_commit_hash = self._ReadFile(_LAST_COMMIT_HASH_FILENAME)

	70 if IsDirectory(path):

	71 child_versions = dict((filename, self._last_commit_hash)

	72 for filename in self._ListDir(path))

	73 else:

	74 child_versions = None

	75 return StatInfo(self._last_commit_hash, child_versions)

	76

	77 def _ReadFile(self, path):

	78 AssertIsFile(path)

	79 return self._FetchObjectData(path)

	80

	81 def _ListDir(self, path, recursive=False):

	82 AssertIsDirectory(path)

	83 # The listbucket method uses a prefix approach to simulate hierarchy.

	84 # Calling it with the "delimiter" argument set to '/' gets only files

	85 # directly inside the directory, not all recursive content.

	86

	87 # Subdirectories are returned in the 'prefixes' property, but they are

	88 # full paths from the root. This plucks off the name of the leaf with a

	89 # trailing slash.

	90 def path_from_prefix(prefix):

	91 return posixpath.split(posixpath.split(prefix)[0])[1] + '/'

	92

	93 query = { 'prefix': path }

	94 if not recursive:

	95 query['delimiter'] = '/'

	96 root_object = json.loads(self._FetchObject('', query=query))

	97 files = [posixpath.basename(o['name'])

	98 for o in root_object.get('items', [])]

	99 dirs = [path_from_prefix(prefix)

	100 for prefix in root_object.get('prefixes', [])]

	101 return files + dirs

	102

	103 def _FetchObject(self, path, query={}):

	104 # Escape the path, including slashes.

	105 url_path = urllib.quote(path.lstrip('/'), safe='')

	106 fetcher = CreateUrlFetcher()

	107 object_url = '%s/b/%s/o/%s' % (_STORAGE_API_BASE, self._bucket, url_path)

	108 response = fetcher.Fetch(object_url, query=query)

	109 if response.status_code != 200:

	110 raise FileNotFoundError(

	111 'Path %s not found in GCS bucket %s' % (path, self._bucket))

	112 return response.content

	113

	114 def _FetchObjectData(self, path, query={}):

	115 q = query.copy()

	116 q.update({ 'alt': 'media' })

	117 return self._FetchObject(path, query=q)

	118

115 def __repr__(self):	119 def __repr__(self):

116 return 'CloudStorageFileSystem(%s)' % self._bucket	120 return 'CloudStorageFileSystem(%s)' % self._bucket

117

118 def _warnAboutAuthError(self):
Ken Rockot(use gerrit already) 2015/05/26 00:26:24 Auth is no longer required. The github repos are p Auth is no longer required. The github repos are public, so I made the buckets public too. I think we can just avoid hosting docserver content in non-public buckets.
119 logging.warn(('Authentication error on Cloud Storage. Check if your'

120 ' appengine project has permissions to Read the GCS'

121 ' buckets. If you are running a local appengine server,'

122 ' you need to set an access_token in'

123 ' local_debug/gcs_debug.conf.'

124 ' Remember that this token expires in less than 10'

125 ' minutes, so keep it updated. See'

126 ' gcs_file_system_provider.py for instructions.'));

127 logging.debug(traceback.format_exc())

OLD	NEW