OLD | NEW |
| (Empty) |
1 # -*- coding: utf-8 -*- | |
2 # Copyright 2014 Google Inc. All Rights Reserved. | |
3 # | |
4 # Licensed under the Apache License, Version 2.0 (the "License"); | |
5 # you may not use this file except in compliance with the License. | |
6 # You may obtain a copy of the License at | |
7 # | |
8 # http://www.apache.org/licenses/LICENSE-2.0 | |
9 # | |
10 # Unless required by applicable law or agreed to in writing, software | |
11 # distributed under the License is distributed on an "AS IS" BASIS, | |
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 # See the License for the specific language governing permissions and | |
14 # limitations under the License. | |
15 """Utility functions and class for listing commands such as ls and du.""" | |
16 | |
17 from __future__ import absolute_import | |
18 | |
19 import fnmatch | |
20 | |
21 from gslib.exception import CommandException | |
22 from gslib.plurality_checkable_iterator import PluralityCheckableIterator | |
23 from gslib.util import UTF8 | |
24 from gslib.wildcard_iterator import StorageUrlFromString | |
25 | |
26 | |
27 def PrintNewLine(): | |
28 """Default function for printing new lines between directories.""" | |
29 print | |
30 | |
31 | |
32 def PrintDirHeader(bucket_listing_ref): | |
33 """Default function for printing headers for prefixes. | |
34 | |
35 Header is printed prior to listing the contents of the prefix. | |
36 | |
37 Args: | |
38 bucket_listing_ref: BucketListingRef of type PREFIX. | |
39 """ | |
40 print '%s:' % bucket_listing_ref.url_string.encode(UTF8) | |
41 | |
42 | |
43 def PrintBucketHeader(bucket_listing_ref): # pylint: disable=unused-argument | |
44 """Default function for printing headers for buckets. | |
45 | |
46 Header is printed prior to listing the contents of the bucket. | |
47 | |
48 Args: | |
49 bucket_listing_ref: BucketListingRef of type BUCKET. | |
50 """ | |
51 pass | |
52 | |
53 | |
54 def PrintDir(bucket_listing_ref): | |
55 """Default function for printing buckets or prefixes. | |
56 | |
57 Args: | |
58 bucket_listing_ref: BucketListingRef of type BUCKET or PREFIX. | |
59 """ | |
60 print bucket_listing_ref.url_string.encode(UTF8) | |
61 | |
62 | |
63 # pylint: disable=unused-argument | |
64 def PrintDirSummary(num_bytes, bucket_listing_ref): | |
65 """Off-by-default function for printing buckets or prefix size summaries. | |
66 | |
67 Args: | |
68 num_bytes: Number of bytes contained in the directory. | |
69 bucket_listing_ref: BucketListingRef of type BUCKET or PREFIX. | |
70 """ | |
71 pass | |
72 | |
73 | |
74 def PrintObject(bucket_listing_ref): | |
75 """Default printing function for objects. | |
76 | |
77 Args: | |
78 bucket_listing_ref: BucketListingRef of type OBJECT. | |
79 | |
80 Returns: | |
81 (num_objects, num_bytes). | |
82 """ | |
83 print bucket_listing_ref.url_string.encode(UTF8) | |
84 return (1, 0) | |
85 | |
86 | |
87 class LsHelper(object): | |
88 """Helper class for ls and du.""" | |
89 | |
90 def __init__(self, iterator_func, logger, | |
91 print_object_func=PrintObject, | |
92 print_dir_func=PrintDir, | |
93 print_dir_header_func=PrintDirHeader, | |
94 print_bucket_header_func=PrintBucketHeader, | |
95 print_dir_summary_func=PrintDirSummary, | |
96 print_newline_func=PrintNewLine, | |
97 all_versions=False, should_recurse=False, | |
98 exclude_patterns=None, fields=('name',)): | |
99 """Initializes the helper class to prepare for listing. | |
100 | |
101 Args: | |
102 iterator_func: Function for instantiating iterator. | |
103 Inputs- | |
104 url_string- Url string to iterate on. May include | |
105 wildcards. | |
106 all_versions=False- If true, iterate over all object | |
107 versions. | |
108 logger: Logger for outputting warnings / errors. | |
109 print_object_func: Function for printing objects. | |
110 print_dir_func: Function for printing buckets/prefixes. | |
111 print_dir_header_func: Function for printing header line for buckets | |
112 or prefixes. | |
113 print_bucket_header_func: Function for printing header line for buckets | |
114 or prefixes. | |
115 print_dir_summary_func: Function for printing size summaries about | |
116 buckets/prefixes. | |
117 print_newline_func: Function for printing new lines between dirs. | |
118 all_versions: If true, list all object versions. | |
119 should_recurse: If true, recursively listing buckets/prefixes. | |
120 exclude_patterns: Patterns to exclude when listing. | |
121 fields: Fields to request from bucket listings; this should | |
122 include all fields that need to be populated in | |
123 objects so they can be listed. Can be set to None | |
124 to retrieve all object fields. Defaults to short | |
125 listing fields. | |
126 """ | |
127 self._iterator_func = iterator_func | |
128 self.logger = logger | |
129 self._print_object_func = print_object_func | |
130 self._print_dir_func = print_dir_func | |
131 self._print_dir_header_func = print_dir_header_func | |
132 self._print_bucket_header_func = print_bucket_header_func | |
133 self._print_dir_summary_func = print_dir_summary_func | |
134 self._print_newline_func = print_newline_func | |
135 self.all_versions = all_versions | |
136 self.should_recurse = should_recurse | |
137 self.exclude_patterns = exclude_patterns | |
138 self.bucket_listing_fields = fields | |
139 | |
140 def ExpandUrlAndPrint(self, url): | |
141 """Iterates over the given URL and calls print functions. | |
142 | |
143 Args: | |
144 url: StorageUrl to iterate over. | |
145 | |
146 Returns: | |
147 (num_objects, num_bytes) total number of objects and bytes iterated. | |
148 """ | |
149 num_objects = 0 | |
150 num_dirs = 0 | |
151 num_bytes = 0 | |
152 print_newline = False | |
153 | |
154 if url.IsBucket() or self.should_recurse: | |
155 # IsBucket() implies a top-level listing. | |
156 if url.IsBucket(): | |
157 self._print_bucket_header_func(url) | |
158 return self._RecurseExpandUrlAndPrint(url.url_string, | |
159 print_initial_newline=False) | |
160 else: | |
161 # User provided a prefix or object URL, but it's impossible to tell | |
162 # which until we do a listing and see what matches. | |
163 top_level_iterator = PluralityCheckableIterator(self._iterator_func( | |
164 url.CreatePrefixUrl(wildcard_suffix=None), | |
165 all_versions=self.all_versions).IterAll( | |
166 expand_top_level_buckets=True, | |
167 bucket_listing_fields=self.bucket_listing_fields)) | |
168 plurality = top_level_iterator.HasPlurality() | |
169 | |
170 for blr in top_level_iterator: | |
171 if self._MatchesExcludedPattern(blr): | |
172 continue | |
173 if blr.IsObject(): | |
174 nd = 0 | |
175 no, nb = self._print_object_func(blr) | |
176 print_newline = True | |
177 elif blr.IsPrefix(): | |
178 if print_newline: | |
179 self._print_newline_func() | |
180 else: | |
181 print_newline = True | |
182 if plurality: | |
183 self._print_dir_header_func(blr) | |
184 expansion_url_str = StorageUrlFromString( | |
185 blr.url_string).CreatePrefixUrl(wildcard_suffix='*') | |
186 nd, no, nb = self._RecurseExpandUrlAndPrint(expansion_url_str) | |
187 self._print_dir_summary_func(nb, blr) | |
188 else: | |
189 # We handle all buckets at the top level, so this should never happen. | |
190 raise CommandException( | |
191 'Sub-level iterator returned a CsBucketListingRef of type Bucket') | |
192 num_objects += no | |
193 num_dirs += nd | |
194 num_bytes += nb | |
195 return num_dirs, num_objects, num_bytes | |
196 | |
197 def _RecurseExpandUrlAndPrint(self, url_str, print_initial_newline=True): | |
198 """Iterates over the given URL string and calls print functions. | |
199 | |
200 Args: | |
201 url_str: String describing StorageUrl to iterate over. | |
202 Must be of depth one or higher. | |
203 print_initial_newline: If true, print a newline before recursively | |
204 expanded prefixes. | |
205 | |
206 Returns: | |
207 (num_objects, num_bytes) total number of objects and bytes iterated. | |
208 """ | |
209 num_objects = 0 | |
210 num_dirs = 0 | |
211 num_bytes = 0 | |
212 for blr in self._iterator_func( | |
213 '%s' % url_str, all_versions=self.all_versions).IterAll( | |
214 expand_top_level_buckets=True, | |
215 bucket_listing_fields=self.bucket_listing_fields): | |
216 if self._MatchesExcludedPattern(blr): | |
217 continue | |
218 | |
219 if blr.IsObject(): | |
220 nd = 0 | |
221 no, nb = self._print_object_func(blr) | |
222 elif blr.IsPrefix(): | |
223 if self.should_recurse: | |
224 if print_initial_newline: | |
225 self._print_newline_func() | |
226 else: | |
227 print_initial_newline = True | |
228 self._print_dir_header_func(blr) | |
229 expansion_url_str = StorageUrlFromString( | |
230 blr.url_string).CreatePrefixUrl(wildcard_suffix='*') | |
231 | |
232 nd, no, nb = self._RecurseExpandUrlAndPrint(expansion_url_str) | |
233 self._print_dir_summary_func(nb, blr) | |
234 else: | |
235 nd, no, nb = 1, 0, 0 | |
236 self._print_dir_func(blr) | |
237 else: | |
238 # We handle all buckets at the top level, so this should never happen. | |
239 raise CommandException( | |
240 'Sub-level iterator returned a bucketListingRef of type Bucket') | |
241 num_dirs += nd | |
242 num_objects += no | |
243 num_bytes += nb | |
244 | |
245 return num_dirs, num_objects, num_bytes | |
246 | |
247 def _MatchesExcludedPattern(self, blr): | |
248 """Checks bucket listing reference against patterns to exclude. | |
249 | |
250 Args: | |
251 blr: BucketListingRef to check. | |
252 | |
253 Returns: | |
254 True if reference matches a pattern and should be excluded. | |
255 """ | |
256 if self.exclude_patterns: | |
257 tomatch = blr.url_string | |
258 for pattern in self.exclude_patterns: | |
259 if fnmatch.fnmatch(tomatch, pattern): | |
260 return True | |
261 return False | |
OLD | NEW |