| OLD | NEW |
| (Empty) |
| 1 # -*- coding: utf-8 -*- | |
| 2 # Copyright 2014 Google Inc. All Rights Reserved. | |
| 3 # | |
| 4 # Licensed under the Apache License, Version 2.0 (the "License"); | |
| 5 # you may not use this file except in compliance with the License. | |
| 6 # You may obtain a copy of the License at | |
| 7 # | |
| 8 # http://www.apache.org/licenses/LICENSE-2.0 | |
| 9 # | |
| 10 # Unless required by applicable law or agreed to in writing, software | |
| 11 # distributed under the License is distributed on an "AS IS" BASIS, | |
| 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 13 # See the License for the specific language governing permissions and | |
| 14 # limitations under the License. | |
| 15 """Utility functions and class for listing commands such as ls and du.""" | |
| 16 | |
| 17 from __future__ import absolute_import | |
| 18 | |
| 19 import fnmatch | |
| 20 | |
| 21 from gslib.exception import CommandException | |
| 22 from gslib.plurality_checkable_iterator import PluralityCheckableIterator | |
| 23 from gslib.util import UTF8 | |
| 24 from gslib.wildcard_iterator import StorageUrlFromString | |
| 25 | |
| 26 | |
| 27 def PrintNewLine(): | |
| 28 """Default function for printing new lines between directories.""" | |
| 29 print | |
| 30 | |
| 31 | |
| 32 def PrintDirHeader(bucket_listing_ref): | |
| 33 """Default function for printing headers for prefixes. | |
| 34 | |
| 35 Header is printed prior to listing the contents of the prefix. | |
| 36 | |
| 37 Args: | |
| 38 bucket_listing_ref: BucketListingRef of type PREFIX. | |
| 39 """ | |
| 40 print '%s:' % bucket_listing_ref.url_string.encode(UTF8) | |
| 41 | |
| 42 | |
| 43 def PrintBucketHeader(bucket_listing_ref): # pylint: disable=unused-argument | |
| 44 """Default function for printing headers for buckets. | |
| 45 | |
| 46 Header is printed prior to listing the contents of the bucket. | |
| 47 | |
| 48 Args: | |
| 49 bucket_listing_ref: BucketListingRef of type BUCKET. | |
| 50 """ | |
| 51 pass | |
| 52 | |
| 53 | |
| 54 def PrintDir(bucket_listing_ref): | |
| 55 """Default function for printing buckets or prefixes. | |
| 56 | |
| 57 Args: | |
| 58 bucket_listing_ref: BucketListingRef of type BUCKET or PREFIX. | |
| 59 """ | |
| 60 print bucket_listing_ref.url_string.encode(UTF8) | |
| 61 | |
| 62 | |
| 63 # pylint: disable=unused-argument | |
| 64 def PrintDirSummary(num_bytes, bucket_listing_ref): | |
| 65 """Off-by-default function for printing buckets or prefix size summaries. | |
| 66 | |
| 67 Args: | |
| 68 num_bytes: Number of bytes contained in the directory. | |
| 69 bucket_listing_ref: BucketListingRef of type BUCKET or PREFIX. | |
| 70 """ | |
| 71 pass | |
| 72 | |
| 73 | |
| 74 def PrintObject(bucket_listing_ref): | |
| 75 """Default printing function for objects. | |
| 76 | |
| 77 Args: | |
| 78 bucket_listing_ref: BucketListingRef of type OBJECT. | |
| 79 | |
| 80 Returns: | |
| 81 (num_objects, num_bytes). | |
| 82 """ | |
| 83 print bucket_listing_ref.url_string.encode(UTF8) | |
| 84 return (1, 0) | |
| 85 | |
| 86 | |
| 87 class LsHelper(object): | |
| 88 """Helper class for ls and du.""" | |
| 89 | |
| 90 def __init__(self, iterator_func, logger, | |
| 91 print_object_func=PrintObject, | |
| 92 print_dir_func=PrintDir, | |
| 93 print_dir_header_func=PrintDirHeader, | |
| 94 print_bucket_header_func=PrintBucketHeader, | |
| 95 print_dir_summary_func=PrintDirSummary, | |
| 96 print_newline_func=PrintNewLine, | |
| 97 all_versions=False, should_recurse=False, | |
| 98 exclude_patterns=None, fields=('name',)): | |
| 99 """Initializes the helper class to prepare for listing. | |
| 100 | |
| 101 Args: | |
| 102 iterator_func: Function for instantiating iterator. | |
| 103 Inputs- | |
| 104 url_string- Url string to iterate on. May include | |
| 105 wildcards. | |
| 106 all_versions=False- If true, iterate over all object | |
| 107 versions. | |
| 108 logger: Logger for outputting warnings / errors. | |
| 109 print_object_func: Function for printing objects. | |
| 110 print_dir_func: Function for printing buckets/prefixes. | |
| 111 print_dir_header_func: Function for printing header line for buckets | |
| 112 or prefixes. | |
| 113 print_bucket_header_func: Function for printing header line for buckets | |
| 114 or prefixes. | |
| 115 print_dir_summary_func: Function for printing size summaries about | |
| 116 buckets/prefixes. | |
| 117 print_newline_func: Function for printing new lines between dirs. | |
| 118 all_versions: If true, list all object versions. | |
| 119 should_recurse: If true, recursively listing buckets/prefixes. | |
| 120 exclude_patterns: Patterns to exclude when listing. | |
| 121 fields: Fields to request from bucket listings; this should | |
| 122 include all fields that need to be populated in | |
| 123 objects so they can be listed. Can be set to None | |
| 124 to retrieve all object fields. Defaults to short | |
| 125 listing fields. | |
| 126 """ | |
| 127 self._iterator_func = iterator_func | |
| 128 self.logger = logger | |
| 129 self._print_object_func = print_object_func | |
| 130 self._print_dir_func = print_dir_func | |
| 131 self._print_dir_header_func = print_dir_header_func | |
| 132 self._print_bucket_header_func = print_bucket_header_func | |
| 133 self._print_dir_summary_func = print_dir_summary_func | |
| 134 self._print_newline_func = print_newline_func | |
| 135 self.all_versions = all_versions | |
| 136 self.should_recurse = should_recurse | |
| 137 self.exclude_patterns = exclude_patterns | |
| 138 self.bucket_listing_fields = fields | |
| 139 | |
| 140 def ExpandUrlAndPrint(self, url): | |
| 141 """Iterates over the given URL and calls print functions. | |
| 142 | |
| 143 Args: | |
| 144 url: StorageUrl to iterate over. | |
| 145 | |
| 146 Returns: | |
| 147 (num_objects, num_bytes) total number of objects and bytes iterated. | |
| 148 """ | |
| 149 num_objects = 0 | |
| 150 num_dirs = 0 | |
| 151 num_bytes = 0 | |
| 152 print_newline = False | |
| 153 | |
| 154 if url.IsBucket() or self.should_recurse: | |
| 155 # IsBucket() implies a top-level listing. | |
| 156 if url.IsBucket(): | |
| 157 self._print_bucket_header_func(url) | |
| 158 return self._RecurseExpandUrlAndPrint(url.url_string, | |
| 159 print_initial_newline=False) | |
| 160 else: | |
| 161 # User provided a prefix or object URL, but it's impossible to tell | |
| 162 # which until we do a listing and see what matches. | |
| 163 top_level_iterator = PluralityCheckableIterator(self._iterator_func( | |
| 164 url.CreatePrefixUrl(wildcard_suffix=None), | |
| 165 all_versions=self.all_versions).IterAll( | |
| 166 expand_top_level_buckets=True, | |
| 167 bucket_listing_fields=self.bucket_listing_fields)) | |
| 168 plurality = top_level_iterator.HasPlurality() | |
| 169 | |
| 170 for blr in top_level_iterator: | |
| 171 if self._MatchesExcludedPattern(blr): | |
| 172 continue | |
| 173 if blr.IsObject(): | |
| 174 nd = 0 | |
| 175 no, nb = self._print_object_func(blr) | |
| 176 print_newline = True | |
| 177 elif blr.IsPrefix(): | |
| 178 if print_newline: | |
| 179 self._print_newline_func() | |
| 180 else: | |
| 181 print_newline = True | |
| 182 if plurality: | |
| 183 self._print_dir_header_func(blr) | |
| 184 expansion_url_str = StorageUrlFromString( | |
| 185 blr.url_string).CreatePrefixUrl(wildcard_suffix='*') | |
| 186 nd, no, nb = self._RecurseExpandUrlAndPrint(expansion_url_str) | |
| 187 self._print_dir_summary_func(nb, blr) | |
| 188 else: | |
| 189 # We handle all buckets at the top level, so this should never happen. | |
| 190 raise CommandException( | |
| 191 'Sub-level iterator returned a CsBucketListingRef of type Bucket') | |
| 192 num_objects += no | |
| 193 num_dirs += nd | |
| 194 num_bytes += nb | |
| 195 return num_dirs, num_objects, num_bytes | |
| 196 | |
| 197 def _RecurseExpandUrlAndPrint(self, url_str, print_initial_newline=True): | |
| 198 """Iterates over the given URL string and calls print functions. | |
| 199 | |
| 200 Args: | |
| 201 url_str: String describing StorageUrl to iterate over. | |
| 202 Must be of depth one or higher. | |
| 203 print_initial_newline: If true, print a newline before recursively | |
| 204 expanded prefixes. | |
| 205 | |
| 206 Returns: | |
| 207 (num_objects, num_bytes) total number of objects and bytes iterated. | |
| 208 """ | |
| 209 num_objects = 0 | |
| 210 num_dirs = 0 | |
| 211 num_bytes = 0 | |
| 212 for blr in self._iterator_func( | |
| 213 '%s' % url_str, all_versions=self.all_versions).IterAll( | |
| 214 expand_top_level_buckets=True, | |
| 215 bucket_listing_fields=self.bucket_listing_fields): | |
| 216 if self._MatchesExcludedPattern(blr): | |
| 217 continue | |
| 218 | |
| 219 if blr.IsObject(): | |
| 220 nd = 0 | |
| 221 no, nb = self._print_object_func(blr) | |
| 222 elif blr.IsPrefix(): | |
| 223 if self.should_recurse: | |
| 224 if print_initial_newline: | |
| 225 self._print_newline_func() | |
| 226 else: | |
| 227 print_initial_newline = True | |
| 228 self._print_dir_header_func(blr) | |
| 229 expansion_url_str = StorageUrlFromString( | |
| 230 blr.url_string).CreatePrefixUrl(wildcard_suffix='*') | |
| 231 | |
| 232 nd, no, nb = self._RecurseExpandUrlAndPrint(expansion_url_str) | |
| 233 self._print_dir_summary_func(nb, blr) | |
| 234 else: | |
| 235 nd, no, nb = 1, 0, 0 | |
| 236 self._print_dir_func(blr) | |
| 237 else: | |
| 238 # We handle all buckets at the top level, so this should never happen. | |
| 239 raise CommandException( | |
| 240 'Sub-level iterator returned a bucketListingRef of type Bucket') | |
| 241 num_dirs += nd | |
| 242 num_objects += no | |
| 243 num_bytes += nb | |
| 244 | |
| 245 return num_dirs, num_objects, num_bytes | |
| 246 | |
| 247 def _MatchesExcludedPattern(self, blr): | |
| 248 """Checks bucket listing reference against patterns to exclude. | |
| 249 | |
| 250 Args: | |
| 251 blr: BucketListingRef to check. | |
| 252 | |
| 253 Returns: | |
| 254 True if reference matches a pattern and should be excluded. | |
| 255 """ | |
| 256 if self.exclude_patterns: | |
| 257 tomatch = blr.url_string | |
| 258 for pattern in self.exclude_patterns: | |
| 259 if fnmatch.fnmatch(tomatch, pattern): | |
| 260 return True | |
| 261 return False | |
| OLD | NEW |