OLD | NEW |
(Empty) | |
| 1 # -*- coding: utf-8 -*- |
| 2 # Copyright 2014 Google Inc. All Rights Reserved. |
| 3 # |
| 4 # Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 # you may not use this file except in compliance with the License. |
| 6 # You may obtain a copy of the License at |
| 7 # |
| 8 # http://www.apache.org/licenses/LICENSE-2.0 |
| 9 # |
| 10 # Unless required by applicable law or agreed to in writing, software |
| 11 # distributed under the License is distributed on an "AS IS" BASIS, |
| 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 # See the License for the specific language governing permissions and |
| 14 # limitations under the License. |
| 15 """Utility functions and class for listing commands such as ls and du.""" |
| 16 |
| 17 from __future__ import absolute_import |
| 18 |
| 19 import fnmatch |
| 20 |
| 21 from gslib.exception import CommandException |
| 22 from gslib.plurality_checkable_iterator import PluralityCheckableIterator |
| 23 from gslib.util import UTF8 |
| 24 from gslib.wildcard_iterator import StorageUrlFromString |
| 25 |
| 26 |
| 27 def PrintNewLine(): |
| 28 """Default function for printing new lines between directories.""" |
| 29 print |
| 30 |
| 31 |
| 32 def PrintDirHeader(bucket_listing_ref): |
| 33 """Default function for printing headers for prefixes. |
| 34 |
| 35 Header is printed prior to listing the contents of the prefix. |
| 36 |
| 37 Args: |
| 38 bucket_listing_ref: BucketListingRef of type PREFIX. |
| 39 """ |
| 40 print '%s:' % bucket_listing_ref.url_string.encode(UTF8) |
| 41 |
| 42 |
| 43 def PrintBucketHeader(bucket_listing_ref): # pylint: disable=unused-argument |
| 44 """Default function for printing headers for buckets. |
| 45 |
| 46 Header is printed prior to listing the contents of the bucket. |
| 47 |
| 48 Args: |
| 49 bucket_listing_ref: BucketListingRef of type BUCKET. |
| 50 """ |
| 51 pass |
| 52 |
| 53 |
| 54 def PrintDir(bucket_listing_ref): |
| 55 """Default function for printing buckets or prefixes. |
| 56 |
| 57 Args: |
| 58 bucket_listing_ref: BucketListingRef of type BUCKET or PREFIX. |
| 59 """ |
| 60 print bucket_listing_ref.url_string.encode(UTF8) |
| 61 |
| 62 |
| 63 # pylint: disable=unused-argument |
| 64 def PrintDirSummary(num_bytes, bucket_listing_ref): |
| 65 """Off-by-default function for printing buckets or prefix size summaries. |
| 66 |
| 67 Args: |
| 68 num_bytes: Number of bytes contained in the directory. |
| 69 bucket_listing_ref: BucketListingRef of type BUCKET or PREFIX. |
| 70 """ |
| 71 pass |
| 72 |
| 73 |
| 74 def PrintObject(bucket_listing_ref): |
| 75 """Default printing function for objects. |
| 76 |
| 77 Args: |
| 78 bucket_listing_ref: BucketListingRef of type OBJECT. |
| 79 |
| 80 Returns: |
| 81 (num_objects, num_bytes). |
| 82 """ |
| 83 print bucket_listing_ref.url_string.encode(UTF8) |
| 84 return (1, 0) |
| 85 |
| 86 |
| 87 class LsHelper(object): |
| 88 """Helper class for ls and du.""" |
| 89 |
| 90 def __init__(self, iterator_func, logger, |
| 91 print_object_func=PrintObject, |
| 92 print_dir_func=PrintDir, |
| 93 print_dir_header_func=PrintDirHeader, |
| 94 print_bucket_header_func=PrintBucketHeader, |
| 95 print_dir_summary_func=PrintDirSummary, |
| 96 print_newline_func=PrintNewLine, |
| 97 all_versions=False, should_recurse=False, |
| 98 exclude_patterns=None, fields=('name',)): |
| 99 """Initializes the helper class to prepare for listing. |
| 100 |
| 101 Args: |
| 102 iterator_func: Function for instantiating iterator. |
| 103 Inputs- |
| 104 url_string- Url string to iterate on. May include |
| 105 wildcards. |
| 106 all_versions=False- If true, iterate over all object |
| 107 versions. |
| 108 logger: Logger for outputting warnings / errors. |
| 109 print_object_func: Function for printing objects. |
| 110 print_dir_func: Function for printing buckets/prefixes. |
| 111 print_dir_header_func: Function for printing header line for buckets |
| 112 or prefixes. |
| 113 print_bucket_header_func: Function for printing header line for buckets |
| 114 or prefixes. |
| 115 print_dir_summary_func: Function for printing size summaries about |
| 116 buckets/prefixes. |
| 117 print_newline_func: Function for printing new lines between dirs. |
| 118 all_versions: If true, list all object versions. |
| 119 should_recurse: If true, recursively listing buckets/prefixes. |
| 120 exclude_patterns: Patterns to exclude when listing. |
| 121 fields: Fields to request from bucket listings; this should |
| 122 include all fields that need to be populated in |
| 123 objects so they can be listed. Can be set to None |
| 124 to retrieve all object fields. Defaults to short |
| 125 listing fields. |
| 126 """ |
| 127 self._iterator_func = iterator_func |
| 128 self.logger = logger |
| 129 self._print_object_func = print_object_func |
| 130 self._print_dir_func = print_dir_func |
| 131 self._print_dir_header_func = print_dir_header_func |
| 132 self._print_bucket_header_func = print_bucket_header_func |
| 133 self._print_dir_summary_func = print_dir_summary_func |
| 134 self._print_newline_func = print_newline_func |
| 135 self.all_versions = all_versions |
| 136 self.should_recurse = should_recurse |
| 137 self.exclude_patterns = exclude_patterns |
| 138 self.bucket_listing_fields = fields |
| 139 |
| 140 def ExpandUrlAndPrint(self, url): |
| 141 """Iterates over the given URL and calls print functions. |
| 142 |
| 143 Args: |
| 144 url: StorageUrl to iterate over. |
| 145 |
| 146 Returns: |
| 147 (num_objects, num_bytes) total number of objects and bytes iterated. |
| 148 """ |
| 149 num_objects = 0 |
| 150 num_dirs = 0 |
| 151 num_bytes = 0 |
| 152 print_newline = False |
| 153 |
| 154 if url.IsBucket() or self.should_recurse: |
| 155 # IsBucket() implies a top-level listing. |
| 156 if url.IsBucket(): |
| 157 self._print_bucket_header_func(url) |
| 158 return self._RecurseExpandUrlAndPrint(url.url_string, |
| 159 print_initial_newline=False) |
| 160 else: |
| 161 # User provided a prefix or object URL, but it's impossible to tell |
| 162 # which until we do a listing and see what matches. |
| 163 top_level_iterator = PluralityCheckableIterator(self._iterator_func( |
| 164 url.CreatePrefixUrl(wildcard_suffix=None), |
| 165 all_versions=self.all_versions).IterAll( |
| 166 expand_top_level_buckets=True, |
| 167 bucket_listing_fields=self.bucket_listing_fields)) |
| 168 plurality = top_level_iterator.HasPlurality() |
| 169 |
| 170 for blr in top_level_iterator: |
| 171 if self._MatchesExcludedPattern(blr): |
| 172 continue |
| 173 if blr.IsObject(): |
| 174 nd = 0 |
| 175 no, nb = self._print_object_func(blr) |
| 176 print_newline = True |
| 177 elif blr.IsPrefix(): |
| 178 if print_newline: |
| 179 self._print_newline_func() |
| 180 else: |
| 181 print_newline = True |
| 182 if plurality: |
| 183 self._print_dir_header_func(blr) |
| 184 expansion_url_str = StorageUrlFromString( |
| 185 blr.url_string).CreatePrefixUrl(wildcard_suffix='*') |
| 186 nd, no, nb = self._RecurseExpandUrlAndPrint(expansion_url_str) |
| 187 self._print_dir_summary_func(nb, blr) |
| 188 else: |
| 189 # We handle all buckets at the top level, so this should never happen. |
| 190 raise CommandException( |
| 191 'Sub-level iterator returned a CsBucketListingRef of type Bucket') |
| 192 num_objects += no |
| 193 num_dirs += nd |
| 194 num_bytes += nb |
| 195 return num_dirs, num_objects, num_bytes |
| 196 |
| 197 def _RecurseExpandUrlAndPrint(self, url_str, print_initial_newline=True): |
| 198 """Iterates over the given URL string and calls print functions. |
| 199 |
| 200 Args: |
| 201 url_str: String describing StorageUrl to iterate over. |
| 202 Must be of depth one or higher. |
| 203 print_initial_newline: If true, print a newline before recursively |
| 204 expanded prefixes. |
| 205 |
| 206 Returns: |
| 207 (num_objects, num_bytes) total number of objects and bytes iterated. |
| 208 """ |
| 209 num_objects = 0 |
| 210 num_dirs = 0 |
| 211 num_bytes = 0 |
| 212 for blr in self._iterator_func( |
| 213 '%s' % url_str, all_versions=self.all_versions).IterAll( |
| 214 expand_top_level_buckets=True, |
| 215 bucket_listing_fields=self.bucket_listing_fields): |
| 216 if self._MatchesExcludedPattern(blr): |
| 217 continue |
| 218 |
| 219 if blr.IsObject(): |
| 220 nd = 0 |
| 221 no, nb = self._print_object_func(blr) |
| 222 elif blr.IsPrefix(): |
| 223 if self.should_recurse: |
| 224 if print_initial_newline: |
| 225 self._print_newline_func() |
| 226 else: |
| 227 print_initial_newline = True |
| 228 self._print_dir_header_func(blr) |
| 229 expansion_url_str = StorageUrlFromString( |
| 230 blr.url_string).CreatePrefixUrl(wildcard_suffix='*') |
| 231 |
| 232 nd, no, nb = self._RecurseExpandUrlAndPrint(expansion_url_str) |
| 233 self._print_dir_summary_func(nb, blr) |
| 234 else: |
| 235 nd, no, nb = 1, 0, 0 |
| 236 self._print_dir_func(blr) |
| 237 else: |
| 238 # We handle all buckets at the top level, so this should never happen. |
| 239 raise CommandException( |
| 240 'Sub-level iterator returned a bucketListingRef of type Bucket') |
| 241 num_dirs += nd |
| 242 num_objects += no |
| 243 num_bytes += nb |
| 244 |
| 245 return num_dirs, num_objects, num_bytes |
| 246 |
| 247 def _MatchesExcludedPattern(self, blr): |
| 248 """Checks bucket listing reference against patterns to exclude. |
| 249 |
| 250 Args: |
| 251 blr: BucketListingRef to check. |
| 252 |
| 253 Returns: |
| 254 True if reference matches a pattern and should be excluded. |
| 255 """ |
| 256 if self.exclude_patterns: |
| 257 tomatch = blr.url_string |
| 258 for pattern in self.exclude_patterns: |
| 259 if fnmatch.fnmatch(tomatch, pattern): |
| 260 return True |
| 261 return False |
OLD | NEW |