Index: third_party/gsutil/gslib/ls_helper.py
|
diff --git a/third_party/gsutil/gslib/ls_helper.py b/third_party/gsutil/gslib/ls_helper.py
|
new file mode 100644
|
index 0000000000000000000000000000000000000000..f424515d1d98e14d3905e4e670f966a44f9a4da6
|
--- /dev/null
|
+++ b/third_party/gsutil/gslib/ls_helper.py
|
@@ -0,0 +1,261 @@
|
+# -*- coding: utf-8 -*-
|
+# Copyright 2014 Google Inc. All Rights Reserved.
|
+#
|
+# Licensed under the Apache License, Version 2.0 (the "License");
|
+# you may not use this file except in compliance with the License.
|
+# You may obtain a copy of the License at
|
+#
|
+# http://www.apache.org/licenses/LICENSE-2.0
|
+#
|
+# Unless required by applicable law or agreed to in writing, software
|
+# distributed under the License is distributed on an "AS IS" BASIS,
|
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
+# See the License for the specific language governing permissions and
|
+# limitations under the License.
|
+"""Utility functions and class for listing commands such as ls and du."""
|
+
|
+from __future__ import absolute_import
|
+
|
+import fnmatch
|
+
|
+from gslib.exception import CommandException
|
+from gslib.plurality_checkable_iterator import PluralityCheckableIterator
|
+from gslib.util import UTF8
|
+from gslib.wildcard_iterator import StorageUrlFromString
|
+
|
+
|
+def PrintNewLine():
|
+ """Default function for printing new lines between directories."""
|
+ print
|
+
|
+
|
+def PrintDirHeader(bucket_listing_ref):
|
+ """Default function for printing headers for prefixes.
|
+
|
+ Header is printed prior to listing the contents of the prefix.
|
+
|
+ Args:
|
+ bucket_listing_ref: BucketListingRef of type PREFIX.
|
+ """
|
+ print '%s:' % bucket_listing_ref.url_string.encode(UTF8)
|
+
|
+
|
+def PrintBucketHeader(bucket_listing_ref): # pylint: disable=unused-argument
|
+ """Default function for printing headers for buckets.
|
+
|
+ Header is printed prior to listing the contents of the bucket.
|
+
|
+ Args:
|
+ bucket_listing_ref: BucketListingRef of type BUCKET.
|
+ """
|
+ pass
|
+
|
+
|
+def PrintDir(bucket_listing_ref):
|
+ """Default function for printing buckets or prefixes.
|
+
|
+ Args:
|
+ bucket_listing_ref: BucketListingRef of type BUCKET or PREFIX.
|
+ """
|
+ print bucket_listing_ref.url_string.encode(UTF8)
|
+
|
+
|
+# pylint: disable=unused-argument
|
+def PrintDirSummary(num_bytes, bucket_listing_ref):
|
+ """Off-by-default function for printing buckets or prefix size summaries.
|
+
|
+ Args:
|
+ num_bytes: Number of bytes contained in the directory.
|
+ bucket_listing_ref: BucketListingRef of type BUCKET or PREFIX.
|
+ """
|
+ pass
|
+
|
+
|
+def PrintObject(bucket_listing_ref):
|
+ """Default printing function for objects.
|
+
|
+ Args:
|
+ bucket_listing_ref: BucketListingRef of type OBJECT.
|
+
|
+ Returns:
|
+ (num_objects, num_bytes).
|
+ """
|
+ print bucket_listing_ref.url_string.encode(UTF8)
|
+ return (1, 0)
|
+
|
+
|
+class LsHelper(object):
|
+ """Helper class for ls and du."""
|
+
|
+ def __init__(self, iterator_func, logger,
|
+ print_object_func=PrintObject,
|
+ print_dir_func=PrintDir,
|
+ print_dir_header_func=PrintDirHeader,
|
+ print_bucket_header_func=PrintBucketHeader,
|
+ print_dir_summary_func=PrintDirSummary,
|
+ print_newline_func=PrintNewLine,
|
+ all_versions=False, should_recurse=False,
|
+ exclude_patterns=None, fields=('name',)):
|
+ """Initializes the helper class to prepare for listing.
|
+
|
+ Args:
|
+ iterator_func: Function for instantiating iterator.
|
+ Inputs-
|
+ url_string- Url string to iterate on. May include
|
+ wildcards.
|
+ all_versions=False- If true, iterate over all object
|
+ versions.
|
+ logger: Logger for outputting warnings / errors.
|
+ print_object_func: Function for printing objects.
|
+ print_dir_func: Function for printing buckets/prefixes.
|
+ print_dir_header_func: Function for printing header line for buckets
|
+ or prefixes.
|
+ print_bucket_header_func: Function for printing header line for buckets
|
+ or prefixes.
|
+ print_dir_summary_func: Function for printing size summaries about
|
+ buckets/prefixes.
|
+ print_newline_func: Function for printing new lines between dirs.
|
+ all_versions: If true, list all object versions.
|
+ should_recurse: If true, recursively listing buckets/prefixes.
|
+ exclude_patterns: Patterns to exclude when listing.
|
+ fields: Fields to request from bucket listings; this should
|
+ include all fields that need to be populated in
|
+ objects so they can be listed. Can be set to None
|
+ to retrieve all object fields. Defaults to short
|
+ listing fields.
|
+ """
|
+ self._iterator_func = iterator_func
|
+ self.logger = logger
|
+ self._print_object_func = print_object_func
|
+ self._print_dir_func = print_dir_func
|
+ self._print_dir_header_func = print_dir_header_func
|
+ self._print_bucket_header_func = print_bucket_header_func
|
+ self._print_dir_summary_func = print_dir_summary_func
|
+ self._print_newline_func = print_newline_func
|
+ self.all_versions = all_versions
|
+ self.should_recurse = should_recurse
|
+ self.exclude_patterns = exclude_patterns
|
+ self.bucket_listing_fields = fields
|
+
|
+ def ExpandUrlAndPrint(self, url):
|
+ """Iterates over the given URL and calls print functions.
|
+
|
+ Args:
|
+ url: StorageUrl to iterate over.
|
+
|
+ Returns:
|
+ (num_objects, num_bytes) total number of objects and bytes iterated.
|
+ """
|
+ num_objects = 0
|
+ num_dirs = 0
|
+ num_bytes = 0
|
+ print_newline = False
|
+
|
+ if url.IsBucket() or self.should_recurse:
|
+ # IsBucket() implies a top-level listing.
|
+ if url.IsBucket():
|
+ self._print_bucket_header_func(url)
|
+ return self._RecurseExpandUrlAndPrint(url.url_string,
|
+ print_initial_newline=False)
|
+ else:
|
+ # User provided a prefix or object URL, but it's impossible to tell
|
+ # which until we do a listing and see what matches.
|
+ top_level_iterator = PluralityCheckableIterator(self._iterator_func(
|
+ url.CreatePrefixUrl(wildcard_suffix=None),
|
+ all_versions=self.all_versions).IterAll(
|
+ expand_top_level_buckets=True,
|
+ bucket_listing_fields=self.bucket_listing_fields))
|
+ plurality = top_level_iterator.HasPlurality()
|
+
|
+ for blr in top_level_iterator:
|
+ if self._MatchesExcludedPattern(blr):
|
+ continue
|
+ if blr.IsObject():
|
+ nd = 0
|
+ no, nb = self._print_object_func(blr)
|
+ print_newline = True
|
+ elif blr.IsPrefix():
|
+ if print_newline:
|
+ self._print_newline_func()
|
+ else:
|
+ print_newline = True
|
+ if plurality:
|
+ self._print_dir_header_func(blr)
|
+ expansion_url_str = StorageUrlFromString(
|
+ blr.url_string).CreatePrefixUrl(wildcard_suffix='*')
|
+ nd, no, nb = self._RecurseExpandUrlAndPrint(expansion_url_str)
|
+ self._print_dir_summary_func(nb, blr)
|
+ else:
|
+ # We handle all buckets at the top level, so this should never happen.
|
+ raise CommandException(
|
+ 'Sub-level iterator returned a CsBucketListingRef of type Bucket')
|
+ num_objects += no
|
+ num_dirs += nd
|
+ num_bytes += nb
|
+ return num_dirs, num_objects, num_bytes
|
+
|
+ def _RecurseExpandUrlAndPrint(self, url_str, print_initial_newline=True):
|
+ """Iterates over the given URL string and calls print functions.
|
+
|
+ Args:
|
+ url_str: String describing StorageUrl to iterate over.
|
+ Must be of depth one or higher.
|
+ print_initial_newline: If true, print a newline before recursively
|
+ expanded prefixes.
|
+
|
+ Returns:
|
+ (num_objects, num_bytes) total number of objects and bytes iterated.
|
+ """
|
+ num_objects = 0
|
+ num_dirs = 0
|
+ num_bytes = 0
|
+ for blr in self._iterator_func(
|
+ '%s' % url_str, all_versions=self.all_versions).IterAll(
|
+ expand_top_level_buckets=True,
|
+ bucket_listing_fields=self.bucket_listing_fields):
|
+ if self._MatchesExcludedPattern(blr):
|
+ continue
|
+
|
+ if blr.IsObject():
|
+ nd = 0
|
+ no, nb = self._print_object_func(blr)
|
+ elif blr.IsPrefix():
|
+ if self.should_recurse:
|
+ if print_initial_newline:
|
+ self._print_newline_func()
|
+ else:
|
+ print_initial_newline = True
|
+ self._print_dir_header_func(blr)
|
+ expansion_url_str = StorageUrlFromString(
|
+ blr.url_string).CreatePrefixUrl(wildcard_suffix='*')
|
+
|
+ nd, no, nb = self._RecurseExpandUrlAndPrint(expansion_url_str)
|
+ self._print_dir_summary_func(nb, blr)
|
+ else:
|
+ nd, no, nb = 1, 0, 0
|
+ self._print_dir_func(blr)
|
+ else:
|
+ # We handle all buckets at the top level, so this should never happen.
|
+ raise CommandException(
|
+ 'Sub-level iterator returned a bucketListingRef of type Bucket')
|
+ num_dirs += nd
|
+ num_objects += no
|
+ num_bytes += nb
|
+
|
+ return num_dirs, num_objects, num_bytes
|
+
|
+ def _MatchesExcludedPattern(self, blr):
|
+ """Checks bucket listing reference against patterns to exclude.
|
+
|
+ Args:
|
+ blr: BucketListingRef to check.
|
+
|
+ Returns:
|
+ True if reference matches a pattern and should be excluded.
|
+ """
|
+ if self.exclude_patterns:
|
+ tomatch = blr.url_string
|
+ for pattern in self.exclude_patterns:
|
+ if fnmatch.fnmatch(tomatch, pattern):
|
+ return True
|
+ return False
|
|