third_party/gsutil/gslib/ls_helper.py - Issue 1377933002: [catapult] - Copy Telemetry's gsutilz over to third_party.

Unified Diff: third_party/gsutil/gslib/ls_helper.py

Issue 1377933002: [catapult] - Copy Telemetry's gsutilz over to third_party. (Closed) Base URL: https://github.com/catapult-project/catapult.git@master

Patch Set: Rename to gsutil. Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: third_party/gsutil/gslib/ls_helper.py

diff --git a/third_party/gsutil/gslib/ls_helper.py b/third_party/gsutil/gslib/ls_helper.py

new file mode 100644

index 0000000000000000000000000000000000000000..f424515d1d98e14d3905e4e670f966a44f9a4da6

--- /dev/null

+++ b/third_party/gsutil/gslib/ls_helper.py

@@ -0,0 +1,261 @@

+# -*- coding: utf-8 -*-

+# Licensed under the Apache License, Version 2.0 (the "License");

+# you may not use this file except in compliance with the License.

+# You may obtain a copy of the License at

+# http://www.apache.org/licenses/LICENSE-2.0

+# Unless required by applicable law or agreed to in writing, software

+# distributed under the License is distributed on an "AS IS" BASIS,

+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+# See the License for the specific language governing permissions and

+# limitations under the License.

+"""Utility functions and class for listing commands such as ls and du."""

+from __future__ import absolute_import

+import fnmatch

+from gslib.exception import CommandException

+from gslib.plurality_checkable_iterator import PluralityCheckableIterator

+from gslib.util import UTF8

+from gslib.wildcard_iterator import StorageUrlFromString

+def PrintNewLine():

+ """Default function for printing new lines between directories."""

+ print

+def PrintDirHeader(bucket_listing_ref):

+ """Default function for printing headers for prefixes.

+ Header is printed prior to listing the contents of the prefix.

+ Args:

+ bucket_listing_ref: BucketListingRef of type PREFIX.

+ """

+ print '%s:' % bucket_listing_ref.url_string.encode(UTF8)

+def PrintBucketHeader(bucket_listing_ref): # pylint: disable=unused-argument

+ """Default function for printing headers for buckets.

+ Header is printed prior to listing the contents of the bucket.

+ Args:

+ bucket_listing_ref: BucketListingRef of type BUCKET.

+ """

+ pass

+def PrintDir(bucket_listing_ref):

+ """Default function for printing buckets or prefixes.

+ Args:

+ bucket_listing_ref: BucketListingRef of type BUCKET or PREFIX.

+ """

+ print bucket_listing_ref.url_string.encode(UTF8)

+# pylint: disable=unused-argument

+def PrintDirSummary(num_bytes, bucket_listing_ref):

+ """Off-by-default function for printing buckets or prefix size summaries.

+ Args:

+ num_bytes: Number of bytes contained in the directory.

+ bucket_listing_ref: BucketListingRef of type BUCKET or PREFIX.

+ """

+ pass

+def PrintObject(bucket_listing_ref):

+ """Default printing function for objects.

+ Args:

+ bucket_listing_ref: BucketListingRef of type OBJECT.

+ Returns:

+ (num_objects, num_bytes).

+ """

+ print bucket_listing_ref.url_string.encode(UTF8)

+ return (1, 0)

+class LsHelper(object):

+ """Helper class for ls and du."""

+ def __init__(self, iterator_func, logger,

+ print_object_func=PrintObject,

+ print_dir_func=PrintDir,

+ print_dir_header_func=PrintDirHeader,

+ print_bucket_header_func=PrintBucketHeader,

+ print_dir_summary_func=PrintDirSummary,

+ print_newline_func=PrintNewLine,

+ all_versions=False, should_recurse=False,

+ exclude_patterns=None, fields=('name',)):

+ """Initializes the helper class to prepare for listing.

+ Args:

+ iterator_func: Function for instantiating iterator.

+ Inputs-

+ url_string- Url string to iterate on. May include

+ wildcards.

+ all_versions=False- If true, iterate over all object

+ versions.

+ logger: Logger for outputting warnings / errors.

+ print_object_func: Function for printing objects.

+ print_dir_func: Function for printing buckets/prefixes.

+ print_dir_header_func: Function for printing header line for buckets

+ or prefixes.

+ print_bucket_header_func: Function for printing header line for buckets

+ or prefixes.

+ print_dir_summary_func: Function for printing size summaries about

+ buckets/prefixes.

+ print_newline_func: Function for printing new lines between dirs.

+ all_versions: If true, list all object versions.

+ should_recurse: If true, recursively listing buckets/prefixes.

+ exclude_patterns: Patterns to exclude when listing.

+ fields: Fields to request from bucket listings; this should

+ include all fields that need to be populated in

+ objects so they can be listed. Can be set to None

+ to retrieve all object fields. Defaults to short

+ listing fields.

+ """

+ self._iterator_func = iterator_func

+ self.logger = logger

+ self._print_object_func = print_object_func

+ self._print_dir_func = print_dir_func

+ self._print_dir_header_func = print_dir_header_func

+ self._print_bucket_header_func = print_bucket_header_func

+ self._print_dir_summary_func = print_dir_summary_func

+ self._print_newline_func = print_newline_func

+ self.all_versions = all_versions

+ self.should_recurse = should_recurse

+ self.exclude_patterns = exclude_patterns

+ self.bucket_listing_fields = fields

+ def ExpandUrlAndPrint(self, url):

+ """Iterates over the given URL and calls print functions.

+ Args:

+ url: StorageUrl to iterate over.

+ Returns:

+ (num_objects, num_bytes) total number of objects and bytes iterated.

+ """

+ num_objects = 0

+ num_dirs = 0

+ num_bytes = 0

+ print_newline = False

+ if url.IsBucket() or self.should_recurse:

+ # IsBucket() implies a top-level listing.

+ if url.IsBucket():

+ self._print_bucket_header_func(url)

+ return self._RecurseExpandUrlAndPrint(url.url_string,

+ print_initial_newline=False)

+ else:

+ # User provided a prefix or object URL, but it's impossible to tell

+ # which until we do a listing and see what matches.

+ top_level_iterator = PluralityCheckableIterator(self._iterator_func(

+ url.CreatePrefixUrl(wildcard_suffix=None),

+ all_versions=self.all_versions).IterAll(

+ expand_top_level_buckets=True,

+ bucket_listing_fields=self.bucket_listing_fields))

+ plurality = top_level_iterator.HasPlurality()

+ for blr in top_level_iterator:

+ if self._MatchesExcludedPattern(blr):

+ continue

+ if blr.IsObject():

+ nd = 0

+ no, nb = self._print_object_func(blr)

+ print_newline = True

+ elif blr.IsPrefix():

+ if print_newline:

+ self._print_newline_func()

+ else:

+ print_newline = True

+ if plurality:

+ self._print_dir_header_func(blr)

+ expansion_url_str = StorageUrlFromString(

+ blr.url_string).CreatePrefixUrl(wildcard_suffix='*')

+ nd, no, nb = self._RecurseExpandUrlAndPrint(expansion_url_str)

+ self._print_dir_summary_func(nb, blr)

+ else:

+ # We handle all buckets at the top level, so this should never happen.

+ raise CommandException(

+ 'Sub-level iterator returned a CsBucketListingRef of type Bucket')

+ num_objects += no

+ num_dirs += nd

+ num_bytes += nb

+ return num_dirs, num_objects, num_bytes

+ def _RecurseExpandUrlAndPrint(self, url_str, print_initial_newline=True):

+ """Iterates over the given URL string and calls print functions.

+ Args:

+ url_str: String describing StorageUrl to iterate over.

+ Must be of depth one or higher.

+ print_initial_newline: If true, print a newline before recursively

+ expanded prefixes.

+ Returns:

+ (num_objects, num_bytes) total number of objects and bytes iterated.

+ """

+ num_objects = 0

+ num_dirs = 0

+ num_bytes = 0

+ for blr in self._iterator_func(

+ '%s' % url_str, all_versions=self.all_versions).IterAll(

+ expand_top_level_buckets=True,

+ bucket_listing_fields=self.bucket_listing_fields):

+ if self._MatchesExcludedPattern(blr):

+ continue

+ if blr.IsObject():

+ nd = 0

+ no, nb = self._print_object_func(blr)

+ elif blr.IsPrefix():

+ if self.should_recurse:

+ if print_initial_newline:

+ self._print_newline_func()

+ else:

+ print_initial_newline = True

+ self._print_dir_header_func(blr)

+ expansion_url_str = StorageUrlFromString(

+ blr.url_string).CreatePrefixUrl(wildcard_suffix='*')

+ nd, no, nb = self._RecurseExpandUrlAndPrint(expansion_url_str)

+ self._print_dir_summary_func(nb, blr)

+ else:

+ nd, no, nb = 1, 0, 0

+ self._print_dir_func(blr)

+ else:

+ # We handle all buckets at the top level, so this should never happen.

+ raise CommandException(

+ 'Sub-level iterator returned a bucketListingRef of type Bucket')

+ num_dirs += nd

+ num_objects += no

+ num_bytes += nb

+ return num_dirs, num_objects, num_bytes

+ def _MatchesExcludedPattern(self, blr):

+ """Checks bucket listing reference against patterns to exclude.

+ Args:

+ blr: BucketListingRef to check.

+ Returns:

+ True if reference matches a pattern and should be excluded.

+ """

+ if self.exclude_patterns:

+ tomatch = blr.url_string

+ for pattern in self.exclude_patterns:

+ if fnmatch.fnmatch(tomatch, pattern):

+ return True

+ return False

« no previous file with comments | « third_party/gsutil/gslib/help_provider.py ('k') | third_party/gsutil/gslib/name_expansion.py » ('j') | no next file with comments »