| Index: gslib/ls_helper.py
|
| ===================================================================
|
| --- gslib/ls_helper.py (revision 0)
|
| +++ gslib/ls_helper.py (revision 0)
|
| @@ -0,0 +1,261 @@
|
| +# -*- coding: utf-8 -*-
|
| +# Copyright 2014 Google Inc. All Rights Reserved.
|
| +#
|
| +# Licensed under the Apache License, Version 2.0 (the "License");
|
| +# you may not use this file except in compliance with the License.
|
| +# You may obtain a copy of the License at
|
| +#
|
| +# http://www.apache.org/licenses/LICENSE-2.0
|
| +#
|
| +# Unless required by applicable law or agreed to in writing, software
|
| +# distributed under the License is distributed on an "AS IS" BASIS,
|
| +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| +# See the License for the specific language governing permissions and
|
| +# limitations under the License.
|
| +"""Utility functions and class for listing commands such as ls and du."""
|
| +
|
| +from __future__ import absolute_import
|
| +
|
| +import fnmatch
|
| +
|
| +from gslib.exception import CommandException
|
| +from gslib.plurality_checkable_iterator import PluralityCheckableIterator
|
| +from gslib.util import UTF8
|
| +from gslib.wildcard_iterator import StorageUrlFromString
|
| +
|
| +
|
| +def PrintNewLine():
|
| + """Default function for printing new lines between directories."""
|
| + print
|
| +
|
| +
|
| +def PrintDirHeader(bucket_listing_ref):
|
| + """Default function for printing headers for prefixes.
|
| +
|
| + Header is printed prior to listing the contents of the prefix.
|
| +
|
| + Args:
|
| + bucket_listing_ref: BucketListingRef of type PREFIX.
|
| + """
|
| + print '%s:' % bucket_listing_ref.url_string.encode(UTF8)
|
| +
|
| +
|
| +def PrintBucketHeader(bucket_listing_ref): # pylint: disable=unused-argument
|
| + """Default function for printing headers for buckets.
|
| +
|
| + Header is printed prior to listing the contents of the bucket.
|
| +
|
| + Args:
|
| + bucket_listing_ref: BucketListingRef of type BUCKET.
|
| + """
|
| + pass
|
| +
|
| +
|
| +def PrintDir(bucket_listing_ref):
|
| + """Default function for printing buckets or prefixes.
|
| +
|
| + Args:
|
| + bucket_listing_ref: BucketListingRef of type BUCKET or PREFIX.
|
| + """
|
| + print bucket_listing_ref.url_string.encode(UTF8)
|
| +
|
| +
|
| +# pylint: disable=unused-argument
|
| +def PrintDirSummary(num_bytes, bucket_listing_ref):
|
| + """Off-by-default function for printing buckets or prefix size summaries.
|
| +
|
| + Args:
|
| + num_bytes: Number of bytes contained in the directory.
|
| + bucket_listing_ref: BucketListingRef of type BUCKET or PREFIX.
|
| + """
|
| + pass
|
| +
|
| +
|
| +def PrintObject(bucket_listing_ref):
|
| + """Default printing function for objects.
|
| +
|
| + Args:
|
| + bucket_listing_ref: BucketListingRef of type OBJECT.
|
| +
|
| + Returns:
|
| + (num_objects, num_bytes).
|
| + """
|
| + print bucket_listing_ref.url_string.encode(UTF8)
|
| + return (1, 0)
|
| +
|
| +
|
| +class LsHelper(object):
|
| + """Helper class for ls and du."""
|
| +
|
| + def __init__(self, iterator_func, logger,
|
| + print_object_func=PrintObject,
|
| + print_dir_func=PrintDir,
|
| + print_dir_header_func=PrintDirHeader,
|
| + print_bucket_header_func=PrintBucketHeader,
|
| + print_dir_summary_func=PrintDirSummary,
|
| + print_newline_func=PrintNewLine,
|
| + all_versions=False, should_recurse=False,
|
| + exclude_patterns=None, fields=('name',)):
|
| + """Initializes the helper class to prepare for listing.
|
| +
|
| + Args:
|
| + iterator_func: Function for instantiating iterator.
|
| + Inputs-
|
| + url_string- Url string to iterate on. May include
|
| + wildcards.
|
| + all_versions=False- If true, iterate over all object
|
| + versions.
|
| + logger: Logger for outputting warnings / errors.
|
| + print_object_func: Function for printing objects.
|
| + print_dir_func: Function for printing buckets/prefixes.
|
| + print_dir_header_func: Function for printing header line for buckets
|
| + or prefixes.
|
| + print_bucket_header_func: Function for printing header line for buckets
|
| + or prefixes.
|
| + print_dir_summary_func: Function for printing size summaries about
|
| + buckets/prefixes.
|
| + print_newline_func: Function for printing new lines between dirs.
|
| + all_versions: If true, list all object versions.
|
| + should_recurse: If true, recursively listing buckets/prefixes.
|
| + exclude_patterns: Patterns to exclude when listing.
|
| + fields: Fields to request from bucket listings; this should
|
| + include all fields that need to be populated in
|
| + objects so they can be listed. Can be set to None
|
| + to retrieve all object fields. Defaults to short
|
| + listing fields.
|
| + """
|
| + self._iterator_func = iterator_func
|
| + self.logger = logger
|
| + self._print_object_func = print_object_func
|
| + self._print_dir_func = print_dir_func
|
| + self._print_dir_header_func = print_dir_header_func
|
| + self._print_bucket_header_func = print_bucket_header_func
|
| + self._print_dir_summary_func = print_dir_summary_func
|
| + self._print_newline_func = print_newline_func
|
| + self.all_versions = all_versions
|
| + self.should_recurse = should_recurse
|
| + self.exclude_patterns = exclude_patterns
|
| + self.bucket_listing_fields = fields
|
| +
|
| + def ExpandUrlAndPrint(self, url):
|
| + """Iterates over the given URL and calls print functions.
|
| +
|
| + Args:
|
| + url: StorageUrl to iterate over.
|
| +
|
| + Returns:
|
| + (num_objects, num_bytes) total number of objects and bytes iterated.
|
| + """
|
| + num_objects = 0
|
| + num_dirs = 0
|
| + num_bytes = 0
|
| + print_newline = False
|
| +
|
| + if url.IsBucket() or self.should_recurse:
|
| + # IsBucket() implies a top-level listing.
|
| + if url.IsBucket():
|
| + self._print_bucket_header_func(url)
|
| + return self._RecurseExpandUrlAndPrint(url.url_string,
|
| + print_initial_newline=False)
|
| + else:
|
| + # User provided a prefix or object URL, but it's impossible to tell
|
| + # which until we do a listing and see what matches.
|
| + top_level_iterator = PluralityCheckableIterator(self._iterator_func(
|
| + url.CreatePrefixUrl(wildcard_suffix=None),
|
| + all_versions=self.all_versions).IterAll(
|
| + expand_top_level_buckets=True,
|
| + bucket_listing_fields=self.bucket_listing_fields))
|
| + plurality = top_level_iterator.HasPlurality()
|
| +
|
| + for blr in top_level_iterator:
|
| + if self._MatchesExcludedPattern(blr):
|
| + continue
|
| + if blr.IsObject():
|
| + nd = 0
|
| + no, nb = self._print_object_func(blr)
|
| + print_newline = True
|
| + elif blr.IsPrefix():
|
| + if print_newline:
|
| + self._print_newline_func()
|
| + else:
|
| + print_newline = True
|
| + if plurality:
|
| + self._print_dir_header_func(blr)
|
| + expansion_url_str = StorageUrlFromString(
|
| + blr.url_string).CreatePrefixUrl(wildcard_suffix='*')
|
| + nd, no, nb = self._RecurseExpandUrlAndPrint(expansion_url_str)
|
| + self._print_dir_summary_func(nb, blr)
|
| + else:
|
| + # We handle all buckets at the top level, so this should never happen.
|
| + raise CommandException(
|
| + 'Sub-level iterator returned a CsBucketListingRef of type Bucket')
|
| + num_objects += no
|
| + num_dirs += nd
|
| + num_bytes += nb
|
| + return num_dirs, num_objects, num_bytes
|
| +
|
| + def _RecurseExpandUrlAndPrint(self, url_str, print_initial_newline=True):
|
| + """Iterates over the given URL string and calls print functions.
|
| +
|
| + Args:
|
| + url_str: String describing StorageUrl to iterate over.
|
| + Must be of depth one or higher.
|
| + print_initial_newline: If true, print a newline before recursively
|
| + expanded prefixes.
|
| +
|
| + Returns:
|
| + (num_objects, num_bytes) total number of objects and bytes iterated.
|
| + """
|
| + num_objects = 0
|
| + num_dirs = 0
|
| + num_bytes = 0
|
| + for blr in self._iterator_func(
|
| + '%s' % url_str, all_versions=self.all_versions).IterAll(
|
| + expand_top_level_buckets=True,
|
| + bucket_listing_fields=self.bucket_listing_fields):
|
| + if self._MatchesExcludedPattern(blr):
|
| + continue
|
| +
|
| + if blr.IsObject():
|
| + nd = 0
|
| + no, nb = self._print_object_func(blr)
|
| + elif blr.IsPrefix():
|
| + if self.should_recurse:
|
| + if print_initial_newline:
|
| + self._print_newline_func()
|
| + else:
|
| + print_initial_newline = True
|
| + self._print_dir_header_func(blr)
|
| + expansion_url_str = StorageUrlFromString(
|
| + blr.url_string).CreatePrefixUrl(wildcard_suffix='*')
|
| +
|
| + nd, no, nb = self._RecurseExpandUrlAndPrint(expansion_url_str)
|
| + self._print_dir_summary_func(nb, blr)
|
| + else:
|
| + nd, no, nb = 1, 0, 0
|
| + self._print_dir_func(blr)
|
| + else:
|
| + # We handle all buckets at the top level, so this should never happen.
|
| + raise CommandException(
|
| + 'Sub-level iterator returned a bucketListingRef of type Bucket')
|
| + num_dirs += nd
|
| + num_objects += no
|
| + num_bytes += nb
|
| +
|
| + return num_dirs, num_objects, num_bytes
|
| +
|
| + def _MatchesExcludedPattern(self, blr):
|
| + """Checks bucket listing reference against patterns to exclude.
|
| +
|
| + Args:
|
| + blr: BucketListingRef to check.
|
| +
|
| + Returns:
|
| + True if reference matches a pattern and should be excluded.
|
| + """
|
| + if self.exclude_patterns:
|
| + tomatch = blr.url_string
|
| + for pattern in self.exclude_patterns:
|
| + if fnmatch.fnmatch(tomatch, pattern):
|
| + return True
|
| + return False
|
|
|
| Property changes on: gslib/ls_helper.py
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|