Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1156)

Unified Diff: third_party/gsutil/gslib/commands/hash.py

Issue 1377933002: [catapult] - Copy Telemetry's gsutilz over to third_party. (Closed) Base URL: https://github.com/catapult-project/catapult.git@master
Patch Set: Rename to gsutil. Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « third_party/gsutil/gslib/commands/du.py ('k') | third_party/gsutil/gslib/commands/help.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/gsutil/gslib/commands/hash.py
diff --git a/third_party/gsutil/gslib/commands/hash.py b/third_party/gsutil/gslib/commands/hash.py
new file mode 100644
index 0000000000000000000000000000000000000000..cfe47778465fae34cc9e3ece738ad43a2c23f592
--- /dev/null
+++ b/third_party/gsutil/gslib/commands/hash.py
@@ -0,0 +1,185 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Implementation of hash command for calculating hashes of local files."""
+
+from hashlib import md5
+import os
+
+import crcmod
+
+from gslib.command import Command
+from gslib.command_argument import CommandArgument
+from gslib.cs_api_map import ApiSelector
+from gslib.exception import CommandException
+from gslib.hashing_helper import Base64EncodeHash
+from gslib.hashing_helper import CalculateHashesFromContents
+from gslib.hashing_helper import SLOW_CRCMOD_WARNING
+from gslib.progress_callback import ConstructAnnounceText
+from gslib.progress_callback import FileProgressCallbackHandler
+from gslib.progress_callback import ProgressCallbackWithBackoff
+from gslib.storage_url import StorageUrlFromString
+from gslib.util import NO_MAX
+from gslib.util import UsingCrcmodExtension
+
+_SYNOPSIS = """
+ gsutil [-c] [-h] [-m] hash filename...
+"""
+
+_DETAILED_HELP_TEXT = ("""
+<B>SYNOPSIS</B>
+""" + _SYNOPSIS + """
+
+
+<B>DESCRIPTION</B>
+ The hash command calculates hashes on a local file that can be used to compare
+ with gsutil ls -L output. If a specific hash option is not provided, this
+ command calculates all gsutil-supported hashes for the file.
+
+ Note that gsutil automatically performs hash validation when uploading or
+ downloading files, so this command is only needed if you want to write a
+ script that separately checks the hash for some reason.
+
+ If you calculate a CRC32c hash for the file without a precompiled crcmod
+ installation, hashing will be very slow. See "gsutil help crcmod" for details.
+
+<B>OPTIONS</B>
+ -c Calculate a CRC32c hash for the file.
+
+ -h Output hashes in hex format. By default, gsutil uses base64.
+
+ -m Calculate a MD5 hash for the file.
+""")
+
+
+class HashCommand(Command):
+ """Implementation of gsutil hash command."""
+
+ # Command specification. See base class for documentation.
+ command_spec = Command.CreateCommandSpec(
+ 'hash',
+ command_name_aliases=[],
+ usage_synopsis=_SYNOPSIS,
+ min_args=1,
+ max_args=NO_MAX,
+ supported_sub_args='chm',
+ file_url_ok=True,
+ provider_url_ok=False,
+ urls_start_arg=0,
+ gs_api_support=[ApiSelector.JSON],
+ gs_default_api=ApiSelector.JSON,
+ argparse_arguments=[
+ CommandArgument.MakeZeroOrMoreFileURLsArgument()
+ ]
+ )
+ # Help specification. See help_provider.py for documentation.
+ help_spec = Command.HelpSpec(
+ help_name='hash',
+ help_name_aliases=['checksum'],
+ help_type='command_help',
+ help_one_line_summary='Calculate file hashes',
+ help_text=_DETAILED_HELP_TEXT,
+ subcommand_help_text={},
+ )
+
+ @classmethod
+ def _ParseOpts(cls, sub_opts, logger):
+ """Returns behavior variables based on input options.
+
+ Args:
+ sub_opts: getopt sub-arguments for the command.
+ logger: logging.Logger for the command.
+
+ Returns:
+ Tuple of
+ calc_crc32c: Boolean, if True, command should calculate a CRC32c checksum.
+ calc_md5: Boolean, if True, command should calculate an MD5 hash.
+ format_func: Function used for formatting the hash in the desired format.
+ output_format: String describing the hash output format.
+ """
+ calc_crc32c = False
+ calc_md5 = False
+ format_func = lambda digest: Base64EncodeHash(digest.hexdigest())
+ found_hash_option = False
+ output_format = 'base64'
+
+ if sub_opts:
+ for o, unused_a in sub_opts:
+ if o == '-c':
+ calc_crc32c = True
+ found_hash_option = True
+ elif o == '-h':
+ output_format = 'hex'
+ format_func = lambda digest: digest.hexdigest()
+ elif o == '-m':
+ calc_md5 = True
+ found_hash_option = True
+
+ if not found_hash_option:
+ calc_crc32c = True
+ calc_md5 = True
+
+ if calc_crc32c and not UsingCrcmodExtension(crcmod):
+ logger.warn(SLOW_CRCMOD_WARNING)
+
+ return calc_crc32c, calc_md5, format_func, output_format
+
+ def _GetHashClassesFromArgs(self, calc_crc32c, calc_md5):
+ """Constructs the dictionary of hashes to compute based on the arguments.
+
+ Args:
+ calc_crc32c: If True, CRC32c should be included.
+ calc_md5: If True, MD5 should be included.
+
+ Returns:
+ Dictionary of {string: hash digester}, where string the name of the
+ digester algorithm.
+ """
+ hash_dict = {}
+ if calc_crc32c:
+ hash_dict['crc32c'] = crcmod.predefined.Crc('crc-32c')
+ if calc_md5:
+ hash_dict['md5'] = md5()
+ return hash_dict
+
+ def RunCommand(self):
+ """Command entry point for the hash command."""
+ (calc_crc32c, calc_md5, format_func, output_format) = (
+ self._ParseOpts(self.sub_opts, self.logger))
+
+ matched_one = False
+ for url_str in self.args:
+ if not StorageUrlFromString(url_str).IsFileUrl():
+ raise CommandException('"hash" command requires a file URL')
+
+ for file_ref in self.WildcardIterator(url_str).IterObjects():
+ matched_one = True
+ file_name = file_ref.storage_url.object_name
+ file_size = os.path.getsize(file_name)
+ callback_processor = ProgressCallbackWithBackoff(
+ file_size, FileProgressCallbackHandler(
+ ConstructAnnounceText('Hashing', file_name), self.logger).call)
+ hash_dict = self._GetHashClassesFromArgs(calc_crc32c, calc_md5)
+ with open(file_name, 'rb') as fp:
+ CalculateHashesFromContents(fp, hash_dict,
+ callback_processor=callback_processor)
+ print 'Hashes [%s] for %s:' % (output_format, file_name)
+ for name, digest in hash_dict.iteritems():
+ print '\tHash (%s):\t\t%s' % (name, format_func(digest))
+
+ if not matched_one:
+ raise CommandException('No files matched')
+
+ return 0
+
« no previous file with comments | « third_party/gsutil/gslib/commands/du.py ('k') | third_party/gsutil/gslib/commands/help.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698