OLD | NEW |
(Empty) | |
| 1 # -*- coding: utf-8 -*- |
| 2 # Copyright 2014 Google Inc. All Rights Reserved. |
| 3 # |
| 4 # Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 # you may not use this file except in compliance with the License. |
| 6 # You may obtain a copy of the License at |
| 7 # |
| 8 # http://www.apache.org/licenses/LICENSE-2.0 |
| 9 # |
| 10 # Unless required by applicable law or agreed to in writing, software |
| 11 # distributed under the License is distributed on an "AS IS" BASIS, |
| 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 # See the License for the specific language governing permissions and |
| 14 # limitations under the License. |
| 15 """Implementation of hash command for calculating hashes of local files.""" |
| 16 |
| 17 from hashlib import md5 |
| 18 import os |
| 19 |
| 20 import crcmod |
| 21 |
| 22 from gslib.command import Command |
| 23 from gslib.command_argument import CommandArgument |
| 24 from gslib.cs_api_map import ApiSelector |
| 25 from gslib.exception import CommandException |
| 26 from gslib.hashing_helper import Base64EncodeHash |
| 27 from gslib.hashing_helper import CalculateHashesFromContents |
| 28 from gslib.hashing_helper import SLOW_CRCMOD_WARNING |
| 29 from gslib.progress_callback import ConstructAnnounceText |
| 30 from gslib.progress_callback import FileProgressCallbackHandler |
| 31 from gslib.progress_callback import ProgressCallbackWithBackoff |
| 32 from gslib.storage_url import StorageUrlFromString |
| 33 from gslib.util import NO_MAX |
| 34 from gslib.util import UsingCrcmodExtension |
| 35 |
| 36 _SYNOPSIS = """ |
| 37 gsutil [-c] [-h] [-m] hash filename... |
| 38 """ |
| 39 |
| 40 _DETAILED_HELP_TEXT = (""" |
| 41 <B>SYNOPSIS</B> |
| 42 """ + _SYNOPSIS + """ |
| 43 |
| 44 |
| 45 <B>DESCRIPTION</B> |
| 46 The hash command calculates hashes on a local file that can be used to compare |
| 47 with gsutil ls -L output. If a specific hash option is not provided, this |
| 48 command calculates all gsutil-supported hashes for the file. |
| 49 |
| 50 Note that gsutil automatically performs hash validation when uploading or |
| 51 downloading files, so this command is only needed if you want to write a |
| 52 script that separately checks the hash for some reason. |
| 53 |
| 54 If you calculate a CRC32c hash for the file without a precompiled crcmod |
| 55 installation, hashing will be very slow. See "gsutil help crcmod" for details. |
| 56 |
| 57 <B>OPTIONS</B> |
| 58 -c Calculate a CRC32c hash for the file. |
| 59 |
| 60 -h Output hashes in hex format. By default, gsutil uses base64. |
| 61 |
| 62 -m Calculate a MD5 hash for the file. |
| 63 """) |
| 64 |
| 65 |
| 66 class HashCommand(Command): |
| 67 """Implementation of gsutil hash command.""" |
| 68 |
| 69 # Command specification. See base class for documentation. |
| 70 command_spec = Command.CreateCommandSpec( |
| 71 'hash', |
| 72 command_name_aliases=[], |
| 73 usage_synopsis=_SYNOPSIS, |
| 74 min_args=1, |
| 75 max_args=NO_MAX, |
| 76 supported_sub_args='chm', |
| 77 file_url_ok=True, |
| 78 provider_url_ok=False, |
| 79 urls_start_arg=0, |
| 80 gs_api_support=[ApiSelector.JSON], |
| 81 gs_default_api=ApiSelector.JSON, |
| 82 argparse_arguments=[ |
| 83 CommandArgument.MakeZeroOrMoreFileURLsArgument() |
| 84 ] |
| 85 ) |
| 86 # Help specification. See help_provider.py for documentation. |
| 87 help_spec = Command.HelpSpec( |
| 88 help_name='hash', |
| 89 help_name_aliases=['checksum'], |
| 90 help_type='command_help', |
| 91 help_one_line_summary='Calculate file hashes', |
| 92 help_text=_DETAILED_HELP_TEXT, |
| 93 subcommand_help_text={}, |
| 94 ) |
| 95 |
| 96 @classmethod |
| 97 def _ParseOpts(cls, sub_opts, logger): |
| 98 """Returns behavior variables based on input options. |
| 99 |
| 100 Args: |
| 101 sub_opts: getopt sub-arguments for the command. |
| 102 logger: logging.Logger for the command. |
| 103 |
| 104 Returns: |
| 105 Tuple of |
| 106 calc_crc32c: Boolean, if True, command should calculate a CRC32c checksum. |
| 107 calc_md5: Boolean, if True, command should calculate an MD5 hash. |
| 108 format_func: Function used for formatting the hash in the desired format. |
| 109 output_format: String describing the hash output format. |
| 110 """ |
| 111 calc_crc32c = False |
| 112 calc_md5 = False |
| 113 format_func = lambda digest: Base64EncodeHash(digest.hexdigest()) |
| 114 found_hash_option = False |
| 115 output_format = 'base64' |
| 116 |
| 117 if sub_opts: |
| 118 for o, unused_a in sub_opts: |
| 119 if o == '-c': |
| 120 calc_crc32c = True |
| 121 found_hash_option = True |
| 122 elif o == '-h': |
| 123 output_format = 'hex' |
| 124 format_func = lambda digest: digest.hexdigest() |
| 125 elif o == '-m': |
| 126 calc_md5 = True |
| 127 found_hash_option = True |
| 128 |
| 129 if not found_hash_option: |
| 130 calc_crc32c = True |
| 131 calc_md5 = True |
| 132 |
| 133 if calc_crc32c and not UsingCrcmodExtension(crcmod): |
| 134 logger.warn(SLOW_CRCMOD_WARNING) |
| 135 |
| 136 return calc_crc32c, calc_md5, format_func, output_format |
| 137 |
| 138 def _GetHashClassesFromArgs(self, calc_crc32c, calc_md5): |
| 139 """Constructs the dictionary of hashes to compute based on the arguments. |
| 140 |
| 141 Args: |
| 142 calc_crc32c: If True, CRC32c should be included. |
| 143 calc_md5: If True, MD5 should be included. |
| 144 |
| 145 Returns: |
| 146 Dictionary of {string: hash digester}, where string the name of the |
| 147 digester algorithm. |
| 148 """ |
| 149 hash_dict = {} |
| 150 if calc_crc32c: |
| 151 hash_dict['crc32c'] = crcmod.predefined.Crc('crc-32c') |
| 152 if calc_md5: |
| 153 hash_dict['md5'] = md5() |
| 154 return hash_dict |
| 155 |
| 156 def RunCommand(self): |
| 157 """Command entry point for the hash command.""" |
| 158 (calc_crc32c, calc_md5, format_func, output_format) = ( |
| 159 self._ParseOpts(self.sub_opts, self.logger)) |
| 160 |
| 161 matched_one = False |
| 162 for url_str in self.args: |
| 163 if not StorageUrlFromString(url_str).IsFileUrl(): |
| 164 raise CommandException('"hash" command requires a file URL') |
| 165 |
| 166 for file_ref in self.WildcardIterator(url_str).IterObjects(): |
| 167 matched_one = True |
| 168 file_name = file_ref.storage_url.object_name |
| 169 file_size = os.path.getsize(file_name) |
| 170 callback_processor = ProgressCallbackWithBackoff( |
| 171 file_size, FileProgressCallbackHandler( |
| 172 ConstructAnnounceText('Hashing', file_name), self.logger).call) |
| 173 hash_dict = self._GetHashClassesFromArgs(calc_crc32c, calc_md5) |
| 174 with open(file_name, 'rb') as fp: |
| 175 CalculateHashesFromContents(fp, hash_dict, |
| 176 callback_processor=callback_processor) |
| 177 print 'Hashes [%s] for %s:' % (output_format, file_name) |
| 178 for name, digest in hash_dict.iteritems(): |
| 179 print '\tHash (%s):\t\t%s' % (name, format_func(digest)) |
| 180 |
| 181 if not matched_one: |
| 182 raise CommandException('No files matched') |
| 183 |
| 184 return 0 |
| 185 |
OLD | NEW |