OLD | NEW |
| (Empty) |
1 # -*- coding: utf-8 -*- | |
2 # Copyright 2014 Google Inc. All Rights Reserved. | |
3 # | |
4 # Licensed under the Apache License, Version 2.0 (the "License"); | |
5 # you may not use this file except in compliance with the License. | |
6 # You may obtain a copy of the License at | |
7 # | |
8 # http://www.apache.org/licenses/LICENSE-2.0 | |
9 # | |
10 # Unless required by applicable law or agreed to in writing, software | |
11 # distributed under the License is distributed on an "AS IS" BASIS, | |
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 # See the License for the specific language governing permissions and | |
14 # limitations under the License. | |
15 """Implementation of hash command for calculating hashes of local files.""" | |
16 | |
17 from hashlib import md5 | |
18 import os | |
19 | |
20 import crcmod | |
21 | |
22 from gslib.command import Command | |
23 from gslib.command_argument import CommandArgument | |
24 from gslib.cs_api_map import ApiSelector | |
25 from gslib.exception import CommandException | |
26 from gslib.hashing_helper import Base64EncodeHash | |
27 from gslib.hashing_helper import CalculateHashesFromContents | |
28 from gslib.hashing_helper import SLOW_CRCMOD_WARNING | |
29 from gslib.progress_callback import ConstructAnnounceText | |
30 from gslib.progress_callback import FileProgressCallbackHandler | |
31 from gslib.progress_callback import ProgressCallbackWithBackoff | |
32 from gslib.storage_url import StorageUrlFromString | |
33 from gslib.util import NO_MAX | |
34 from gslib.util import UsingCrcmodExtension | |
35 | |
36 _SYNOPSIS = """ | |
37 gsutil [-c] [-h] [-m] hash filename... | |
38 """ | |
39 | |
40 _DETAILED_HELP_TEXT = (""" | |
41 <B>SYNOPSIS</B> | |
42 """ + _SYNOPSIS + """ | |
43 | |
44 | |
45 <B>DESCRIPTION</B> | |
46 The hash command calculates hashes on a local file that can be used to compare | |
47 with gsutil ls -L output. If a specific hash option is not provided, this | |
48 command calculates all gsutil-supported hashes for the file. | |
49 | |
50 Note that gsutil automatically performs hash validation when uploading or | |
51 downloading files, so this command is only needed if you want to write a | |
52 script that separately checks the hash for some reason. | |
53 | |
54 If you calculate a CRC32c hash for the file without a precompiled crcmod | |
55 installation, hashing will be very slow. See "gsutil help crcmod" for details. | |
56 | |
57 <B>OPTIONS</B> | |
58 -c Calculate a CRC32c hash for the file. | |
59 | |
60 -h Output hashes in hex format. By default, gsutil uses base64. | |
61 | |
62 -m Calculate a MD5 hash for the file. | |
63 """) | |
64 | |
65 | |
66 class HashCommand(Command): | |
67 """Implementation of gsutil hash command.""" | |
68 | |
69 # Command specification. See base class for documentation. | |
70 command_spec = Command.CreateCommandSpec( | |
71 'hash', | |
72 command_name_aliases=[], | |
73 usage_synopsis=_SYNOPSIS, | |
74 min_args=1, | |
75 max_args=NO_MAX, | |
76 supported_sub_args='chm', | |
77 file_url_ok=True, | |
78 provider_url_ok=False, | |
79 urls_start_arg=0, | |
80 gs_api_support=[ApiSelector.JSON], | |
81 gs_default_api=ApiSelector.JSON, | |
82 argparse_arguments=[ | |
83 CommandArgument.MakeZeroOrMoreFileURLsArgument() | |
84 ] | |
85 ) | |
86 # Help specification. See help_provider.py for documentation. | |
87 help_spec = Command.HelpSpec( | |
88 help_name='hash', | |
89 help_name_aliases=['checksum'], | |
90 help_type='command_help', | |
91 help_one_line_summary='Calculate file hashes', | |
92 help_text=_DETAILED_HELP_TEXT, | |
93 subcommand_help_text={}, | |
94 ) | |
95 | |
96 @classmethod | |
97 def _ParseOpts(cls, sub_opts, logger): | |
98 """Returns behavior variables based on input options. | |
99 | |
100 Args: | |
101 sub_opts: getopt sub-arguments for the command. | |
102 logger: logging.Logger for the command. | |
103 | |
104 Returns: | |
105 Tuple of | |
106 calc_crc32c: Boolean, if True, command should calculate a CRC32c checksum. | |
107 calc_md5: Boolean, if True, command should calculate an MD5 hash. | |
108 format_func: Function used for formatting the hash in the desired format. | |
109 output_format: String describing the hash output format. | |
110 """ | |
111 calc_crc32c = False | |
112 calc_md5 = False | |
113 format_func = lambda digest: Base64EncodeHash(digest.hexdigest()) | |
114 found_hash_option = False | |
115 output_format = 'base64' | |
116 | |
117 if sub_opts: | |
118 for o, unused_a in sub_opts: | |
119 if o == '-c': | |
120 calc_crc32c = True | |
121 found_hash_option = True | |
122 elif o == '-h': | |
123 output_format = 'hex' | |
124 format_func = lambda digest: digest.hexdigest() | |
125 elif o == '-m': | |
126 calc_md5 = True | |
127 found_hash_option = True | |
128 | |
129 if not found_hash_option: | |
130 calc_crc32c = True | |
131 calc_md5 = True | |
132 | |
133 if calc_crc32c and not UsingCrcmodExtension(crcmod): | |
134 logger.warn(SLOW_CRCMOD_WARNING) | |
135 | |
136 return calc_crc32c, calc_md5, format_func, output_format | |
137 | |
138 def _GetHashClassesFromArgs(self, calc_crc32c, calc_md5): | |
139 """Constructs the dictionary of hashes to compute based on the arguments. | |
140 | |
141 Args: | |
142 calc_crc32c: If True, CRC32c should be included. | |
143 calc_md5: If True, MD5 should be included. | |
144 | |
145 Returns: | |
146 Dictionary of {string: hash digester}, where string the name of the | |
147 digester algorithm. | |
148 """ | |
149 hash_dict = {} | |
150 if calc_crc32c: | |
151 hash_dict['crc32c'] = crcmod.predefined.Crc('crc-32c') | |
152 if calc_md5: | |
153 hash_dict['md5'] = md5() | |
154 return hash_dict | |
155 | |
156 def RunCommand(self): | |
157 """Command entry point for the hash command.""" | |
158 (calc_crc32c, calc_md5, format_func, output_format) = ( | |
159 self._ParseOpts(self.sub_opts, self.logger)) | |
160 | |
161 matched_one = False | |
162 for url_str in self.args: | |
163 if not StorageUrlFromString(url_str).IsFileUrl(): | |
164 raise CommandException('"hash" command requires a file URL') | |
165 | |
166 for file_ref in self.WildcardIterator(url_str).IterObjects(): | |
167 matched_one = True | |
168 file_name = file_ref.storage_url.object_name | |
169 file_size = os.path.getsize(file_name) | |
170 callback_processor = ProgressCallbackWithBackoff( | |
171 file_size, FileProgressCallbackHandler( | |
172 ConstructAnnounceText('Hashing', file_name), self.logger).call) | |
173 hash_dict = self._GetHashClassesFromArgs(calc_crc32c, calc_md5) | |
174 with open(file_name, 'rb') as fp: | |
175 CalculateHashesFromContents(fp, hash_dict, | |
176 callback_processor=callback_processor) | |
177 print 'Hashes [%s] for %s:' % (output_format, file_name) | |
178 for name, digest in hash_dict.iteritems(): | |
179 print '\tHash (%s):\t\t%s' % (name, format_func(digest)) | |
180 | |
181 if not matched_one: | |
182 raise CommandException('No files matched') | |
183 | |
184 return 0 | |
185 | |
OLD | NEW |