Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(143)

Side by Side Diff: third_party/gsutil/gslib/commands/hash.py

Issue 1377933002: [catapult] - Copy Telemetry's gsutilz over to third_party. (Closed) Base URL: https://github.com/catapult-project/catapult.git@master
Patch Set: Rename to gsutil. Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/gsutil/gslib/commands/du.py ('k') | third_party/gsutil/gslib/commands/help.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # -*- coding: utf-8 -*-
2 # Copyright 2014 Google Inc. All Rights Reserved.
3 #
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
7 #
8 # http://www.apache.org/licenses/LICENSE-2.0
9 #
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15 """Implementation of hash command for calculating hashes of local files."""
16
17 from hashlib import md5
18 import os
19
20 import crcmod
21
22 from gslib.command import Command
23 from gslib.command_argument import CommandArgument
24 from gslib.cs_api_map import ApiSelector
25 from gslib.exception import CommandException
26 from gslib.hashing_helper import Base64EncodeHash
27 from gslib.hashing_helper import CalculateHashesFromContents
28 from gslib.hashing_helper import SLOW_CRCMOD_WARNING
29 from gslib.progress_callback import ConstructAnnounceText
30 from gslib.progress_callback import FileProgressCallbackHandler
31 from gslib.progress_callback import ProgressCallbackWithBackoff
32 from gslib.storage_url import StorageUrlFromString
33 from gslib.util import NO_MAX
34 from gslib.util import UsingCrcmodExtension
35
36 _SYNOPSIS = """
37 gsutil [-c] [-h] [-m] hash filename...
38 """
39
40 _DETAILED_HELP_TEXT = ("""
41 <B>SYNOPSIS</B>
42 """ + _SYNOPSIS + """
43
44
45 <B>DESCRIPTION</B>
46 The hash command calculates hashes on a local file that can be used to compare
47 with gsutil ls -L output. If a specific hash option is not provided, this
48 command calculates all gsutil-supported hashes for the file.
49
50 Note that gsutil automatically performs hash validation when uploading or
51 downloading files, so this command is only needed if you want to write a
52 script that separately checks the hash for some reason.
53
54 If you calculate a CRC32c hash for the file without a precompiled crcmod
55 installation, hashing will be very slow. See "gsutil help crcmod" for details.
56
57 <B>OPTIONS</B>
58 -c Calculate a CRC32c hash for the file.
59
60 -h Output hashes in hex format. By default, gsutil uses base64.
61
62 -m Calculate a MD5 hash for the file.
63 """)
64
65
66 class HashCommand(Command):
67 """Implementation of gsutil hash command."""
68
69 # Command specification. See base class for documentation.
70 command_spec = Command.CreateCommandSpec(
71 'hash',
72 command_name_aliases=[],
73 usage_synopsis=_SYNOPSIS,
74 min_args=1,
75 max_args=NO_MAX,
76 supported_sub_args='chm',
77 file_url_ok=True,
78 provider_url_ok=False,
79 urls_start_arg=0,
80 gs_api_support=[ApiSelector.JSON],
81 gs_default_api=ApiSelector.JSON,
82 argparse_arguments=[
83 CommandArgument.MakeZeroOrMoreFileURLsArgument()
84 ]
85 )
86 # Help specification. See help_provider.py for documentation.
87 help_spec = Command.HelpSpec(
88 help_name='hash',
89 help_name_aliases=['checksum'],
90 help_type='command_help',
91 help_one_line_summary='Calculate file hashes',
92 help_text=_DETAILED_HELP_TEXT,
93 subcommand_help_text={},
94 )
95
96 @classmethod
97 def _ParseOpts(cls, sub_opts, logger):
98 """Returns behavior variables based on input options.
99
100 Args:
101 sub_opts: getopt sub-arguments for the command.
102 logger: logging.Logger for the command.
103
104 Returns:
105 Tuple of
106 calc_crc32c: Boolean, if True, command should calculate a CRC32c checksum.
107 calc_md5: Boolean, if True, command should calculate an MD5 hash.
108 format_func: Function used for formatting the hash in the desired format.
109 output_format: String describing the hash output format.
110 """
111 calc_crc32c = False
112 calc_md5 = False
113 format_func = lambda digest: Base64EncodeHash(digest.hexdigest())
114 found_hash_option = False
115 output_format = 'base64'
116
117 if sub_opts:
118 for o, unused_a in sub_opts:
119 if o == '-c':
120 calc_crc32c = True
121 found_hash_option = True
122 elif o == '-h':
123 output_format = 'hex'
124 format_func = lambda digest: digest.hexdigest()
125 elif o == '-m':
126 calc_md5 = True
127 found_hash_option = True
128
129 if not found_hash_option:
130 calc_crc32c = True
131 calc_md5 = True
132
133 if calc_crc32c and not UsingCrcmodExtension(crcmod):
134 logger.warn(SLOW_CRCMOD_WARNING)
135
136 return calc_crc32c, calc_md5, format_func, output_format
137
138 def _GetHashClassesFromArgs(self, calc_crc32c, calc_md5):
139 """Constructs the dictionary of hashes to compute based on the arguments.
140
141 Args:
142 calc_crc32c: If True, CRC32c should be included.
143 calc_md5: If True, MD5 should be included.
144
145 Returns:
146 Dictionary of {string: hash digester}, where string the name of the
147 digester algorithm.
148 """
149 hash_dict = {}
150 if calc_crc32c:
151 hash_dict['crc32c'] = crcmod.predefined.Crc('crc-32c')
152 if calc_md5:
153 hash_dict['md5'] = md5()
154 return hash_dict
155
156 def RunCommand(self):
157 """Command entry point for the hash command."""
158 (calc_crc32c, calc_md5, format_func, output_format) = (
159 self._ParseOpts(self.sub_opts, self.logger))
160
161 matched_one = False
162 for url_str in self.args:
163 if not StorageUrlFromString(url_str).IsFileUrl():
164 raise CommandException('"hash" command requires a file URL')
165
166 for file_ref in self.WildcardIterator(url_str).IterObjects():
167 matched_one = True
168 file_name = file_ref.storage_url.object_name
169 file_size = os.path.getsize(file_name)
170 callback_processor = ProgressCallbackWithBackoff(
171 file_size, FileProgressCallbackHandler(
172 ConstructAnnounceText('Hashing', file_name), self.logger).call)
173 hash_dict = self._GetHashClassesFromArgs(calc_crc32c, calc_md5)
174 with open(file_name, 'rb') as fp:
175 CalculateHashesFromContents(fp, hash_dict,
176 callback_processor=callback_processor)
177 print 'Hashes [%s] for %s:' % (output_format, file_name)
178 for name, digest in hash_dict.iteritems():
179 print '\tHash (%s):\t\t%s' % (name, format_func(digest))
180
181 if not matched_one:
182 raise CommandException('No files matched')
183
184 return 0
185
OLDNEW
« no previous file with comments | « third_party/gsutil/gslib/commands/du.py ('k') | third_party/gsutil/gslib/commands/help.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698