Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(429)

Side by Side Diff: gslib/tests/test_cp.py

Issue 698893003: Update checked in version of gsutil to version 4.6 (Closed) Base URL: http://dart.googlecode.com/svn/third_party/gsutil/
Patch Set: Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « gslib/tests/test_cors.py ('k') | gslib/tests/test_cp_funcs.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # -*- coding: utf-8 -*- 1 # -*- coding: utf-8 -*-
2 #
3 # Copyright 2013 Google Inc. All Rights Reserved. 2 # Copyright 2013 Google Inc. All Rights Reserved.
4 # 3 #
5 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License. 5 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at 6 # You may obtain a copy of the License at
8 # 7 #
9 # http://www.apache.org/licenses/LICENSE-2.0 8 # http://www.apache.org/licenses/LICENSE-2.0
10 # 9 #
11 # Unless required by applicable law or agreed to in writing, software 10 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS, 11 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and 13 # See the License for the specific language governing permissions and
15 # limitations under the License. 14 # limitations under the License.
15 """Integration tests for cp command."""
16 16
17 import boto 17 from __future__ import absolute_import
18
19 import base64
20 import binascii
18 import datetime 21 import datetime
19 import os 22 import os
20 import pkgutil 23 import pkgutil
24 import random
21 import re 25 import re
26 import string
27
28 import boto
29 from boto import storage_uri
30
31 from gslib.copy_helper import GetTrackerFilePath
32 from gslib.copy_helper import TrackerFileType
33 from gslib.cs_api_map import ApiSelector
34 from gslib.hashing_helper import CalculateMd5FromContents
35 from gslib.storage_url import StorageUrlFromString
22 import gslib.tests.testcase as testcase 36 import gslib.tests.testcase as testcase
37 from gslib.tests.testcase.base import NotParallelizable
38 from gslib.tests.testcase.integration_testcase import SkipForS3
23 from gslib.tests.util import HAS_S3_CREDS 39 from gslib.tests.util import HAS_S3_CREDS
24 from gslib.tests.util import unittest
25
26 from boto import storage_uri
27 from boto.storage_uri import BucketStorageUri
28 from gslib.commands.config import DEFAULT_PARALLEL_COMPOSITE_UPLOAD_THRESHOLD
29 from gslib.commands.cp import FilterExistingComponents
30 from gslib.commands.cp import MakeGsUri
31 from gslib.commands.cp import ObjectFromTracker
32 from gslib.commands.cp import PerformResumableUploadIfAppliesArgs
33 from gslib.storage_uri_builder import StorageUriBuilder
34 from gslib.tests.util import ObjectToURI as suri 40 from gslib.tests.util import ObjectToURI as suri
35 from gslib.tests.util import PerformsFileToObjectUpload 41 from gslib.tests.util import PerformsFileToObjectUpload
36 from gslib.tests.util import SetBotoConfigForTest 42 from gslib.tests.util import SetBotoConfigForTest
37 from gslib.util import CreateLock 43 from gslib.tests.util import unittest
38 from gslib.util import IS_WINDOWS 44 from gslib.util import IS_WINDOWS
45 from gslib.util import ONE_KB
39 from gslib.util import Retry 46 from gslib.util import Retry
40 from gslib.util import TWO_MB 47 from gslib.util import START_CALLBACK_PER_BYTES
48 from gslib.util import UTF8
41 49
42 50
43 class TestCp(testcase.GsUtilIntegrationTestCase): 51 class TestCp(testcase.GsUtilIntegrationTestCase):
44 """Integration tests for cp command.""" 52 """Integration tests for cp command."""
45 53
54 # For tests that artificially halt, we need to ensure at least one callback
55 # occurs.
56 halt_size = START_CALLBACK_PER_BYTES * 2
57
46 def _get_test_file(self, name): 58 def _get_test_file(self, name):
47 contents = pkgutil.get_data('gslib', 'tests/test_data/%s' % name) 59 contents = pkgutil.get_data('gslib', 'tests/test_data/%s' % name)
48 return self.CreateTempFile(file_name=name, contents=contents) 60 return self.CreateTempFile(file_name=name, contents=contents)
49 61
50 @PerformsFileToObjectUpload 62 @PerformsFileToObjectUpload
51 def test_noclobber(self): 63 def test_noclobber(self):
52 key_uri = self.CreateObject(contents='foo') 64 key_uri = self.CreateObject(contents='foo')
53 fpath = self.CreateTempFile(contents='bar') 65 fpath = self.CreateTempFile(contents='bar')
54 stderr = self.RunGsUtil(['cp', '-n', fpath, suri(key_uri)], 66 stderr = self.RunGsUtil(['cp', '-n', fpath, suri(key_uri)],
55 return_stderr=True) 67 return_stderr=True)
56 self.assertIn('Skipping existing item: %s' % suri(key_uri), stderr) 68 self.assertIn('Skipping existing item: %s' % suri(key_uri), stderr)
57 self.assertEqual(key_uri.get_contents_as_string(), 'foo') 69 self.assertEqual(key_uri.get_contents_as_string(), 'foo')
58 stderr = self.RunGsUtil(['cp', '-n', suri(key_uri), fpath], 70 stderr = self.RunGsUtil(['cp', '-n', suri(key_uri), fpath],
59 return_stderr=True) 71 return_stderr=True)
60 with open(fpath, 'r') as f: 72 with open(fpath, 'r') as f:
61 self.assertIn('Skipping existing item: %s' % suri(f), stderr) 73 self.assertIn('Skipping existing item: %s' % suri(f), stderr)
62 self.assertEqual(f.read(), 'bar') 74 self.assertEqual(f.read(), 'bar')
63 75
76 def test_dest_bucket_not_exist(self):
77 fpath = self.CreateTempFile(contents='foo')
78 invalid_bucket_uri = (
79 '%s://%s' % (self.default_provider, self.nonexistent_bucket_name))
80 stderr = self.RunGsUtil(['cp', fpath, invalid_bucket_uri],
81 expected_status=1, return_stderr=True)
82 self.assertIn('does not exist.', stderr)
83
64 def test_copy_in_cloud_noclobber(self): 84 def test_copy_in_cloud_noclobber(self):
65 bucket1_uri = self.CreateBucket() 85 bucket1_uri = self.CreateBucket()
66 bucket2_uri = self.CreateBucket() 86 bucket2_uri = self.CreateBucket()
67 key_uri = self.CreateObject(bucket_uri=bucket1_uri, contents='foo') 87 key_uri = self.CreateObject(bucket_uri=bucket1_uri, contents='foo')
68 stderr = self.RunGsUtil(['cp', suri(key_uri), suri(bucket2_uri)], 88 stderr = self.RunGsUtil(['cp', suri(key_uri), suri(bucket2_uri)],
69 return_stderr=True) 89 return_stderr=True)
70 self.assertEqual(stderr.count('Copying'), 1) 90 self.assertEqual(stderr.count('Copying'), 1)
71 stderr = self.RunGsUtil(['cp', '-n', suri(key_uri), suri(bucket2_uri)], 91 stderr = self.RunGsUtil(['cp', '-n', suri(key_uri), suri(bucket2_uri)],
72 return_stderr=True) 92 return_stderr=True)
73 self.assertIn('Skipping existing item: %s' % suri(bucket2_uri, 93 self.assertIn('Skipping existing item: %s' %
74 key_uri.object_name), stderr) 94 suri(bucket2_uri, key_uri.object_name), stderr)
75 95
76 def _run_streaming_test(self, provider): 96 @PerformsFileToObjectUpload
77 bucket_uri = self.CreateBucket(provider=provider) 97 def test_streaming(self):
98 bucket_uri = self.CreateBucket()
78 stderr = self.RunGsUtil(['cp', '-', '%s' % suri(bucket_uri, 'foo')], 99 stderr = self.RunGsUtil(['cp', '-', '%s' % suri(bucket_uri, 'foo')],
79 stdin='bar', return_stderr=True) 100 stdin='bar', return_stderr=True)
80 self.assertIn('Copying from <STDIN>', stderr) 101 self.assertIn('Copying from <STDIN>', stderr)
81 key_uri = bucket_uri.clone_replace_name('foo') 102 key_uri = bucket_uri.clone_replace_name('foo')
82 self.assertEqual(key_uri.get_contents_as_string(), 'bar') 103 self.assertEqual(key_uri.get_contents_as_string(), 'bar')
83 104
84 @unittest.skipUnless(HAS_S3_CREDS, 'Test requires S3 credentials.') 105 def test_streaming_multiple_arguments(self):
85 def test_streaming_s3(self): 106 bucket_uri = self.CreateBucket()
86 self._run_streaming_test('s3') 107 stderr = self.RunGsUtil(['cp', '-', '-', suri(bucket_uri)],
87 108 stdin='bar', return_stderr=True, expected_status=1)
88 109 self.assertIn('Multiple URL strings are not supported with streaming',
89 @PerformsFileToObjectUpload 110 stderr)
90 def test_streaming_gs(self):
91 self._run_streaming_test('gs')
92 111
93 # TODO: Implement a way to test both with and without using magic file. 112 # TODO: Implement a way to test both with and without using magic file.
94 113
95 @PerformsFileToObjectUpload 114 @PerformsFileToObjectUpload
96 def test_detect_content_type(self): 115 def test_detect_content_type(self):
116 """Tests local detection of content type."""
97 bucket_uri = self.CreateBucket() 117 bucket_uri = self.CreateBucket()
98 dsturi = suri(bucket_uri, 'foo') 118 dsturi = suri(bucket_uri, 'foo')
99 119
100 self.RunGsUtil(['cp', self._get_test_file('test.mp3'), dsturi]) 120 self.RunGsUtil(['cp', self._get_test_file('test.mp3'), dsturi])
101 # Use @Retry as hedge against bucket listing eventual consistency. 121
102 @Retry(AssertionError, tries=3, timeout_secs=1) 122 # Use @Retry as hedge against bucket listing eventual consistency.
103 def _Check1(): 123 @Retry(AssertionError, tries=3, timeout_secs=1)
104 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True) 124 def _Check1():
105 self.assertRegexpMatches(stdout, 'Content-Type:\s+audio/mpeg') 125 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
126 if IS_WINDOWS:
127 self.assertTrue(
128 re.search(r'Content-Type:\s+audio/x-mpg', stdout) or
129 re.search(r'Content-Type:\s+audio/mpeg', stdout))
130 else:
131 self.assertRegexpMatches(stdout, r'Content-Type:\s+audio/mpeg')
106 _Check1() 132 _Check1()
107 133
108 self.RunGsUtil(['cp', self._get_test_file('test.gif'), dsturi]) 134 self.RunGsUtil(['cp', self._get_test_file('test.gif'), dsturi])
109 # Use @Retry as hedge against bucket listing eventual consistency. 135
110 @Retry(AssertionError, tries=3, timeout_secs=1) 136 # Use @Retry as hedge against bucket listing eventual consistency.
111 def _Check2(): 137 @Retry(AssertionError, tries=3, timeout_secs=1)
112 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True) 138 def _Check2():
113 self.assertRegexpMatches(stdout, 'Content-Type:\s+image/gif') 139 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
114 _Check2() 140 self.assertRegexpMatches(stdout, r'Content-Type:\s+image/gif')
115 141 _Check2()
116 def test_content_type_override(self): 142
143 def test_content_type_override_default(self):
144 """Tests overriding content type with the default value."""
117 bucket_uri = self.CreateBucket() 145 bucket_uri = self.CreateBucket()
118 dsturi = suri(bucket_uri, 'foo') 146 dsturi = suri(bucket_uri, 'foo')
119 147
120 self.RunGsUtil(['-h', 'Content-Type:', 'cp', 148 self.RunGsUtil(['-h', 'Content-Type:', 'cp',
121 self._get_test_file('test.mp3'), dsturi]) 149 self._get_test_file('test.mp3'), dsturi])
122 # Use @Retry as hedge against bucket listing eventual consistency. 150
123 @Retry(AssertionError, tries=3, timeout_secs=1) 151 # Use @Retry as hedge against bucket listing eventual consistency.
124 def _Check1(): 152 @Retry(AssertionError, tries=3, timeout_secs=1)
125 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True) 153 def _Check1():
126 self.assertRegexpMatches(stdout, 'Content-Type:\s+binary/octet-stream') 154 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
155 self.assertRegexpMatches(stdout,
156 r'Content-Type:\s+application/octet-stream')
127 _Check1() 157 _Check1()
128 158
129 self.RunGsUtil(['-h', 'Content-Type:', 'cp', 159 self.RunGsUtil(['-h', 'Content-Type:', 'cp',
130 self._get_test_file('test.gif'), dsturi]) 160 self._get_test_file('test.gif'), dsturi])
131 # Use @Retry as hedge against bucket listing eventual consistency. 161
132 @Retry(AssertionError, tries=3, timeout_secs=1) 162 # Use @Retry as hedge against bucket listing eventual consistency.
133 def _Check2(): 163 @Retry(AssertionError, tries=3, timeout_secs=1)
134 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True) 164 def _Check2():
135 self.assertRegexpMatches(stdout, 'Content-Type:\s+binary/octet-stream') 165 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
136 _Check2() 166 self.assertRegexpMatches(stdout,
137 167 r'Content-Type:\s+application/octet-stream')
138 @PerformsFileToObjectUpload 168 _Check2()
139 def test_foo_noct(self): 169
170 def test_content_type_override(self):
171 """Tests overriding content type with a value."""
172 bucket_uri = self.CreateBucket()
173 dsturi = suri(bucket_uri, 'foo')
174
175 self.RunGsUtil(['-h', 'Content-Type:text/plain', 'cp',
176 self._get_test_file('test.mp3'), dsturi])
177
178 # Use @Retry as hedge against bucket listing eventual consistency.
179 @Retry(AssertionError, tries=3, timeout_secs=1)
180 def _Check1():
181 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
182 self.assertRegexpMatches(stdout, r'Content-Type:\s+text/plain')
183 _Check1()
184
185 self.RunGsUtil(['-h', 'Content-Type:text/plain', 'cp',
186 self._get_test_file('test.gif'), dsturi])
187
188 # Use @Retry as hedge against bucket listing eventual consistency.
189 @Retry(AssertionError, tries=3, timeout_secs=1)
190 def _Check2():
191 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
192 self.assertRegexpMatches(stdout, r'Content-Type:\s+text/plain')
193 _Check2()
194
195 @unittest.skipIf(IS_WINDOWS, 'magicfile is not available on Windows.')
196 @PerformsFileToObjectUpload
197 def test_magicfile_override(self):
198 """Tests content type override with magicfile value."""
140 bucket_uri = self.CreateBucket() 199 bucket_uri = self.CreateBucket()
141 dsturi = suri(bucket_uri, 'foo') 200 dsturi = suri(bucket_uri, 'foo')
142 fpath = self.CreateTempFile(contents='foo/bar\n') 201 fpath = self.CreateTempFile(contents='foo/bar\n')
143 self.RunGsUtil(['cp', fpath, dsturi]) 202 self.RunGsUtil(['cp', fpath, dsturi])
144 # Use @Retry as hedge against bucket listing eventual consistency. 203
145 @Retry(AssertionError, tries=3, timeout_secs=1) 204 # Use @Retry as hedge against bucket listing eventual consistency.
146 def _Check1(): 205 @Retry(AssertionError, tries=3, timeout_secs=1)
147 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True) 206 def _Check1():
148 USE_MAGICFILE = boto.config.getbool('GSUtil', 'use_magicfile', False) 207 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
149 content_type = ('text/plain' if USE_MAGICFILE 208 use_magicfile = boto.config.getbool('GSUtil', 'use_magicfile', False)
209 content_type = ('text/plain' if use_magicfile
150 else 'application/octet-stream') 210 else 'application/octet-stream')
151 self.assertRegexpMatches(stdout, 'Content-Type:\s+%s' % content_type) 211 self.assertRegexpMatches(stdout, r'Content-Type:\s+%s' % content_type)
152 _Check1() 212 _Check1()
153 213
154 @PerformsFileToObjectUpload 214 @PerformsFileToObjectUpload
155 def test_content_type_mismatches(self): 215 def test_content_type_mismatches(self):
216 """Tests overriding content type when it does not match the file type."""
156 bucket_uri = self.CreateBucket() 217 bucket_uri = self.CreateBucket()
157 dsturi = suri(bucket_uri, 'foo') 218 dsturi = suri(bucket_uri, 'foo')
158 fpath = self.CreateTempFile(contents='foo/bar\n') 219 fpath = self.CreateTempFile(contents='foo/bar\n')
159 220
160 self.RunGsUtil(['-h', 'Content-Type:image/gif', 'cp', 221 self.RunGsUtil(['-h', 'Content-Type:image/gif', 'cp',
161 self._get_test_file('test.mp3'), dsturi]) 222 self._get_test_file('test.mp3'), dsturi])
162 # Use @Retry as hedge against bucket listing eventual consistency. 223
163 @Retry(AssertionError, tries=3, timeout_secs=1) 224 # Use @Retry as hedge against bucket listing eventual consistency.
164 def _Check1(): 225 @Retry(AssertionError, tries=3, timeout_secs=1)
165 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True) 226 def _Check1():
166 self.assertRegexpMatches(stdout, 'Content-Type:\s+image/gif') 227 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
228 self.assertRegexpMatches(stdout, r'Content-Type:\s+image/gif')
167 _Check1() 229 _Check1()
168 230
169 self.RunGsUtil(['-h', 'Content-Type:image/gif', 'cp', 231 self.RunGsUtil(['-h', 'Content-Type:image/gif', 'cp',
170 self._get_test_file('test.gif'), dsturi]) 232 self._get_test_file('test.gif'), dsturi])
171 # Use @Retry as hedge against bucket listing eventual consistency. 233
172 @Retry(AssertionError, tries=3, timeout_secs=1) 234 # Use @Retry as hedge against bucket listing eventual consistency.
173 def _Check2(): 235 @Retry(AssertionError, tries=3, timeout_secs=1)
174 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True) 236 def _Check2():
175 self.assertRegexpMatches(stdout, 'Content-Type:\s+image/gif') 237 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
238 self.assertRegexpMatches(stdout, r'Content-Type:\s+image/gif')
176 _Check2() 239 _Check2()
177 240
178 self.RunGsUtil(['-h', 'Content-Type:image/gif', 'cp', fpath, dsturi]) 241 self.RunGsUtil(['-h', 'Content-Type:image/gif', 'cp', fpath, dsturi])
242
179 # Use @Retry as hedge against bucket listing eventual consistency. 243 # Use @Retry as hedge against bucket listing eventual consistency.
180 @Retry(AssertionError, tries=3, timeout_secs=1) 244 @Retry(AssertionError, tries=3, timeout_secs=1)
181 def _Check3(): 245 def _Check3():
182 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True) 246 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
183 self.assertRegexpMatches(stdout, 'Content-Type:\s+image/gif') 247 self.assertRegexpMatches(stdout, r'Content-Type:\s+image/gif')
184 _Check3() 248 _Check3()
185 249
186 @PerformsFileToObjectUpload 250 @PerformsFileToObjectUpload
187 def test_content_type_header_case_insensitive(self): 251 def test_content_type_header_case_insensitive(self):
252 """Tests that content type header is treated with case insensitivity."""
188 bucket_uri = self.CreateBucket() 253 bucket_uri = self.CreateBucket()
189 dsturi = suri(bucket_uri, 'foo') 254 dsturi = suri(bucket_uri, 'foo')
190 fpath = self._get_test_file('test.gif') 255 fpath = self._get_test_file('test.gif')
191 256
192 self.RunGsUtil(['-h', 'content-Type:text/plain', 'cp', 257 self.RunGsUtil(['-h', 'content-Type:text/plain', 'cp',
193 fpath, dsturi]) 258 fpath, dsturi])
194 # Use @Retry as hedge against bucket listing eventual consistency. 259
195 @Retry(AssertionError, tries=3, timeout_secs=1) 260 # Use @Retry as hedge against bucket listing eventual consistency.
196 def _Check1(): 261 @Retry(AssertionError, tries=3, timeout_secs=1)
197 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True) 262 def _Check1():
198 self.assertRegexpMatches(stdout, 'Content-Type:\s+text/plain') 263 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
199 self.assertNotRegexpMatches(stdout, 'image/gif') 264 self.assertRegexpMatches(stdout, r'Content-Type:\s+text/plain')
265 self.assertNotRegexpMatches(stdout, r'image/gif')
200 _Check1() 266 _Check1()
201 267
202 self.RunGsUtil(['-h', 'CONTENT-TYPE:image/gif', 268 self.RunGsUtil(['-h', 'CONTENT-TYPE:image/gif',
203 '-h', 'content-type:image/gif', 269 '-h', 'content-type:image/gif',
204 'cp', fpath, dsturi]) 270 'cp', fpath, dsturi])
205 # Use @Retry as hedge against bucket listing eventual consistency. 271
206 @Retry(AssertionError, tries=3, timeout_secs=1) 272 # Use @Retry as hedge against bucket listing eventual consistency.
207 def _Check2(): 273 @Retry(AssertionError, tries=3, timeout_secs=1)
208 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True) 274 def _Check2():
209 self.assertRegexpMatches(stdout, 'Content-Type:\s+image/gif') 275 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
210 self.assertNotRegexpMatches(stdout, 'image/gif,\s*image/gif') 276 self.assertRegexpMatches(stdout, r'Content-Type:\s+image/gif')
211 _Check2() 277 self.assertNotRegexpMatches(stdout, r'image/gif,\s*image/gif')
278 _Check2()
279
280 @PerformsFileToObjectUpload
281 def test_other_headers(self):
282 """Tests that non-content-type headers are applied successfully on copy."""
283 bucket_uri = self.CreateBucket()
284 dst_uri = suri(bucket_uri, 'foo')
285 fpath = self._get_test_file('test.gif')
286
287 self.RunGsUtil(['-h', 'Cache-Control:public,max-age=12',
288 '-h', 'x-%s-meta-1:abcd' % self.provider_custom_meta, 'cp',
289 fpath, dst_uri])
290
291 stdout = self.RunGsUtil(['ls', '-L', dst_uri], return_stdout=True)
292 self.assertRegexpMatches(stdout, r'Cache-Control\s*:\s*public,max-age=12')
293 self.assertRegexpMatches(stdout, r'Metadata:\s*1:\s*abcd')
294
295 dst_uri2 = suri(bucket_uri, 'bar')
296 self.RunGsUtil(['cp', dst_uri, dst_uri2])
297 # Ensure metadata was preserved across copy.
298 stdout = self.RunGsUtil(['ls', '-L', dst_uri2], return_stdout=True)
299 self.assertRegexpMatches(stdout, r'Cache-Control\s*:\s*public,max-age=12')
300 self.assertRegexpMatches(stdout, r'Metadata:\s*1:\s*abcd')
212 301
213 @PerformsFileToObjectUpload 302 @PerformsFileToObjectUpload
214 def test_versioning(self): 303 def test_versioning(self):
304 """Tests copy with versioning."""
215 bucket_uri = self.CreateVersionedBucket() 305 bucket_uri = self.CreateVersionedBucket()
216 k1_uri = self.CreateObject(bucket_uri=bucket_uri, contents='data2') 306 k1_uri = self.CreateObject(bucket_uri=bucket_uri, contents='data2')
217 k2_uri = self.CreateObject(bucket_uri=bucket_uri, contents='data1') 307 k2_uri = self.CreateObject(bucket_uri=bucket_uri, contents='data1')
218 g1 = k2_uri.generation 308 g1 = k2_uri.generation or k2_uri.version_id
219 self.RunGsUtil(['cp', suri(k1_uri), suri(k2_uri)]) 309 self.RunGsUtil(['cp', suri(k1_uri), suri(k2_uri)])
220 k2_uri = bucket_uri.clone_replace_name(k2_uri.object_name) 310 k2_uri = bucket_uri.clone_replace_name(k2_uri.object_name)
221 k2_uri = bucket_uri.clone_replace_key(k2_uri.get_key()) 311 k2_uri = bucket_uri.clone_replace_key(k2_uri.get_key())
222 g2 = k2_uri.generation 312 g2 = k2_uri.generation or k2_uri.version_id
223 k2_uri.set_contents_from_string('data3') 313 k2_uri.set_contents_from_string('data3')
224 g3 = k2_uri.generation 314 g3 = k2_uri.generation or k2_uri.version_id
225 315
226 fpath = self.CreateTempFile() 316 fpath = self.CreateTempFile()
227 # Check to make sure current version is data3. 317 # Check to make sure current version is data3.
228 self.RunGsUtil(['cp', k2_uri.versionless_uri, fpath]) 318 self.RunGsUtil(['cp', k2_uri.versionless_uri, fpath])
229 with open(fpath, 'r') as f: 319 with open(fpath, 'r') as f:
230 self.assertEqual(f.read(), 'data3') 320 self.assertEqual(f.read(), 'data3')
231 321
232 # Check contents of all three versions 322 # Check contents of all three versions
233 self.RunGsUtil(['cp', '%s#%s' % (k2_uri.versionless_uri, g1), fpath]) 323 self.RunGsUtil(['cp', '%s#%s' % (k2_uri.versionless_uri, g1), fpath])
234 with open(fpath, 'r') as f: 324 with open(fpath, 'r') as f:
(...skipping 10 matching lines...) Expand all
245 k2_uri.versionless_uri]) 335 k2_uri.versionless_uri])
246 self.RunGsUtil(['cp', k2_uri.versionless_uri, fpath]) 336 self.RunGsUtil(['cp', k2_uri.versionless_uri, fpath])
247 with open(fpath, 'r') as f: 337 with open(fpath, 'r') as f:
248 self.assertEqual(f.read(), 'data1') 338 self.assertEqual(f.read(), 'data1')
249 339
250 # Attempt to specify a version-specific URI for destination. 340 # Attempt to specify a version-specific URI for destination.
251 stderr = self.RunGsUtil(['cp', fpath, k2_uri.uri], return_stderr=True, 341 stderr = self.RunGsUtil(['cp', fpath, k2_uri.uri], return_stderr=True,
252 expected_status=1) 342 expected_status=1)
253 self.assertIn('cannot be the destination for gsutil cp', stderr) 343 self.assertIn('cannot be the destination for gsutil cp', stderr)
254 344
345 @SkipForS3('S3 lists versioned objects in reverse timestamp order.')
255 def test_recursive_copying_versioned_bucket(self): 346 def test_recursive_copying_versioned_bucket(self):
256 # Tests that cp -R between versioned buckets copies all versions and 347 """Tests that cp -R with versioned buckets copies all versions in order."""
257 # preserves version order.
258 bucket1_uri = self.CreateVersionedBucket() 348 bucket1_uri = self.CreateVersionedBucket()
259 bucket2_uri = self.CreateVersionedBucket() 349 bucket2_uri = self.CreateVersionedBucket()
350
260 # Write two versions of an object to the bucket1. 351 # Write two versions of an object to the bucket1.
261 k_uri = self.CreateObject(bucket_uri=bucket1_uri, object_name='k', 352 self.CreateObject(bucket_uri=bucket1_uri, object_name='k', contents='data0')
262 contents='data0')
263 self.CreateObject(bucket_uri=bucket1_uri, object_name='k', 353 self.CreateObject(bucket_uri=bucket1_uri, object_name='k',
264 contents='longer_data1') 354 contents='longer_data1')
355
356 self.AssertNObjectsInBucket(bucket1_uri, 2, versioned=True)
357 self.AssertNObjectsInBucket(bucket2_uri, 0, versioned=True)
358
265 # Recursively copy to second versioned bucket. 359 # Recursively copy to second versioned bucket.
266 self.RunGsUtil(['cp', '-R', suri(bucket1_uri, '*'), suri(bucket2_uri)]) 360 self.RunGsUtil(['cp', '-R', suri(bucket1_uri, '*'), suri(bucket2_uri)])
361
267 # Use @Retry as hedge against bucket listing eventual consistency. 362 # Use @Retry as hedge against bucket listing eventual consistency.
268 @Retry(AssertionError, tries=3, timeout_secs=1) 363 @Retry(AssertionError, tries=3, timeout_secs=1)
269 def _Check1(): 364 def _Check2():
365 """Validates the results of the cp -R."""
270 listing1 = self.RunGsUtil(['ls', '-la', suri(bucket1_uri)], 366 listing1 = self.RunGsUtil(['ls', '-la', suri(bucket1_uri)],
271 return_stdout=True).split('\n') 367 return_stdout=True).split('\n')
272 listing2 = self.RunGsUtil(['ls', '-la', suri(bucket2_uri)], 368 listing2 = self.RunGsUtil(['ls', '-la', suri(bucket2_uri)],
273 return_stdout=True).split('\n') 369 return_stdout=True).split('\n')
274 # 2 lines of listing output, 1 summary line, 1 empty line from \n split. 370 # 2 lines of listing output, 1 summary line, 1 empty line from \n split.
275 self.assertEquals(len(listing1), 4) 371 self.assertEquals(len(listing1), 4)
276 self.assertEquals(len(listing2), 4) 372 self.assertEquals(len(listing2), 4)
277 373
278 # First object in each bucket should match in size and version-less name. 374 # First object in each bucket should match in size and version-less name.
279 size1, _, uri_str1, _ = listing1[0].split() 375 size1, _, uri_str1, _ = listing1[0].split()
280 self.assertEquals(size1, str(len('data0'))) 376 self.assertEquals(size1, str(len('data0')))
281 self.assertEquals(storage_uri(uri_str1).object_name, 'k') 377 self.assertEquals(storage_uri(uri_str1).object_name, 'k')
282 size2, _, uri_str2, _ = listing2[0].split() 378 size2, _, uri_str2, _ = listing2[0].split()
283 self.assertEquals(size2, str(len('data0'))) 379 self.assertEquals(size2, str(len('data0')))
284 self.assertEquals(storage_uri(uri_str2).object_name, 'k') 380 self.assertEquals(storage_uri(uri_str2).object_name, 'k')
285 381
286 # Similarly for second object in each bucket. 382 # Similarly for second object in each bucket.
287 size1, _, uri_str1, _ = listing1[1].split() 383 size1, _, uri_str1, _ = listing1[1].split()
288 self.assertEquals(size1, str(len('longer_data1'))) 384 self.assertEquals(size1, str(len('longer_data1')))
289 self.assertEquals(storage_uri(uri_str1).object_name, 'k') 385 self.assertEquals(storage_uri(uri_str1).object_name, 'k')
290 size2, _, uri_str2, _ = listing2[1].split() 386 size2, _, uri_str2, _ = listing2[1].split()
291 self.assertEquals(size2, str(len('longer_data1'))) 387 self.assertEquals(size2, str(len('longer_data1')))
292 self.assertEquals(storage_uri(uri_str2).object_name, 'k') 388 self.assertEquals(storage_uri(uri_str2).object_name, 'k')
293 _Check1() 389 _Check2()
294 390
295 @PerformsFileToObjectUpload 391 @PerformsFileToObjectUpload
392 @SkipForS3('Preconditions not supported for S3.')
393 def test_cp_v_generation_match(self):
394 """Tests that cp -v option handles the if-generation-match header."""
395 bucket_uri = self.CreateVersionedBucket()
396 k1_uri = self.CreateObject(bucket_uri=bucket_uri, contents='data1')
397 g1 = k1_uri.generation
398
399 tmpdir = self.CreateTempDir()
400 fpath1 = self.CreateTempFile(tmpdir=tmpdir, contents='data2')
401
402 gen_match_header = 'x-goog-if-generation-match:%s' % g1
403 # First copy should succeed.
404 self.RunGsUtil(['-h', gen_match_header, 'cp', fpath1, suri(k1_uri)])
405
406 # Second copy should fail the precondition.
407 stderr = self.RunGsUtil(['-h', gen_match_header, 'cp', fpath1,
408 suri(k1_uri)],
409 return_stderr=True, expected_status=1)
410
411 self.assertIn('PreconditionException', stderr)
412
413 # Specifiying a generation with -n should fail before the request hits the
414 # server.
415 stderr = self.RunGsUtil(['-h', gen_match_header, 'cp', '-n', fpath1,
416 suri(k1_uri)],
417 return_stderr=True, expected_status=1)
418
419 self.assertIn('ArgumentException', stderr)
420 self.assertIn('Specifying x-goog-if-generation-match is not supported '
421 'with cp -n', stderr)
422
423 @PerformsFileToObjectUpload
424 def test_cp_nv(self):
425 """Tests that cp -nv works when skipping existing file."""
426 bucket_uri = self.CreateVersionedBucket()
427 k1_uri = self.CreateObject(bucket_uri=bucket_uri, contents='data1')
428
429 tmpdir = self.CreateTempDir()
430 fpath1 = self.CreateTempFile(tmpdir=tmpdir, contents='data2')
431
432 # First copy should succeed.
433 self.RunGsUtil(['cp', '-nv', fpath1, suri(k1_uri)])
434
435 # Second copy should skip copying.
436 stderr = self.RunGsUtil(['cp', '-nv', fpath1, suri(k1_uri)],
437 return_stderr=True)
438 self.assertIn('Skipping existing item:', stderr)
439
440 @PerformsFileToObjectUpload
441 @SkipForS3('S3 lists versioned objects in reverse timestamp order.')
296 def test_cp_v_option(self): 442 def test_cp_v_option(self):
297 # Tests that cp -v option returns the created object's version-specific URI. 443 """"Tests that cp -v returns the created object's version-specific URI."""
298 bucket_uri = self.CreateVersionedBucket() 444 bucket_uri = self.CreateVersionedBucket()
299 k1_uri = self.CreateObject(bucket_uri=bucket_uri, contents='data1') 445 k1_uri = self.CreateObject(bucket_uri=bucket_uri, contents='data1')
300 k2_uri = self.CreateObject(bucket_uri=bucket_uri, contents='data2') 446 k2_uri = self.CreateObject(bucket_uri=bucket_uri, contents='data2')
301 g1 = k1_uri.generation
302 447
303 # Case 1: Upload file to object using one-shot PUT. 448 # Case 1: Upload file to object using one-shot PUT.
304 tmpdir = self.CreateTempDir() 449 tmpdir = self.CreateTempDir()
305 fpath1 = self.CreateTempFile(tmpdir=tmpdir, contents='data1') 450 fpath1 = self.CreateTempFile(tmpdir=tmpdir, contents='data1')
306 self._run_cp_minus_v_test('-v', fpath1, k2_uri.uri) 451 self._run_cp_minus_v_test('-v', fpath1, k2_uri.uri)
307 452
308 # Case 2: Upload file to object using resumable upload. 453 # Case 2: Upload file to object using resumable upload.
309 size_threshold = boto.config.get('GSUtil', 'resumable_threshold', TWO_MB) 454 size_threshold = ONE_KB
310 file_as_string = os.urandom(size_threshold) 455 boto_config_for_test = ('GSUtil', 'resumable_threshold',
311 tmpdir = self.CreateTempDir() 456 str(size_threshold))
312 fpath1 = self.CreateTempFile(tmpdir=tmpdir, contents=file_as_string) 457 with SetBotoConfigForTest([boto_config_for_test]):
313 self._run_cp_minus_v_test('-v', fpath1, k2_uri.uri) 458 file_as_string = os.urandom(size_threshold)
459 tmpdir = self.CreateTempDir()
460 fpath1 = self.CreateTempFile(tmpdir=tmpdir, contents=file_as_string)
461 self._run_cp_minus_v_test('-v', fpath1, k2_uri.uri)
314 462
315 # Case 3: Upload stream to object. 463 # Case 3: Upload stream to object.
316 self._run_cp_minus_v_test('-v', '-', k2_uri.uri) 464 self._run_cp_minus_v_test('-v', '-', k2_uri.uri)
317 465
318 # Case 4: Download object to file. For this case we just expect output of 466 # Case 4: Download object to file. For this case we just expect output of
319 # gsutil cp -v to be the URI of the file. 467 # gsutil cp -v to be the URI of the file.
320 tmpdir = self.CreateTempDir() 468 tmpdir = self.CreateTempDir()
321 fpath1 = self.CreateTempFile(tmpdir=tmpdir) 469 fpath1 = self.CreateTempFile(tmpdir=tmpdir)
322 dst_uri = storage_uri(fpath1) 470 dst_uri = storage_uri(fpath1)
323 stderr = self.RunGsUtil(['cp', '-v', suri(k1_uri), suri(dst_uri)], 471 stderr = self.RunGsUtil(['cp', '-v', suri(k1_uri), suri(dst_uri)],
324 return_stderr=True) 472 return_stderr=True)
325 self.assertIn('Created: %s' % dst_uri.uri, stderr.split('\n')[-2]) 473 self.assertIn('Created: %s' % dst_uri.uri, stderr.split('\n')[-2])
326 474
327 # Case 5: Daisy-chain from object to object. 475 # Case 5: Daisy-chain from object to object.
328 self._run_cp_minus_v_test('-Dv', k1_uri.uri, k2_uri.uri) 476 self._run_cp_minus_v_test('-Dv', k1_uri.uri, k2_uri.uri)
329 477
330 # Case 6: Copy object to object in-the-cloud. 478 # Case 6: Copy object to object in-the-cloud.
331 self._run_cp_minus_v_test('-v', k1_uri.uri, k2_uri.uri) 479 self._run_cp_minus_v_test('-v', k1_uri.uri, k2_uri.uri)
332 480
333 def _run_cp_minus_v_test(self, opt, src_str, dst_str): 481 def _run_cp_minus_v_test(self, opt, src_str, dst_str):
482 """Runs cp -v with the options and validates the results."""
334 stderr = self.RunGsUtil(['cp', opt, src_str, dst_str], return_stderr=True) 483 stderr = self.RunGsUtil(['cp', opt, src_str, dst_str], return_stderr=True)
335 match = re.search(r'Created: (.*)\n', stderr) 484 match = re.search(r'Created: (.*)\n', stderr)
336 self.assertIsNotNone(match) 485 self.assertIsNotNone(match)
337 created_uri = match.group(1) 486 created_uri = match.group(1)
487
338 # Use @Retry as hedge against bucket listing eventual consistency. 488 # Use @Retry as hedge against bucket listing eventual consistency.
339 @Retry(AssertionError, tries=3, timeout_secs=1) 489 @Retry(AssertionError, tries=3, timeout_secs=1)
340 def _Check1(): 490 def _Check1():
341 stdout = self.RunGsUtil(['ls', '-a', dst_str], return_stdout=True) 491 stdout = self.RunGsUtil(['ls', '-a', dst_str], return_stdout=True)
342 lines = stdout.split('\n') 492 lines = stdout.split('\n')
343 # Final (most recent) object should match the "Created:" URI. This is 493 # Final (most recent) object should match the "Created:" URI. This is
344 # in second-to-last line (last line is '\n'). 494 # in second-to-last line (last line is '\n').
495 self.assertGreater(len(lines), 2)
345 self.assertEqual(created_uri, lines[-2]) 496 self.assertEqual(created_uri, lines[-2])
346 _Check1() 497 _Check1()
347 498
348 @PerformsFileToObjectUpload 499 @PerformsFileToObjectUpload
349 def test_stdin_args(self): 500 def test_stdin_args(self):
501 """Tests cp with the -I option."""
350 tmpdir = self.CreateTempDir() 502 tmpdir = self.CreateTempDir()
351 fpath1 = self.CreateTempFile(tmpdir=tmpdir, contents='data1') 503 fpath1 = self.CreateTempFile(tmpdir=tmpdir, contents='data1')
352 fpath2 = self.CreateTempFile(tmpdir=tmpdir, contents='data2') 504 fpath2 = self.CreateTempFile(tmpdir=tmpdir, contents='data2')
353 bucket_uri = self.CreateBucket() 505 bucket_uri = self.CreateBucket()
354 self.RunGsUtil(['cp', '-I', suri(bucket_uri)], 506 self.RunGsUtil(['cp', '-I', suri(bucket_uri)],
355 stdin='\n'.join((fpath1, fpath2))) 507 stdin='\n'.join((fpath1, fpath2)))
508
356 # Use @Retry as hedge against bucket listing eventual consistency. 509 # Use @Retry as hedge against bucket listing eventual consistency.
357 @Retry(AssertionError, tries=3, timeout_secs=1) 510 @Retry(AssertionError, tries=3, timeout_secs=1)
358 def _Check1(): 511 def _Check1():
359 stdout = self.RunGsUtil(['ls', suri(bucket_uri)], return_stdout=True) 512 stdout = self.RunGsUtil(['ls', suri(bucket_uri)], return_stdout=True)
360 self.assertIn(os.path.basename(fpath1), stdout) 513 self.assertIn(os.path.basename(fpath1), stdout)
361 self.assertIn(os.path.basename(fpath2), stdout) 514 self.assertIn(os.path.basename(fpath2), stdout)
362 self.assertNumLines(stdout, 2) 515 self.assertNumLines(stdout, 2)
363 _Check1() 516 _Check1()
364 517
365 def test_cross_storage_class_cp(self): 518 def test_cross_storage_class_cloud_cp(self):
366 bucket1_uri = self.CreateBucket(storage_class='STANDARD') 519 bucket1_uri = self.CreateBucket(storage_class='STANDARD')
367 bucket2_uri = self.CreateBucket( 520 bucket2_uri = self.CreateBucket(
368 storage_class='DURABLE_REDUCED_AVAILABILITY') 521 storage_class='DURABLE_REDUCED_AVAILABILITY')
369 key_uri = self.CreateObject(bucket_uri=bucket1_uri, contents='foo') 522 key_uri = self.CreateObject(bucket_uri=bucket1_uri, contents='foo')
370 # Check that copy-in-the-cloud is allowed. 523 # Server now allows copy-in-the-cloud across storage classes.
371 stderr = self.RunGsUtil(['cp', suri(key_uri), suri(bucket2_uri)], 524 self.RunGsUtil(['cp', suri(key_uri), suri(bucket2_uri)])
372 return_stderr=True, expected_status=0) 525
373 self.assertIn('Copying ', stderr) 526 @unittest.skipUnless(HAS_S3_CREDS, 'Test requires both S3 and GS credentials')
527 def test_cross_provider_cp(self):
528 s3_bucket = self.CreateBucket(provider='s3')
529 gs_bucket = self.CreateBucket(provider='gs')
530 s3_key = self.CreateObject(bucket_uri=s3_bucket, contents='foo')
531 gs_key = self.CreateObject(bucket_uri=gs_bucket, contents='bar')
532 self.RunGsUtil(['cp', suri(s3_key), suri(gs_bucket)])
533 self.RunGsUtil(['cp', suri(gs_key), suri(s3_bucket)])
534
535 @unittest.skipUnless(HAS_S3_CREDS, 'Test requires both S3 and GS credentials')
536 @unittest.skip('This test performs a large copy but remains here for '
537 'debugging purposes.')
538 def test_cross_provider_large_cp(self):
539 s3_bucket = self.CreateBucket(provider='s3')
540 gs_bucket = self.CreateBucket(provider='gs')
541 s3_key = self.CreateObject(bucket_uri=s3_bucket, contents='f'*1024*1024)
542 gs_key = self.CreateObject(bucket_uri=gs_bucket, contents='b'*1024*1024)
543 self.RunGsUtil(['cp', suri(s3_key), suri(gs_bucket)])
544 self.RunGsUtil(['cp', suri(gs_key), suri(s3_bucket)])
545 with SetBotoConfigForTest([
546 ('GSUtil', 'resumable_threshold', str(ONE_KB)),
547 ('GSUtil', 'json_resumable_chunk_size', str(ONE_KB * 256))]):
548 # Ensure copy also works across json upload chunk boundaries.
549 self.RunGsUtil(['cp', suri(s3_key), suri(gs_bucket)])
550
551 @unittest.skip('This test is slow due to creating many objects, '
552 'but remains here for debugging purposes.')
553 def test_daisy_chain_cp_file_sizes(self):
554 """Ensure daisy chain cp works with a wide of file sizes."""
555 bucket_uri = self.CreateBucket()
556 bucket2_uri = self.CreateBucket()
557 exponent_cap = 22 # Up to 2MB in size.
558 for i in range(exponent_cap):
559 one_byte_smaller = 2**i - 1
560 normal = 2**i
561 one_byte_larger = 2**i + 1
562 self.CreateObject(bucket_uri=bucket_uri, contents='a'*one_byte_smaller)
563 self.CreateObject(bucket_uri=bucket_uri, contents='b'*normal)
564 self.CreateObject(bucket_uri=bucket_uri, contents='c'*one_byte_larger)
565
566 self.AssertNObjectsInBucket(bucket_uri, exponent_cap*3)
567 self.RunGsUtil(['-m', 'cp', '-D', suri(bucket_uri, '**'),
568 suri(bucket2_uri)])
569
570 self.AssertNObjectsInBucket(bucket2_uri, exponent_cap*3)
374 571
375 def test_daisy_chain_cp(self): 572 def test_daisy_chain_cp(self):
376 # Daisy chain mode is required for copying across storage classes, 573 """Tests cp with the -D option."""
377 # so create 2 buckets and attempt to copy without vs with daisy chain mode.
378 bucket1_uri = self.CreateBucket(storage_class='STANDARD') 574 bucket1_uri = self.CreateBucket(storage_class='STANDARD')
379 bucket2_uri = self.CreateBucket( 575 bucket2_uri = self.CreateBucket(
380 storage_class='DURABLE_REDUCED_AVAILABILITY') 576 storage_class='DURABLE_REDUCED_AVAILABILITY')
381 key_uri = self.CreateObject(bucket_uri=bucket1_uri, contents='foo') 577 key_uri = self.CreateObject(bucket_uri=bucket1_uri, contents='foo')
382
383 # Set some headers on source object so we can verify that headers are 578 # Set some headers on source object so we can verify that headers are
384 # presereved by daisy-chain copy. 579 # presereved by daisy-chain copy.
385 self.RunGsUtil(['setmeta', '-h', 'Cache-Control:public,max-age=12', 580 self.RunGsUtil(['setmeta', '-h', 'Cache-Control:public,max-age=12',
386 '-h', 'Content-Type:image/gif', 581 '-h', 'Content-Type:image/gif',
387 '-h', 'x-goog-meta-1:abcd', suri(key_uri)]) 582 '-h', 'x-%s-meta-1:abcd' % self.provider_custom_meta,
583 suri(key_uri)])
388 # Set public-read (non-default) ACL so we can verify that cp -D -p works. 584 # Set public-read (non-default) ACL so we can verify that cp -D -p works.
389 self.RunGsUtil(['acl', 'set', 'public-read', suri(key_uri)]) 585 self.RunGsUtil(['acl', 'set', 'public-read', suri(key_uri)])
390 acl_xml = self.RunGsUtil(['acl', 'get', suri(key_uri)], return_stdout=True) 586 acl_json = self.RunGsUtil(['acl', 'get', suri(key_uri)], return_stdout=True)
391 # Perform daisy-chain copy and verify that it wasn't disallowed and that 587 # Perform daisy-chain copy and verify that source object headers and ACL
392 # source object headers and ACL were preserved. Also specify -n option to 588 # were preserved. Also specify -n option to test that gsutil correctly
393 # test that gsutil correctly removes the x-goog-if-generation-match:0 header 589 # removes the x-goog-if-generation-match:0 header that was set at uploading
394 # that was set at uploading time when updating the ACL. 590 # time when updating the ACL.
395 stderr = self.RunGsUtil(['cp', '-Dpn', suri(key_uri), suri(bucket2_uri)], 591 stderr = self.RunGsUtil(['cp', '-Dpn', suri(key_uri), suri(bucket2_uri)],
396 return_stderr=True) 592 return_stderr=True)
397 self.assertIn('Copying ', stderr) 593 self.assertNotIn('Copy-in-the-cloud disallowed', stderr)
398 594
399 @Retry(AssertionError, tries=3, timeout_secs=1) 595 @Retry(AssertionError, tries=3, timeout_secs=1)
400 def _Check(): 596 def _Check():
401 uri = suri(bucket2_uri, key_uri.object_name) 597 uri = suri(bucket2_uri, key_uri.object_name)
402 stdout = self.RunGsUtil(['ls', '-L', uri], return_stdout=True) 598 stdout = self.RunGsUtil(['ls', '-L', uri], return_stdout=True)
403 self.assertRegexpMatches(stdout, 'Cache-Control:\s+public,max-age=12') 599 self.assertRegexpMatches(stdout, r'Cache-Control:\s+public,max-age=12')
404 self.assertRegexpMatches(stdout, 'Content-Type:\s+image/gif') 600 self.assertRegexpMatches(stdout, r'Content-Type:\s+image/gif')
405 self.assertRegexpMatches(stdout, 'x-goog-meta-1:\s+abcd') 601 self.assertRegexpMatches(stdout, r'Metadata:\s+1:\s+abcd')
406 new_acl_xml = self.RunGsUtil(['acl', 'get', uri], return_stdout=True) 602 new_acl_json = self.RunGsUtil(['acl', 'get', uri], return_stdout=True)
407 self.assertEqual(acl_xml, new_acl_xml) 603 self.assertEqual(acl_json, new_acl_json)
408 _Check() 604 _Check()
409 605
606 def test_canned_acl_cp(self):
607 """Tests copying with a canned ACL."""
608 bucket1_uri = self.CreateBucket()
609 bucket2_uri = self.CreateBucket()
610 key_uri = self.CreateObject(bucket_uri=bucket1_uri, contents='foo')
611 self.RunGsUtil(['cp', '-a', 'public-read', suri(key_uri),
612 suri(bucket2_uri)])
613 # Set public-read on the original key after the copy so we can compare
614 # the ACLs.
615 self.RunGsUtil(['acl', 'set', 'public-read', suri(key_uri)])
616 public_read_acl = self.RunGsUtil(['acl', 'get', suri(key_uri)],
617 return_stdout=True)
618
619 @Retry(AssertionError, tries=3, timeout_secs=1)
620 def _Check():
621 uri = suri(bucket2_uri, key_uri.object_name)
622 new_acl_json = self.RunGsUtil(['acl', 'get', uri], return_stdout=True)
623 self.assertEqual(public_read_acl, new_acl_json)
624 _Check()
625
626 @PerformsFileToObjectUpload
627 def test_canned_acl_upload(self):
628 """Tests uploading a file with a canned ACL."""
629 bucket1_uri = self.CreateBucket()
630 key_uri = self.CreateObject(bucket_uri=bucket1_uri, contents='foo')
631 # Set public-read on the object so we can compare the ACLs.
632 self.RunGsUtil(['acl', 'set', 'public-read', suri(key_uri)])
633 public_read_acl = self.RunGsUtil(['acl', 'get', suri(key_uri)],
634 return_stdout=True)
635
636 file_name = 'bar'
637 fpath = self.CreateTempFile(file_name=file_name, contents='foo')
638 self.RunGsUtil(['cp', '-a', 'public-read', fpath, suri(bucket1_uri)])
639 new_acl_json = self.RunGsUtil(['acl', 'get', suri(bucket1_uri, file_name)],
640 return_stdout=True)
641 self.assertEqual(public_read_acl, new_acl_json)
642
643 resumable_size = ONE_KB
644 boto_config_for_test = ('GSUtil', 'resumable_threshold',
645 str(resumable_size))
646 with SetBotoConfigForTest([boto_config_for_test]):
647 resumable_file_name = 'resumable_bar'
648 resumable_contents = os.urandom(resumable_size)
649 resumable_fpath = self.CreateTempFile(
650 file_name=resumable_file_name, contents=resumable_contents)
651 self.RunGsUtil(['cp', '-a', 'public-read', resumable_fpath,
652 suri(bucket1_uri)])
653 new_resumable_acl_json = self.RunGsUtil(
654 ['acl', 'get', suri(bucket1_uri, resumable_file_name)],
655 return_stdout=True)
656 self.assertEqual(public_read_acl, new_resumable_acl_json)
657
410 def test_cp_key_to_local_stream(self): 658 def test_cp_key_to_local_stream(self):
411 bucket_uri = self.CreateBucket() 659 bucket_uri = self.CreateBucket()
412 contents = 'foo' 660 contents = 'foo'
413 key_uri = self.CreateObject(bucket_uri=bucket_uri, contents=contents) 661 key_uri = self.CreateObject(bucket_uri=bucket_uri, contents=contents)
414 stdout = self.RunGsUtil(['cp', suri(key_uri), '-'], return_stdout=True) 662 stdout = self.RunGsUtil(['cp', suri(key_uri), '-'], return_stdout=True)
415 self.assertIn(contents, stdout) 663 self.assertIn(contents, stdout)
416 664
417 def test_cp_local_file_to_local_stream(self): 665 def test_cp_local_file_to_local_stream(self):
418 contents = 'content' 666 contents = 'content'
419 fpath = self.CreateTempFile(contents=contents) 667 fpath = self.CreateTempFile(contents=contents)
420 stdout = self.RunGsUtil(['cp', fpath, '-'], return_stdout=True) 668 stdout = self.RunGsUtil(['cp', fpath, '-'], return_stdout=True)
421 self.assertIn(contents, stdout) 669 self.assertIn(contents, stdout)
422 670
671 @PerformsFileToObjectUpload
672 def test_cp_zero_byte_file(self):
673 dst_bucket_uri = self.CreateBucket()
674 src_dir = self.CreateTempDir()
675 fpath = os.path.join(src_dir, 'zero_byte')
676 with open(fpath, 'w') as unused_out_file:
677 pass # Write a zero byte file
678 self.RunGsUtil(['cp', fpath, suri(dst_bucket_uri)])
679
680 @Retry(AssertionError, tries=3, timeout_secs=1)
681 def _Check1():
682 stdout = self.RunGsUtil(['ls', suri(dst_bucket_uri)], return_stdout=True)
683 self.assertIn(os.path.basename(fpath), stdout)
684 _Check1()
685
686 download_path = os.path.join(src_dir, 'zero_byte_download')
687 self.RunGsUtil(['cp', suri(dst_bucket_uri, 'zero_byte'), download_path])
688 self.assertTrue(os.stat(download_path))
689
423 def test_copy_bucket_to_bucket(self): 690 def test_copy_bucket_to_bucket(self):
424 # Tests that recursively copying from bucket to bucket produces identically 691 """Tests that recursively copying from bucket to bucket.
425 # named objects (and not, in particular, destination objects named by the 692
426 # version- specific URI from source objects). 693 This should produce identically named objects (and not, in particular,
694 destination objects named by the version-specific URI from source objects).
695 """
427 src_bucket_uri = self.CreateVersionedBucket() 696 src_bucket_uri = self.CreateVersionedBucket()
428 dst_bucket_uri = self.CreateVersionedBucket() 697 dst_bucket_uri = self.CreateVersionedBucket()
429 self.CreateObject(bucket_uri=src_bucket_uri, object_name='obj0', 698 self.CreateObject(bucket_uri=src_bucket_uri, object_name='obj0',
430 contents='abc') 699 contents='abc')
431 self.CreateObject(bucket_uri=src_bucket_uri, object_name='obj1', 700 self.CreateObject(bucket_uri=src_bucket_uri, object_name='obj1',
432 contents='def') 701 contents='def')
702
433 # Use @Retry as hedge against bucket listing eventual consistency. 703 # Use @Retry as hedge against bucket listing eventual consistency.
434 @Retry(AssertionError, tries=3, timeout_secs=1) 704 @Retry(AssertionError, tries=3, timeout_secs=1)
435 def _CopyAndCheck(): 705 def _CopyAndCheck():
436 self.RunGsUtil(['cp', '-R', suri(src_bucket_uri), 706 self.RunGsUtil(['cp', '-R', suri(src_bucket_uri),
437 suri(dst_bucket_uri)]) 707 suri(dst_bucket_uri)])
438 stdout = self.RunGsUtil(['ls', '-R', dst_bucket_uri.uri], 708 stdout = self.RunGsUtil(['ls', '-R', dst_bucket_uri.uri],
439 return_stdout=True) 709 return_stdout=True)
440 self.assertIn('%s%s/obj0\n' % (dst_bucket_uri, 710 self.assertIn('%s%s/obj0\n' % (dst_bucket_uri,
441 src_bucket_uri.bucket_name), stdout) 711 src_bucket_uri.bucket_name), stdout)
442 self.assertIn('%s%s/obj1\n' % (dst_bucket_uri, 712 self.assertIn('%s%s/obj1\n' % (dst_bucket_uri,
443 src_bucket_uri.bucket_name), stdout) 713 src_bucket_uri.bucket_name), stdout)
444 _CopyAndCheck() 714 _CopyAndCheck()
445 715
446 def test_copy_bucket_to_dir(self): 716 def test_copy_bucket_to_dir(self):
447 # Tests that recursively copying from bucket to dir produces identically 717 """Tests recursively copying from bucket to a directory.
448 # named objects (and not, in particular, destination objects named by the 718
449 # version- specific URI from source objects). 719 This should produce identically named objects (and not, in particular,
720 destination objects named by the version- specific URI from source objects).
721 """
450 src_bucket_uri = self.CreateBucket() 722 src_bucket_uri = self.CreateBucket()
451 dst_dir = self.CreateTempDir() 723 dst_dir = self.CreateTempDir()
452 self.CreateObject(bucket_uri=src_bucket_uri, object_name='obj0', 724 self.CreateObject(bucket_uri=src_bucket_uri, object_name='obj0',
453 contents='abc') 725 contents='abc')
454 self.CreateObject(bucket_uri=src_bucket_uri, object_name='obj1', 726 self.CreateObject(bucket_uri=src_bucket_uri, object_name='obj1',
455 contents='def') 727 contents='def')
728
456 # Use @Retry as hedge against bucket listing eventual consistency. 729 # Use @Retry as hedge against bucket listing eventual consistency.
457 @Retry(AssertionError, tries=3, timeout_secs=1) 730 @Retry(AssertionError, tries=3, timeout_secs=1)
458 def _CopyAndCheck(): 731 def _CopyAndCheck():
732 """Copies the bucket recursively and validates the results."""
459 self.RunGsUtil(['cp', '-R', suri(src_bucket_uri), dst_dir]) 733 self.RunGsUtil(['cp', '-R', suri(src_bucket_uri), dst_dir])
460 dir_list = [] 734 dir_list = []
461 for dirname, dirnames, filenames in os.walk(dst_dir): 735 for dirname, _, filenames in os.walk(dst_dir):
462 for filename in filenames: 736 for filename in filenames:
463 dir_list.append(os.path.join(dirname, filename)) 737 dir_list.append(os.path.join(dirname, filename))
464 dir_list = sorted(dir_list) 738 dir_list = sorted(dir_list)
465 self.assertEqual(len(dir_list), 2) 739 self.assertEqual(len(dir_list), 2)
466 self.assertEqual(os.path.join(dst_dir, src_bucket_uri.bucket_name, 740 self.assertEqual(os.path.join(dst_dir, src_bucket_uri.bucket_name,
467 "obj0"), dir_list[0]) 741 'obj0'), dir_list[0])
468 self.assertEqual(os.path.join(dst_dir, src_bucket_uri.bucket_name, 742 self.assertEqual(os.path.join(dst_dir, src_bucket_uri.bucket_name,
469 "obj1"), dir_list[1]) 743 'obj1'), dir_list[1])
470 _CopyAndCheck() 744 _CopyAndCheck()
471 745
746 def test_recursive_download_with_leftover_dir_placeholder(self):
747 """Tests that we correctly handle leftover dir placeholders."""
748 src_bucket_uri = self.CreateBucket()
749 dst_dir = self.CreateTempDir()
750 self.CreateObject(bucket_uri=src_bucket_uri, object_name='obj0',
751 contents='abc')
752 self.CreateObject(bucket_uri=src_bucket_uri, object_name='obj1',
753 contents='def')
754
755 # Create a placeholder like what can be left over by web GUI tools.
756 key_uri = src_bucket_uri.clone_replace_name('/')
757 key_uri.set_contents_from_string('')
758 self.AssertNObjectsInBucket(src_bucket_uri, 3)
759
760 stderr = self.RunGsUtil(['cp', '-R', suri(src_bucket_uri), dst_dir],
761 return_stderr=True)
762 self.assertIn('Skipping cloud sub-directory placeholder object', stderr)
763 dir_list = []
764 for dirname, _, filenames in os.walk(dst_dir):
765 for filename in filenames:
766 dir_list.append(os.path.join(dirname, filename))
767 dir_list = sorted(dir_list)
768 self.assertEqual(len(dir_list), 2)
769 self.assertEqual(os.path.join(dst_dir, src_bucket_uri.bucket_name,
770 'obj0'), dir_list[0])
771 self.assertEqual(os.path.join(dst_dir, src_bucket_uri.bucket_name,
772 'obj1'), dir_list[1])
773
472 def test_copy_quiet(self): 774 def test_copy_quiet(self):
473 bucket_uri = self.CreateBucket() 775 bucket_uri = self.CreateBucket()
474 key_uri = self.CreateObject(bucket_uri=bucket_uri, contents='foo') 776 key_uri = self.CreateObject(bucket_uri=bucket_uri, contents='foo')
475 stderr = self.RunGsUtil(['-q', 'cp', suri(key_uri), 777 stderr = self.RunGsUtil(['-q', 'cp', suri(key_uri),
476 suri(bucket_uri.clone_replace_name('o2'))], 778 suri(bucket_uri.clone_replace_name('o2'))],
477 return_stderr=True) 779 return_stderr=True)
478 self.assertEqual(stderr.count('Copying '), 0) 780 self.assertEqual(stderr.count('Copying '), 0)
479 781
782 def test_cp_md5_match(self):
783 """Tests that the uploaded object has the expected MD5.
784
785 Note that while this does perform a file to object upload, MD5's are
786 not supported for composite objects so we don't use the decorator in this
787 case.
788 """
789 bucket_uri = self.CreateBucket()
790 fpath = self.CreateTempFile(contents='bar')
791 with open(fpath, 'r') as f_in:
792 file_md5 = base64.encodestring(binascii.unhexlify(
793 CalculateMd5FromContents(f_in))).rstrip('\n')
794 self.RunGsUtil(['cp', fpath, suri(bucket_uri)])
795
796 # Use @Retry as hedge against bucket listing eventual consistency.
797 @Retry(AssertionError, tries=3, timeout_secs=1)
798 def _Check1():
799 stdout = self.RunGsUtil(['ls', '-L', suri(bucket_uri)],
800 return_stdout=True)
801 self.assertRegexpMatches(stdout,
802 r'Hash\s+\(md5\):\s+%s' % re.escape(file_md5))
803 _Check1()
804
805 @unittest.skipIf(IS_WINDOWS,
806 'Unicode handling on Windows requires mods to site-packages')
807 @PerformsFileToObjectUpload
808 def test_cp_manifest_upload_unicode(self):
809 return self._ManifestUpload('foo-unicöde', 'bar-unicöde',
810 'manifest-unicöde')
811
480 @PerformsFileToObjectUpload 812 @PerformsFileToObjectUpload
481 def test_cp_manifest_upload(self): 813 def test_cp_manifest_upload(self):
814 """Tests uploading with a mnifest file."""
815 return self._ManifestUpload('foo', 'bar', 'manifest')
816
817 def _ManifestUpload(self, file_name, object_name, manifest_name):
818 """Tests uploading with a manifest file."""
482 bucket_uri = self.CreateBucket() 819 bucket_uri = self.CreateBucket()
483 dsturi = suri(bucket_uri, 'foo') 820 dsturi = suri(bucket_uri, object_name)
484 821
485 fpath = self.CreateTempFile(contents='bar') 822 fpath = self.CreateTempFile(file_name=file_name, contents='bar')
486 logpath = self.CreateTempFile(contents='') 823 logpath = self.CreateTempFile(file_name=manifest_name, contents='')
487 # Ensure the file is empty. 824 # Ensure the file is empty.
488 open(logpath, 'w').close() 825 open(logpath, 'w').close()
489 stdout = self.RunGsUtil(['cp', '-L', logpath, fpath, dsturi], 826 self.RunGsUtil(['cp', '-L', logpath, fpath, dsturi])
490 return_stdout=True)
491 with open(logpath, 'r') as f: 827 with open(logpath, 'r') as f:
492 lines = f.readlines() 828 lines = f.readlines()
493 self.assertEqual(len(lines), 2) 829 self.assertEqual(len(lines), 2)
494 830
495 expected_headers = ['Source', 'Destination', 'Start', 'End', 'Md5', 831 expected_headers = ['Source', 'Destination', 'Start', 'End', 'Md5',
496 'UploadId', 'Source Size', 'Bytes Transferred', 832 'UploadId', 'Source Size', 'Bytes Transferred',
497 'Result', 'Description'] 833 'Result', 'Description']
498 self.assertEqual(expected_headers, lines[0].strip().split(',')) 834 self.assertEqual(expected_headers, lines[0].strip().split(','))
499 results = lines[1].strip().split(',') 835 results = lines[1].strip().split(',')
500 self.assertEqual(results[0][:7], 'file://') # source 836 self.assertEqual(results[0][:7], 'file://') # source
501 self.assertEqual(results[1][:5], 'gs://') # destination 837 self.assertEqual(results[1][:5], '%s://' %
838 self.default_provider) # destination
502 date_format = '%Y-%m-%dT%H:%M:%S.%fZ' 839 date_format = '%Y-%m-%dT%H:%M:%S.%fZ'
503 start_date = datetime.datetime.strptime(results[2], date_format) 840 start_date = datetime.datetime.strptime(results[2], date_format)
504 end_date = datetime.datetime.strptime(results[3], date_format) 841 end_date = datetime.datetime.strptime(results[3], date_format)
505 self.assertEqual(end_date > start_date, True) 842 self.assertEqual(end_date > start_date, True)
506 if self.RunGsUtil == testcase.GsUtilIntegrationTestCase.RunGsUtil: 843 if self.RunGsUtil == testcase.GsUtilIntegrationTestCase.RunGsUtil:
507 # Check that we didn't do automatic parallel uploads - compose doesn't 844 # Check that we didn't do automatic parallel uploads - compose doesn't
508 # calculate the MD5 hash. Since RunGsUtil is overriden in 845 # calculate the MD5 hash. Since RunGsUtil is overriden in
509 # TestCpParallelUploads to force parallel uploads, we can check which 846 # TestCpParallelUploads to force parallel uploads, we can check which
510 # method was used. 847 # method was used.
511 self.assertEqual(results[4], '37b51d194a7513e45b56f6524f2d51f2') # md5 848 self.assertEqual(results[4], 'rL0Y20zC+Fzt72VPzMSk2A==') # md5
512 self.assertEqual(int(results[6]), 3) # Source Size 849 self.assertEqual(int(results[6]), 3) # Source Size
513 self.assertEqual(int(results[7]), 3) # Bytes Transferred 850 self.assertEqual(int(results[7]), 3) # Bytes Transferred
514 self.assertEqual(results[8], 'OK') # Result 851 self.assertEqual(results[8], 'OK') # Result
515 852
516 @PerformsFileToObjectUpload 853 @PerformsFileToObjectUpload
517 def test_cp_manifest_download(self): 854 def test_cp_manifest_download(self):
855 """Tests downloading with a manifest file."""
518 key_uri = self.CreateObject(contents='foo') 856 key_uri = self.CreateObject(contents='foo')
519 fpath = self.CreateTempFile(contents='') 857 fpath = self.CreateTempFile(contents='')
520 logpath = self.CreateTempFile(contents='') 858 logpath = self.CreateTempFile(contents='')
521 # Ensure the file is empty. 859 # Ensure the file is empty.
522 open(logpath, 'w').close() 860 open(logpath, 'w').close()
523 stdout = self.RunGsUtil(['cp', '-L', logpath, suri(key_uri), fpath], 861 self.RunGsUtil(['cp', '-L', logpath, suri(key_uri), fpath],
524 return_stdout=True) 862 return_stdout=True)
525 with open(logpath, 'r') as f: 863 with open(logpath, 'r') as f:
526 lines = f.readlines() 864 lines = f.readlines()
527 self.assertEqual(len(lines), 2) 865 self.assertEqual(len(lines), 2)
528 866
529 expected_headers = ['Source', 'Destination', 'Start', 'End', 'Md5', 867 expected_headers = ['Source', 'Destination', 'Start', 'End', 'Md5',
530 'UploadId', 'Source Size', 'Bytes Transferred', 868 'UploadId', 'Source Size', 'Bytes Transferred',
531 'Result', 'Description'] 869 'Result', 'Description']
532 self.assertEqual(expected_headers, lines[0].strip().split(',')) 870 self.assertEqual(expected_headers, lines[0].strip().split(','))
533 results = lines[1].strip().split(',') 871 results = lines[1].strip().split(',')
534 self.assertEqual(results[0][:5], 'gs://') # source 872 self.assertEqual(results[0][:5], '%s://' %
873 self.default_provider) # source
535 self.assertEqual(results[1][:7], 'file://') # destination 874 self.assertEqual(results[1][:7], 'file://') # destination
536 date_format = '%Y-%m-%dT%H:%M:%S.%fZ' 875 date_format = '%Y-%m-%dT%H:%M:%S.%fZ'
537 start_date = datetime.datetime.strptime(results[2], date_format) 876 start_date = datetime.datetime.strptime(results[2], date_format)
538 end_date = datetime.datetime.strptime(results[3], date_format) 877 end_date = datetime.datetime.strptime(results[3], date_format)
539 self.assertEqual(end_date > start_date, True) 878 self.assertEqual(end_date > start_date, True)
540 # TODO: fix this when CRC32C's are added to the manifest. 879 self.assertEqual(results[4], 'rL0Y20zC+Fzt72VPzMSk2A==') # md5
541 # self.assertEqual(results[4], '37b51d194a7513e45b56f6524f2d51f2') # md5
542 self.assertEqual(int(results[6]), 3) # Source Size 880 self.assertEqual(int(results[6]), 3) # Source Size
543 # Bytes transferred might be more than 3 if the file was gzipped, since 881 # Bytes transferred might be more than 3 if the file was gzipped, since
544 # the minimum gzip header is 10 bytes. 882 # the minimum gzip header is 10 bytes.
545 self.assertGreaterEqual(int(results[7]), 3) # Bytes Transferred 883 self.assertGreaterEqual(int(results[7]), 3) # Bytes Transferred
546 self.assertEqual(results[8], 'OK') # Result 884 self.assertEqual(results[8], 'OK') # Result
547 885
548 @PerformsFileToObjectUpload 886 @PerformsFileToObjectUpload
549 def test_copy_unicode_non_ascii_filename(self): 887 def test_copy_unicode_non_ascii_filename(self):
550 key_uri = self.CreateObject(contents='foo') 888 key_uri = self.CreateObject(contents='foo')
551 # Make file large enough to cause a resumable upload (which hashes filename 889 # Make file large enough to cause a resumable upload (which hashes filename
552 # to construct tracker filename). 890 # to construct tracker filename).
553 fpath = self.CreateTempFile(file_name=u'Аудиоархив', 891 fpath = self.CreateTempFile(file_name=u'Аудиоархив',
554 contents='x' * 3 * 1024 * 1024) 892 contents='x' * 3 * 1024 * 1024)
555 fpath_bytes = fpath.encode('utf-8') 893 fpath_bytes = fpath.encode(UTF8)
556 stderr = self.RunGsUtil(['cp', fpath_bytes, suri(key_uri)], 894 stderr = self.RunGsUtil(['cp', fpath_bytes, suri(key_uri)],
557 return_stderr=True) 895 return_stderr=True)
558 self.assertIn('Copying file:', stderr) 896 self.assertIn('Copying file:', stderr)
559 897
898 # Note: We originally one time implemented a test
899 # (test_copy_invalid_unicode_filename) that invalid unicode filenames were
900 # skipped, but it turns out os.walk() on MacOS doesn't have problems with
901 # such files (so, failed that test). Given that, we decided to remove the
902 # test.
903
560 def test_gzip_upload_and_download(self): 904 def test_gzip_upload_and_download(self):
561 key_uri = self.CreateObject() 905 bucket_uri = self.CreateBucket()
562 contents = 'x' * 10000 906 contents = 'x' * 10000
563 fpath1 = self.CreateTempFile(file_name='test.html', contents=contents) 907 tmpdir = self.CreateTempDir()
564 self.RunGsUtil(['cp', '-z', 'html', suri(fpath1), suri(key_uri)]) 908 self.CreateTempFile(file_name='test.html', tmpdir=tmpdir, contents=contents)
565 fpath2 = self.CreateTempFile() 909 self.CreateTempFile(file_name='test.js', tmpdir=tmpdir, contents=contents)
566 self.RunGsUtil(['cp', suri(key_uri), suri(fpath2)]) 910 self.CreateTempFile(file_name='test.txt', tmpdir=tmpdir, contents=contents)
567 with open(fpath2, 'r') as f: 911 # Test that copying specifying only 2 of the 3 prefixes gzips the correct
568 self.assertEqual(f.read(), contents) 912 # files, and test that including whitespace in the extension list works.
913 self.RunGsUtil(['cp', '-z', 'js, html',
914 os.path.join(tmpdir, 'test.*'), suri(bucket_uri)])
915 self.AssertNObjectsInBucket(bucket_uri, 3)
916 uri1 = suri(bucket_uri, 'test.html')
917 uri2 = suri(bucket_uri, 'test.js')
918 uri3 = suri(bucket_uri, 'test.txt')
919 stdout = self.RunGsUtil(['stat', uri1], return_stdout=True)
920 self.assertRegexpMatches(stdout, r'Content-Encoding:\s+gzip')
921 stdout = self.RunGsUtil(['stat', uri2], return_stdout=True)
922 self.assertRegexpMatches(stdout, r'Content-Encoding:\s+gzip')
923 stdout = self.RunGsUtil(['stat', uri3], return_stdout=True)
924 self.assertNotRegexpMatches(stdout, r'Content-Encoding:\s+gzip')
925 fpath4 = self.CreateTempFile()
926 for uri in (uri1, uri2, uri3):
927 self.RunGsUtil(['cp', uri, suri(fpath4)])
928 with open(fpath4, 'r') as f:
929 self.assertEqual(f.read(), contents)
569 930
570 def test_upload_with_subdir_and_unexpanded_wildcard(self): 931 def test_upload_with_subdir_and_unexpanded_wildcard(self):
571 fpath1 = self.CreateTempFile(file_name=('tmp', 'x', 'y', 'z')) 932 fpath1 = self.CreateTempFile(file_name=('tmp', 'x', 'y', 'z'))
572 bucket_uri = self.CreateBucket() 933 bucket_uri = self.CreateBucket()
573 wildcard_uri = '%s*' % fpath1[:-5] 934 wildcard_uri = '%s*' % fpath1[:-5]
574 stderr = self.RunGsUtil(['cp', '-R', wildcard_uri, suri(bucket_uri)], 935 stderr = self.RunGsUtil(['cp', '-R', wildcard_uri, suri(bucket_uri)],
575 return_stderr=True) 936 return_stderr=True)
576 self.assertIn('Copying file:', stderr) 937 self.assertIn('Copying file:', stderr)
577 938
939 def test_cp_object_ending_with_slash(self):
940 """Tests that cp works with object names ending with slash."""
941 tmpdir = self.CreateTempDir()
942 bucket_uri = self.CreateBucket()
943 self.CreateObject(bucket_uri=bucket_uri,
944 object_name='abc/',
945 contents='dir')
946 self.CreateObject(bucket_uri=bucket_uri,
947 object_name='abc/def',
948 contents='def')
949 self.RunGsUtil(['cp', '-R', suri(bucket_uri), tmpdir])
950 # Check that files in the subdir got copied even though subdir object
951 # download was skipped.
952 with open(os.path.join(tmpdir, bucket_uri.bucket_name, 'abc', 'def')) as f:
953 self.assertEquals('def', '\n'.join(f.readlines()))
954
578 def test_cp_without_read_access(self): 955 def test_cp_without_read_access(self):
579 object_uri = self.CreateObject(contents='foo') 956 """Tests that cp fails without read access to the object."""
957 # TODO: With 401's triggering retries in apitools, this test will take
958 # a long time. Ideally, make apitools accept a num_retries config for this
959 # until we stop retrying the 401's.
960 bucket_uri = self.CreateBucket()
961 object_uri = self.CreateObject(bucket_uri=bucket_uri, contents='foo')
962
963 # Use @Retry as hedge against bucket listing eventual consistency.
964 self.AssertNObjectsInBucket(bucket_uri, 1)
965
580 with self.SetAnonymousBotoCreds(): 966 with self.SetAnonymousBotoCreds():
581 stderr = self.RunGsUtil(['cp', suri(object_uri), 'foo'], 967 stderr = self.RunGsUtil(['cp', suri(object_uri), 'foo'],
582 return_stderr = True, expected_status=1) 968 return_stderr=True, expected_status=1)
583 self.assertIn('Access denied to', stderr) 969 self.assertIn('AccessDenied', stderr)
584 970
585 @unittest.skipIf(IS_WINDOWS, 'os.symlink() is not available on Windows.') 971 @unittest.skipIf(IS_WINDOWS, 'os.symlink() is not available on Windows.')
586 def test_cp_minus_e(self): 972 def test_cp_minus_e(self):
587 fpath_dir = self.CreateTempDir() 973 fpath_dir = self.CreateTempDir()
588 fpath1 = self.CreateTempFile(tmpdir=fpath_dir) 974 fpath1 = self.CreateTempFile(tmpdir=fpath_dir)
589 fpath2 = os.path.join(fpath_dir, 'cp_minus_e') 975 fpath2 = os.path.join(fpath_dir, 'cp_minus_e')
590 bucket_uri = self.CreateBucket() 976 bucket_uri = self.CreateBucket()
591 os.symlink(fpath1, fpath2) 977 os.symlink(fpath1, fpath2)
592 stderr = self.RunGsUtil( 978 stderr = self.RunGsUtil(
593 ['cp', '-e', '%s%s*' % (fpath_dir, os.path.sep), 979 ['cp', '-e', '%s%s*' % (fpath_dir, os.path.sep),
594 suri(bucket_uri, 'files')], 980 suri(bucket_uri, 'files')],
595 return_stderr=True) 981 return_stderr=True)
596 self.assertIn('Copying file', stderr) 982 self.assertIn('Copying file', stderr)
597 self.assertIn('Skipping symbolic link file', stderr) 983 self.assertIn('Skipping symbolic link file', stderr)
598 984
599 def test_filter_existing_components_non_versioned(self): 985 def test_cp_multithreaded_wildcard(self):
600 bucket_uri = self.CreateBucket() 986 """Tests that cp -m works with a wildcard."""
601 tracker_file = self.CreateTempFile(file_name='foo', contents='asdf') 987 num_test_files = 5
602 tracker_file_lock = CreateLock() 988 tmp_dir = self.CreateTempDir(test_files=num_test_files)
603 989 bucket_uri = self.CreateBucket()
604 # Already uploaded, contents still match, component still used. 990 wildcard_uri = '%s%s*' % (tmp_dir, os.sep)
605 fpath_uploaded_correctly = self.CreateTempFile(file_name='foo1', 991 self.RunGsUtil(['-m', 'cp', wildcard_uri, suri(bucket_uri)])
606 contents='1') 992 self.AssertNObjectsInBucket(bucket_uri, num_test_files)
607 key_uploaded_correctly = self.CreateObject(object_name='foo1', contents='1', 993
608 bucket_uri=bucket_uri) 994 @SkipForS3('No resumable upload support for S3.')
609 args_uploaded_correctly = PerformResumableUploadIfAppliesArgs( 995 def test_cp_resumable_upload_break(self):
610 fpath_uploaded_correctly, 0, 1, fpath_uploaded_correctly, 996 """Tests that an upload can be resumed after a connection break."""
611 key_uploaded_correctly, '', {}, tracker_file, tracker_file_lock) 997 bucket_uri = self.CreateBucket()
612 998 fpath = self.CreateTempFile(contents='a' * self.halt_size)
613 # Not yet uploaded, but needed. 999 boto_config_for_test = ('GSUtil', 'resumable_threshold', str(ONE_KB))
614 fpath_not_uploaded = self.CreateTempFile(file_name='foo2', contents='2') 1000 with SetBotoConfigForTest([boto_config_for_test]):
615 key_not_uploaded = self.CreateObject(object_name='foo2', contents='2', 1001 stderr = self.RunGsUtil(['cp', '--haltatbyte', '5', fpath,
616 bucket_uri=bucket_uri) 1002 suri(bucket_uri)],
617 args_not_uploaded = PerformResumableUploadIfAppliesArgs( 1003 expected_status=1, return_stderr=True)
618 fpath_not_uploaded, 0, 1, fpath_not_uploaded, key_not_uploaded, '', {}, 1004 self.assertIn('Artifically halting upload', stderr)
619 tracker_file, tracker_file_lock) 1005 stderr = self.RunGsUtil(['cp', fpath, suri(bucket_uri)],
620 1006 return_stderr=True)
621 # Already uploaded, but contents no longer match. Even though the contents 1007 self.assertIn('Resuming upload', stderr)
622 # differ, we don't delete this since the bucket is not versioned and it 1008
623 # will be overwritten anyway. 1009 @SkipForS3('No resumable upload support for S3.')
624 fpath_wrong_contents = self.CreateTempFile(file_name='foo4', contents='4') 1010 def test_cp_resumable_upload(self):
625 key_wrong_contents = self.CreateObject(object_name='foo4', contents='_', 1011 """Tests that a basic resumable upload completes successfully."""
626 bucket_uri=bucket_uri) 1012 bucket_uri = self.CreateBucket()
627 args_wrong_contents = PerformResumableUploadIfAppliesArgs( 1013 fpath = self.CreateTempFile(contents='a' * self.halt_size)
628 fpath_wrong_contents, 0, 1, fpath_wrong_contents, key_wrong_contents, 1014 boto_config_for_test = ('GSUtil', 'resumable_threshold', str(ONE_KB))
629 '', {}, tracker_file, tracker_file_lock) 1015 with SetBotoConfigForTest([boto_config_for_test]):
630 1016 self.RunGsUtil(['cp', fpath, suri(bucket_uri)])
631 # Exists in tracker file, but component object no longer exists. 1017
632 fpath_remote_deleted = self.CreateTempFile(file_name='foo5', contents='5') 1018 @SkipForS3('No resumable upload support for S3.')
633 args_remote_deleted = PerformResumableUploadIfAppliesArgs( 1019 def test_resumable_upload_break_leaves_tracker(self):
634 fpath_remote_deleted, 0, 1, fpath_remote_deleted, '', '', {}, 1020 """Tests that a tracker file is created with a resumable upload."""
635 tracker_file, tracker_file_lock) 1021 bucket_uri = self.CreateBucket()
636 1022 fpath = self.CreateTempFile(file_name='foo',
637 # Exists in tracker file and already uploaded, but no longer needed. 1023 contents='a' * self.halt_size)
638 fpath_no_longer_used = self.CreateTempFile(file_name='foo6', contents='6') 1024 boto_config_for_test = ('GSUtil', 'resumable_threshold', str(ONE_KB))
639 key_no_longer_used = self.CreateObject(object_name='foo6', contents='6', 1025 with SetBotoConfigForTest([boto_config_for_test]):
640 bucket_uri=bucket_uri) 1026 tracker_filename = GetTrackerFilePath(
641 1027 StorageUrlFromString(suri(bucket_uri, 'foo')),
642 dst_args = {fpath_uploaded_correctly:args_uploaded_correctly, 1028 TrackerFileType.UPLOAD, self.test_api)
643 fpath_not_uploaded:args_not_uploaded, 1029 try:
644 fpath_wrong_contents:args_wrong_contents, 1030 stderr = self.RunGsUtil(['cp', '--haltatbyte', '5', fpath,
645 fpath_remote_deleted:args_remote_deleted} 1031 suri(bucket_uri, 'foo')],
646 1032 expected_status=1, return_stderr=True)
647 existing_components = [ObjectFromTracker(fpath_uploaded_correctly, ''), 1033 self.assertIn('Artifically halting upload', stderr)
648 ObjectFromTracker(fpath_wrong_contents, ''), 1034 self.assertTrue(os.path.exists(tracker_filename),
649 ObjectFromTracker(fpath_remote_deleted, ''), 1035 'Tracker file %s not present.' % tracker_filename)
650 ObjectFromTracker(fpath_no_longer_used, '')] 1036 finally:
651 1037 if os.path.exists(tracker_filename):
652 suri_builder = StorageUriBuilder(0, BucketStorageUri) 1038 os.unlink(tracker_filename)
653 1039
654 (components_to_upload, uploaded_components, existing_objects_to_delete) = ( 1040 @SkipForS3('No resumable upload support for S3.')
655 FilterExistingComponents(dst_args, existing_components, 1041 def test_cp_resumable_upload_break_file_size_change(self):
656 bucket_uri.bucket_name, suri_builder)) 1042 """Tests a resumable upload where the uploaded file changes size.
657 1043
658 for arg in [args_not_uploaded, args_wrong_contents, args_remote_deleted]: 1044 This should fail when we read the tracker data.
659 self.assertTrue(arg in components_to_upload) 1045 """
660 self.assertEqual(str([args_uploaded_correctly.dst_uri]), 1046 bucket_uri = self.CreateBucket()
661 str(uploaded_components)) 1047 tmp_dir = self.CreateTempDir()
662 self.assertEqual( 1048 fpath = self.CreateTempFile(file_name='foo', tmpdir=tmp_dir,
663 str([MakeGsUri(bucket_uri.bucket_name, fpath_no_longer_used, 1049 contents='a' * self.halt_size)
664 suri_builder)]), 1050 boto_config_for_test = ('GSUtil', 'resumable_threshold', str(ONE_KB))
665 str(existing_objects_to_delete)) 1051 with SetBotoConfigForTest([boto_config_for_test]):
666 1052 stderr = self.RunGsUtil(['cp', '--haltatbyte', '5', fpath,
667 def test_filter_existing_components_versioned(self): 1053 suri(bucket_uri)],
668 suri_builder = StorageUriBuilder(0, BucketStorageUri) 1054 expected_status=1, return_stderr=True)
669 bucket_uri = self.CreateVersionedBucket() 1055 self.assertIn('Artifically halting upload', stderr)
670 tracker_file = self.CreateTempFile(file_name='foo', contents='asdf') 1056 fpath = self.CreateTempFile(file_name='foo', tmpdir=tmp_dir,
671 tracker_file_lock = CreateLock() 1057 contents='a' * self.halt_size * 2)
672 1058 stderr = self.RunGsUtil(['cp', fpath, suri(bucket_uri)],
673 # Already uploaded, contents still match, component still used. 1059 expected_status=1, return_stderr=True)
674 fpath_uploaded_correctly = self.CreateTempFile(file_name='foo1', 1060 self.assertIn('ResumableUploadAbortException', stderr)
675 contents='1') 1061
676 key_uploaded_correctly = self.CreateObject(object_name='foo1', contents='1', 1062 @SkipForS3('No resumable upload support for S3.')
677 bucket_uri=bucket_uri) 1063 def test_cp_resumable_upload_break_file_content_change(self):
678 args_uploaded_correctly = PerformResumableUploadIfAppliesArgs( 1064 """Tests a resumable upload where the uploaded file changes content."""
679 fpath_uploaded_correctly, 0, 1, fpath_uploaded_correctly, 1065 if self.test_api == ApiSelector.XML:
680 key_uploaded_correctly, key_uploaded_correctly.generation, {}, 1066 return unittest.skip(
681 tracker_file, tracker_file_lock) 1067 'XML doesn\'t make separate HTTP calls at fixed-size boundaries for '
682 1068 'resumable uploads, so we can\'t guarantee that the server saves a '
683 # Duplicate object name in tracker file, but uploaded correctly. 1069 'specific part of the upload.')
684 fpath_duplicate = fpath_uploaded_correctly 1070 bucket_uri = self.CreateBucket()
685 key_duplicate = self.CreateObject(object_name='foo1', contents='1', 1071 tmp_dir = self.CreateTempDir()
686 bucket_uri=bucket_uri) 1072 fpath = self.CreateTempFile(file_name='foo', tmpdir=tmp_dir,
687 args_duplicate = PerformResumableUploadIfAppliesArgs( 1073 contents='a' * ONE_KB * 512)
688 fpath_duplicate, 0, 1, fpath_duplicate, key_duplicate, 1074 resumable_threshold_for_test = (
689 key_duplicate.generation, {}, tracker_file, tracker_file_lock) 1075 'GSUtil', 'resumable_threshold', str(ONE_KB))
690 object_name_duplicate = ObjectFromTracker(fpath_duplicate, 1076 resumable_chunk_size_for_test = (
691 key_duplicate.generation).object_name 1077 'GSUtil', 'json_resumable_chunk_size', str(ONE_KB * 256))
692 uri_duplicate = MakeGsUri(bucket_uri.bucket_name, object_name_duplicate, 1078 with SetBotoConfigForTest([resumable_threshold_for_test,
693 suri_builder) 1079 resumable_chunk_size_for_test]):
694 uri_duplicate.generation = args_duplicate.dst_uri.generation 1080 stderr = self.RunGsUtil(['cp', '--haltatbyte', str(ONE_KB * 384), fpath,
695 1081 suri(bucket_uri)],
696 # Already uploaded, but contents no longer match. 1082 expected_status=1, return_stderr=True)
697 fpath_wrong_contents = self.CreateTempFile(file_name='foo4', contents='4') 1083 self.assertIn('Artifically halting upload', stderr)
698 key_wrong_contents = self.CreateObject(object_name='foo4', contents='_', 1084 fpath = self.CreateTempFile(file_name='foo', tmpdir=tmp_dir,
699 bucket_uri=bucket_uri) 1085 contents='b' * ONE_KB * 512)
700 args_wrong_contents = PerformResumableUploadIfAppliesArgs( 1086 stderr = self.RunGsUtil(['cp', fpath, suri(bucket_uri)],
701 fpath_wrong_contents, 0, 1, fpath_wrong_contents, key_wrong_contents, 1087 expected_status=1, return_stderr=True)
702 key_wrong_contents.generation, {}, tracker_file, tracker_file_lock) 1088 self.assertIn('doesn\'t match cloud-supplied digest', stderr)
703 1089
704 dst_args = {fpath_uploaded_correctly:args_uploaded_correctly, 1090 @SkipForS3('No resumable upload support for S3.')
705 fpath_wrong_contents:args_wrong_contents} 1091 def test_cp_resumable_upload_break_file_smaller_size(self):
706 1092 """Tests a resumable upload where the uploaded file changes content.
707 existing_components = [ObjectFromTracker(fpath_uploaded_correctly, 1093
708 key_uploaded_correctly.generation), 1094 This should fail hash validation.
709 ObjectFromTracker(fpath_duplicate, 1095 """
710 key_duplicate.generation), 1096 bucket_uri = self.CreateBucket()
711 ObjectFromTracker(fpath_wrong_contents, 1097 tmp_dir = self.CreateTempDir()
712 key_wrong_contents.generation)] 1098 fpath = self.CreateTempFile(file_name='foo', tmpdir=tmp_dir,
713 1099 contents='a' * ONE_KB * 512)
714 (components_to_upload, uploaded_components, existing_objects_to_delete) = ( 1100 resumable_threshold_for_test = (
715 FilterExistingComponents(dst_args, existing_components, 1101 'GSUtil', 'resumable_threshold', str(ONE_KB))
716 bucket_uri.bucket_name, suri_builder)) 1102 resumable_chunk_size_for_test = (
717 1103 'GSUtil', 'json_resumable_chunk_size', str(ONE_KB * 256))
718 self.assertEqual([args_wrong_contents], components_to_upload) 1104 with SetBotoConfigForTest([resumable_threshold_for_test,
719 self.assertEqual(str([args_uploaded_correctly.dst_uri]), 1105 resumable_chunk_size_for_test]):
720 str(uploaded_components)) 1106 stderr = self.RunGsUtil(['cp', '--haltatbyte', str(ONE_KB * 384), fpath,
721 expected_to_delete = [(args_wrong_contents.dst_uri.object_name, 1107 suri(bucket_uri)],
722 args_wrong_contents.dst_uri.generation), 1108 expected_status=1, return_stderr=True)
723 (uri_duplicate.object_name, 1109 self.assertIn('Artifically halting upload', stderr)
724 args_duplicate.dst_uri.generation)] 1110 fpath = self.CreateTempFile(file_name='foo', tmpdir=tmp_dir,
725 for uri in existing_objects_to_delete: 1111 contents='a' * ONE_KB)
726 self.assertTrue((uri.object_name, uri.generation) in expected_to_delete) 1112 stderr = self.RunGsUtil(['cp', fpath, suri(bucket_uri)],
727 self.assertEqual(len(expected_to_delete), len(existing_objects_to_delete)) 1113 expected_status=1, return_stderr=True)
1114 self.assertIn('ResumableUploadAbortException', stderr)
1115
1116 # This temporarily changes the tracker directory to unwritable which
1117 # interferes with any parallel running tests that use the tracker directory.
1118 @NotParallelizable
1119 @SkipForS3('No resumable upload support for S3.')
1120 @unittest.skipIf(IS_WINDOWS, 'chmod on dir unsupported on Windows.')
1121 @PerformsFileToObjectUpload
1122 def test_cp_unwritable_tracker_file(self):
1123 """Tests a resumable upload with an unwritable tracker file."""
1124 bucket_uri = self.CreateBucket()
1125 tracker_filename = GetTrackerFilePath(
1126 StorageUrlFromString(suri(bucket_uri, 'foo')),
1127 TrackerFileType.UPLOAD, self.test_api)
1128 tracker_dir = os.path.dirname(tracker_filename)
1129 fpath = self.CreateTempFile(file_name='foo', contents='a' * ONE_KB)
1130 boto_config_for_test = ('GSUtil', 'resumable_threshold', str(ONE_KB))
1131 save_mod = os.stat(tracker_dir).st_mode
1132
1133 try:
1134 os.chmod(tracker_dir, 0)
1135 with SetBotoConfigForTest([boto_config_for_test]):
1136 stderr = self.RunGsUtil(['cp', fpath, suri(bucket_uri)],
1137 expected_status=1, return_stderr=True)
1138 self.assertIn('Couldn\'t write tracker file', stderr)
1139 finally:
1140 os.chmod(tracker_dir, save_mod)
1141 if os.path.exists(tracker_filename):
1142 os.unlink(tracker_filename)
1143
1144 def test_cp_resumable_download_break(self):
1145 """Tests that a download can be resumed after a connection break."""
1146 bucket_uri = self.CreateBucket()
1147 object_uri = self.CreateObject(bucket_uri=bucket_uri, object_name='foo',
1148 contents='a' * self.halt_size)
1149 fpath = self.CreateTempFile()
1150 boto_config_for_test = ('GSUtil', 'resumable_threshold', str(ONE_KB))
1151 with SetBotoConfigForTest([boto_config_for_test]):
1152 stderr = self.RunGsUtil(['cp', '--haltatbyte', '5', suri(object_uri),
1153 fpath], expected_status=1, return_stderr=True)
1154 self.assertIn('Artifically halting download.', stderr)
1155 tracker_filename = GetTrackerFilePath(
1156 StorageUrlFromString(fpath), TrackerFileType.DOWNLOAD, self.test_api)
1157 self.assertTrue(os.path.isfile(tracker_filename))
1158 stderr = self.RunGsUtil(['cp', suri(object_uri), fpath],
1159 return_stderr=True)
1160 self.assertIn('Resuming download', stderr)
1161
1162 def test_cp_resumable_download_etag_differs(self):
1163 """Tests that download restarts the file when the source object changes.
1164
1165 This causes the etag not to match.
1166 """
1167 bucket_uri = self.CreateBucket()
1168 object_uri = self.CreateObject(bucket_uri=bucket_uri, object_name='foo',
1169 contents='a' * self.halt_size)
1170 fpath = self.CreateTempFile()
1171 boto_config_for_test = ('GSUtil', 'resumable_threshold', str(ONE_KB))
1172 with SetBotoConfigForTest([boto_config_for_test]):
1173 # This will create a tracker file with an ETag.
1174 stderr = self.RunGsUtil(['cp', '--haltatbyte', '5', suri(object_uri),
1175 fpath], expected_status=1, return_stderr=True)
1176 self.assertIn('Artifically halting download.', stderr)
1177 # Create a new object with different contents - it should have a
1178 # different ETag since the content has changed.
1179 object_uri = self.CreateObject(bucket_uri=bucket_uri, object_name='foo',
1180 contents='b' * self.halt_size)
1181 stderr = self.RunGsUtil(['cp', suri(object_uri), fpath],
1182 return_stderr=True)
1183 self.assertNotIn('Resuming download', stderr)
1184
1185 def test_cp_resumable_download_file_larger(self):
1186 """Tests download deletes the tracker file when existing file is larger."""
1187 bucket_uri = self.CreateBucket()
1188 fpath = self.CreateTempFile()
1189 object_uri = self.CreateObject(bucket_uri=bucket_uri, object_name='foo',
1190 contents='a' * self.halt_size)
1191 boto_config_for_test = ('GSUtil', 'resumable_threshold', str(ONE_KB))
1192 with SetBotoConfigForTest([boto_config_for_test]):
1193 stderr = self.RunGsUtil(['cp', '--haltatbyte', '5', suri(object_uri),
1194 fpath],
1195 expected_status=1, return_stderr=True)
1196 self.assertIn('Artifically halting download.', stderr)
1197 with open(fpath, 'w') as larger_file:
1198 for _ in range(self.halt_size * 2):
1199 larger_file.write('a')
1200 stderr = self.RunGsUtil(['cp', suri(object_uri), fpath],
1201 expected_status=1, return_stderr=True)
1202 self.assertNotIn('Resuming download', stderr)
1203 self.assertIn('is larger', stderr)
1204 self.assertIn('Deleting tracker file', stderr)
1205
1206 def test_cp_resumable_download_content_differs(self):
1207 """Tests that we do not re-download when tracker file matches existing file.
1208
1209 We only compare size, not contents, so re-download should not occur even
1210 though the contents are technically different. However, hash validation on
1211 the file should still occur and we will delete the file then because
1212 the hashes differ.
1213 """
1214 bucket_uri = self.CreateBucket()
1215 tmp_dir = self.CreateTempDir()
1216 fpath = self.CreateTempFile(tmpdir=tmp_dir, contents='abcd' * ONE_KB)
1217 object_uri = self.CreateObject(bucket_uri=bucket_uri, object_name='foo',
1218 contents='efgh' * ONE_KB)
1219 stdout = self.RunGsUtil(['ls', '-L', suri(object_uri)], return_stdout=True)
1220 etag_match = re.search(r'\s*ETag:\s*(.*)', stdout)
1221 self.assertIsNotNone(etag_match, 'Could not get object ETag')
1222 self.assertEqual(len(etag_match.groups()), 1,
1223 'Did not match expected single ETag')
1224 etag = etag_match.group(1)
1225
1226 tracker_filename = GetTrackerFilePath(
1227 StorageUrlFromString(fpath), TrackerFileType.DOWNLOAD, self.test_api)
1228 try:
1229 with open(tracker_filename, 'w') as tracker_fp:
1230 tracker_fp.write(etag)
1231 boto_config_for_test = ('GSUtil', 'resumable_threshold', str(ONE_KB))
1232 with SetBotoConfigForTest([boto_config_for_test]):
1233 stderr = self.RunGsUtil(['cp', suri(object_uri), fpath],
1234 return_stderr=True, expected_status=1)
1235 self.assertIn('Download already complete for file', stderr)
1236 self.assertIn('doesn\'t match cloud-supplied digest', stderr)
1237 # File and tracker file should be deleted.
1238 self.assertFalse(os.path.isfile(fpath))
1239 self.assertFalse(os.path.isfile(tracker_filename))
1240 finally:
1241 if os.path.exists(tracker_filename):
1242 os.unlink(tracker_filename)
1243
1244 def test_cp_resumable_download_content_matches(self):
1245 """Tests download no-ops when tracker file matches existing file."""
1246 bucket_uri = self.CreateBucket()
1247 tmp_dir = self.CreateTempDir()
1248 matching_contents = 'abcd' * ONE_KB
1249 fpath = self.CreateTempFile(tmpdir=tmp_dir, contents=matching_contents)
1250 object_uri = self.CreateObject(bucket_uri=bucket_uri, object_name='foo',
1251 contents=matching_contents)
1252 stdout = self.RunGsUtil(['ls', '-L', suri(object_uri)], return_stdout=True)
1253 etag_match = re.search(r'\s*ETag:\s*(.*)', stdout)
1254 self.assertIsNotNone(etag_match, 'Could not get object ETag')
1255 self.assertEqual(len(etag_match.groups()), 1,
1256 'Did not match expected single ETag')
1257 etag = etag_match.group(1)
1258 tracker_filename = GetTrackerFilePath(
1259 StorageUrlFromString(fpath), TrackerFileType.DOWNLOAD, self.test_api)
1260 with open(tracker_filename, 'w') as tracker_fp:
1261 tracker_fp.write(etag)
1262 try:
1263 boto_config_for_test = ('GSUtil', 'resumable_threshold', str(ONE_KB))
1264 with SetBotoConfigForTest([boto_config_for_test]):
1265 stderr = self.RunGsUtil(['cp', suri(object_uri), fpath],
1266 return_stderr=True)
1267 self.assertIn('Download already complete for file', stderr)
1268 # Tracker file should be removed after successful hash validation.
1269 self.assertFalse(os.path.isfile(tracker_filename))
1270 finally:
1271 if os.path.exists(tracker_filename):
1272 os.unlink(tracker_filename)
1273
1274 def test_cp_resumable_download_tracker_file_not_matches(self):
1275 """Tests that download overwrites when tracker file etag does not match."""
1276 bucket_uri = self.CreateBucket()
1277 tmp_dir = self.CreateTempDir()
1278 fpath = self.CreateTempFile(tmpdir=tmp_dir, contents='abcd' * ONE_KB)
1279 object_uri = self.CreateObject(bucket_uri=bucket_uri, object_name='foo',
1280 contents='efgh' * ONE_KB)
1281 stdout = self.RunGsUtil(['ls', '-L', suri(object_uri)], return_stdout=True)
1282 etag_match = re.search(r'\s*ETag:\s*(.*)', stdout)
1283 self.assertIsNotNone(etag_match, 'Could not get object ETag')
1284 self.assertEqual(len(etag_match.groups()), 1,
1285 'Did not match regex for exactly one object ETag')
1286 etag = etag_match.group(1)
1287 etag += 'nonmatching'
1288 tracker_filename = GetTrackerFilePath(
1289 StorageUrlFromString(fpath), TrackerFileType.DOWNLOAD, self.test_api)
1290 with open(tracker_filename, 'w') as tracker_fp:
1291 tracker_fp.write(etag)
1292 try:
1293 boto_config_for_test = ('GSUtil', 'resumable_threshold', str(ONE_KB))
1294 with SetBotoConfigForTest([boto_config_for_test]):
1295 stderr = self.RunGsUtil(['cp', suri(object_uri), fpath],
1296 return_stderr=True)
1297 self.assertNotIn('Resuming download', stderr)
1298 # Ensure the file was overwritten.
1299 with open(fpath, 'r') as in_fp:
1300 contents = in_fp.read()
1301 self.assertEqual(contents, 'efgh' * ONE_KB,
1302 'File not overwritten when it should have been '
1303 'due to a non-matching tracker file.')
1304 self.assertFalse(os.path.isfile(tracker_filename))
1305 finally:
1306 if os.path.exists(tracker_filename):
1307 os.unlink(tracker_filename)
1308
1309 def test_cp_resumable_download_gzip(self):
1310 """Tests that download can be resumed successfully with a gzipped file."""
1311 # Generate some reasonably incompressible data. This compresses to a bit
1312 # around 128K in practice, but we assert specifically below that it is
1313 # larger than self.halt_size to guarantee that we can halt the download
1314 # partway through.
1315 object_uri = self.CreateObject()
1316 random.seed(0)
1317 contents = str([random.choice(string.ascii_letters)
1318 for _ in xrange(ONE_KB * 128)])
1319 random.seed() # Reset the seed for any other tests.
1320 fpath1 = self.CreateTempFile(file_name='unzipped.txt', contents=contents)
1321 self.RunGsUtil(['cp', '-z', 'txt', suri(fpath1), suri(object_uri)])
1322
1323 # Use @Retry as hedge against bucket listing eventual consistency.
1324 @Retry(AssertionError, tries=3, timeout_secs=1)
1325 def _GetObjectSize():
1326 stdout = self.RunGsUtil(['du', suri(object_uri)], return_stdout=True)
1327 size_match = re.search(r'(\d+)\s+.*', stdout)
1328 self.assertIsNotNone(size_match, 'Could not get object size')
1329 self.assertEqual(len(size_match.groups()), 1,
1330 'Did not match regex for exactly one object size.')
1331 return long(size_match.group(1))
1332
1333 object_size = _GetObjectSize()
1334 self.assertGreaterEqual(object_size, self.halt_size,
1335 'Compresed object size was not large enough to '
1336 'allow for a halted download, so the test results '
1337 'would be invalid. Please increase the compressed '
1338 'object size in the test.')
1339 fpath2 = self.CreateTempFile()
1340 boto_config_for_test = ('GSUtil', 'resumable_threshold', str(ONE_KB))
1341 with SetBotoConfigForTest([boto_config_for_test]):
1342 stderr = self.RunGsUtil(['cp', '--haltatbyte', '5', suri(object_uri),
1343 suri(fpath2)],
1344 return_stderr=True, expected_status=1)
1345 self.assertIn('Artifically halting download.', stderr)
1346 tracker_filename = GetTrackerFilePath(
1347 StorageUrlFromString(fpath2), TrackerFileType.DOWNLOAD, self.test_api)
1348 self.assertTrue(os.path.isfile(tracker_filename))
1349 self.assertIn('Downloading to temp gzip filename', stderr)
1350 # We should have a temporary gzipped file, a tracker file, and no
1351 # final file yet.
1352 self.assertTrue(os.path.isfile('%s_.gztmp' % fpath2))
1353 stderr = self.RunGsUtil(['cp', suri(object_uri), suri(fpath2)],
1354 return_stderr=True)
1355 self.assertIn('Resuming download', stderr)
1356 with open(fpath2, 'r') as f:
1357 self.assertEqual(f.read(), contents, 'File contents did not match.')
1358 self.assertFalse(os.path.isfile(tracker_filename))
1359 self.assertFalse(os.path.isfile('%s_.gztmp' % fpath2))
1360
1361 def test_cp_minus_c(self):
1362 bucket_uri = self.CreateBucket()
1363 object_uri = self.CreateObject(bucket_uri=bucket_uri, object_name='foo',
1364 contents='foo')
1365 self.RunGsUtil(
1366 ['cp', '-c', suri(bucket_uri) + '/foo2', suri(object_uri),
1367 suri(bucket_uri) + '/dir/'],
1368 expected_status=1)
1369 self.RunGsUtil(['stat', '%s/dir/foo' % suri(bucket_uri)])
1370
1371
1372 class TestCpUnitTests(testcase.GsUtilUnitTestCase):
1373 """Unit tests for gsutil cp."""
1374
1375 def testDownloadWithNoHashAvailable(self):
1376 """Tests a download with no valid server-supplied hash."""
1377 # S3 should have a special message for non-MD5 etags.
1378 bucket_uri = self.CreateBucket(provider='s3')
1379 object_uri = self.CreateObject(bucket_uri=bucket_uri, contents='foo')
1380 object_uri.get_key().etag = '12345' # Not an MD5
1381 dst_dir = self.CreateTempDir()
1382
1383 log_handler = self.RunCommand(
1384 'cp', [suri(object_uri), dst_dir], return_log_handler=True)
1385 warning_messages = log_handler.messages['warning']
1386 self.assertEquals(2, len(warning_messages))
1387 self.assertRegexpMatches(
1388 warning_messages[0],
1389 r'Non-MD5 etag \(12345\) present for key .*, '
1390 r'data integrity checks are not possible')
1391 self.assertIn('Integrity cannot be assured', warning_messages[1])
1392
1393 def test_object_and_prefix_same_name(self):
1394 bucket_uri = self.CreateBucket()
1395 object_uri = self.CreateObject(bucket_uri=bucket_uri, object_name='foo',
1396 contents='foo')
1397 self.CreateObject(bucket_uri=bucket_uri,
1398 object_name='foo/bar', contents='bar')
1399 fpath = self.CreateTempFile()
1400 # MockKey doesn't support hash_algs, so the MD5 will not match.
1401 with SetBotoConfigForTest([('GSUtil', 'check_hashes', 'never')]):
1402 self.RunCommand('cp', [suri(object_uri), fpath])
1403 with open(fpath, 'r') as f:
1404 self.assertEqual(f.read(), 'foo')
1405
1406 def test_cp_upload_respects_no_hashes(self):
1407 bucket_uri = self.CreateBucket()
1408 fpath = self.CreateTempFile(contents='abcd')
1409 with SetBotoConfigForTest([('GSUtil', 'check_hashes', 'never')]):
1410 log_handler = self.RunCommand('cp', [fpath, suri(bucket_uri)],
1411 return_log_handler=True)
1412 warning_messages = log_handler.messages['warning']
1413 self.assertEquals(1, len(warning_messages))
1414 self.assertIn('Found no hashes to validate object upload',
1415 warning_messages[0])
OLDNEW
« no previous file with comments | « gslib/tests/test_cors.py ('k') | gslib/tests/test_cp_funcs.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698