OLD | NEW |
(Empty) | |
| 1 # Copyright 2013 Google Inc. All Rights Reserved. |
| 2 # |
| 3 # Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 # you may not use this file except in compliance with the License. |
| 5 # You may obtain a copy of the License at |
| 6 # |
| 7 # http://www.apache.org/licenses/LICENSE-2.0 |
| 8 # |
| 9 # Unless required by applicable law or agreed to in writing, software |
| 10 # distributed under the License is distributed on an "AS IS" BASIS, |
| 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 # See the License for the specific language governing permissions and |
| 13 # limitations under the License. |
| 14 |
| 15 import boto |
| 16 import os |
| 17 import re |
| 18 import gslib.tests.testcase as testcase |
| 19 from gslib.util import Retry |
| 20 from gslib.util import TWO_MB |
| 21 from boto import storage_uri |
| 22 from gslib.tests.util import ObjectToURI as suri |
| 23 |
| 24 |
| 25 CURDIR = os.path.abspath(os.path.dirname(__file__)) |
| 26 TEST_DATA_DIR = os.path.join(CURDIR, 'test_data') |
| 27 |
| 28 |
| 29 class TestCp(testcase.GsUtilIntegrationTestCase): |
| 30 """Integration tests for cp command.""" |
| 31 |
| 32 def _get_test_file(self, name): |
| 33 return os.path.join(TEST_DATA_DIR, name) |
| 34 |
| 35 def test_noclobber(self): |
| 36 key_uri = self.CreateObject(contents='foo') |
| 37 fpath = self.CreateTempFile(contents='bar') |
| 38 stderr = self.RunGsUtil(['cp', '-n', fpath, suri(key_uri)], |
| 39 return_stderr=True) |
| 40 self.assertIn('Skipping existing item: %s' % suri(key_uri), stderr) |
| 41 self.assertEqual(key_uri.get_contents_as_string(), 'foo') |
| 42 stderr = self.RunGsUtil(['cp', '-n', suri(key_uri), fpath], |
| 43 return_stderr=True) |
| 44 with open(fpath, 'r') as f: |
| 45 self.assertIn('Skipping existing item: %s' % suri(f), stderr) |
| 46 self.assertEqual(f.read(), 'bar') |
| 47 |
| 48 def test_copy_in_cloud_noclobber(self): |
| 49 bucket1_uri = self.CreateBucket() |
| 50 bucket2_uri = self.CreateBucket() |
| 51 key_uri = self.CreateObject(bucket_uri=bucket1_uri, contents='foo') |
| 52 stderr = self.RunGsUtil(['cp', suri(key_uri), suri(bucket2_uri)], |
| 53 return_stderr=True) |
| 54 self.assertEqual(stderr.count('Copying'), 1) |
| 55 stderr = self.RunGsUtil(['cp', '-n', suri(key_uri), suri(bucket2_uri)], |
| 56 return_stderr=True) |
| 57 self.assertIn('Skipping existing item: %s' % suri(bucket2_uri, |
| 58 key_uri.object_name), stderr) |
| 59 |
| 60 def test_streaming(self): |
| 61 bucket_uri = self.CreateBucket() |
| 62 stderr = self.RunGsUtil(['cp', '-', '%s' % suri(bucket_uri, 'foo')], |
| 63 stdin='bar', return_stderr=True) |
| 64 self.assertIn('Copying from <STDIN>', stderr) |
| 65 key_uri = bucket_uri.clone_replace_name('foo') |
| 66 self.assertEqual(key_uri.get_contents_as_string(), 'bar') |
| 67 |
| 68 # TODO: Implement a way to test both with and without using magic file. |
| 69 |
| 70 def test_detect_content_type(self): |
| 71 bucket_uri = self.CreateBucket() |
| 72 dsturi = suri(bucket_uri, 'foo') |
| 73 |
| 74 self.RunGsUtil(['cp', self._get_test_file('test.mp3'), dsturi]) |
| 75 # Use @Retry as hedge against bucket listing eventual consistency. |
| 76 @Retry(AssertionError, tries=3, delay=1, backoff=1) |
| 77 def _Check1(): |
| 78 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True) |
| 79 self.assertIn('Content-Type:\taudio/mpeg', stdout) |
| 80 _Check1() |
| 81 |
| 82 self.RunGsUtil(['cp', self._get_test_file('test.gif'), dsturi]) |
| 83 # Use @Retry as hedge against bucket listing eventual consistency. |
| 84 @Retry(AssertionError, tries=3, delay=1, backoff=1) |
| 85 def _Check2(): |
| 86 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True) |
| 87 self.assertIn('Content-Type:\timage/gif', stdout) |
| 88 _Check2() |
| 89 |
| 90 def test_content_type_override_default(self): |
| 91 bucket_uri = self.CreateBucket() |
| 92 dsturi = suri(bucket_uri, 'foo') |
| 93 |
| 94 self.RunGsUtil(['-h', 'Content-Type:', 'cp', |
| 95 self._get_test_file('test.mp3'), dsturi]) |
| 96 # Use @Retry as hedge against bucket listing eventual consistency. |
| 97 @Retry(AssertionError, tries=3, delay=1, backoff=1) |
| 98 def _Check1(): |
| 99 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True) |
| 100 self.assertIn('Content-Type:\tbinary/octet-stream', stdout) |
| 101 _Check1() |
| 102 |
| 103 self.RunGsUtil(['-h', 'Content-Type:', 'cp', |
| 104 self._get_test_file('test.gif'), dsturi]) |
| 105 # Use @Retry as hedge against bucket listing eventual consistency. |
| 106 @Retry(AssertionError, tries=3, delay=1, backoff=1) |
| 107 def _Check2(): |
| 108 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True) |
| 109 self.assertIn('Content-Type:\tbinary/octet-stream', stdout) |
| 110 _Check2() |
| 111 |
| 112 def test_content_type_override(self): |
| 113 bucket_uri = self.CreateBucket() |
| 114 dsturi = suri(bucket_uri, 'foo') |
| 115 |
| 116 self.RunGsUtil(['-h', 'Content-Type:', 'cp', |
| 117 self._get_test_file('test.mp3'), dsturi]) |
| 118 # Use @Retry as hedge against bucket listing eventual consistency. |
| 119 @Retry(AssertionError, tries=3, delay=1, backoff=1) |
| 120 def _Check1(): |
| 121 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True) |
| 122 self.assertIn('Content-Type:\tbinary/octet-stream', stdout) |
| 123 _Check1() |
| 124 |
| 125 self.RunGsUtil(['-h', 'Content-Type:', 'cp', |
| 126 self._get_test_file('test.gif'), dsturi]) |
| 127 # Use @Retry as hedge against bucket listing eventual consistency. |
| 128 @Retry(AssertionError, tries=3, delay=1, backoff=1) |
| 129 def _Check2(): |
| 130 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True) |
| 131 self.assertIn('Content-Type:\tbinary/octet-stream', stdout) |
| 132 _Check2() |
| 133 |
| 134 def test_foo_noct(self): |
| 135 bucket_uri = self.CreateBucket() |
| 136 dsturi = suri(bucket_uri, 'foo') |
| 137 fpath = self.CreateTempFile(contents='foo/bar\n') |
| 138 self.RunGsUtil(['cp', fpath, dsturi]) |
| 139 # Use @Retry as hedge against bucket listing eventual consistency. |
| 140 @Retry(AssertionError, tries=3, delay=1, backoff=1) |
| 141 def _Check1(): |
| 142 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True) |
| 143 USE_MAGICFILE = boto.config.getbool('GSUtil', 'use_magicfile', False) |
| 144 content_type = ('text/plain' if USE_MAGICFILE |
| 145 else 'application/octet-stream') |
| 146 self.assertIn('Content-Type:\t%s' % content_type, stdout) |
| 147 _Check1() |
| 148 |
| 149 def test_content_type_mismatches(self): |
| 150 bucket_uri = self.CreateBucket() |
| 151 dsturi = suri(bucket_uri, 'foo') |
| 152 fpath = self.CreateTempFile(contents='foo/bar\n') |
| 153 |
| 154 self.RunGsUtil(['-h', 'Content-Type:image/gif', 'cp', |
| 155 self._get_test_file('test.mp3'), dsturi]) |
| 156 # Use @Retry as hedge against bucket listing eventual consistency. |
| 157 @Retry(AssertionError, tries=3, delay=1, backoff=1) |
| 158 def _Check1(): |
| 159 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True) |
| 160 self.assertIn('Content-Type:\timage/gif', stdout) |
| 161 _Check1() |
| 162 |
| 163 self.RunGsUtil(['-h', 'Content-Type:image/gif', 'cp', |
| 164 self._get_test_file('test.gif'), dsturi]) |
| 165 # Use @Retry as hedge against bucket listing eventual consistency. |
| 166 @Retry(AssertionError, tries=3, delay=1, backoff=1) |
| 167 def _Check2(): |
| 168 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True) |
| 169 self.assertIn('Content-Type:\timage/gif', stdout) |
| 170 _Check2() |
| 171 |
| 172 self.RunGsUtil(['-h', 'Content-Type:image/gif', 'cp', fpath, dsturi]) |
| 173 # Use @Retry as hedge against bucket listing eventual consistency. |
| 174 @Retry(AssertionError, tries=3, delay=1, backoff=1) |
| 175 def _Check3(): |
| 176 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True) |
| 177 self.assertIn('Content-Type:\timage/gif', stdout) |
| 178 _Check3() |
| 179 |
| 180 def test_versioning(self): |
| 181 bucket_uri = self.CreateVersionedBucket() |
| 182 k1_uri = self.CreateObject(bucket_uri=bucket_uri, contents='data2') |
| 183 k2_uri = self.CreateObject(bucket_uri=bucket_uri, contents='data1') |
| 184 g1 = k2_uri.generation |
| 185 self.RunGsUtil(['cp', suri(k1_uri), suri(k2_uri)]) |
| 186 k2_uri = bucket_uri.clone_replace_name(k2_uri.object_name) |
| 187 k2_uri = bucket_uri.clone_replace_key(k2_uri.get_key()) |
| 188 g2 = k2_uri.generation |
| 189 k2_uri.set_contents_from_string('data3') |
| 190 g3 = k2_uri.generation |
| 191 |
| 192 fpath = self.CreateTempFile() |
| 193 # Check to make sure current version is data3. |
| 194 self.RunGsUtil(['cp', k2_uri.versionless_uri, fpath]) |
| 195 with open(fpath, 'r') as f: |
| 196 self.assertEqual(f.read(), 'data3') |
| 197 |
| 198 # Check contents of all three versions |
| 199 self.RunGsUtil(['cp', '%s#%s' % (k2_uri.versionless_uri, g1), fpath]) |
| 200 with open(fpath, 'r') as f: |
| 201 self.assertEqual(f.read(), 'data1') |
| 202 self.RunGsUtil(['cp', '%s#%s' % (k2_uri.versionless_uri, g2), fpath]) |
| 203 with open(fpath, 'r') as f: |
| 204 self.assertEqual(f.read(), 'data2') |
| 205 self.RunGsUtil(['cp', '%s#%s' % (k2_uri.versionless_uri, g3), fpath]) |
| 206 with open(fpath, 'r') as f: |
| 207 self.assertEqual(f.read(), 'data3') |
| 208 |
| 209 # Copy first version to current and verify. |
| 210 self.RunGsUtil(['cp', '%s#%s' % (k2_uri.versionless_uri, g1), |
| 211 k2_uri.versionless_uri]) |
| 212 self.RunGsUtil(['cp', k2_uri.versionless_uri, fpath]) |
| 213 with open(fpath, 'r') as f: |
| 214 self.assertEqual(f.read(), 'data1') |
| 215 |
| 216 # Attempt to specify a version-specific URI for destination. |
| 217 stderr = self.RunGsUtil(['cp', fpath, k2_uri.uri], return_stderr=True, |
| 218 expected_status=1) |
| 219 self.assertIn('cannot be the destination for gsutil cp', stderr) |
| 220 |
| 221 def test_cp_v_option(self): |
| 222 # Tests that cp -v option returns the created object's version-specific URI. |
| 223 bucket_uri = self.CreateVersionedBucket() |
| 224 k1_uri = self.CreateObject(bucket_uri=bucket_uri, contents='data1') |
| 225 k2_uri = self.CreateObject(bucket_uri=bucket_uri, contents='data2') |
| 226 g1 = k1_uri.generation |
| 227 |
| 228 # Case 1: Upload file to object using one-shot PUT. |
| 229 tmpdir = self.CreateTempDir() |
| 230 fpath1 = self.CreateTempFile(tmpdir=tmpdir, contents='data1') |
| 231 self._run_cp_minus_v_test('-v', fpath1, k2_uri.uri) |
| 232 |
| 233 # Case 2: Upload file to object using resumable upload. |
| 234 size_threshold = boto.config.get('GSUtil', 'resumable_threshold', TWO_MB) |
| 235 file_as_string = os.urandom(size_threshold) |
| 236 tmpdir = self.CreateTempDir() |
| 237 fpath1 = self.CreateTempFile(tmpdir=tmpdir, contents=file_as_string) |
| 238 self._run_cp_minus_v_test('-v', fpath1, k2_uri.uri) |
| 239 |
| 240 # Case 3: Upload stream to object. |
| 241 self._run_cp_minus_v_test('-v', '-', k2_uri.uri) |
| 242 |
| 243 # Case 4: Download object to file. For this case we just expect output of |
| 244 # gsutil cp -v to be the URI of the file. |
| 245 tmpdir = self.CreateTempDir() |
| 246 fpath1 = self.CreateTempFile(tmpdir=tmpdir) |
| 247 dst_uri = storage_uri(fpath1) |
| 248 stderr = self.RunGsUtil(['cp', '-v', suri(k1_uri), suri(dst_uri)], |
| 249 return_stderr=True) |
| 250 self.assertIn('Created: %s' % dst_uri.uri, stderr.split('\n')[-2]) |
| 251 |
| 252 # Case 5: Daisy-chain from object to object. |
| 253 self._run_cp_minus_v_test('-Dv', k1_uri.uri, k2_uri.uri) |
| 254 |
| 255 # Case 6: Copy object to object in-the-cloud. |
| 256 # TODO: Uncomment this test once copy-in-the-cloud returns version-specific |
| 257 # URI. |
| 258 #self._run_cp_minus_v_test('-v', k1_uri.uri, k2_uri.uri) |
| 259 |
| 260 def _run_cp_minus_v_test(self, opt, src_str, dst_str): |
| 261 stderr = self.RunGsUtil(['cp', opt, src_str, dst_str], return_stderr=True) |
| 262 match = re.search(r'Created: (.*)\n', stderr) |
| 263 self.assertIsNotNone(match) |
| 264 created_uri = match.group(1) |
| 265 # Use @Retry as hedge against bucket listing eventual consistency. |
| 266 @Retry(AssertionError, tries=3, delay=1, backoff=1) |
| 267 def _Check1(): |
| 268 stdout = self.RunGsUtil(['ls', '-a', dst_str], return_stdout=True) |
| 269 lines = stdout.split('\n') |
| 270 # Final (most recent) object should match the "Created:" URI. This is |
| 271 # in second-to-last line (last line is '\n'). |
| 272 self.assertEqual(created_uri, lines[-2]) |
| 273 _Check1() |
| 274 |
| 275 def test_stdin_args(self): |
| 276 tmpdir = self.CreateTempDir() |
| 277 fpath1 = self.CreateTempFile(tmpdir=tmpdir, contents='data1') |
| 278 fpath2 = self.CreateTempFile(tmpdir=tmpdir, contents='data2') |
| 279 bucket_uri = self.CreateBucket() |
| 280 self.RunGsUtil(['cp', '-I', suri(bucket_uri)], |
| 281 stdin='\n'.join((fpath1, fpath2))) |
| 282 # Use @Retry as hedge against bucket listing eventual consistency. |
| 283 @Retry(AssertionError, tries=3, delay=1, backoff=1) |
| 284 def _Check1(): |
| 285 stdout = self.RunGsUtil(['ls', suri(bucket_uri)], return_stdout=True) |
| 286 self.assertIn(os.path.basename(fpath1), stdout) |
| 287 self.assertIn(os.path.basename(fpath2), stdout) |
| 288 self.assertNumLines(stdout, 2) |
| 289 _Check1() |
| 290 |
| 291 def test_daisy_chain_cp(self): |
| 292 # Daisy chain mode is required for copying across storage classes, |
| 293 # so create 2 buckets and attempt to copy without vs with daisy chain mode. |
| 294 bucket1_uri = self.CreateBucket(storage_class='STANDARD') |
| 295 bucket2_uri = self.CreateBucket( |
| 296 storage_class='DURABLE_REDUCED_AVAILABILITY') |
| 297 key_uri = self.CreateObject(bucket_uri=bucket1_uri, contents='foo') |
| 298 stderr = self.RunGsUtil(['cp', suri(key_uri), suri(bucket2_uri)], |
| 299 return_stderr=True, expected_status=1) |
| 300 self.assertIn('Copy-in-the-cloud disallowed', stderr) |
| 301 key_uri = self.CreateObject(bucket_uri=bucket1_uri, contents='foo') |
| 302 stderr = self.RunGsUtil(['cp', '-D', suri(key_uri), suri(bucket2_uri)], |
| 303 return_stderr=True) |
| 304 self.assertNotIn('Copy-in-the-cloud disallowed', stderr) |
| 305 |
| 306 def test_cp_key_to_local_stream(self): |
| 307 bucket_uri = self.CreateBucket() |
| 308 contents = 'foo' |
| 309 key_uri = self.CreateObject(bucket_uri=bucket_uri, contents=contents) |
| 310 stdout = self.RunGsUtil(['cp', suri(key_uri), '-'], return_stdout=True) |
| 311 self.assertIn(contents, stdout) |
| 312 |
| 313 def test_cp_local_file_to_local_stream(self): |
| 314 contents = 'content' |
| 315 fpath = self.CreateTempFile(contents=contents) |
| 316 stdout = self.RunGsUtil(['cp', fpath, '-'], return_stdout=True) |
| 317 self.assertIn(contents, stdout) |
| 318 |
| 319 def test_copy_bucket_to_bucket(self): |
| 320 # Tests that recursively copying from bucket to bucket produces identically |
| 321 # named objects (and not, in particular, destination objects named by the |
| 322 # version- specific URI from source objects). |
| 323 src_bucket_uri = self.CreateVersionedBucket() |
| 324 dst_bucket_uri = self.CreateVersionedBucket() |
| 325 self.CreateObject(bucket_uri=src_bucket_uri, object_name='obj0', |
| 326 contents='abc') |
| 327 self.CreateObject(bucket_uri=src_bucket_uri, object_name='obj1', |
| 328 contents='def') |
| 329 # Use @Retry as hedge against bucket listing eventual consistency. |
| 330 @Retry(AssertionError, tries=3, delay=1, backoff=1) |
| 331 def _CopyAndCheck(): |
| 332 self.RunGsUtil(['cp', '-R', suri(src_bucket_uri), |
| 333 suri(dst_bucket_uri)]) |
| 334 stdout = self.RunGsUtil(['ls', '-R', dst_bucket_uri.uri], |
| 335 return_stdout=True) |
| 336 self.assertIn('%s%s/obj0\n' % (dst_bucket_uri, |
| 337 src_bucket_uri.bucket_name), stdout) |
| 338 self.assertIn('%s%s/obj1\n' % (dst_bucket_uri, |
| 339 src_bucket_uri.bucket_name), stdout) |
| 340 _CopyAndCheck() |
| 341 |
| 342 def test_copy_bucket_to_dir(self): |
| 343 # Tests that recursively copying from bucket to dir produces identically |
| 344 # named objects (and not, in particular, destination objects named by the |
| 345 # version- specific URI from source objects). |
| 346 src_bucket_uri = self.CreateBucket() |
| 347 dst_dir = self.CreateTempDir() |
| 348 self.CreateObject(bucket_uri=src_bucket_uri, object_name='obj0', |
| 349 contents='abc') |
| 350 self.CreateObject(bucket_uri=src_bucket_uri, object_name='obj1', |
| 351 contents='def') |
| 352 # Use @Retry as hedge against bucket listing eventual consistency. |
| 353 @Retry(AssertionError, tries=3, delay=1, backoff=1) |
| 354 def _CopyAndCheck(): |
| 355 self.RunGsUtil(['cp', '-R', suri(src_bucket_uri), dst_dir]) |
| 356 dir_list = [] |
| 357 for dirname, dirnames, filenames in os.walk(dst_dir): |
| 358 for filename in filenames: |
| 359 dir_list.append(os.path.join(dirname, filename)) |
| 360 dir_list = sorted(dir_list) |
| 361 self.assertEqual(len(dir_list), 2) |
| 362 self.assertEqual(os.path.join(dst_dir, src_bucket_uri.bucket_name, |
| 363 "obj0"), dir_list[0]) |
| 364 self.assertEqual(os.path.join(dst_dir, src_bucket_uri.bucket_name, |
| 365 "obj1"), dir_list[1]) |
| 366 _CopyAndCheck() |
OLD | NEW |