Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(375)

Side by Side Diff: third_party/gsutil/gslib/tests/test_cp.py

Issue 12042069: Scripts to download files from google storage based on sha1 sums (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/depot_tools.git@master
Patch Set: Review fixes, updated gsutil Created 7 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright 2013 Google Inc. All Rights Reserved.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 import boto
16 import os
17 import re
18 import gslib.tests.testcase as testcase
19 from gslib.util import Retry
20 from gslib.util import TWO_MB
21 from boto import storage_uri
22 from gslib.tests.util import ObjectToURI as suri
23
24
25 CURDIR = os.path.abspath(os.path.dirname(__file__))
26 TEST_DATA_DIR = os.path.join(CURDIR, 'test_data')
27
28
29 class TestCp(testcase.GsUtilIntegrationTestCase):
30 """Integration tests for cp command."""
31
32 def _get_test_file(self, name):
33 return os.path.join(TEST_DATA_DIR, name)
34
35 def test_noclobber(self):
36 key_uri = self.CreateObject(contents='foo')
37 fpath = self.CreateTempFile(contents='bar')
38 stderr = self.RunGsUtil(['cp', '-n', fpath, suri(key_uri)],
39 return_stderr=True)
40 self.assertIn('Skipping existing item: %s' % suri(key_uri), stderr)
41 self.assertEqual(key_uri.get_contents_as_string(), 'foo')
42 stderr = self.RunGsUtil(['cp', '-n', suri(key_uri), fpath],
43 return_stderr=True)
44 with open(fpath, 'r') as f:
45 self.assertIn('Skipping existing item: %s' % suri(f), stderr)
46 self.assertEqual(f.read(), 'bar')
47
48 def test_copy_in_cloud_noclobber(self):
49 bucket1_uri = self.CreateBucket()
50 bucket2_uri = self.CreateBucket()
51 key_uri = self.CreateObject(bucket_uri=bucket1_uri, contents='foo')
52 stderr = self.RunGsUtil(['cp', suri(key_uri), suri(bucket2_uri)],
53 return_stderr=True)
54 self.assertEqual(stderr.count('Copying'), 1)
55 stderr = self.RunGsUtil(['cp', '-n', suri(key_uri), suri(bucket2_uri)],
56 return_stderr=True)
57 self.assertIn('Skipping existing item: %s' % suri(bucket2_uri,
58 key_uri.object_name), stderr)
59
60 def test_streaming(self):
61 bucket_uri = self.CreateBucket()
62 stderr = self.RunGsUtil(['cp', '-', '%s' % suri(bucket_uri, 'foo')],
63 stdin='bar', return_stderr=True)
64 self.assertIn('Copying from <STDIN>', stderr)
65 key_uri = bucket_uri.clone_replace_name('foo')
66 self.assertEqual(key_uri.get_contents_as_string(), 'bar')
67
68 # TODO: Implement a way to test both with and without using magic file.
69
70 def test_detect_content_type(self):
71 bucket_uri = self.CreateBucket()
72 dsturi = suri(bucket_uri, 'foo')
73
74 self.RunGsUtil(['cp', self._get_test_file('test.mp3'), dsturi])
75 # Use @Retry as hedge against bucket listing eventual consistency.
76 @Retry(AssertionError, tries=3, delay=1, backoff=1)
77 def _Check1():
78 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
79 self.assertIn('Content-Type:\taudio/mpeg', stdout)
80 _Check1()
81
82 self.RunGsUtil(['cp', self._get_test_file('test.gif'), dsturi])
83 # Use @Retry as hedge against bucket listing eventual consistency.
84 @Retry(AssertionError, tries=3, delay=1, backoff=1)
85 def _Check2():
86 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
87 self.assertIn('Content-Type:\timage/gif', stdout)
88 _Check2()
89
90 def test_content_type_override_default(self):
91 bucket_uri = self.CreateBucket()
92 dsturi = suri(bucket_uri, 'foo')
93
94 self.RunGsUtil(['-h', 'Content-Type:', 'cp',
95 self._get_test_file('test.mp3'), dsturi])
96 # Use @Retry as hedge against bucket listing eventual consistency.
97 @Retry(AssertionError, tries=3, delay=1, backoff=1)
98 def _Check1():
99 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
100 self.assertIn('Content-Type:\tbinary/octet-stream', stdout)
101 _Check1()
102
103 self.RunGsUtil(['-h', 'Content-Type:', 'cp',
104 self._get_test_file('test.gif'), dsturi])
105 # Use @Retry as hedge against bucket listing eventual consistency.
106 @Retry(AssertionError, tries=3, delay=1, backoff=1)
107 def _Check2():
108 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
109 self.assertIn('Content-Type:\tbinary/octet-stream', stdout)
110 _Check2()
111
112 def test_content_type_override(self):
113 bucket_uri = self.CreateBucket()
114 dsturi = suri(bucket_uri, 'foo')
115
116 self.RunGsUtil(['-h', 'Content-Type:', 'cp',
117 self._get_test_file('test.mp3'), dsturi])
118 # Use @Retry as hedge against bucket listing eventual consistency.
119 @Retry(AssertionError, tries=3, delay=1, backoff=1)
120 def _Check1():
121 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
122 self.assertIn('Content-Type:\tbinary/octet-stream', stdout)
123 _Check1()
124
125 self.RunGsUtil(['-h', 'Content-Type:', 'cp',
126 self._get_test_file('test.gif'), dsturi])
127 # Use @Retry as hedge against bucket listing eventual consistency.
128 @Retry(AssertionError, tries=3, delay=1, backoff=1)
129 def _Check2():
130 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
131 self.assertIn('Content-Type:\tbinary/octet-stream', stdout)
132 _Check2()
133
134 def test_foo_noct(self):
135 bucket_uri = self.CreateBucket()
136 dsturi = suri(bucket_uri, 'foo')
137 fpath = self.CreateTempFile(contents='foo/bar\n')
138 self.RunGsUtil(['cp', fpath, dsturi])
139 # Use @Retry as hedge against bucket listing eventual consistency.
140 @Retry(AssertionError, tries=3, delay=1, backoff=1)
141 def _Check1():
142 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
143 USE_MAGICFILE = boto.config.getbool('GSUtil', 'use_magicfile', False)
144 content_type = ('text/plain' if USE_MAGICFILE
145 else 'application/octet-stream')
146 self.assertIn('Content-Type:\t%s' % content_type, stdout)
147 _Check1()
148
149 def test_content_type_mismatches(self):
150 bucket_uri = self.CreateBucket()
151 dsturi = suri(bucket_uri, 'foo')
152 fpath = self.CreateTempFile(contents='foo/bar\n')
153
154 self.RunGsUtil(['-h', 'Content-Type:image/gif', 'cp',
155 self._get_test_file('test.mp3'), dsturi])
156 # Use @Retry as hedge against bucket listing eventual consistency.
157 @Retry(AssertionError, tries=3, delay=1, backoff=1)
158 def _Check1():
159 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
160 self.assertIn('Content-Type:\timage/gif', stdout)
161 _Check1()
162
163 self.RunGsUtil(['-h', 'Content-Type:image/gif', 'cp',
164 self._get_test_file('test.gif'), dsturi])
165 # Use @Retry as hedge against bucket listing eventual consistency.
166 @Retry(AssertionError, tries=3, delay=1, backoff=1)
167 def _Check2():
168 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
169 self.assertIn('Content-Type:\timage/gif', stdout)
170 _Check2()
171
172 self.RunGsUtil(['-h', 'Content-Type:image/gif', 'cp', fpath, dsturi])
173 # Use @Retry as hedge against bucket listing eventual consistency.
174 @Retry(AssertionError, tries=3, delay=1, backoff=1)
175 def _Check3():
176 stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
177 self.assertIn('Content-Type:\timage/gif', stdout)
178 _Check3()
179
180 def test_versioning(self):
181 bucket_uri = self.CreateVersionedBucket()
182 k1_uri = self.CreateObject(bucket_uri=bucket_uri, contents='data2')
183 k2_uri = self.CreateObject(bucket_uri=bucket_uri, contents='data1')
184 g1 = k2_uri.generation
185 self.RunGsUtil(['cp', suri(k1_uri), suri(k2_uri)])
186 k2_uri = bucket_uri.clone_replace_name(k2_uri.object_name)
187 k2_uri = bucket_uri.clone_replace_key(k2_uri.get_key())
188 g2 = k2_uri.generation
189 k2_uri.set_contents_from_string('data3')
190 g3 = k2_uri.generation
191
192 fpath = self.CreateTempFile()
193 # Check to make sure current version is data3.
194 self.RunGsUtil(['cp', k2_uri.versionless_uri, fpath])
195 with open(fpath, 'r') as f:
196 self.assertEqual(f.read(), 'data3')
197
198 # Check contents of all three versions
199 self.RunGsUtil(['cp', '%s#%s' % (k2_uri.versionless_uri, g1), fpath])
200 with open(fpath, 'r') as f:
201 self.assertEqual(f.read(), 'data1')
202 self.RunGsUtil(['cp', '%s#%s' % (k2_uri.versionless_uri, g2), fpath])
203 with open(fpath, 'r') as f:
204 self.assertEqual(f.read(), 'data2')
205 self.RunGsUtil(['cp', '%s#%s' % (k2_uri.versionless_uri, g3), fpath])
206 with open(fpath, 'r') as f:
207 self.assertEqual(f.read(), 'data3')
208
209 # Copy first version to current and verify.
210 self.RunGsUtil(['cp', '%s#%s' % (k2_uri.versionless_uri, g1),
211 k2_uri.versionless_uri])
212 self.RunGsUtil(['cp', k2_uri.versionless_uri, fpath])
213 with open(fpath, 'r') as f:
214 self.assertEqual(f.read(), 'data1')
215
216 # Attempt to specify a version-specific URI for destination.
217 stderr = self.RunGsUtil(['cp', fpath, k2_uri.uri], return_stderr=True,
218 expected_status=1)
219 self.assertIn('cannot be the destination for gsutil cp', stderr)
220
221 def test_cp_v_option(self):
222 # Tests that cp -v option returns the created object's version-specific URI.
223 bucket_uri = self.CreateVersionedBucket()
224 k1_uri = self.CreateObject(bucket_uri=bucket_uri, contents='data1')
225 k2_uri = self.CreateObject(bucket_uri=bucket_uri, contents='data2')
226 g1 = k1_uri.generation
227
228 # Case 1: Upload file to object using one-shot PUT.
229 tmpdir = self.CreateTempDir()
230 fpath1 = self.CreateTempFile(tmpdir=tmpdir, contents='data1')
231 self._run_cp_minus_v_test('-v', fpath1, k2_uri.uri)
232
233 # Case 2: Upload file to object using resumable upload.
234 size_threshold = boto.config.get('GSUtil', 'resumable_threshold', TWO_MB)
235 file_as_string = os.urandom(size_threshold)
236 tmpdir = self.CreateTempDir()
237 fpath1 = self.CreateTempFile(tmpdir=tmpdir, contents=file_as_string)
238 self._run_cp_minus_v_test('-v', fpath1, k2_uri.uri)
239
240 # Case 3: Upload stream to object.
241 self._run_cp_minus_v_test('-v', '-', k2_uri.uri)
242
243 # Case 4: Download object to file. For this case we just expect output of
244 # gsutil cp -v to be the URI of the file.
245 tmpdir = self.CreateTempDir()
246 fpath1 = self.CreateTempFile(tmpdir=tmpdir)
247 dst_uri = storage_uri(fpath1)
248 stderr = self.RunGsUtil(['cp', '-v', suri(k1_uri), suri(dst_uri)],
249 return_stderr=True)
250 self.assertIn('Created: %s' % dst_uri.uri, stderr.split('\n')[-2])
251
252 # Case 5: Daisy-chain from object to object.
253 self._run_cp_minus_v_test('-Dv', k1_uri.uri, k2_uri.uri)
254
255 # Case 6: Copy object to object in-the-cloud.
256 # TODO: Uncomment this test once copy-in-the-cloud returns version-specific
257 # URI.
258 #self._run_cp_minus_v_test('-v', k1_uri.uri, k2_uri.uri)
259
260 def _run_cp_minus_v_test(self, opt, src_str, dst_str):
261 stderr = self.RunGsUtil(['cp', opt, src_str, dst_str], return_stderr=True)
262 match = re.search(r'Created: (.*)\n', stderr)
263 self.assertIsNotNone(match)
264 created_uri = match.group(1)
265 # Use @Retry as hedge against bucket listing eventual consistency.
266 @Retry(AssertionError, tries=3, delay=1, backoff=1)
267 def _Check1():
268 stdout = self.RunGsUtil(['ls', '-a', dst_str], return_stdout=True)
269 lines = stdout.split('\n')
270 # Final (most recent) object should match the "Created:" URI. This is
271 # in second-to-last line (last line is '\n').
272 self.assertEqual(created_uri, lines[-2])
273 _Check1()
274
275 def test_stdin_args(self):
276 tmpdir = self.CreateTempDir()
277 fpath1 = self.CreateTempFile(tmpdir=tmpdir, contents='data1')
278 fpath2 = self.CreateTempFile(tmpdir=tmpdir, contents='data2')
279 bucket_uri = self.CreateBucket()
280 self.RunGsUtil(['cp', '-I', suri(bucket_uri)],
281 stdin='\n'.join((fpath1, fpath2)))
282 # Use @Retry as hedge against bucket listing eventual consistency.
283 @Retry(AssertionError, tries=3, delay=1, backoff=1)
284 def _Check1():
285 stdout = self.RunGsUtil(['ls', suri(bucket_uri)], return_stdout=True)
286 self.assertIn(os.path.basename(fpath1), stdout)
287 self.assertIn(os.path.basename(fpath2), stdout)
288 self.assertNumLines(stdout, 2)
289 _Check1()
290
291 def test_daisy_chain_cp(self):
292 # Daisy chain mode is required for copying across storage classes,
293 # so create 2 buckets and attempt to copy without vs with daisy chain mode.
294 bucket1_uri = self.CreateBucket(storage_class='STANDARD')
295 bucket2_uri = self.CreateBucket(
296 storage_class='DURABLE_REDUCED_AVAILABILITY')
297 key_uri = self.CreateObject(bucket_uri=bucket1_uri, contents='foo')
298 stderr = self.RunGsUtil(['cp', suri(key_uri), suri(bucket2_uri)],
299 return_stderr=True, expected_status=1)
300 self.assertIn('Copy-in-the-cloud disallowed', stderr)
301 key_uri = self.CreateObject(bucket_uri=bucket1_uri, contents='foo')
302 stderr = self.RunGsUtil(['cp', '-D', suri(key_uri), suri(bucket2_uri)],
303 return_stderr=True)
304 self.assertNotIn('Copy-in-the-cloud disallowed', stderr)
305
306 def test_cp_key_to_local_stream(self):
307 bucket_uri = self.CreateBucket()
308 contents = 'foo'
309 key_uri = self.CreateObject(bucket_uri=bucket_uri, contents=contents)
310 stdout = self.RunGsUtil(['cp', suri(key_uri), '-'], return_stdout=True)
311 self.assertIn(contents, stdout)
312
313 def test_cp_local_file_to_local_stream(self):
314 contents = 'content'
315 fpath = self.CreateTempFile(contents=contents)
316 stdout = self.RunGsUtil(['cp', fpath, '-'], return_stdout=True)
317 self.assertIn(contents, stdout)
318
319 def test_copy_bucket_to_bucket(self):
320 # Tests that recursively copying from bucket to bucket produces identically
321 # named objects (and not, in particular, destination objects named by the
322 # version- specific URI from source objects).
323 src_bucket_uri = self.CreateVersionedBucket()
324 dst_bucket_uri = self.CreateVersionedBucket()
325 self.CreateObject(bucket_uri=src_bucket_uri, object_name='obj0',
326 contents='abc')
327 self.CreateObject(bucket_uri=src_bucket_uri, object_name='obj1',
328 contents='def')
329 # Use @Retry as hedge against bucket listing eventual consistency.
330 @Retry(AssertionError, tries=3, delay=1, backoff=1)
331 def _CopyAndCheck():
332 self.RunGsUtil(['cp', '-R', suri(src_bucket_uri),
333 suri(dst_bucket_uri)])
334 stdout = self.RunGsUtil(['ls', '-R', dst_bucket_uri.uri],
335 return_stdout=True)
336 self.assertIn('%s%s/obj0\n' % (dst_bucket_uri,
337 src_bucket_uri.bucket_name), stdout)
338 self.assertIn('%s%s/obj1\n' % (dst_bucket_uri,
339 src_bucket_uri.bucket_name), stdout)
340 _CopyAndCheck()
341
342 def test_copy_bucket_to_dir(self):
343 # Tests that recursively copying from bucket to dir produces identically
344 # named objects (and not, in particular, destination objects named by the
345 # version- specific URI from source objects).
346 src_bucket_uri = self.CreateBucket()
347 dst_dir = self.CreateTempDir()
348 self.CreateObject(bucket_uri=src_bucket_uri, object_name='obj0',
349 contents='abc')
350 self.CreateObject(bucket_uri=src_bucket_uri, object_name='obj1',
351 contents='def')
352 # Use @Retry as hedge against bucket listing eventual consistency.
353 @Retry(AssertionError, tries=3, delay=1, backoff=1)
354 def _CopyAndCheck():
355 self.RunGsUtil(['cp', '-R', suri(src_bucket_uri), dst_dir])
356 dir_list = []
357 for dirname, dirnames, filenames in os.walk(dst_dir):
358 for filename in filenames:
359 dir_list.append(os.path.join(dirname, filename))
360 dir_list = sorted(dir_list)
361 self.assertEqual(len(dir_list), 2)
362 self.assertEqual(os.path.join(dst_dir, src_bucket_uri.bucket_name,
363 "obj0"), dir_list[0])
364 self.assertEqual(os.path.join(dst_dir, src_bucket_uri.bucket_name,
365 "obj1"), dir_list[1])
366 _CopyAndCheck()
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698