Index: third_party/gsutil/gslib/addlhelp/wildcards.py |
diff --git a/third_party/gsutil/gslib/addlhelp/wildcards.py b/third_party/gsutil/gslib/addlhelp/wildcards.py |
deleted file mode 100644 |
index 0a5e26071898e759ba99ac3978df64c138fda361..0000000000000000000000000000000000000000 |
--- a/third_party/gsutil/gslib/addlhelp/wildcards.py |
+++ /dev/null |
@@ -1,170 +0,0 @@ |
-# Copyright 2012 Google Inc. All Rights Reserved. |
-# |
-# Licensed under the Apache License, Version 2.0 (the "License"); |
-# you may not use this file except in compliance with the License. |
-# You may obtain a copy of the License at |
-# |
-# http://www.apache.org/licenses/LICENSE-2.0 |
-# |
-# Unless required by applicable law or agreed to in writing, software |
-# distributed under the License is distributed on an "AS IS" BASIS, |
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
-# See the License for the specific language governing permissions and |
-# limitations under the License. |
- |
-from gslib.help_provider import HELP_NAME |
-from gslib.help_provider import HELP_NAME_ALIASES |
-from gslib.help_provider import HELP_ONE_LINE_SUMMARY |
-from gslib.help_provider import HelpProvider |
-from gslib.help_provider import HELP_TEXT |
-from gslib.help_provider import HelpType |
-from gslib.help_provider import HELP_TYPE |
- |
-_detailed_help_text = (""" |
-<B>DESCRIPTION</B> |
- gsutil supports URI wildcards. For example, the command: |
- |
- gsutil cp gs://bucket/data/abc* . |
- |
- will copy all objects that start with gs://bucket/data/abc followed by any |
- number of characters within that subdirectory. |
- |
- |
-<B>DIRECTORY BY DIRECTORY VS RECURSIVE WILDCARDS</B> |
- The "*" wildcard only matches up to the end of a path within |
- a subdirectory. For example, if bucket contains objects |
- named gs://bucket/data/abcd, gs://bucket/data/abcdef, |
- and gs://bucket/data/abcxyx, as well as an object in a sub-directory |
- (gs://bucket/data/abc/def) the above gsutil cp command would match the |
- first 3 object names but not the last one. |
- |
- If you want matches to span directory boundaries, use a '**' wildcard: |
- |
- gsutil cp gs://bucket/data/abc** . |
- |
- will match all four objects above. |
- |
- Note that gsutil supports the same wildcards for both objects and file names. |
- Thus, for example: |
- |
- gsutil cp data/abc* gs://bucket |
- |
- will match all names in the local file system. Most command shells also |
- support wildcarding, so if you run the above command probably your shell |
- is expanding the matches before running gsutil. However, most shells do not |
- support recursive wildcards ('**'), and you can cause gsutil's wildcarding |
- support to work for such shells by single-quoting the arguments so they |
- don't get interpreted by the shell before being passed to gsutil: |
- |
- gsutil cp 'data/abc**' gs://bucket |
- |
- |
-<B>BUCKET WILDCARDS</B> |
- You can specify wildcards for bucket names. For example: |
- |
- gsutil ls gs://data*.example.com |
- |
- will list the contents of all buckets whose name starts with "data" and |
- ends with ".example.com". |
- |
- You can also combine bucket and object name wildcards. For example this |
- command will remove all ".txt" files in any of your Google Cloud Storage |
- buckets: |
- |
- gsutil rm gs://*/**.txt |
- |
- |
-<B>OTHER WILDCARD CHARACTERS</B> |
- In addition to '*', you can use these wildcards: |
- |
- ? Matches a single character. For example "gs://bucket/??.txt" |
- only matches objects with two characters followed by .txt. |
- |
- [chars] Match any of the specified characters. For example |
- "gs://bucket/[aeiou].txt" matches objects that contain a single vowel |
- character followed by .txt |
- |
- [char range] Match any of the range of characters. For example |
- "gs://bucket/[a-m].txt" matches objects that contain letters |
- a, b, c, ... or m, and end with .txt. |
- |
- You can combine wildcards to provide more powerful matches, for example: |
- gs://bucket/[a-m]??.j*g |
- |
- |
-<B>EFFICIENCY CONSIDERATION: USING WILDCARDS OVER MANY OBJECTS</B> |
- It is more efficient, faster, and less network traffic-intensive |
- to use wildcards that have a non-wildcard object-name prefix, like: |
- |
- gs://bucket/abc*.txt |
- |
- than it is to use wildcards as the first part of the object name, like: |
- |
- gs://bucket/*abc.txt |
- |
- This is because the request for "gs://bucket/abc*.txt" asks the server |
- to send back the subset of results whose object names start with "abc", |
- and then gsutil filters the result list for objects whose name ends with |
- ".txt". In contrast, "gs://bucket/*abc.txt" asks the server for the complete |
- list of objects in the bucket and then filters for those objects whose name |
- ends with "abc.txt". This efficiency consideration becomes increasingly |
- noticeable when you use buckets containing thousands or more objects. It is |
- sometimes possible to set up the names of your objects to fit with expected |
- wildcard matching patterns, to take advantage of the efficiency of doing |
- server-side prefix requests. See, for example "gsutil help prod" for a |
- concrete use case example. |
- |
- |
-<B>EFFICIENCY CONSIDERATION: USING MID-PATH WILDCARDS</B> |
- Suppose you have a bucket with these objects: |
- gs://bucket/obj1 |
- gs://bucket/obj2 |
- gs://bucket/obj3 |
- gs://bucket/obj4 |
- gs://bucket/dir1/obj5 |
- gs://bucket/dir2/obj6 |
- |
- If you run the command: |
- gsutil ls gs://bucket/*/obj5 |
- gsutil will perform a /-delimited top-level bucket listing and then one bucket |
- listing for each subdirectory, for a total of 3 bucket listings: |
- GET /bucket/?delimiter=/ |
- GET /bucket/?prefix=dir1/obj5&delimiter=/ |
- GET /bucket/?prefix=dir2/obj5&delimiter=/ |
- |
- The more bucket listings your wildcard requires, the slower and more expensive |
- it will be. The number of bucket listings required grows as: |
- - the number of wildcard components (e.g., "gs://bucket/a??b/c*/*/d" |
- has 3 wildcard components); |
- - the number of subdirectories that match each component; and |
- - the number of results (pagination is implemented using one GET |
- request per 1000 results, specifying markers for each). |
- |
- If you want to use a mid-path wildcard, you might try instead using a |
- recursive wildcard, for example: |
- |
- gsutil ls gs://bucket/**/obj5 |
- |
- This will match more objects than gs://bucket/*/obj5 (since it spans |
- directories), but is implemented using a delimiter-less bucket listing |
- request (which means fewer bucket requests, though it will list the entire |
- bucket and filter locally, so that could require a non-trivial amount of |
- network traffic). |
-""") |
- |
- |
-class CommandOptions(HelpProvider): |
- """Additional help about wildcards.""" |
- |
- help_spec = { |
- # Name of command or auxiliary help info for which this help applies. |
- HELP_NAME : 'wildcards', |
- # List of help name aliases. |
- HELP_NAME_ALIASES : ['wildcard', '*', '**'], |
- # Type of help: |
- HELP_TYPE : HelpType.ADDITIONAL_HELP, |
- # One line summary of this help. |
- HELP_ONE_LINE_SUMMARY : 'Wildcard support', |
- # The full help text. |
- HELP_TEXT : _detailed_help_text, |
- } |