| Index: third_party/gsutil/boto/cloudsearch/document.py
|
| diff --git a/third_party/gsutil/boto/cloudsearch/document.py b/third_party/gsutil/boto/cloudsearch/document.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..9b37b269cc43c79102ab1a03510606efb0928b6c
|
| --- /dev/null
|
| +++ b/third_party/gsutil/boto/cloudsearch/document.py
|
| @@ -0,0 +1,147 @@
|
| +# Copyright (c) 2012 Mitch Garnaat http://garnaat.org/
|
| +# Copyright (c) 2012 Amazon.com, Inc. or its affiliates.
|
| +# All Rights Reserved
|
| +#
|
| +# Permission is hereby granted, free of charge, to any person obtaining a
|
| +# copy of this software and associated documentation files (the
|
| +# "Software"), to deal in the Software without restriction, including
|
| +# without limitation the rights to use, copy, modify, merge, publish, dis-
|
| +# tribute, sublicense, and/or sell copies of the Software, and to permit
|
| +# persons to whom the Software is furnished to do so, subject to the fol-
|
| +# lowing conditions:
|
| +#
|
| +# The above copyright notice and this permission notice shall be included
|
| +# in all copies or substantial portions of the Software.
|
| +#
|
| +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
| +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
|
| +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
| +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
| +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
| +# IN THE SOFTWARE.
|
| +#
|
| +
|
| +import boto.exception
|
| +from boto.compat import json
|
| +import requests
|
| +import boto
|
| +
|
| +class SearchServiceException(Exception):
|
| + pass
|
| +
|
| +
|
| +class CommitMismatchError(Exception):
|
| + pass
|
| +
|
| +
|
| +class DocumentServiceConnection(object):
|
| +
|
| + def __init__(self, domain=None, endpoint=None):
|
| + self.domain = domain
|
| + self.endpoint = endpoint
|
| + if not self.endpoint:
|
| + self.endpoint = domain.doc_service_endpoint
|
| + self.documents_batch = []
|
| + self._sdf = None
|
| +
|
| + def add(self, _id, version, fields, lang='en'):
|
| + d = {'type': 'add', 'id': _id, 'version': version, 'lang': lang,
|
| + 'fields': fields}
|
| + self.documents_batch.append(d)
|
| +
|
| + def delete(self, _id, version):
|
| + d = {'type': 'delete', 'id': _id, 'version': version}
|
| + self.documents_batch.append(d)
|
| +
|
| + def get_sdf(self):
|
| + return self._sdf if self._sdf else json.dumps(self.documents_batch)
|
| +
|
| + def clear_sdf(self):
|
| + self._sdf = None
|
| + self.documents_batch = []
|
| +
|
| + def add_sdf_from_s3(self, key_obj):
|
| + """@todo (lucas) would be nice if this could just take an s3://uri..."""
|
| + self._sdf = key_obj.get_contents_as_string()
|
| +
|
| + def commit(self):
|
| + sdf = self.get_sdf()
|
| +
|
| + if ': null' in sdf:
|
| + boto.log.error('null value in sdf detected. This will probably raise '
|
| + '500 error.')
|
| + index = sdf.index(': null')
|
| + boto.log.error(sdf[index - 100:index + 100])
|
| +
|
| + url = "http://%s/2011-02-01/documents/batch" % (self.endpoint)
|
| +
|
| + request_config = {
|
| + 'pool_connections': 20,
|
| + 'keep_alive': True,
|
| + 'max_retries': 5,
|
| + 'pool_maxsize': 50
|
| + }
|
| +
|
| + r = requests.post(url, data=sdf, config=request_config,
|
| + headers={'Content-Type': 'application/json'})
|
| +
|
| + return CommitResponse(r, self, sdf)
|
| +
|
| +
|
| +class CommitResponse(object):
|
| + """Wrapper for response to Cloudsearch document batch commit.
|
| +
|
| + :type response: :class:`requests.models.Response`
|
| + :param response: Response from Cloudsearch /documents/batch API
|
| +
|
| + :type doc_service: :class:`exfm.cloudsearch.DocumentServiceConnection`
|
| + :param doc_service: Object containing the documents posted and methods to
|
| + retry
|
| +
|
| + :raises: :class:`boto.exception.BotoServerError`
|
| + :raises: :class:`exfm.cloudsearch.SearchServiceException`
|
| + """
|
| + def __init__(self, response, doc_service, sdf):
|
| + self.response = response
|
| + self.doc_service = doc_service
|
| + self.sdf = sdf
|
| +
|
| + try:
|
| + self.content = json.loads(response.content)
|
| + except:
|
| + boto.log.error('Error indexing documents.\nResponse Content:\n{}\n\n'
|
| + 'SDF:\n{}'.format(response.content, self.sdf))
|
| + raise boto.exception.BotoServerError(self.response.status_code, '',
|
| + body=response.content)
|
| +
|
| + self.status = self.content['status']
|
| + if self.status == 'error':
|
| + self.errors = [e.get('message') for e in self.content.get('errors',
|
| + [])]
|
| + else:
|
| + self.errors = []
|
| +
|
| + self.adds = self.content['adds']
|
| + self.deletes = self.content['deletes']
|
| + self._check_num_ops('add', self.adds)
|
| + self._check_num_ops('delete', self.deletes)
|
| +
|
| + def _check_num_ops(self, type_, response_num):
|
| + """Raise exception if number of ops in response doesn't match commit
|
| +
|
| + :type type_: str
|
| + :param type_: Type of commit operation: 'add' or 'delete'
|
| +
|
| + :type response_num: int
|
| + :param response_num: Number of adds or deletes in the response.
|
| +
|
| + :raises: :class:`exfm.cloudsearch.SearchServiceException`
|
| + """
|
| + commit_num = len([d for d in self.doc_service.documents_batch
|
| + if d['type'] == type_])
|
| +
|
| + if response_num != commit_num:
|
| + raise CommitMismatchError(
|
| + 'Incorrect number of {}s returned. Commit: {} Respose: {}'\
|
| + .format(type_, commit_num, response_num))
|
|
|