| Index: chrome/common/extensions/docs/server2/datastore_util.py
|
| diff --git a/chrome/common/extensions/docs/server2/datastore_util.py b/chrome/common/extensions/docs/server2/datastore_util.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..2deeb938a26c5e414710f0f370c7c7232d7c042e
|
| --- /dev/null
|
| +++ b/chrome/common/extensions/docs/server2/datastore_util.py
|
| @@ -0,0 +1,135 @@
|
| +# Copyright 2015 The Chromium Authors. All rights reserved.
|
| +# Use of this source code is governed by a BSD-style license that can be
|
| +# found in the LICENSE file.
|
| +
|
| +import cPickle
|
| +import googledatastore as datastore
|
| +import logging
|
| +
|
| +from future import Future
|
| +
|
| +# N.B.: In order to use this module you should have a working cloud development
|
| +# environment configured with the googledatastore module installed.
|
| +#
|
| +# Please see https://cloud.google.com/datastore/docs/getstarted/start_python/
|
| +
|
| +
|
| +_DATASET_NAME = 'chrome-apps-doc'
|
| +_PERSISTENT_OBJECT_KIND = 'PersistentObjectStoreItem'
|
| +_VALUE_PROPERTY_NAME = 'pickled_value'
|
| +
|
| +# The max number of entities to include in a single request. This is capped at
|
| +# 500 by the service. In practice we may send fewer due to _MAX_REQUEST_SIZE
|
| +_MAX_BATCH_SIZE = 500
|
| +
|
| +
|
| +# The maximum entity size allowed by Datastore.
|
| +_MAX_ENTITY_SIZE = 1024*1024
|
| +
|
| +
|
| +# The maximum request size (in bytes) to send Datastore. This is an approximate
|
| +# size based on the sum of entity blob_value sizes.
|
| +_MAX_REQUEST_SIZE = 5*1024*1024
|
| +
|
| +
|
| +def _CreateEntity(name, value):
|
| + entity = datastore.Entity()
|
| + path = entity.key.path_element.add()
|
| + path.kind = _PERSISTENT_OBJECT_KIND
|
| + path.name = name
|
| + pickled_value_property = entity.property.add()
|
| + pickled_value_property.name = _VALUE_PROPERTY_NAME
|
| + pickled_value_property.value.indexed = False
|
| + pickled_value_property.value.blob_value = value
|
| + return entity
|
| +
|
| +
|
| +def _CreateBatches(data):
|
| + '''Constructs batches of at most _MAX_BATCH_SIZE entities to cover all
|
| + entities defined in |data| without exceeding the transaction size limit.
|
| + This is a generator emitting lists of entities.
|
| + '''
|
| + def get_size(entity):
|
| + return len(entity.property[0].value.blob_value)
|
| +
|
| + entities = [_CreateEntity(name, value) for name, value in data.iteritems()]
|
| + batch_start = 0
|
| + batch_end = 1
|
| + batch_size = get_size(entities[0])
|
| + while batch_end < len(entities):
|
| + next_size = get_size(entities[batch_end])
|
| + if (batch_size + next_size > _MAX_REQUEST_SIZE or
|
| + batch_end - batch_start >= _MAX_BATCH_SIZE):
|
| + yield entities[batch_start:batch_end], batch_end, len(entities)
|
| + batch_start = batch_end
|
| + batch_size = 0
|
| + else:
|
| + batch_size += next_size
|
| + batch_end = batch_end + 1
|
| + if batch_end > batch_start and batch_start < len(entities):
|
| + yield entities[batch_start:batch_end], batch_end, len(entities)
|
| +
|
| +
|
| +def PushData(data, original_data={}):
|
| + '''Pushes a bunch of data into the datastore. The data should be a dict. Each
|
| + key is treated as a namespace, and each value is also a dict. A new datastore
|
| + entry is upserted for every inner key, with the value pickled into the
|
| + |pickled_value| field.
|
| +
|
| + For example, if given the dictionary:
|
| +
|
| + {
|
| + 'fruit': {
|
| + 'apple': 1234,
|
| + 'banana': 'yellow',
|
| + 'trolling carrot': { 'arbitrarily complex': ['value', 'goes', 'here'] }
|
| + },
|
| + 'animal': {
|
| + 'sheep': 'baaah',
|
| + 'dog': 'woof',
|
| + 'trolling cat': 'moo'
|
| + }
|
| + }
|
| +
|
| + this would result in a push of 6 keys in total, with the following IDs:
|
| +
|
| + Key('PersistentObjectStoreItem', 'fruit/apple')
|
| + Key('PersistentObjectStoreItem', 'fruit/banana')
|
| + Key('PersistentObjectStoreItem', 'fruit/trolling carrot')
|
| + Key('PersistentObjectStoreItem', 'animal/sheep')
|
| + Key('PersistentObjectStoreItem', 'animal/dog')
|
| + Key('PersistentObjectStoreItem', 'animal/trolling cat')
|
| +
|
| + If given |original_data|, this will only push key-value pairs for entries that
|
| + are either new or have changed from their original (pickled) value.
|
| +
|
| + Caveat: Pickling and unpickling a dictionary can (but does not always) change
|
| + its key order. This means that objects will often be seen as changed even when
|
| + they haven't changed.
|
| + '''
|
| + datastore.set_options(dataset=_DATASET_NAME)
|
| +
|
| + def flatten(dataset):
|
| + flat = {}
|
| + for namespace, items in dataset.iteritems():
|
| + for k, v in items.iteritems():
|
| + flat['%s/%s' % (namespace, k)] = cPickle.dumps(v)
|
| + return flat
|
| +
|
| + logging.info('Flattening data sets...')
|
| + data = flatten(data)
|
| + original_data = flatten(original_data)
|
| +
|
| + logging.info('Culling new data...')
|
| + for k in data.keys():
|
| + if ((k in original_data and original_data[k] == data[k]) or
|
| + (len(data[k]) > _MAX_ENTITY_SIZE)):
|
| + del data[k]
|
| +
|
| + for batch, n, total in _CreateBatches(data):
|
| + commit_request = datastore.CommitRequest()
|
| + commit_request.mode = datastore.CommitRequest.NON_TRANSACTIONAL
|
| + commit_request.mutation.upsert.extend(list(batch))
|
| +
|
| + logging.info('Committing %s/%s entities...' % (n, total))
|
| + datastore.commit(commit_request)
|
|
|