Index: chrome/common/extensions/docs/server2/datastore_util.py |
diff --git a/chrome/common/extensions/docs/server2/datastore_util.py b/chrome/common/extensions/docs/server2/datastore_util.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..2deeb938a26c5e414710f0f370c7c7232d7c042e |
--- /dev/null |
+++ b/chrome/common/extensions/docs/server2/datastore_util.py |
@@ -0,0 +1,135 @@ |
+# Copyright 2015 The Chromium Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+import cPickle |
+import googledatastore as datastore |
+import logging |
+ |
+from future import Future |
+ |
+# N.B.: In order to use this module you should have a working cloud development |
+# environment configured with the googledatastore module installed. |
+# |
+# Please see https://cloud.google.com/datastore/docs/getstarted/start_python/ |
+ |
+ |
+_DATASET_NAME = 'chrome-apps-doc' |
+_PERSISTENT_OBJECT_KIND = 'PersistentObjectStoreItem' |
+_VALUE_PROPERTY_NAME = 'pickled_value' |
+ |
+# The max number of entities to include in a single request. This is capped at |
+# 500 by the service. In practice we may send fewer due to _MAX_REQUEST_SIZE |
+_MAX_BATCH_SIZE = 500 |
+ |
+ |
+# The maximum entity size allowed by Datastore. |
+_MAX_ENTITY_SIZE = 1024*1024 |
+ |
+ |
+# The maximum request size (in bytes) to send Datastore. This is an approximate |
+# size based on the sum of entity blob_value sizes. |
+_MAX_REQUEST_SIZE = 5*1024*1024 |
+ |
+ |
+def _CreateEntity(name, value): |
+ entity = datastore.Entity() |
+ path = entity.key.path_element.add() |
+ path.kind = _PERSISTENT_OBJECT_KIND |
+ path.name = name |
+ pickled_value_property = entity.property.add() |
+ pickled_value_property.name = _VALUE_PROPERTY_NAME |
+ pickled_value_property.value.indexed = False |
+ pickled_value_property.value.blob_value = value |
+ return entity |
+ |
+ |
+def _CreateBatches(data): |
+ '''Constructs batches of at most _MAX_BATCH_SIZE entities to cover all |
+ entities defined in |data| without exceeding the transaction size limit. |
+ This is a generator emitting lists of entities. |
+ ''' |
+ def get_size(entity): |
+ return len(entity.property[0].value.blob_value) |
+ |
+ entities = [_CreateEntity(name, value) for name, value in data.iteritems()] |
+ batch_start = 0 |
+ batch_end = 1 |
+ batch_size = get_size(entities[0]) |
+ while batch_end < len(entities): |
+ next_size = get_size(entities[batch_end]) |
+ if (batch_size + next_size > _MAX_REQUEST_SIZE or |
+ batch_end - batch_start >= _MAX_BATCH_SIZE): |
+ yield entities[batch_start:batch_end], batch_end, len(entities) |
+ batch_start = batch_end |
+ batch_size = 0 |
+ else: |
+ batch_size += next_size |
+ batch_end = batch_end + 1 |
+ if batch_end > batch_start and batch_start < len(entities): |
+ yield entities[batch_start:batch_end], batch_end, len(entities) |
+ |
+ |
+def PushData(data, original_data={}): |
+ '''Pushes a bunch of data into the datastore. The data should be a dict. Each |
+ key is treated as a namespace, and each value is also a dict. A new datastore |
+ entry is upserted for every inner key, with the value pickled into the |
+ |pickled_value| field. |
+ |
+ For example, if given the dictionary: |
+ |
+ { |
+ 'fruit': { |
+ 'apple': 1234, |
+ 'banana': 'yellow', |
+ 'trolling carrot': { 'arbitrarily complex': ['value', 'goes', 'here'] } |
+ }, |
+ 'animal': { |
+ 'sheep': 'baaah', |
+ 'dog': 'woof', |
+ 'trolling cat': 'moo' |
+ } |
+ } |
+ |
+ this would result in a push of 6 keys in total, with the following IDs: |
+ |
+ Key('PersistentObjectStoreItem', 'fruit/apple') |
+ Key('PersistentObjectStoreItem', 'fruit/banana') |
+ Key('PersistentObjectStoreItem', 'fruit/trolling carrot') |
+ Key('PersistentObjectStoreItem', 'animal/sheep') |
+ Key('PersistentObjectStoreItem', 'animal/dog') |
+ Key('PersistentObjectStoreItem', 'animal/trolling cat') |
+ |
+ If given |original_data|, this will only push key-value pairs for entries that |
+ are either new or have changed from their original (pickled) value. |
+ |
+ Caveat: Pickling and unpickling a dictionary can (but does not always) change |
+ its key order. This means that objects will often be seen as changed even when |
+ they haven't changed. |
+ ''' |
+ datastore.set_options(dataset=_DATASET_NAME) |
+ |
+ def flatten(dataset): |
+ flat = {} |
+ for namespace, items in dataset.iteritems(): |
+ for k, v in items.iteritems(): |
+ flat['%s/%s' % (namespace, k)] = cPickle.dumps(v) |
+ return flat |
+ |
+ logging.info('Flattening data sets...') |
+ data = flatten(data) |
+ original_data = flatten(original_data) |
+ |
+ logging.info('Culling new data...') |
+ for k in data.keys(): |
+ if ((k in original_data and original_data[k] == data[k]) or |
+ (len(data[k]) > _MAX_ENTITY_SIZE)): |
+ del data[k] |
+ |
+ for batch, n, total in _CreateBatches(data): |
+ commit_request = datastore.CommitRequest() |
+ commit_request.mode = datastore.CommitRequest.NON_TRANSACTIONAL |
+ commit_request.mutation.upsert.extend(list(batch)) |
+ |
+ logging.info('Committing %s/%s entities...' % (n, total)) |
+ datastore.commit(commit_request) |