Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(5414)

Unified Diff: chrome/common/extensions/docs/server2/datastore_util.py

Issue 1151283007: Docserver overhaul: Gitiles away from me. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/common/extensions/docs/server2/datastore_util.py
diff --git a/chrome/common/extensions/docs/server2/datastore_util.py b/chrome/common/extensions/docs/server2/datastore_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..2deeb938a26c5e414710f0f370c7c7232d7c042e
--- /dev/null
+++ b/chrome/common/extensions/docs/server2/datastore_util.py
@@ -0,0 +1,135 @@
+# Copyright 2015 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
Ken Rockot(use gerrit already) 2015/05/26 00:26:24 This provides the PushData function used exclusive
+
+import cPickle
+import googledatastore as datastore
+import logging
+
+from future import Future
+
+# N.B.: In order to use this module you should have a working cloud development
+# environment configured with the googledatastore module installed.
+#
+# Please see https://cloud.google.com/datastore/docs/getstarted/start_python/
+
+
+_DATASET_NAME = 'chrome-apps-doc'
+_PERSISTENT_OBJECT_KIND = 'PersistentObjectStoreItem'
+_VALUE_PROPERTY_NAME = 'pickled_value'
+
+# The max number of entities to include in a single request. This is capped at
+# 500 by the service. In practice we may send fewer due to _MAX_REQUEST_SIZE
+_MAX_BATCH_SIZE = 500
+
+
+# The maximum entity size allowed by Datastore.
+_MAX_ENTITY_SIZE = 1024*1024
+
+
+# The maximum request size (in bytes) to send Datastore. This is an approximate
+# size based on the sum of entity blob_value sizes.
+_MAX_REQUEST_SIZE = 5*1024*1024
Ken Rockot(use gerrit already) 2015/05/26 00:26:24 Couldn't find any documentation for the max reques
+
+
+def _CreateEntity(name, value):
+ entity = datastore.Entity()
+ path = entity.key.path_element.add()
+ path.kind = _PERSISTENT_OBJECT_KIND
+ path.name = name
+ pickled_value_property = entity.property.add()
+ pickled_value_property.name = _VALUE_PROPERTY_NAME
+ pickled_value_property.value.indexed = False
+ pickled_value_property.value.blob_value = value
+ return entity
+
+
+def _CreateBatches(data):
+ '''Constructs batches of at most _MAX_BATCH_SIZE entities to cover all
+ entities defined in |data| without exceeding the transaction size limit.
+ This is a generator emitting lists of entities.
+ '''
+ def get_size(entity):
+ return len(entity.property[0].value.blob_value)
+
+ entities = [_CreateEntity(name, value) for name, value in data.iteritems()]
+ batch_start = 0
+ batch_end = 1
+ batch_size = get_size(entities[0])
+ while batch_end < len(entities):
+ next_size = get_size(entities[batch_end])
+ if (batch_size + next_size > _MAX_REQUEST_SIZE or
+ batch_end - batch_start >= _MAX_BATCH_SIZE):
+ yield entities[batch_start:batch_end], batch_end, len(entities)
+ batch_start = batch_end
+ batch_size = 0
+ else:
+ batch_size += next_size
+ batch_end = batch_end + 1
+ if batch_end > batch_start and batch_start < len(entities):
+ yield entities[batch_start:batch_end], batch_end, len(entities)
+
+
+def PushData(data, original_data={}):
+ '''Pushes a bunch of data into the datastore. The data should be a dict. Each
+ key is treated as a namespace, and each value is also a dict. A new datastore
+ entry is upserted for every inner key, with the value pickled into the
+ |pickled_value| field.
+
+ For example, if given the dictionary:
+
+ {
+ 'fruit': {
+ 'apple': 1234,
+ 'banana': 'yellow',
+ 'trolling carrot': { 'arbitrarily complex': ['value', 'goes', 'here'] }
+ },
+ 'animal': {
+ 'sheep': 'baaah',
+ 'dog': 'woof',
+ 'trolling cat': 'moo'
+ }
+ }
+
+ this would result in a push of 6 keys in total, with the following IDs:
+
+ Key('PersistentObjectStoreItem', 'fruit/apple')
+ Key('PersistentObjectStoreItem', 'fruit/banana')
+ Key('PersistentObjectStoreItem', 'fruit/trolling carrot')
+ Key('PersistentObjectStoreItem', 'animal/sheep')
+ Key('PersistentObjectStoreItem', 'animal/dog')
+ Key('PersistentObjectStoreItem', 'animal/trolling cat')
+
+ If given |original_data|, this will only push key-value pairs for entries that
+ are either new or have changed from their original (pickled) value.
+
+ Caveat: Pickling and unpickling a dictionary can (but does not always) change
+ its key order. This means that objects will often be seen as changed even when
+ they haven't changed.
+ '''
+ datastore.set_options(dataset=_DATASET_NAME)
+
+ def flatten(dataset):
+ flat = {}
+ for namespace, items in dataset.iteritems():
+ for k, v in items.iteritems():
+ flat['%s/%s' % (namespace, k)] = cPickle.dumps(v)
+ return flat
+
+ logging.info('Flattening data sets...')
+ data = flatten(data)
+ original_data = flatten(original_data)
+
+ logging.info('Culling new data...')
+ for k in data.keys():
+ if ((k in original_data and original_data[k] == data[k]) or
+ (len(data[k]) > _MAX_ENTITY_SIZE)):
+ del data[k]
Ken Rockot(use gerrit already) 2015/05/26 00:26:24 This should be super awesome and get us very tiny
not at google - send to devlin 2015/06/04 22:40:45 What about an OrderedDict? That should pickle, and
Ken Rockot(use gerrit already) 2015/06/05 00:21:50 No, I think that's the right approach. I didn't do
+
+ for batch, n, total in _CreateBatches(data):
+ commit_request = datastore.CommitRequest()
+ commit_request.mode = datastore.CommitRequest.NON_TRANSACTIONAL
+ commit_request.mutation.upsert.extend(list(batch))
+
+ logging.info('Committing %s/%s entities...' % (n, total))
+ datastore.commit(commit_request)

Powered by Google App Engine
This is Rietveld 408576698