Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(106)

Side by Side Diff: chrome/common/extensions/docs/server2/datastore_util.py

Issue 1151283007: Docserver overhaul: Gitiles away from me. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
Ken Rockot(use gerrit already) 2015/05/26 00:26:24 This provides the PushData function used exclusive
4
5 import cPickle
6 import googledatastore as datastore
7 import logging
8
9 from future import Future
10
11 # N.B.: In order to use this module you should have a working cloud development
12 # environment configured with the googledatastore module installed.
13 #
14 # Please see https://cloud.google.com/datastore/docs/getstarted/start_python/
15
16
17 _DATASET_NAME = 'chrome-apps-doc'
18 _PERSISTENT_OBJECT_KIND = 'PersistentObjectStoreItem'
19 _VALUE_PROPERTY_NAME = 'pickled_value'
20
21 # The max number of entities to include in a single request. This is capped at
22 # 500 by the service. In practice we may send fewer due to _MAX_REQUEST_SIZE
23 _MAX_BATCH_SIZE = 500
24
25
26 # The maximum entity size allowed by Datastore.
27 _MAX_ENTITY_SIZE = 1024*1024
28
29
30 # The maximum request size (in bytes) to send Datastore. This is an approximate
31 # size based on the sum of entity blob_value sizes.
32 _MAX_REQUEST_SIZE = 5*1024*1024
Ken Rockot(use gerrit already) 2015/05/26 00:26:24 Couldn't find any documentation for the max reques
33
34
35 def _CreateEntity(name, value):
36 entity = datastore.Entity()
37 path = entity.key.path_element.add()
38 path.kind = _PERSISTENT_OBJECT_KIND
39 path.name = name
40 pickled_value_property = entity.property.add()
41 pickled_value_property.name = _VALUE_PROPERTY_NAME
42 pickled_value_property.value.indexed = False
43 pickled_value_property.value.blob_value = value
44 return entity
45
46
47 def _CreateBatches(data):
48 '''Constructs batches of at most _MAX_BATCH_SIZE entities to cover all
49 entities defined in |data| without exceeding the transaction size limit.
50 This is a generator emitting lists of entities.
51 '''
52 def get_size(entity):
53 return len(entity.property[0].value.blob_value)
54
55 entities = [_CreateEntity(name, value) for name, value in data.iteritems()]
56 batch_start = 0
57 batch_end = 1
58 batch_size = get_size(entities[0])
59 while batch_end < len(entities):
60 next_size = get_size(entities[batch_end])
61 if (batch_size + next_size > _MAX_REQUEST_SIZE or
62 batch_end - batch_start >= _MAX_BATCH_SIZE):
63 yield entities[batch_start:batch_end], batch_end, len(entities)
64 batch_start = batch_end
65 batch_size = 0
66 else:
67 batch_size += next_size
68 batch_end = batch_end + 1
69 if batch_end > batch_start and batch_start < len(entities):
70 yield entities[batch_start:batch_end], batch_end, len(entities)
71
72
73 def PushData(data, original_data={}):
74 '''Pushes a bunch of data into the datastore. The data should be a dict. Each
75 key is treated as a namespace, and each value is also a dict. A new datastore
76 entry is upserted for every inner key, with the value pickled into the
77 |pickled_value| field.
78
79 For example, if given the dictionary:
80
81 {
82 'fruit': {
83 'apple': 1234,
84 'banana': 'yellow',
85 'trolling carrot': { 'arbitrarily complex': ['value', 'goes', 'here'] }
86 },
87 'animal': {
88 'sheep': 'baaah',
89 'dog': 'woof',
90 'trolling cat': 'moo'
91 }
92 }
93
94 this would result in a push of 6 keys in total, with the following IDs:
95
96 Key('PersistentObjectStoreItem', 'fruit/apple')
97 Key('PersistentObjectStoreItem', 'fruit/banana')
98 Key('PersistentObjectStoreItem', 'fruit/trolling carrot')
99 Key('PersistentObjectStoreItem', 'animal/sheep')
100 Key('PersistentObjectStoreItem', 'animal/dog')
101 Key('PersistentObjectStoreItem', 'animal/trolling cat')
102
103 If given |original_data|, this will only push key-value pairs for entries that
104 are either new or have changed from their original (pickled) value.
105
106 Caveat: Pickling and unpickling a dictionary can (but does not always) change
107 its key order. This means that objects will often be seen as changed even when
108 they haven't changed.
109 '''
110 datastore.set_options(dataset=_DATASET_NAME)
111
112 def flatten(dataset):
113 flat = {}
114 for namespace, items in dataset.iteritems():
115 for k, v in items.iteritems():
116 flat['%s/%s' % (namespace, k)] = cPickle.dumps(v)
117 return flat
118
119 logging.info('Flattening data sets...')
120 data = flatten(data)
121 original_data = flatten(original_data)
122
123 logging.info('Culling new data...')
124 for k in data.keys():
125 if ((k in original_data and original_data[k] == data[k]) or
126 (len(data[k]) > _MAX_ENTITY_SIZE)):
127 del data[k]
Ken Rockot(use gerrit already) 2015/05/26 00:26:24 This should be super awesome and get us very tiny
not at google - send to devlin 2015/06/04 22:40:45 What about an OrderedDict? That should pickle, and
Ken Rockot(use gerrit already) 2015/06/05 00:21:50 No, I think that's the right approach. I didn't do
128
129 for batch, n, total in _CreateBatches(data):
130 commit_request = datastore.CommitRequest()
131 commit_request.mode = datastore.CommitRequest.NON_TRANSACTIONAL
132 commit_request.mutation.upsert.extend(list(batch))
133
134 logging.info('Committing %s/%s entities...' % (n, total))
135 datastore.commit(commit_request)
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698