Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(32)

Side by Side Diff: chrome/common/extensions/docs/server2/datastore_util.py

Issue 2352813003: [Extensions DocServer]: More datastore API updates (Closed)
Patch Set: Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2015 The Chromium Authors. All rights reserved. 1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import cPickle 5 import cPickle
6 import googledatastore as datastore
7 import logging 6 import logging
8 7
9 from future import Future 8 from future import Future
9 from gcloud import datastore
10 10
11 # N.B.: In order to use this module you should have a working cloud development 11 # N.B.: In order to use this module you should have a working cloud development
12 # environment configured with the googledatastore module installed. 12 # environment configured with the googledatastore module installed.
13 # 13 #
14 # Please see https://cloud.google.com/datastore/docs/getstarted/start_python/ 14 # Please see https://cloud.google.com/datastore/docs/getstarted/start_python/
15 15
16 16
17 _DATASET_NAME = 'chrome-apps-doc' 17 _PROJECT_NAME = 'chrome-apps-doc'
18 _PERSISTENT_OBJECT_KIND = 'PersistentObjectStoreItem' 18 _PERSISTENT_OBJECT_KIND = 'PersistentObjectStoreItem'
19 _VALUE_PROPERTY_NAME = 'pickled_value' 19 _VALUE_PROPERTY_NAME = 'pickled_value'
20 20
21 # The max number of entities to include in a single request. This is capped at 21 # The max number of entities to include in a single request. This is capped at
22 # 500 by the service. In practice we may send fewer due to _MAX_REQUEST_SIZE 22 # 500 by the service. In practice we may send fewer due to _MAX_REQUEST_SIZE
23 _MAX_BATCH_SIZE = 500 23 _MAX_BATCH_SIZE = 500
24 24
25 25
26 # The maximum entity size allowed by Datastore. 26 # The maximum entity size allowed by Datastore.
27 _MAX_ENTITY_SIZE = 1024*1024 27 _MAX_ENTITY_SIZE = 1024*1024
28 28
29 29
30 # The maximum request size (in bytes) to send Datastore. This is an approximate 30 # The maximum request size (in bytes) to send Datastore. This is an approximate
31 # size based on the sum of entity blob_value sizes. 31 # size based on the sum of entity blob_value sizes.
32 _MAX_REQUEST_SIZE = 5*1024*1024 32 _MAX_REQUEST_SIZE = 5*1024*1024
33 33
34 34
35 def _CreateEntity(name, value): 35 def _CreateEntity(client, name, value):
36 entity = datastore.Entity(exclude_from_indexes=[_VALUE_PROPERTY_NAME]) 36 key = client.key(_PERSISTENT_OBJECT_KIND, name)
37 path = entity.key.path.add() 37 entity = datastore.Entity(
38 path.kind = _PERSISTENT_OBJECT_KIND 38 key=key, exclude_from_indexes=[_VALUE_PROPERTY_NAME])
39 path.name = name 39 entity[_VALUE_PROPERTY_NAME] = value
40 entity.update({_VALUE_PROPERTY_NAME: value})
41 return entity 40 return entity
42 41
43 42
44 def _CreateBatches(data): 43 def _CreateBatches(client, data):
45 '''Constructs batches of at most _MAX_BATCH_SIZE entities to cover all 44 '''Constructs batches of at most _MAX_BATCH_SIZE entities to cover all
46 entities defined in |data| without exceeding the transaction size limit. 45 entities defined in |data| without exceeding the transaction size limit.
47 This is a generator emitting lists of entities. 46 This is a generator emitting lists of entities.
48 ''' 47 '''
49 def get_size(entity): 48 def get_size(entity):
50 return len(entity.properties[_VALUE_PROPERTY_NAME].value.blob_value) 49 return len(entity[_VALUE_PROPERTY_NAME])
51 50
52 entities = [_CreateEntity(name, value) for name, value in data.iteritems()] 51 entities = [_CreateEntity(client, name, value)
52 for name, value in data.iteritems()]
53 batch_start = 0 53 batch_start = 0
54 batch_end = 1 54 batch_end = 1
55 batch_size = get_size(entities[0]) 55 batch_size = get_size(entities[0])
56 while batch_end < len(entities): 56 while batch_end < len(entities):
57 next_size = get_size(entities[batch_end]) 57 next_size = get_size(entities[batch_end])
58 if (batch_size + next_size > _MAX_REQUEST_SIZE or 58 if (batch_size + next_size > _MAX_REQUEST_SIZE or
59 batch_end - batch_start >= _MAX_BATCH_SIZE): 59 batch_end - batch_start >= _MAX_BATCH_SIZE):
60 yield entities[batch_start:batch_end], batch_end, len(entities) 60 yield entities[batch_start:batch_end], batch_end, len(entities)
61 batch_start = batch_end 61 batch_start = batch_end
62 batch_size = 0 62 batch_size = 0
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
97 Key('PersistentObjectStoreItem', 'animal/dog') 97 Key('PersistentObjectStoreItem', 'animal/dog')
98 Key('PersistentObjectStoreItem', 'animal/trolling cat') 98 Key('PersistentObjectStoreItem', 'animal/trolling cat')
99 99
100 If given |original_data|, this will only push key-value pairs for entries that 100 If given |original_data|, this will only push key-value pairs for entries that
101 are either new or have changed from their original (pickled) value. 101 are either new or have changed from their original (pickled) value.
102 102
103 Caveat: Pickling and unpickling a dictionary can (but does not always) change 103 Caveat: Pickling and unpickling a dictionary can (but does not always) change
104 its key order. This means that objects will often be seen as changed even when 104 its key order. This means that objects will often be seen as changed even when
105 they haven't changed. 105 they haven't changed.
106 ''' 106 '''
107 datastore.set_options(dataset=_DATASET_NAME) 107 client = datastore.Client(_PROJECT_NAME)
108 108
109 def flatten(dataset): 109 def flatten(dataset):
110 flat = {} 110 flat = {}
111 for namespace, items in dataset.iteritems(): 111 for namespace, items in dataset.iteritems():
112 for k, v in items.iteritems(): 112 for k, v in items.iteritems():
113 flat['%s/%s' % (namespace, k)] = cPickle.dumps(v) 113 flat['%s/%s' % (namespace, k)] = cPickle.dumps(v)
114 return flat 114 return flat
115 115
116 logging.info('Flattening data sets...') 116 logging.info('Flattening data sets...')
117 data = flatten(data) 117 data = flatten(data)
118 original_data = flatten(original_data) 118 original_data = flatten(original_data)
119 119
120 logging.info('Culling new data...') 120 logging.info('Culling new data...')
121 for k in data.keys(): 121 for k in data.keys():
122 if ((k in original_data and original_data[k] == data[k]) or 122 if ((k in original_data and original_data[k] == data[k]) or
123 (len(data[k]) > _MAX_ENTITY_SIZE)): 123 (len(data[k]) > _MAX_ENTITY_SIZE)):
124 del data[k] 124 del data[k]
125 125
126 for batch, n, total in _CreateBatches(data): 126 for entities, n, total in _CreateBatches(client, data):
127 commit_request = datastore.CommitRequest() 127 batch = client.batch()
128 commit_request.mode = datastore.CommitRequest.NON_TRANSACTIONAL 128 for e in entities:
129 commit_request.mutations.upsert.extend(list(batch)) 129 batch.put(e)
130
131 logging.info('Committing %s/%s entities...' % (n, total)) 130 logging.info('Committing %s/%s entities...' % (n, total))
132 datastore.commit(commit_request) 131 batch.commit()
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698