Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import cPickle | 5 import cPickle |
| 6 import googledatastore as datastore | 6 import googledatastore as datastore |
| 7 import logging | 7 import logging |
| 8 | 8 |
| 9 from future import Future | 9 from future import Future |
| 10 | 10 |
| 11 # N.B.: In order to use this module you should have a working cloud development | 11 # N.B.: In order to use this module you should have a working cloud development |
| 12 # environment configured with the googledatastore module installed. | 12 # environment configured with the googledatastore module installed. |
| 13 # | 13 # |
| 14 # Please see https://cloud.google.com/datastore/docs/getstarted/start_python/ | 14 # Please see https://cloud.google.com/datastore/docs/getstarted/start_python/ |
| 15 | 15 |
| 16 | 16 |
| 17 _DATASET_NAME = 'chrome-apps-doc' | 17 _DATASET_NAME = 'chrome-apps-doc' |
| 18 _PERSISTENT_OBJECT_KIND = 'PersistentObjectStoreItem' | 18 _PERSISTENT_OBJECT_KIND = 'PersistentObjectStoreItem' |
| 19 _VALUE_PROPERTY_NAME = 'pickled_value' | 19 _VALUE_PROPERTY_NAME = 'pickled_value' |
|
asargent_no_longer_on_chrome
2016/09/14 04:57:57
random aside: this style of having variables for t
Devlin
2016/09/14 15:09:24
For this particular case (property name), the reas
| |
| 20 | 20 |
| 21 # The max number of entities to include in a single request. This is capped at | 21 # The max number of entities to include in a single request. This is capped at |
| 22 # 500 by the service. In practice we may send fewer due to _MAX_REQUEST_SIZE | 22 # 500 by the service. In practice we may send fewer due to _MAX_REQUEST_SIZE |
| 23 _MAX_BATCH_SIZE = 500 | 23 _MAX_BATCH_SIZE = 500 |
| 24 | 24 |
| 25 | 25 |
| 26 # The maximum entity size allowed by Datastore. | 26 # The maximum entity size allowed by Datastore. |
| 27 _MAX_ENTITY_SIZE = 1024*1024 | 27 _MAX_ENTITY_SIZE = 1024*1024 |
| 28 | 28 |
| 29 | 29 |
| 30 # The maximum request size (in bytes) to send Datastore. This is an approximate | 30 # The maximum request size (in bytes) to send Datastore. This is an approximate |
| 31 # size based on the sum of entity blob_value sizes. | 31 # size based on the sum of entity blob_value sizes. |
| 32 _MAX_REQUEST_SIZE = 5*1024*1024 | 32 _MAX_REQUEST_SIZE = 5*1024*1024 |
| 33 | 33 |
| 34 | 34 |
| 35 def _CreateEntity(name, value): | 35 def _CreateEntity(name, value): |
| 36 entity = datastore.Entity() | 36 entity = datastore.Entity(exclude_from_indexes=[_VALUE_PROPERTY_NAME]) |
| 37 path = entity.key.path_element.add() | 37 path = entity.key.path.add() |
| 38 path.kind = _PERSISTENT_OBJECT_KIND | 38 path.kind = _PERSISTENT_OBJECT_KIND |
| 39 path.name = name | 39 path.name = name |
| 40 pickled_value_property = entity.property.add() | 40 entity.update({_VALUE_PROPERTY_NAME: value}) |
| 41 pickled_value_property.name = _VALUE_PROPERTY_NAME | |
| 42 pickled_value_property.value.indexed = False | |
| 43 pickled_value_property.value.blob_value = value | |
| 44 return entity | 41 return entity |
| 45 | 42 |
| 46 | 43 |
| 47 def _CreateBatches(data): | 44 def _CreateBatches(data): |
| 48 '''Constructs batches of at most _MAX_BATCH_SIZE entities to cover all | 45 '''Constructs batches of at most _MAX_BATCH_SIZE entities to cover all |
| 49 entities defined in |data| without exceeding the transaction size limit. | 46 entities defined in |data| without exceeding the transaction size limit. |
| 50 This is a generator emitting lists of entities. | 47 This is a generator emitting lists of entities. |
| 51 ''' | 48 ''' |
| 52 def get_size(entity): | 49 def get_size(entity): |
| 53 return len(entity.property[0].value.blob_value) | 50 return len(entity.properties[_VALUE_PROPERTY_NAME].value.blob_value) |
| 54 | 51 |
| 55 entities = [_CreateEntity(name, value) for name, value in data.iteritems()] | 52 entities = [_CreateEntity(name, value) for name, value in data.iteritems()] |
| 56 batch_start = 0 | 53 batch_start = 0 |
| 57 batch_end = 1 | 54 batch_end = 1 |
| 58 batch_size = get_size(entities[0]) | 55 batch_size = get_size(entities[0]) |
| 59 while batch_end < len(entities): | 56 while batch_end < len(entities): |
| 60 next_size = get_size(entities[batch_end]) | 57 next_size = get_size(entities[batch_end]) |
| 61 if (batch_size + next_size > _MAX_REQUEST_SIZE or | 58 if (batch_size + next_size > _MAX_REQUEST_SIZE or |
| 62 batch_end - batch_start >= _MAX_BATCH_SIZE): | 59 batch_end - batch_start >= _MAX_BATCH_SIZE): |
| 63 yield entities[batch_start:batch_end], batch_end, len(entities) | 60 yield entities[batch_start:batch_end], batch_end, len(entities) |
| (...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 122 | 119 |
| 123 logging.info('Culling new data...') | 120 logging.info('Culling new data...') |
| 124 for k in data.keys(): | 121 for k in data.keys(): |
| 125 if ((k in original_data and original_data[k] == data[k]) or | 122 if ((k in original_data and original_data[k] == data[k]) or |
| 126 (len(data[k]) > _MAX_ENTITY_SIZE)): | 123 (len(data[k]) > _MAX_ENTITY_SIZE)): |
| 127 del data[k] | 124 del data[k] |
| 128 | 125 |
| 129 for batch, n, total in _CreateBatches(data): | 126 for batch, n, total in _CreateBatches(data): |
| 130 commit_request = datastore.CommitRequest() | 127 commit_request = datastore.CommitRequest() |
| 131 commit_request.mode = datastore.CommitRequest.NON_TRANSACTIONAL | 128 commit_request.mode = datastore.CommitRequest.NON_TRANSACTIONAL |
| 132 commit_request.mutation.upsert.extend(list(batch)) | 129 commit_request.mutations.upsert.extend(list(batch)) |
| 133 | 130 |
| 134 logging.info('Committing %s/%s entities...' % (n, total)) | 131 logging.info('Committing %s/%s entities...' % (n, total)) |
| 135 datastore.commit(commit_request) | 132 datastore.commit(commit_request) |
| OLD | NEW |