Chromium Code Reviews| Index: appengine/findit/util_scripts/iterator.py |
| diff --git a/appengine/findit/util_scripts/iterator.py b/appengine/findit/util_scripts/iterator.py |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..cc56fcba7e7ff40da3a25ddf9f35dc6869796318 |
| --- /dev/null |
| +++ b/appengine/findit/util_scripts/iterator.py |
| @@ -0,0 +1,66 @@ |
| +# Copyright 2016 The Chromium Authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| + |
| +"""Fetches entities and iterate over and process them.""" |
| + |
| +import os |
| + |
| +import remote_api |
| + |
| +_DEFAULT_BATCH_SIZE = 1000 |
| + |
| + |
| +def ProjectEntity(entity, fields): |
| + """Projects fields from entity. Returns dict.""" |
| + entity_info = {} |
| + for field in fields: |
| + if hasattr(entity, field): |
| + entity_info[field] = getattr(entity, field) |
| + else: |
| + entity_info[field] = None |
| + entity_info['id'] = entity.key.id() |
| + return entity_info |
| + |
| + |
| +def Iterate(query, |
| + fields, |
| + filter_func=None, |
| + batch_size=_DEFAULT_BATCH_SIZE, |
| + batch_run=False): |
| + """Iterates entities queried by query. |
| + |
| + Args: |
| + query (ndb.Query): The query to fetch entities. |
| + fields (list): Field names of an entity to be projected to a dict. |
| + If a given field name is not available, it is set to None. |
| + 'id' is always added by default as an integer. |
| + filter_func (function): A function that does in memory filtering. |
| + batch_size (int): The number of entities to query at one time. |
| + batch_run (bool): If True, iterate batches of entities, if |
| + False, iterate each entity. |
| + |
| + An exmaple is available in crash_printer/print_crash.py. |
| + """ |
| + remote_api.EnableRemoteApi(app_id=os.getenv('APP_ID')) |
|
stgao
2016/10/10 23:39:40
It seems better to handle the app_id from user cod
Sharu Jiang
2016/10/12 00:52:11
Done.
|
| + |
| + cursor = None |
| + while True: |
| + entities, next_cursor, more = query.fetch_page(batch_size, |
| + start_cursor=cursor) |
| + |
| + if not more: |
| + break |
|
stgao
2016/10/10 23:39:40
Why break here? What if `entities` is not empty?
Sharu Jiang
2016/10/12 00:52:11
https://cloud.google.com/appengine/docs/python/ndb
stgao
2016/10/13 06:38:51
My question is not whether to break "if not more"
Sharu Jiang
2016/10/19 20:12:44
Oops, done.
|
| + |
| + if filter_func: |
| + entities = filter_func(entities) |
| + |
| + entities = [ProjectEntity(entity, fields) for entity in entities] |
| + |
| + if batch_run: |
| + yield entities |
| + else: |
| + for entity in entities: |
| + yield entity |
| + |
| + cursor = next_cursor |