| Index: chrome/common/extensions/docs/server2/cron_servlet.py
|
| diff --git a/chrome/common/extensions/docs/server2/cron_servlet.py b/chrome/common/extensions/docs/server2/cron_servlet.py
|
| index dfd766f0a2497061bd239f7d82a327204eca1a15..1c9bb93f261ed81770b2d59ec23bf9a482bc94a0 100644
|
| --- a/chrome/common/extensions/docs/server2/cron_servlet.py
|
| +++ b/chrome/common/extensions/docs/server2/cron_servlet.py
|
| @@ -2,84 +2,28 @@
|
| # Use of this source code is governed by a BSD-style license that can be
|
| # found in the LICENSE file.
|
|
|
| -import logging
|
| -import posixpath
|
| +import time
|
| import traceback
|
|
|
| from app_yaml_helper import AppYamlHelper
|
| -from appengine_wrappers import IsDeadlineExceededError, logservice
|
| +from appengine_wrappers import IsDeadlineExceededError, logservice, taskqueue
|
| from branch_utility import BranchUtility
|
| from compiled_file_system import CompiledFileSystem
|
| +from custom_logger import CustomLogger
|
| from data_source_registry import CreateDataSources
|
| -from environment import GetAppVersion, IsDevServer
|
| -from extensions_paths import EXAMPLES, PUBLIC_TEMPLATES, STATIC_DOCS
|
| -from file_system_util import CreateURLsFromPaths
|
| -from future import Future
|
| +from environment import GetAppVersion
|
| from gcs_file_system_provider import CloudStorageFileSystemProvider
|
| from github_file_system_provider import GithubFileSystemProvider
|
| from host_file_system_provider import HostFileSystemProvider
|
| from object_store_creator import ObjectStoreCreator
|
| -from render_servlet import RenderServlet
|
| +from render_refresher import RenderRefresher
|
| from server_instance import ServerInstance
|
| from servlet import Servlet, Request, Response
|
| -from special_paths import SITE_VERIFICATION_FILE
|
| -from timer import Timer, TimerClosure
|
| +from timer import Timer
|
|
|
|
|
| -class _SingletonRenderServletDelegate(RenderServlet.Delegate):
|
| - def __init__(self, server_instance):
|
| - self._server_instance = server_instance
|
| +_log = CustomLogger('cron')
|
|
|
| - def CreateServerInstance(self):
|
| - return self._server_instance
|
| -
|
| -class _CronLogger(object):
|
| - '''Wraps the logging.* methods to prefix them with 'cron' and flush
|
| - immediately. The flushing is important because often these cron runs time
|
| - out and we lose the logs.
|
| - '''
|
| - def info(self, msg, *args): self._log(logging.info, msg, args)
|
| - def warning(self, msg, *args): self._log(logging.warning, msg, args)
|
| - def error(self, msg, *args): self._log(logging.error, msg, args)
|
| -
|
| - def _log(self, logfn, msg, args):
|
| - try:
|
| - logfn('cron: %s' % msg, *args)
|
| - finally:
|
| - logservice.flush()
|
| -
|
| -_cronlog = _CronLogger()
|
| -
|
| -def _RequestEachItem(title, items, request_callback):
|
| - '''Runs a task |request_callback| named |title| for each item in |items|.
|
| - |request_callback| must take an item and return a servlet response.
|
| - Returns true if every item was successfully run, false if any return a
|
| - non-200 response or raise an exception.
|
| - '''
|
| - _cronlog.info('%s: starting', title)
|
| - success_count, failure_count = 0, 0
|
| - timer = Timer()
|
| - try:
|
| - for i, item in enumerate(items):
|
| - def error_message(detail):
|
| - return '%s: error rendering %s (%s of %s): %s' % (
|
| - title, item, i + 1, len(items), detail)
|
| - try:
|
| - response = request_callback(item)
|
| - if response.status == 200:
|
| - success_count += 1
|
| - else:
|
| - _cronlog.error(error_message('response status %s' % response.status))
|
| - failure_count += 1
|
| - except Exception as e:
|
| - _cronlog.error(error_message(traceback.format_exc()))
|
| - failure_count += 1
|
| - if IsDeadlineExceededError(e): raise
|
| - finally:
|
| - _cronlog.info('%s: rendered %s of %s with %s failures in %s',
|
| - title, success_count, len(items), failure_count,
|
| - timer.Stop().FormatElapsed())
|
| - return success_count == len(items)
|
|
|
| class CronServlet(Servlet):
|
| '''Servlet which runs a cron job.
|
| @@ -110,125 +54,75 @@ class CronServlet(Servlet):
|
| return GetAppVersion()
|
|
|
| def Get(self):
|
| - # Crons often time out, and if they do we need to make sure to flush the
|
| + # Refreshes may time out, and if they do we need to make sure to flush the
|
| # logs before the process gets killed (Python gives us a couple of
|
| # seconds).
|
| #
|
| # So, manually flush logs at the end of the cron run. However, sometimes
|
| - # even that isn't enough, which is why in this file we use _cronlog and
|
| + # even that isn't enough, which is why in this file we use _log and
|
| # make it flush the log every time its used.
|
| logservice.AUTOFLUSH_ENABLED = False
|
| try:
|
| return self._GetImpl()
|
| except BaseException:
|
| - _cronlog.error('Caught top-level exception! %s', traceback.format_exc())
|
| + _log.error('Caught top-level exception! %s', traceback.format_exc())
|
| finally:
|
| logservice.flush()
|
|
|
| def _GetImpl(self):
|
| # Cron strategy:
|
| #
|
| - # Find all public template files and static files, and render them. Most of
|
| - # the time these won't have changed since the last cron run, so it's a
|
| - # little wasteful, but hopefully rendering is really fast (if it isn't we
|
| - # have a problem).
|
| - _cronlog.info('starting')
|
| -
|
| - # This is returned every time RenderServlet wants to create a new
|
| - # ServerInstance.
|
| + # Collect all DataSources, the PlatformBundle, the ContentProviders, and
|
| + # any other statically renderered contents (e.g. examples content),
|
| + # and spin up taskqueue tasks which will refresh any cached data relevant
|
| + # to these assets.
|
| #
|
| - # TODO(kalman): IMPORTANT. This sometimes throws an exception, breaking
|
| - # everything. Need retry logic at the fetcher level.
|
| + # TODO(rockot/kalman): At the moment examples are not actually refreshed
|
| + # because they're too slow.
|
| +
|
| + _log.info('starting')
|
| +
|
| server_instance = self._GetSafeServerInstance()
|
| master_fs = server_instance.host_file_system_provider.GetMaster()
|
| + master_commit = master_fs.GetCommitID().Get()
|
|
|
| - def render(path):
|
| - request = Request(path, self._request.host, self._request.headers)
|
| - delegate = _SingletonRenderServletDelegate(server_instance)
|
| - return RenderServlet(request, delegate).Get()
|
| + # This is the guy that would be responsible for refreshing the cache of
|
| + # examples. Here for posterity, hopefully it will be added to the targets
|
| + # below someday.
|
| + render_refresher = RenderRefresher(server_instance, self._request)
|
|
|
| - def request_files_in_dir(path, prefix='', strip_ext=None):
|
| - '''Requests every file found under |path| in this host file system, with
|
| - a request prefix of |prefix|. |strip_ext| is an optional list of file
|
| - extensions that should be stripped from paths before requesting.
|
| - '''
|
| - def maybe_strip_ext(name):
|
| - if name == SITE_VERIFICATION_FILE or not strip_ext:
|
| - return name
|
| - base, ext = posixpath.splitext(name)
|
| - return base if ext in strip_ext else name
|
| - files = [maybe_strip_ext(name)
|
| - for name, _ in CreateURLsFromPaths(master_fs, path, prefix)]
|
| - return _RequestEachItem(path, files, render)
|
| + # Get the default taskqueue
|
| + queue = taskqueue.Queue()
|
|
|
| - results = []
|
| + # GAE documentation specifies that it's bad to add tasks to a queue
|
| + # within one second of purging. We wait 2 seconds, because we like
|
| + # to go the extra mile.
|
| + queue.purge()
|
| + time.sleep(2)
|
|
|
| + success = True
|
| try:
|
| - # Start running the hand-written Cron methods first; they can be run in
|
| - # parallel. They are resolved at the end.
|
| - def run_cron_for_future(target):
|
| - title = target.__class__.__name__
|
| - future, init_timer = TimerClosure(target.Cron)
|
| - assert isinstance(future, Future), (
|
| - '%s.Cron() did not return a Future' % title)
|
| - def resolve():
|
| - resolve_timer = Timer()
|
| - try:
|
| - future.Get()
|
| - except Exception as e:
|
| - _cronlog.error('%s: error %s' % (title, traceback.format_exc()))
|
| - results.append(False)
|
| - if IsDeadlineExceededError(e): raise
|
| - finally:
|
| - resolve_timer.Stop()
|
| - _cronlog.info('%s took %s: %s to initialize and %s to resolve' %
|
| - (title,
|
| - init_timer.With(resolve_timer).FormatElapsed(),
|
| - init_timer.FormatElapsed(),
|
| - resolve_timer.FormatElapsed()))
|
| - return Future(callback=resolve)
|
| -
|
| - targets = (CreateDataSources(server_instance).values() +
|
| - [server_instance.content_providers,
|
| - server_instance.platform_bundle])
|
| - title = 'initializing %s parallel Cron targets' % len(targets)
|
| - _cronlog.info(title)
|
| + data_sources = CreateDataSources(server_instance)
|
| + targets = (data_sources.items() +
|
| + [('content_providers', server_instance.content_providers),
|
| + ('platform_bundle', server_instance.platform_bundle)])
|
| + title = 'initializing %s parallel targets' % len(targets)
|
| + _log.info(title)
|
| timer = Timer()
|
| - try:
|
| - cron_futures = [run_cron_for_future(target) for target in targets]
|
| - finally:
|
| - _cronlog.info('%s took %s' % (title, timer.Stop().FormatElapsed()))
|
| -
|
| - # Samples are too expensive to run on the dev server, where there is no
|
| - # parallel fetch.
|
| - #
|
| - # XXX(kalman): Currently samples are *always* too expensive to fetch, so
|
| - # disabling them for now. It won't break anything so long as we're still
|
| - # not enforcing that everything gets cached for normal instances.
|
| - if False: # should be "not IsDevServer()":
|
| - # Fetch each individual sample file.
|
| - results.append(request_files_in_dir(EXAMPLES,
|
| - prefix='extensions/examples'))
|
| -
|
| - # Resolve the hand-written Cron method futures.
|
| - title = 'resolving %s parallel Cron targets' % len(targets)
|
| - _cronlog.info(title)
|
| - timer = Timer()
|
| - try:
|
| - for future in cron_futures:
|
| - future.Get()
|
| - finally:
|
| - _cronlog.info('%s took %s' % (title, timer.Stop().FormatElapsed()))
|
| -
|
| + for name, target in targets:
|
| + refresh_paths = target.GetRefreshPaths()
|
| + for path in refresh_paths:
|
| + queue.add(taskqueue.Task(url='/_refresh/%s/%s' % (name, path),
|
| + params={'commit': master_commit}))
|
| + _log.info('%s took %s' % (title, timer.Stop().FormatElapsed()))
|
| except:
|
| - results.append(False)
|
| # This should never actually happen (each cron step does its own
|
| # conservative error checking), so re-raise no matter what it is.
|
| - _cronlog.error('uncaught error: %s' % traceback.format_exc())
|
| + _log.error('uncaught error: %s' % traceback.format_exc())
|
| + success = False
|
| raise
|
| finally:
|
| - success = all(results)
|
| - _cronlog.info('finished (%s)', 'success' if success else 'FAILED')
|
| + _log.info('finished (%s)', 'success' if success else 'FAILED')
|
| return (Response.Ok('Success') if success else
|
| Response.InternalError('Failure'))
|
|
|
| @@ -259,7 +153,7 @@ class CronServlet(Servlet):
|
| safe_revision = app_yaml_handler.GetFirstRevisionGreaterThan(
|
| delegate.GetAppVersion()) - 1
|
|
|
| - _cronlog.info('app version %s is out of date, safe is %s',
|
| + _log.info('app version %s is out of date, safe is %s',
|
| delegate.GetAppVersion(), safe_revision)
|
|
|
| return self._CreateServerInstance(safe_revision)
|
|
|