Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(8655)

Unified Diff: chrome/common/extensions/docs/server2/cron_servlet.py

Issue 575613003: Docserver: Gitiles auth and cron refactoring. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/common/extensions/docs/server2/cron_servlet.py
diff --git a/chrome/common/extensions/docs/server2/cron_servlet.py b/chrome/common/extensions/docs/server2/cron_servlet.py
index dfd766f0a2497061bd239f7d82a327204eca1a15..1c9bb93f261ed81770b2d59ec23bf9a482bc94a0 100644
--- a/chrome/common/extensions/docs/server2/cron_servlet.py
+++ b/chrome/common/extensions/docs/server2/cron_servlet.py
@@ -2,84 +2,28 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
-import logging
-import posixpath
+import time
import traceback
from app_yaml_helper import AppYamlHelper
-from appengine_wrappers import IsDeadlineExceededError, logservice
+from appengine_wrappers import IsDeadlineExceededError, logservice, taskqueue
from branch_utility import BranchUtility
from compiled_file_system import CompiledFileSystem
+from custom_logger import CustomLogger
from data_source_registry import CreateDataSources
-from environment import GetAppVersion, IsDevServer
-from extensions_paths import EXAMPLES, PUBLIC_TEMPLATES, STATIC_DOCS
-from file_system_util import CreateURLsFromPaths
-from future import Future
+from environment import GetAppVersion
from gcs_file_system_provider import CloudStorageFileSystemProvider
from github_file_system_provider import GithubFileSystemProvider
from host_file_system_provider import HostFileSystemProvider
from object_store_creator import ObjectStoreCreator
-from render_servlet import RenderServlet
+from render_refresher import RenderRefresher
from server_instance import ServerInstance
from servlet import Servlet, Request, Response
-from special_paths import SITE_VERIFICATION_FILE
-from timer import Timer, TimerClosure
+from timer import Timer
-class _SingletonRenderServletDelegate(RenderServlet.Delegate):
- def __init__(self, server_instance):
- self._server_instance = server_instance
+_log = CustomLogger('cron')
- def CreateServerInstance(self):
- return self._server_instance
-
-class _CronLogger(object):
- '''Wraps the logging.* methods to prefix them with 'cron' and flush
- immediately. The flushing is important because often these cron runs time
- out and we lose the logs.
- '''
- def info(self, msg, *args): self._log(logging.info, msg, args)
- def warning(self, msg, *args): self._log(logging.warning, msg, args)
- def error(self, msg, *args): self._log(logging.error, msg, args)
-
- def _log(self, logfn, msg, args):
- try:
- logfn('cron: %s' % msg, *args)
- finally:
- logservice.flush()
-
-_cronlog = _CronLogger()
-
-def _RequestEachItem(title, items, request_callback):
- '''Runs a task |request_callback| named |title| for each item in |items|.
- |request_callback| must take an item and return a servlet response.
- Returns true if every item was successfully run, false if any return a
- non-200 response or raise an exception.
- '''
- _cronlog.info('%s: starting', title)
- success_count, failure_count = 0, 0
- timer = Timer()
- try:
- for i, item in enumerate(items):
- def error_message(detail):
- return '%s: error rendering %s (%s of %s): %s' % (
- title, item, i + 1, len(items), detail)
- try:
- response = request_callback(item)
- if response.status == 200:
- success_count += 1
- else:
- _cronlog.error(error_message('response status %s' % response.status))
- failure_count += 1
- except Exception as e:
- _cronlog.error(error_message(traceback.format_exc()))
- failure_count += 1
- if IsDeadlineExceededError(e): raise
- finally:
- _cronlog.info('%s: rendered %s of %s with %s failures in %s',
- title, success_count, len(items), failure_count,
- timer.Stop().FormatElapsed())
- return success_count == len(items)
class CronServlet(Servlet):
'''Servlet which runs a cron job.
@@ -110,125 +54,75 @@ class CronServlet(Servlet):
return GetAppVersion()
def Get(self):
- # Crons often time out, and if they do we need to make sure to flush the
+ # Refreshes may time out, and if they do we need to make sure to flush the
# logs before the process gets killed (Python gives us a couple of
# seconds).
#
# So, manually flush logs at the end of the cron run. However, sometimes
- # even that isn't enough, which is why in this file we use _cronlog and
+ # even that isn't enough, which is why in this file we use _log and
# make it flush the log every time its used.
logservice.AUTOFLUSH_ENABLED = False
try:
return self._GetImpl()
except BaseException:
- _cronlog.error('Caught top-level exception! %s', traceback.format_exc())
+ _log.error('Caught top-level exception! %s', traceback.format_exc())
finally:
logservice.flush()
def _GetImpl(self):
# Cron strategy:
#
- # Find all public template files and static files, and render them. Most of
- # the time these won't have changed since the last cron run, so it's a
- # little wasteful, but hopefully rendering is really fast (if it isn't we
- # have a problem).
- _cronlog.info('starting')
-
- # This is returned every time RenderServlet wants to create a new
- # ServerInstance.
+ # Collect all DataSources, the PlatformBundle, the ContentProviders, and
+ # any other statically renderered contents (e.g. examples content),
+ # and spin up taskqueue tasks which will refresh any cached data relevant
+ # to these assets.
#
- # TODO(kalman): IMPORTANT. This sometimes throws an exception, breaking
- # everything. Need retry logic at the fetcher level.
+ # TODO(rockot/kalman): At the moment examples are not actually refreshed
+ # because they're too slow.
+
+ _log.info('starting')
+
server_instance = self._GetSafeServerInstance()
master_fs = server_instance.host_file_system_provider.GetMaster()
+ master_commit = master_fs.GetCommitID().Get()
- def render(path):
- request = Request(path, self._request.host, self._request.headers)
- delegate = _SingletonRenderServletDelegate(server_instance)
- return RenderServlet(request, delegate).Get()
+ # This is the guy that would be responsible for refreshing the cache of
+ # examples. Here for posterity, hopefully it will be added to the targets
+ # below someday.
+ render_refresher = RenderRefresher(server_instance, self._request)
- def request_files_in_dir(path, prefix='', strip_ext=None):
- '''Requests every file found under |path| in this host file system, with
- a request prefix of |prefix|. |strip_ext| is an optional list of file
- extensions that should be stripped from paths before requesting.
- '''
- def maybe_strip_ext(name):
- if name == SITE_VERIFICATION_FILE or not strip_ext:
- return name
- base, ext = posixpath.splitext(name)
- return base if ext in strip_ext else name
- files = [maybe_strip_ext(name)
- for name, _ in CreateURLsFromPaths(master_fs, path, prefix)]
- return _RequestEachItem(path, files, render)
+ # Get the default taskqueue
+ queue = taskqueue.Queue()
- results = []
+ # GAE documentation specifies that it's bad to add tasks to a queue
+ # within one second of purging. We wait 2 seconds, because we like
+ # to go the extra mile.
+ queue.purge()
+ time.sleep(2)
+ success = True
try:
- # Start running the hand-written Cron methods first; they can be run in
- # parallel. They are resolved at the end.
- def run_cron_for_future(target):
- title = target.__class__.__name__
- future, init_timer = TimerClosure(target.Cron)
- assert isinstance(future, Future), (
- '%s.Cron() did not return a Future' % title)
- def resolve():
- resolve_timer = Timer()
- try:
- future.Get()
- except Exception as e:
- _cronlog.error('%s: error %s' % (title, traceback.format_exc()))
- results.append(False)
- if IsDeadlineExceededError(e): raise
- finally:
- resolve_timer.Stop()
- _cronlog.info('%s took %s: %s to initialize and %s to resolve' %
- (title,
- init_timer.With(resolve_timer).FormatElapsed(),
- init_timer.FormatElapsed(),
- resolve_timer.FormatElapsed()))
- return Future(callback=resolve)
-
- targets = (CreateDataSources(server_instance).values() +
- [server_instance.content_providers,
- server_instance.platform_bundle])
- title = 'initializing %s parallel Cron targets' % len(targets)
- _cronlog.info(title)
+ data_sources = CreateDataSources(server_instance)
+ targets = (data_sources.items() +
+ [('content_providers', server_instance.content_providers),
+ ('platform_bundle', server_instance.platform_bundle)])
+ title = 'initializing %s parallel targets' % len(targets)
+ _log.info(title)
timer = Timer()
- try:
- cron_futures = [run_cron_for_future(target) for target in targets]
- finally:
- _cronlog.info('%s took %s' % (title, timer.Stop().FormatElapsed()))
-
- # Samples are too expensive to run on the dev server, where there is no
- # parallel fetch.
- #
- # XXX(kalman): Currently samples are *always* too expensive to fetch, so
- # disabling them for now. It won't break anything so long as we're still
- # not enforcing that everything gets cached for normal instances.
- if False: # should be "not IsDevServer()":
- # Fetch each individual sample file.
- results.append(request_files_in_dir(EXAMPLES,
- prefix='extensions/examples'))
-
- # Resolve the hand-written Cron method futures.
- title = 'resolving %s parallel Cron targets' % len(targets)
- _cronlog.info(title)
- timer = Timer()
- try:
- for future in cron_futures:
- future.Get()
- finally:
- _cronlog.info('%s took %s' % (title, timer.Stop().FormatElapsed()))
-
+ for name, target in targets:
+ refresh_paths = target.GetRefreshPaths()
+ for path in refresh_paths:
+ queue.add(taskqueue.Task(url='/_refresh/%s/%s' % (name, path),
+ params={'commit': master_commit}))
+ _log.info('%s took %s' % (title, timer.Stop().FormatElapsed()))
except:
- results.append(False)
# This should never actually happen (each cron step does its own
# conservative error checking), so re-raise no matter what it is.
- _cronlog.error('uncaught error: %s' % traceback.format_exc())
+ _log.error('uncaught error: %s' % traceback.format_exc())
+ success = False
raise
finally:
- success = all(results)
- _cronlog.info('finished (%s)', 'success' if success else 'FAILED')
+ _log.info('finished (%s)', 'success' if success else 'FAILED')
return (Response.Ok('Success') if success else
Response.InternalError('Failure'))
@@ -259,7 +153,7 @@ class CronServlet(Servlet):
safe_revision = app_yaml_handler.GetFirstRevisionGreaterThan(
delegate.GetAppVersion()) - 1
- _cronlog.info('app version %s is out of date, safe is %s',
+ _log.info('app version %s is out of date, safe is %s',
delegate.GetAppVersion(), safe_revision)
return self._CreateServerInstance(safe_revision)
« no previous file with comments | « chrome/common/extensions/docs/server2/cron.yaml ('k') | chrome/common/extensions/docs/server2/custom_logger.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698