Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1692)

Unified Diff: appengine/findit/waterfall/swarming_util.py

Issue 2547713002: [Findit] Using ts_mon to track swarming/isolated server outages (Closed)
Patch Set: Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: appengine/findit/waterfall/swarming_util.py
diff --git a/appengine/findit/waterfall/swarming_util.py b/appengine/findit/waterfall/swarming_util.py
index 44e2e8fe0068d013ce02e52a771a642447c65480..d449e281d526251579ce539dab70dd912dc4ad95 100644
--- a/appengine/findit/waterfall/swarming_util.py
+++ b/appengine/findit/waterfall/swarming_util.py
@@ -17,6 +17,7 @@ from google.appengine.ext import ndb
from common import auth_util
from model.wf_step import WfStep
+from waterfall import monitoring
from waterfall import waterfall_config
from waterfall.swarming_task_request import SwarmingTaskRequest
@@ -82,6 +83,18 @@ def _GetBackoffSeconds(retry_backoff, tries, maximum_retry_interval):
return min(retry_backoff * (2 ** (tries - 1)), maximum_retry_interval)
+def _OnConnectionFailed(url, exception):
+ swarming_settings = waterfall_config.GetSwarmingSettings()
+ swarming_server_host = swarming_settings.get('server_host')
+ isolated_server_host = swarming_settings.get('isolated_server')
+ exception_type_name = type(exception).__name__
stgao 2016/12/01 22:25:15 Why not pass over the exception name from code bel
lijeffrey 2016/12/01 22:54:34 Done.
+
+ if isolated_server_host in url:
stgao 2016/12/01 22:25:15 Extract the host from the url, and make it a field
lijeffrey 2016/12/01 22:54:34 Done.
+ monitoring.isolated_server_failures.increment({'type': exception_type_name})
+ elif swarming_server_host in url:
+ monitoring.swarming_server_failures.increment({'type': exception_type_name})
+
+
def _SendRequestToServer(url, http_client, post_data=None):
"""Sends GET/POST request to arbitrary url and returns response content.
@@ -96,8 +109,6 @@ def _SendRequestToServer(url, http_client, post_data=None):
http_client (HttpClient): The httpclient object with which to make the
server calls.
post_data (dict): Data/params to send with the request, if any.
- swarming_task (WfSwarmingTask, FlakeSwarmingTask): An optional swarming
- task with which to capture errors.
Returns:
content (dict), error (dict): The content from the server and the last error
@@ -131,21 +142,25 @@ def _SendRequestToServer(url, http_client, post_data=None):
'code': URLFETCH_CONNECTION_CLOSED_ERROR,
'message': e.message
}
+ _OnConnectionFailed(url, e)
except DeadlineExceededError as e:
error = {
'code': URLFETCH_DEADLINE_EXCEEDED_ERROR,
'message': e.message
}
+ _OnConnectionFailed(url, e)
except DownloadError as e:
error = {
'code': URLFETCH_DOWNLOAD_ERROR,
'message': e.message
}
+ _OnConnectionFailed(url, e)
except Exception as e: # pragma: no cover
error = {
'code': UNKNOWN,
'message': e.message
}
+ _OnConnectionFailed(url, e)
if error or status_code != 200:
# The retry upon 50x (501 excluded) is automatically handled in the

Powered by Google App Engine
This is Rietveld 408576698