Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1743)

Unified Diff: appengine/findit/waterfall/swarming_util.py

Issue 2547713002: [Findit] Using ts_mon to track swarming/isolated server outages (Closed)
Patch Set: Fixing nit Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « appengine/findit/waterfall/monitoring.py ('k') | appengine/findit/waterfall/test/swarming_util_test.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: appengine/findit/waterfall/swarming_util.py
diff --git a/appengine/findit/waterfall/swarming_util.py b/appengine/findit/waterfall/swarming_util.py
index 44e2e8fe0068d013ce02e52a771a642447c65480..776f8bd529785954e32a3373efd7df5255eeee02 100644
--- a/appengine/findit/waterfall/swarming_util.py
+++ b/appengine/findit/waterfall/swarming_util.py
@@ -8,6 +8,7 @@ import json
import logging
import time
import urllib
+from urlparse import urlparse
import zlib
from google.appengine.api.urlfetch_errors import DeadlineExceededError
@@ -17,6 +18,7 @@ from google.appengine.ext import ndb
from common import auth_util
from model.wf_step import WfStep
+from waterfall import monitoring
from waterfall import waterfall_config
from waterfall.swarming_task_request import SwarmingTaskRequest
@@ -82,6 +84,13 @@ def _GetBackoffSeconds(retry_backoff, tries, maximum_retry_interval):
return min(retry_backoff * (2 ** (tries - 1)), maximum_retry_interval)
+def _OnConnectionFailed(url, exception_type):
+ host = urlparse(url).hostname
+ assert host
+ monitoring.outgoing_http_errors.increment(
+ {'host': host, 'exception': exception_type})
+
+
def _SendRequestToServer(url, http_client, post_data=None):
"""Sends GET/POST request to arbitrary url and returns response content.
@@ -96,8 +105,6 @@ def _SendRequestToServer(url, http_client, post_data=None):
http_client (HttpClient): The httpclient object with which to make the
server calls.
post_data (dict): Data/params to send with the request, if any.
- swarming_task (WfSwarmingTask, FlakeSwarmingTask): An optional swarming
- task with which to capture errors.
Returns:
content (dict), error (dict): The content from the server and the last error
@@ -131,21 +138,28 @@ def _SendRequestToServer(url, http_client, post_data=None):
'code': URLFETCH_CONNECTION_CLOSED_ERROR,
'message': e.message
}
+ _OnConnectionFailed(url, 'ConnectionClosedError')
except DeadlineExceededError as e:
error = {
'code': URLFETCH_DEADLINE_EXCEEDED_ERROR,
'message': e.message
}
+ _OnConnectionFailed(url, 'DeadlineExceededError')
except DownloadError as e:
error = {
'code': URLFETCH_DOWNLOAD_ERROR,
'message': e.message
}
+ _OnConnectionFailed(url, 'DownloadError')
except Exception as e: # pragma: no cover
+ logging.error(
+ 'An unknown exception occurred that need to be monitored: %s',
+ e.message)
error = {
'code': UNKNOWN,
'message': e.message
}
+ _OnConnectionFailed(url, 'Unknown Exception')
if error or status_code != 200:
# The retry upon 50x (501 excluded) is automatically handled in the
« no previous file with comments | « appengine/findit/waterfall/monitoring.py ('k') | appengine/findit/waterfall/test/swarming_util_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698