Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import copy | 5 import copy |
| 6 import json | 6 import json |
| 7 import logging | 7 import logging |
| 8 | 8 |
| 9 from common import appengine_util | 9 from common import appengine_util |
| 10 from common import pubsub_util | 10 from common import pubsub_util |
| 11 from common import time_util | 11 from common import time_util |
| 12 from common.http_client_appengine import HttpClientAppengine | 12 from common.http_client_appengine import HttpClientAppengine |
| 13 from common.pipeline_wrapper import BasePipeline | 13 from common.pipeline_wrapper import BasePipeline |
| 14 from common.pipeline_wrapper import pipeline | 14 from common.pipeline_wrapper import pipeline |
| 15 from crash import findit_for_chromecrash | 15 from crash import findit_for_chromecrash |
| 16 from crash import findit_for_clusterfuzz | 16 from crash import findit_for_clusterfuzz |
| 17 from crash.type_enums import CrashClient | 17 from crash.type_enums import CrashClient |
| 18 from lib.gitiles import gitiles_repository | 18 from lib.gitiles import gitiles_repository |
| 19 from model import analysis_status | 19 from model import analysis_status |
| 20 | 20 |
| 21 | 21 |
| 22 # TODO(http://crbug.com/659346): write complete coverage tests for this. | 22 # TODO(http://crbug.com/659346): this needs complete coverage tests. |
| 23 def FinditForClientID(client_id): # pragma: no cover | 23 def FinditForClientID(client_id): |
| 24 """Construct a Findit object from a client id string specifying the class. | 24 """Construct a Findit object from a client id string specifying the class. |
| 25 | 25 |
| 26 We cannot pass Findit objects to the various methods in | 26 We cannot pass Findit objects to the various methods in |
| 27 ``crash.crash_pipeline``, because they are not JSON serializable. For | 27 ``crash.crash_pipeline``, because they are not JSON serializable. For |
| 28 now, we just serialize Findit objects as their ``client_id``, and then | 28 now, we just serialize Findit objects as their ``client_id``, and then |
| 29 use this function to reconstruct them. Alas, this means we will lose | 29 use this function to reconstruct them. Alas, this means we will lose |
| 30 various other information stored in the Findit object (i.e., stuff that | 30 various other information stored in the Findit object (i.e., stuff that |
| 31 comes from CrashConfig); which could lead to some hard-to-diagnose | 31 comes from CrashConfig); which could lead to some hard-to-diagnose |
| 32 coherency bugs, since the new Findit object will be based on the | 32 coherency bugs, since the new Findit object will be based on the |
| 33 CrashConfig at the time it's constructed, which may be different | 33 CrashConfig at the time it's constructed, which may be different |
| 34 than the CrashConfig at the time the previous Findit object was | 34 than the CrashConfig at the time the previous Findit object was |
| 35 constructed. In the future we should fix all this to serialize Findit | 35 constructed. In the future we should fix all this to serialize Findit |
| 36 objects in a more robust way. | 36 objects in a more robust way. |
| 37 """ | 37 """ |
| 38 assert isinstance(client_id, (str, unicode)), ( | 38 assert isinstance(client_id, (str, unicode)), ( |
| 39 'FinditForClientID: expected string or unicode, but got %s' | 39 'FinditForClientID: expected string or unicode, but got %s' |
| 40 % client_id.__class__.__name__) | 40 % client_id.__class__.__name__) |
| 41 # TODO(wrengr): it'd be nice to replace this with a single lookup in | 41 # TODO(wrengr): it'd be nice to replace this with a single lookup in |
| 42 # a dict; but that's buggy for some unknown reason. | 42 # a dict; but that's buggy for some unknown reason. |
| 43 if client_id == CrashClient.FRACAS: | 43 if client_id == CrashClient.FRACAS: |
| 44 cls = findit_for_chromecrash.FinditForFracas | 44 cls = findit_for_chromecrash.FinditForFracas |
| 45 elif client_id == CrashClient.CRACAS: | 45 elif client_id == CrashClient.CRACAS: # pragma: no cover |
| 46 cls = findit_for_chromecrash.FinditForCracas | 46 cls = findit_for_chromecrash.FinditForCracas |
| 47 elif client_id == CrashClient.CLUSTERFUZZ: | 47 elif client_id == CrashClient.CLUSTERFUZZ: # pragma: no cover |
| 48 cls = findit_for_clusterfuzz.FinditForClusterfuzz | 48 cls = findit_for_clusterfuzz.FinditForClusterfuzz |
| 49 else: | 49 else: # pragma: no cover |
| 50 raise ValueError('FinditForClientID: ' | 50 raise ValueError('FinditForClientID: ' |
| 51 'unknown or unsupported client %s' % client_id) | 51 'unknown or unsupported client %s' % client_id) |
| 52 | 52 |
| 53 return cls( | 53 return cls(gitiles_repository.GitilesRepository( |
| 54 gitiles_repository.GitilesRepository(http_client=HttpClientAppengine()), | 54 http_client=HttpClientAppengine())) |
| 55 CrashWrapperPipeline) | |
| 56 | 55 |
| 57 | 56 |
| 58 # Some notes about the classes below, for people who are not | 57 # Some notes about the classes below, for people who are not |
| 59 # familiar with AppEngine. The thing that really kicks everything off | 58 # familiar with AppEngine. The thing that really kicks everything off |
| 60 # is ``CrashWrapperPipeline.run``. However, an important thing to bear in | 59 # is ``CrashWrapperPipeline.run``. However, an important thing to bear in |
| 61 # mind is that whatever arguments are passed to that method will also | 60 # mind is that whatever arguments are passed to that method will also |
| 62 # be passed to the ``run`` method on whatever objects it yields. Thus, | 61 # be passed to the ``run`` method on whatever objects it yields. Thus, |
| 63 # all the ``run`` methods across these different classes must have | 62 # all the ``run`` methods across these different classes must have |
| 64 # the same type. In practice, we end up passing all the arguments to the | 63 # the same type. In practice, we end up passing all the arguments to the |
| 65 # constructors, because we need to have the fields around for logging | 64 # constructors, because we need to have the fields around for logging |
| 66 # (e.g., in ``finalized``); thus, there's nothing that needs to be passed | 65 # (e.g., in ``finalized``); thus, there's nothing that needs to be passed |
| 67 # to ``run``. Another thing to bear in mind is that whatever objects | 66 # to ``run``. However, for some inscrutable reason, whatever arguments |
| 67 # are passed to the constructors will also be passed to ``run`` (it's | |
| 68 # unclear what sort of terrible introspection AppEngine does in order to | |
|
stgao
2016/11/02 01:28:27
It is not AppEngine, but the AppEngine Pipeline fr
wrengr
2016/11/02 23:52:26
Acknowledged.
| |
| 69 # figure this out, but no matter); thus, even though the ``run`` methods | |
| 70 # don't need them (and don't want them) they must still accept a bunch | |
| 71 # of arguments. Another thing to bear in mind is that whatever objects | |
| 68 # ``CrashWrapperPipeline.run`` yields must be JSON-serializable. The base | 72 # ``CrashWrapperPipeline.run`` yields must be JSON-serializable. The base |
| 69 # class handles most of that for us, so the force of this constraint is | 73 # class handles most of that for us, so the force of this constraint is |
| 70 # that all the arguments to the constructors for those classes must be | 74 # that all the arguments to the constructors for those classes must be |
| 71 # JSON-serializable. Thus, we cannot actually pass a Findit object to | 75 # JSON-serializable. Thus, we cannot actually pass a Findit object to the |
| 72 # the constructor, but rather must pass only the ``client_id`` (or whatever | 76 # constructor, but rather must pass only the ``client_id`` (or whatever |
| 73 # JSON dict) and then reconstruct the Findit object from that. Moreover, | 77 # JSON dict) and then reconstruct the Findit object from that. Moreover, |
| 74 # the ``run`` method and the ``finalized`` method will be run in different | 78 # the ``run`` method and the ``finalized`` method will be run in different |
| 75 # processes, so we will actually end up reconstructing the Findit object | 79 # processes, so we will actually end up reconstructing the Findit object |
| 76 # twice. Thus, we shouldn't store anything in the pipeline objects outside | 80 # twice. Thus, we shouldn't store anything in the pipeline objects outside |
| 77 # of what their constructors store. | 81 # of what their constructors store. |
| 78 | 82 |
| 79 class CrashBasePipeline(BasePipeline): | 83 class CrashBasePipeline(BasePipeline): |
| 80 def __init__(self, client_id, crash_identifiers): | 84 def __init__(self, client_id, crash_identifiers): |
| 81 super(CrashBasePipeline, self).__init__(client_id, crash_identifiers) | 85 super(CrashBasePipeline, self).__init__(client_id, crash_identifiers) |
| 82 self._crash_identifiers = crash_identifiers | 86 self._crash_identifiers = crash_identifiers |
| 83 self._findit = FinditForClientID(client_id) | 87 self._findit = FinditForClientID(client_id) |
| 84 | 88 |
| 85 @property | 89 @property |
| 86 def client_id(self): # pragma: no cover | 90 def client_id(self): # pragma: no cover |
| 87 return self._findit.client_id | 91 return self._findit.client_id |
| 88 | 92 |
| 89 def run(self, *args, **kwargs): | 93 def run(self, *args, **kwargs): |
| 90 raise NotImplementedError() | 94 raise NotImplementedError() |
| 91 | 95 |
| 92 | 96 |
| 93 class CrashAnalysisPipeline(CrashBasePipeline): | 97 class CrashAnalysisPipeline(CrashBasePipeline): |
| 94 def finalized(self): # pragma: no cover | 98 def finalized(self): |
| 95 if self.was_aborted: | 99 if self.was_aborted: # pragma: no cover |
| 96 self._PutAbortedError() | 100 self._PutAbortedError() |
| 97 | 101 |
| 98 # N.B., this method must be factored out for unittest reasons; since | 102 # N.B., this method must be factored out for unittest reasons; since |
| 99 # ``finalized`` takes no arguments (by AppEngine's spec) and | 103 # ``finalized`` takes no arguments (by AppEngine's spec) and |
| 100 # ``was_aborted`` can't be altered directly. | 104 # ``was_aborted`` can't be altered directly. |
| 101 def _PutAbortedError(self): | 105 def _PutAbortedError(self): |
| 102 """Update the ndb.Model to indicate that this pipeline was aborted.""" | 106 """Update the ndb.Model to indicate that this pipeline was aborted.""" |
| 103 logging.error('Aborted analysis for %s', repr(self._crash_identifiers)) | 107 logging.error('Aborted analysis for %s', repr(self._crash_identifiers)) |
| 104 analysis = self._findit.GetAnalysis(self._crash_identifiers) | 108 analysis = self._findit.GetAnalysis(self._crash_identifiers) |
| 105 analysis.status = analysis_status.ERROR | 109 analysis.status = analysis_status.ERROR |
| 106 analysis.put() | 110 analysis.put() |
| 107 | 111 |
| 108 # TODO(http://crbug.com/659346): we misplaced the coverage test; find it! | 112 def run(self, *_args, **_kwargs): |
| 109 # Arguments number differs from overridden method - pylint: disable=W0221 | |
| 110 def run(self): | |
| 111 # TODO(wrengr): shouldn't this method somehow call _NeedsNewAnalysis | 113 # TODO(wrengr): shouldn't this method somehow call _NeedsNewAnalysis |
| 112 # to guard against race conditions? | 114 # to guard against race conditions? |
| 113 analysis = self._findit.GetAnalysis(self._crash_identifiers) | 115 analysis = self._findit.GetAnalysis(self._crash_identifiers) |
| 114 | 116 |
| 115 # Update the model's status to say we're in the process of doing analysis. | 117 # Update the model's status to say we're in the process of doing analysis. |
| 116 analysis.pipeline_status_path = self.pipeline_status_path() | 118 analysis.pipeline_status_path = self.pipeline_status_path() |
| 117 analysis.status = analysis_status.RUNNING | 119 analysis.status = analysis_status.RUNNING |
| 118 analysis.started_time = time_util.GetUTCNow() | 120 analysis.started_time = time_util.GetUTCNow() |
| 119 analysis.findit_version = appengine_util.GetCurrentVersion() | 121 analysis.findit_version = appengine_util.GetCurrentVersion() |
| 120 analysis.put() | 122 analysis.put() |
| (...skipping 10 matching lines...) Expand all Loading... | |
| 131 'found_components': False, | 133 'found_components': False, |
| 132 'has_regression_range': False, | 134 'has_regression_range': False, |
| 133 'solution': None, | 135 'solution': None, |
| 134 } | 136 } |
| 135 | 137 |
| 136 # Update model's status to say we're done, and save the results. | 138 # Update model's status to say we're done, and save the results. |
| 137 analysis.completed_time = time_util.GetUTCNow() | 139 analysis.completed_time = time_util.GetUTCNow() |
| 138 analysis.result = result | 140 analysis.result = result |
| 139 for tag_name, tag_value in tags.iteritems(): | 141 for tag_name, tag_value in tags.iteritems(): |
| 140 # TODO(http://crbug.com/602702): make it possible to add arbitrary tags. | 142 # TODO(http://crbug.com/602702): make it possible to add arbitrary tags. |
| 141 if hasattr(analysis, tag_name): | 143 # TODO(http://crbug.com/659346): we misplaced the coverage test; find it! |
| 144 if hasattr(analysis, tag_name): # pragma: no cover | |
| 142 setattr(analysis, tag_name, tag_value) | 145 setattr(analysis, tag_name, tag_value) |
| 143 analysis.status = analysis_status.COMPLETED | 146 analysis.status = analysis_status.COMPLETED |
| 144 analysis.put() | 147 analysis.put() |
| 145 | 148 |
| 146 | 149 |
| 147 class PublishResultPipeline(CrashBasePipeline): | 150 class PublishResultPipeline(CrashBasePipeline): |
| 148 # TODO(http://crbug.com/659346): we misplaced the coverage test; find it! | |
| 149 def finalized(self): | 151 def finalized(self): |
| 150 if self.was_aborted: # pragma: no cover. | 152 if self.was_aborted: # pragma: no cover. |
| 151 logging.error('Failed to publish %s analysis result for %s', | 153 logging.error('Failed to publish %s analysis result for %s', |
| 152 repr(self._crash_identifiers), self.client_id) | 154 repr(self._crash_identifiers), self.client_id) |
| 153 | 155 |
| 154 # Arguments number differs from overridden method - pylint: disable=W0221 | 156 # TODO(http://crbug.com/659346): we misplaced the coverage test; find it! |
| 155 def run(self): | 157 def run(self, *_args, **_kwargs): # pragma: no cover |
|
Sharu Jiang
2016/11/01 23:03:24
This is a little hacky, we should at lease add doc
stgao
2016/11/02 01:28:27
I don't understand this.
Sharu, do you mind elabor
Sharu Jiang
2016/11/02 17:01:35
like the `yield` in #190 and #193
What `yield` doe
stgao
2016/11/02 21:22:09
Would you mind pointing me to the doc for the usag
wrengr
2016/11/02 23:52:26
I agree it's a terrible hack, but it's forced on u
Sharu Jiang
2016/11/03 23:03:25
Sorry for the late response, was busy working on m
Sharu Jiang
2016/11/04 00:19:00
Calling ``run`` directly is not a correct usage fo
| |
| 156 analysis = self._findit.GetAnalysis(self._crash_identifiers) | 158 analysis = self._findit.GetAnalysis(self._crash_identifiers) |
| 157 result = analysis.ToPublishableResult(self._crash_identifiers) | 159 result = analysis.ToPublishableResult(self._crash_identifiers) |
| 158 messages_data = [json.dumps(result, sort_keys=True)] | 160 messages_data = [json.dumps(result, sort_keys=True)] |
| 159 | 161 |
| 160 # TODO(http://crbug.com/659354): remove Findit's dependency on CrashConfig. | 162 # TODO(http://crbug.com/659354): remove Findit's dependency on CrashConfig. |
| 161 client_config = self._findit.config | 163 client_config = self._findit.config |
| 162 # TODO(katesonia): Clean string uses in config. | 164 # TODO(katesonia): Clean string uses in config. |
| 163 topic = client_config['analysis_result_pubsub_topic'] | 165 topic = client_config['analysis_result_pubsub_topic'] |
| 164 pubsub_util.PublishMessagesToTopic(messages_data, topic) | 166 pubsub_util.PublishMessagesToTopic(messages_data, topic) |
| 165 logging.info('Published %s analysis result for %s', self.client_id, | 167 logging.info('Published %s analysis result for %s', self.client_id, |
| 166 repr(self._crash_identifiers)) | 168 repr(self._crash_identifiers)) |
| 167 | 169 |
| 168 | 170 |
| 169 class CrashWrapperPipeline(BasePipeline): | 171 # TODO(http://crbug.com/659346): we misplaced the coverage test; find it! |
| 172 class CrashWrapperPipeline(BasePipeline): # pragma: no cover | |
| 170 """Fire off pipelines to (1) do the analysis and (2) publish results. | 173 """Fire off pipelines to (1) do the analysis and (2) publish results. |
| 171 | 174 |
| 172 The reason we have analysis and publishing as separate pipelines is | 175 The reason we have analysis and publishing as separate pipelines is |
| 173 because each of them can fail for independent reasons. E.g., if we | 176 because each of them can fail for independent reasons. E.g., if we |
| 174 successfully finish the analysis, but then the publishing fails due to | 177 successfully finish the analysis, but then the publishing fails due to |
| 175 network errors, we don't want to have to redo the analysis in order | 178 network errors, we don't want to have to redo the analysis in order |
| 176 to redo the publishing. We could try to cache the fact that analysis | 179 to redo the publishing. We could try to cache the fact that analysis |
| 177 succeeded in the pipeline object itself, but we'd have to be careful | 180 succeeded in the pipeline object itself, but we'd have to be careful |
| 178 because the ``run`` and ``finalized`` methods are executed in different | 181 because the ``run`` and ``finalized`` methods are executed in different |
| 179 processes. | 182 processes. |
| 180 """ | 183 """ |
| 181 def __init__(self, client_id, crash_identifiers): | 184 def __init__(self, client_id, crash_identifiers): |
| 182 super(CrashWrapperPipeline, self).__init__(client_id, crash_identifiers) | 185 super(CrashWrapperPipeline, self).__init__(client_id, crash_identifiers) |
| 183 self._crash_identifiers = crash_identifiers | 186 self._crash_identifiers = crash_identifiers |
| 184 self._client_id = client_id | 187 self._client_id = client_id |
| 185 | 188 |
| 186 # TODO(http://crbug.com/659346): write coverage tests. | 189 def run(self, *_args, **_kwargs): |
| 187 # Arguments number differs from overridden method - pylint: disable=W0221 | |
| 188 def run(self): # pragma: no cover | |
| 189 run_analysis = yield CrashAnalysisPipeline( | 190 run_analysis = yield CrashAnalysisPipeline( |
| 190 self._client_id, self._crash_identifiers) | 191 self._client_id, self._crash_identifiers) |
| 191 with pipeline.After(run_analysis): | 192 with pipeline.After(run_analysis): |
| 192 yield PublishResultPipeline(self._client_id, self._crash_identifiers) | 193 yield PublishResultPipeline(self._client_id, self._crash_identifiers) |
| OLD | NEW |