Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import copy | 5 import copy |
| 6 import json | 6 import json |
| 7 import logging | 7 import logging |
| 8 | 8 |
| 9 from common import appengine_util | 9 from common import appengine_util |
| 10 from common import pubsub_util | 10 from common import pubsub_util |
| 11 from common import time_util | 11 from common import time_util |
| 12 from common.http_client_appengine import HttpClientAppengine | 12 from common.http_client_appengine import HttpClientAppengine |
| 13 from common.pipeline_wrapper import BasePipeline | 13 from common.pipeline_wrapper import BasePipeline |
| 14 from common.pipeline_wrapper import pipeline | 14 from common.pipeline_wrapper import pipeline |
| 15 from crash import findit_for_chromecrash | 15 from crash import findit_for_chromecrash |
| 16 from crash import findit_for_clusterfuzz | 16 from crash import findit_for_clusterfuzz |
| 17 from crash.type_enums import CrashClient | 17 from crash.type_enums import CrashClient |
| 18 from lib.gitiles import gitiles_repository | 18 from lib.gitiles import gitiles_repository |
| 19 from model import analysis_status | 19 from model import analysis_status |
| 20 | 20 |
| 21 | 21 |
| 22 # TODO(http://crbug.com/659346): write complete coverage tests for this. | 22 # TODO(http://crbug.com/659346): this needs complete coverage tests. |
| 23 def FinditForClientID(client_id): # pragma: no cover | 23 def FinditForClientID(client_id): |
| 24 """Construct a Findit object from a client id string specifying the class. | 24 """Construct a Findit object from a client id string specifying the class. |
| 25 | 25 |
| 26 We cannot pass Findit objects to the various methods in | 26 We cannot pass Findit objects to the various methods in |
| 27 ``crash.crash_pipeline``, because they are not JSON serializable. For | 27 ``crash.crash_pipeline``, because they are not JSON serializable. For |
| 28 now, we just serialize Findit objects as their ``client_id``, and then | 28 now, we just serialize Findit objects as their ``client_id``, and then |
| 29 use this function to reconstruct them. Alas, this means we will lose | 29 use this function to reconstruct them. Alas, this means we will lose |
| 30 various other information stored in the Findit object (i.e., stuff that | 30 various other information stored in the Findit object (i.e., stuff that |
| 31 comes from CrashConfig); which could lead to some hard-to-diagnose | 31 comes from CrashConfig); which could lead to some hard-to-diagnose |
| 32 coherency bugs, since the new Findit object will be based on the | 32 coherency bugs, since the new Findit object will be based on the |
| 33 CrashConfig at the time it's constructed, which may be different | 33 CrashConfig at the time it's constructed, which may be different |
| 34 than the CrashConfig at the time the previous Findit object was | 34 than the CrashConfig at the time the previous Findit object was |
| 35 constructed. In the future we should fix all this to serialize Findit | 35 constructed. In the future we should fix all this to serialize Findit |
| 36 objects in a more robust way. | 36 objects in a more robust way. |
| 37 """ | 37 """ |
| 38 assert isinstance(client_id, (str, unicode)), ( | 38 assert isinstance(client_id, (str, unicode)), ( |
| 39 'FinditForClientID: expected string or unicode, but got %s' | 39 'FinditForClientID: expected string or unicode, but got %s' |
| 40 % client_id.__class__.__name__) | 40 % client_id.__class__.__name__) |
| 41 # TODO(wrengr): it'd be nice to replace this with a single lookup in | 41 # TODO(wrengr): it'd be nice to replace this with a single lookup in |
| 42 # a dict; but that's buggy for some unknown reason. | 42 # a dict; but that's buggy for some unknown reason. |
| 43 if client_id == CrashClient.FRACAS: | 43 if client_id == CrashClient.FRACAS: |
| 44 cls = findit_for_chromecrash.FinditForFracas | 44 cls = findit_for_chromecrash.FinditForFracas |
| 45 elif client_id == CrashClient.CRACAS: | 45 elif client_id == CrashClient.CRACAS: # pragma: no cover |
| 46 cls = findit_for_chromecrash.FinditForCracas | 46 cls = findit_for_chromecrash.FinditForCracas |
| 47 elif client_id == CrashClient.CLUSTERFUZZ: | 47 elif client_id == CrashClient.CLUSTERFUZZ: # pragma: no cover |
| 48 cls = findit_for_clusterfuzz.FinditForClusterfuzz | 48 cls = findit_for_clusterfuzz.FinditForClusterfuzz |
| 49 else: | 49 else: # pragma: no cover |
| 50 raise ValueError('FinditForClientID: ' | 50 raise ValueError('FinditForClientID: ' |
| 51 'unknown or unsupported client %s' % client_id) | 51 'unknown or unsupported client %s' % client_id) |
| 52 | 52 |
| 53 return cls( | 53 return cls(gitiles_repository.GitilesRepository( |
| 54 gitiles_repository.GitilesRepository(http_client=HttpClientAppengine()), | 54 http_client=HttpClientAppengine())) |
| 55 CrashWrapperPipeline) | |
| 56 | 55 |
| 57 | 56 |
| 58 # Some notes about the classes below, for people who are not | 57 # Some notes about the classes below, for people who are not familiar |
| 59 # familiar with AppEngine. The thing that really kicks everything off | 58 # with AppEngine pipelines: |
| 60 # is ``CrashWrapperPipeline.run``. However, an important thing to bear in | 59 # |
| 61 # mind is that whatever arguments are passed to that method will also | 60 # The pipeline library is designed in a strange way which requires that |
|
stgao
2016/11/03 04:48:10
It might be just me, but "strange" seems a little
wrengr
2016/11/03 17:40:34
I've done a lot of software engineering on a lot o
stgao
2016/11/03 18:48:30
I would say "special" instead. But it's fine as is
wrengr
2016/11/03 20:04:21
filed http://crbug.com/662145
| |
| 62 # be passed to the ``run`` method on whatever objects it yields. Thus, | 61 # all the ``__init__`` and ``run`` methods in this file take the exact |
| 63 # all the ``run`` methods across these different classes must have | 62 # same arguments. This arises from the interaction between a few |
| 64 # the same type. In practice, we end up passing all the arguments to the | 63 # different constraints. First, for any given pipeline, its ``__init__`` |
| 65 # constructors, because we need to have the fields around for logging | 64 # and ``run`` must take the same arguments. Second, all the objects that |
| 66 # (e.g., in ``finalized``); thus, there's nothing that needs to be passed | 65 # ``CrashWrapperPipeline.run`` yields must take the same arguments as that |
| 67 # to ``run``. Another thing to bear in mind is that whatever objects | 66 # ``run`` method itself. For more about all this, see: |
|
stgao
2016/11/03 04:48:10
I don't understand why the yielded objects must ta
wrengr
2016/11/03 17:40:34
I have no idea. This is what katesonia has said re
stgao
2016/11/03 18:17:37
Sharu, would you mind clarifying here? I'm confuse
Sharu Jiang
2016/11/04 00:19:00
The comment "``CrashWrapperPipeline.run`` yields m
| |
| 68 # ``CrashWrapperPipeline.run`` yields must be JSON-serializable. The base | 67 # https://github.com/GoogleCloudPlatform/appengine-pipelines/wiki/Python |
| 69 # class handles most of that for us, so the force of this constraint is | 68 # For our use case, we pass all the important data to ``__init__``, |
| 70 # that all the arguments to the constructors for those classes must be | 69 # since we need it to be available in ``finalized``; thus we ignore the |
| 71 # JSON-serializable. Thus, we cannot actually pass a Findit object to | 70 # arguments passed to ``run`` (which will be the same values that were |
| 72 # the constructor, but rather must pass only the ``client_id`` (or whatever | 71 # passed to ``__init__``). |
| 73 # JSON dict) and then reconstruct the Findit object from that. Moreover, | 72 # |
| 74 # the ``run`` method and the ``finalized`` method will be run in different | 73 # In addition, whatever objects ``CrashWrapperPipeline.run`` yields must |
| 75 # processes, so we will actually end up reconstructing the Findit object | 74 # be JSON-serializable. The pipeline class handles most of the details, |
|
stgao
2016/11/03 04:48:10
For the yielded objects by ``CrashWrapperPipeline.
wrengr
2016/11/03 17:40:34
The bug I encountered previously about JSON-serial
stgao
2016/11/03 18:48:30
This is true.
But my comment above is for "In add
wrengr
2016/11/03 20:04:21
The issue I encountered was that ``yield SomePipel
stgao
2016/11/03 21:07:07
The problem most likely is due to passing ``stuff`
| |
| 76 # twice. Thus, we shouldn't store anything in the pipeline objects outside | 75 # so the force of this constraint is that whatever arguments we pass |
|
stgao
2016/11/03 04:48:10
It is true that parameters passed over to the cons
wrengr
2016/11/03 17:40:34
Which agrees with what I wrote...
stgao
2016/11/03 18:48:30
Sorry for the typo again. I meant to say __yielded
| |
| 77 # of what their constructors store. | 76 # to their ``__init__`` must themselves be JSON-serializable. Alas, |
| 77 # in Python, JSON-serializability isn't a property of classes themselves, | |
| 78 # but rather a property of the JSON-encoder object used to do the | |
| 79 # serialization. Thus, we cannot pass a ``Findit`` object directly to | |
| 80 # any of the methods here, but rather must instead pass the ``client_id`` | |
| 81 # (or whatever JSON dict), and then reconstruct the ``Findit`` object | |
| 82 # from that data. | |
| 83 # | |
| 84 # Moreover, the ``run`` and ``finalized`` methods are executed in separate | |
|
stgao
2016/11/03 04:48:10
Actually, this is one of the goodies from appengin
wrengr
2016/11/03 17:40:34
Does it not? How do changes from inside ``run`` ge
stgao
2016/11/03 18:17:37
Sorry for the typo. I meant to say it __does__ int
wrengr
2016/11/03 18:20:20
Cool, I understood it right :)
| |
| 85 # processes, so we'll actually end up reconstructing the ``Findit`` object | |
| 86 # twice. This also means ``run`` can't store anything in the pipeline | |
| 87 # object and expect it to still be available in the ``finalized`` method. | |
| 78 | 88 |
| 79 class CrashBasePipeline(BasePipeline): | 89 class CrashBasePipeline(BasePipeline): |
| 80 def __init__(self, client_id, crash_identifiers): | 90 def __init__(self, client_id, crash_identifiers): |
| 81 super(CrashBasePipeline, self).__init__(client_id, crash_identifiers) | 91 super(CrashBasePipeline, self).__init__(client_id, crash_identifiers) |
| 82 self._crash_identifiers = crash_identifiers | 92 self._crash_identifiers = crash_identifiers |
| 83 self._findit = FinditForClientID(client_id) | 93 self._findit = FinditForClientID(client_id) |
| 84 | 94 |
| 85 @property | 95 @property |
| 86 def client_id(self): # pragma: no cover | 96 def client_id(self): # pragma: no cover |
| 87 return self._findit.client_id | 97 return self._findit.client_id |
| 88 | 98 |
| 89 def run(self, *args, **kwargs): | 99 def run(self, *args, **kwargs): |
| 90 raise NotImplementedError() | 100 raise NotImplementedError() |
| 91 | 101 |
| 92 | 102 |
| 93 class CrashAnalysisPipeline(CrashBasePipeline): | 103 class CrashAnalysisPipeline(CrashBasePipeline): |
| 94 def finalized(self): # pragma: no cover | 104 def finalized(self): |
| 95 if self.was_aborted: | 105 if self.was_aborted: # pragma: no cover |
| 96 self._PutAbortedError() | 106 self._PutAbortedError() |
| 97 | 107 |
| 98 # N.B., this method must be factored out for unittest reasons; since | 108 # N.B., this method must be factored out for unittest reasons; since |
| 99 # ``finalized`` takes no arguments (by AppEngine's spec) and | 109 # ``finalized`` takes no arguments (by AppEngine's spec) and |
| 100 # ``was_aborted`` can't be altered directly. | 110 # ``was_aborted`` can't be altered directly. |
| 101 def _PutAbortedError(self): | 111 def _PutAbortedError(self): |
| 102 """Update the ndb.Model to indicate that this pipeline was aborted.""" | 112 """Update the ndb.Model to indicate that this pipeline was aborted.""" |
| 103 logging.error('Aborted analysis for %s', repr(self._crash_identifiers)) | 113 logging.error('Aborted analysis for %s', repr(self._crash_identifiers)) |
| 104 analysis = self._findit.GetAnalysis(self._crash_identifiers) | 114 analysis = self._findit.GetAnalysis(self._crash_identifiers) |
| 105 analysis.status = analysis_status.ERROR | 115 analysis.status = analysis_status.ERROR |
| 106 analysis.put() | 116 analysis.put() |
| 107 | 117 |
| 108 # TODO(http://crbug.com/659346): we misplaced the coverage test; find it! | 118 def run(self, *_args, **_kwargs): |
| 109 # Arguments number differs from overridden method - pylint: disable=W0221 | 119 """Call predator to do the analysis of the given crash. |
| 110 def run(self): | 120 |
| 121 N.B., due to the structure of AppEngine pipelines, this method must | |
| 122 accept the same arguments as are passed to ``__init__``; however, | |
| 123 because they were already passed to ``__init__`` there's no use in | |
| 124 recieving them here. Thus, we discard all the arguments to this method | |
| 125 (except for ``self``, naturally). | |
| 126 """ | |
| 111 # TODO(wrengr): shouldn't this method somehow call _NeedsNewAnalysis | 127 # TODO(wrengr): shouldn't this method somehow call _NeedsNewAnalysis |
| 112 # to guard against race conditions? | 128 # to guard against race conditions? |
| 113 analysis = self._findit.GetAnalysis(self._crash_identifiers) | 129 analysis = self._findit.GetAnalysis(self._crash_identifiers) |
| 114 | 130 |
| 115 # Update the model's status to say we're in the process of doing analysis. | 131 # Update the model's status to say we're in the process of doing analysis. |
| 116 analysis.pipeline_status_path = self.pipeline_status_path() | 132 analysis.pipeline_status_path = self.pipeline_status_path() |
| 117 analysis.status = analysis_status.RUNNING | 133 analysis.status = analysis_status.RUNNING |
| 118 analysis.started_time = time_util.GetUTCNow() | 134 analysis.started_time = time_util.GetUTCNow() |
| 119 analysis.findit_version = appengine_util.GetCurrentVersion() | 135 analysis.findit_version = appengine_util.GetCurrentVersion() |
| 120 analysis.put() | 136 analysis.put() |
| (...skipping 10 matching lines...) Expand all Loading... | |
| 131 'found_components': False, | 147 'found_components': False, |
| 132 'has_regression_range': False, | 148 'has_regression_range': False, |
| 133 'solution': None, | 149 'solution': None, |
| 134 } | 150 } |
| 135 | 151 |
| 136 # Update model's status to say we're done, and save the results. | 152 # Update model's status to say we're done, and save the results. |
| 137 analysis.completed_time = time_util.GetUTCNow() | 153 analysis.completed_time = time_util.GetUTCNow() |
| 138 analysis.result = result | 154 analysis.result = result |
| 139 for tag_name, tag_value in tags.iteritems(): | 155 for tag_name, tag_value in tags.iteritems(): |
| 140 # TODO(http://crbug.com/602702): make it possible to add arbitrary tags. | 156 # TODO(http://crbug.com/602702): make it possible to add arbitrary tags. |
| 141 if hasattr(analysis, tag_name): | 157 # TODO(http://crbug.com/659346): we misplaced the coverage test; find it! |
| 158 if hasattr(analysis, tag_name): # pragma: no cover | |
| 142 setattr(analysis, tag_name, tag_value) | 159 setattr(analysis, tag_name, tag_value) |
| 143 analysis.status = analysis_status.COMPLETED | 160 analysis.status = analysis_status.COMPLETED |
| 144 analysis.put() | 161 analysis.put() |
| 145 | 162 |
| 146 | 163 |
| 147 class PublishResultPipeline(CrashBasePipeline): | 164 class PublishResultPipeline(CrashBasePipeline): |
| 148 # TODO(http://crbug.com/659346): we misplaced the coverage test; find it! | |
| 149 def finalized(self): | 165 def finalized(self): |
| 150 if self.was_aborted: # pragma: no cover. | 166 if self.was_aborted: # pragma: no cover. |
| 151 logging.error('Failed to publish %s analysis result for %s', | 167 logging.error('Failed to publish %s analysis result for %s', |
| 152 repr(self._crash_identifiers), self.client_id) | 168 repr(self._crash_identifiers), self.client_id) |
| 153 | 169 |
| 154 # Arguments number differs from overridden method - pylint: disable=W0221 | 170 # TODO(http://crbug.com/659346): we misplaced the coverage test; find it! |
| 155 def run(self): | 171 def run(self, *_args, **_kwargs): # pragma: no cover |
| 172 """Publish the results of our analysis back into the ndb.Model. | |
| 173 | |
| 174 N.B., due to the structure of AppEngine pipelines, this method must | |
| 175 accept the same arguments as are passed to ``__init__``; however, | |
| 176 because they were already passed to ``__init__`` there's no use in | |
| 177 recieving them here. Thus, we discard all the arguments to this method | |
| 178 (except for ``self``, naturally). | |
| 179 """ | |
| 156 analysis = self._findit.GetAnalysis(self._crash_identifiers) | 180 analysis = self._findit.GetAnalysis(self._crash_identifiers) |
| 157 result = analysis.ToPublishableResult(self._crash_identifiers) | 181 result = analysis.ToPublishableResult(self._crash_identifiers) |
| 158 messages_data = [json.dumps(result, sort_keys=True)] | 182 messages_data = [json.dumps(result, sort_keys=True)] |
| 159 | 183 |
| 160 # TODO(http://crbug.com/659354): remove Findit's dependency on CrashConfig. | 184 # TODO(http://crbug.com/659354): remove Findit's dependency on CrashConfig. |
| 161 client_config = self._findit.config | 185 client_config = self._findit.config |
| 162 # TODO(katesonia): Clean string uses in config. | 186 # TODO(katesonia): Clean string uses in config. |
| 163 topic = client_config['analysis_result_pubsub_topic'] | 187 topic = client_config['analysis_result_pubsub_topic'] |
| 164 pubsub_util.PublishMessagesToTopic(messages_data, topic) | 188 pubsub_util.PublishMessagesToTopic(messages_data, topic) |
| 165 logging.info('Published %s analysis result for %s', self.client_id, | 189 logging.info('Published %s analysis result for %s', self.client_id, |
| 166 repr(self._crash_identifiers)) | 190 repr(self._crash_identifiers)) |
| 167 | 191 |
| 168 | 192 |
| 169 class CrashWrapperPipeline(BasePipeline): | 193 # TODO(http://crbug.com/659346): we misplaced the coverage test; find it! |
| 194 class CrashWrapperPipeline(BasePipeline): # pragma: no cover | |
| 170 """Fire off pipelines to (1) do the analysis and (2) publish results. | 195 """Fire off pipelines to (1) do the analysis and (2) publish results. |
| 171 | 196 |
| 172 The reason we have analysis and publishing as separate pipelines is | 197 The reason we have analysis and publishing as separate pipelines is |
| 173 because each of them can fail for independent reasons. E.g., if we | 198 because each of them can fail for independent reasons. E.g., if we |
| 174 successfully finish the analysis, but then the publishing fails due to | 199 successfully finish the analysis, but then the publishing fails due to |
| 175 network errors, we don't want to have to redo the analysis in order | 200 network errors, we don't want to have to redo the analysis in order |
| 176 to redo the publishing. We could try to cache the fact that analysis | 201 to redo the publishing. We could try to cache the fact that analysis |
| 177 succeeded in the pipeline object itself, but we'd have to be careful | 202 succeeded in the pipeline object itself, but we'd have to be careful |
| 178 because the ``run`` and ``finalized`` methods are executed in different | 203 because the ``run`` and ``finalized`` methods are executed in different |
| 179 processes. | 204 processes. |
| 180 """ | 205 """ |
| 181 def __init__(self, client_id, crash_identifiers): | 206 def __init__(self, client_id, crash_identifiers): |
| 182 super(CrashWrapperPipeline, self).__init__(client_id, crash_identifiers) | 207 super(CrashWrapperPipeline, self).__init__(client_id, crash_identifiers) |
| 183 self._crash_identifiers = crash_identifiers | 208 self._crash_identifiers = crash_identifiers |
| 184 self._client_id = client_id | 209 self._client_id = client_id |
| 185 | 210 |
| 186 # TODO(http://crbug.com/659346): write coverage tests. | 211 def run(self, *_args, **_kwargs): |
| 187 # Arguments number differs from overridden method - pylint: disable=W0221 | 212 """Fire off pipelines to run the analysis and publish its results. |
| 188 def run(self): # pragma: no cover | 213 |
| 214 N.B., due to the structure of AppEngine pipelines, this method must | |
| 215 accept the same arguments as are passed to ``__init__``; however, | |
| 216 because they were already passed to ``__init__`` there's no use in | |
| 217 recieving them here. Thus, we discard all the arguments to this method | |
| 218 (except for ``self``, naturally). | |
| 219 """ | |
| 189 run_analysis = yield CrashAnalysisPipeline( | 220 run_analysis = yield CrashAnalysisPipeline( |
| 190 self._client_id, self._crash_identifiers) | 221 self._client_id, self._crash_identifiers) |
| 191 with pipeline.After(run_analysis): | 222 with pipeline.After(run_analysis): |
| 192 yield PublishResultPipeline(self._client_id, self._crash_identifiers) | 223 yield PublishResultPipeline(self._client_id, self._crash_identifiers) |
| OLD | NEW |