OLD | NEW |
---|---|
1 # Copyright (c) 2014 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2014 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 import cgi | 5 import cgi |
6 import ConfigParser | 6 import ConfigParser |
7 import json | 7 import json |
8 import logging | 8 import logging |
9 import os | 9 import os |
10 import time | 10 import time |
11 import urllib2 | 11 import urllib2 |
12 | 12 |
13 from common import utils | |
14 from result import Result | 13 from result import Result |
15 | 14 |
16 | 15 |
17 INFINITY = float('inf') | 16 INFINITY = float('inf') |
18 | 17 |
19 | 18 |
20 def ParseURLsFromConfig(file_name): | 19 def ParseURLsFromConfig(file_name): |
21 """Parses URLS from the config file. | 20 """Parses URLS from the config file. |
22 | 21 |
23 The file should be in python config format, where svn section is in the | 22 The file should be in python config format, where svn section is in the |
(...skipping 14 matching lines...) Expand all Loading... | |
38 # Get the absolute path of the config file, and read the file. If it fails, | 37 # Get the absolute path of the config file, and read the file. If it fails, |
39 # return none. | 38 # return none. |
40 config_file_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), | 39 config_file_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), |
41 file_name) | 40 file_name) |
42 config.read(config_file_path) | 41 config.read(config_file_path) |
43 if not config: | 42 if not config: |
44 logging.error('Config file with URLs does not exist.') | 43 logging.error('Config file with URLs does not exist.') |
45 return None | 44 return None |
46 | 45 |
47 # Iterate through the config file, check for sections. | 46 # Iterate through the config file, check for sections. |
48 parsed_config = {} | 47 config_dict = {} |
49 for section in config.sections(): | 48 for section in config.sections(): |
50 # These two do not need another layer of dictionary, so add it and go | 49 # These two do not need another layer of dictionary, so add it and go |
51 # to next section. | 50 # to next section. |
52 if ':' not in section: | 51 if ':' not in section: |
53 for option in config.options(section): | 52 for option in config.options(section): |
54 if section not in parsed_config: | 53 if section not in config_dict: |
55 parsed_config[section] = {} | 54 config_dict[section] = {} |
56 | 55 |
57 url = config.get(section, option) | 56 url = config.get(section, option) |
58 parsed_config[section][option] = url | 57 config_dict[section][option] = url |
59 | 58 |
60 continue | 59 continue |
61 | 60 |
62 # Get repository type and component name from the section name. | 61 # Get repository type and component name from the section name. |
63 repository_type_and_component = section.split(':') | 62 repository_type_and_component = section.split(':') |
64 repository_type = repository_type_and_component[0] | 63 repository_type = repository_type_and_component[0] |
65 component_path = repository_type_and_component[1] | 64 component_path = repository_type_and_component[1] |
66 | 65 |
67 # Add 'svn' as the key, if it is not already there. | 66 # Add 'svn' as the key, if it is not already there. |
68 if repository_type not in parsed_config: | 67 if repository_type not in config_dict: |
69 parsed_config[repository_type] = {} | 68 config_dict[repository_type] = {} |
70 url_map_for_repository = parsed_config[repository_type] | 69 url_map_for_repository = config_dict[repository_type] |
71 | 70 |
72 # Add the path to the 'svn', if it is not already there. | 71 # Add the path to the 'svn', if it is not already there. |
73 if component_path not in url_map_for_repository: | 72 if component_path not in url_map_for_repository: |
74 url_map_for_repository[component_path] = {} | 73 url_map_for_repository[component_path] = {} |
75 type_to_url = url_map_for_repository[component_path] | 74 type_to_url = url_map_for_repository[component_path] |
76 | 75 |
77 # Add all URLs to this map. | 76 # Add all URLs to this map. |
78 for option in config.options(section): | 77 for option in config.options(section): |
79 url = config.get(section, option) | 78 url = config.get(section, option) |
80 type_to_url[option] = url | 79 type_to_url[option] = url |
81 | 80 |
82 return parsed_config | 81 return config_dict |
83 | 82 |
84 | 83 |
85 def NormalizePathLinux(path, parsed_deps): | 84 def NormalizePath(path, parsed_deps): |
86 """Normalizes linux path. | 85 """Normalizes the path. |
87 | 86 |
88 Args: | 87 Args: |
89 path: A string representing a path. | 88 path: A string representing a path. |
90 parsed_deps: A map from component path to its component name, repository, | 89 parsed_deps: A map from component path to its component name, repository, |
91 etc. | 90 etc. |
92 | 91 |
93 Returns: | 92 Returns: |
94 A tuple containing a component this path is in (e.g blink, skia, etc) | 93 A tuple containing a component this path is in (e.g blink, skia, etc) |
95 and a path in that component's repository. | 94 and a path in that component's repository. Returns None if the component |
stgao
2014/08/22 06:50:53
It seems googlecode is not checked.
jeun
2014/08/22 22:58:43
Even if we don't check it here, it will fail later
| |
95 repository is not supported, i.e from googlecode. | |
96 """ | 96 """ |
97 # First normalize the path by retreiving the absolute path. | 97 # First normalize the path by retreiving the absolute path. |
98 normalized_path = os.path.abspath(path) | 98 normalized_path = os.path.normpath(path.replace('\\','/')) |
stgao
2014/08/22 06:50:53
Please also update the comment.
jeun
2014/08/22 22:58:43
Done.
| |
99 | 99 |
100 # Iterate through all component paths in the parsed DEPS, in the decreasing | 100 # Iterate through all component paths in the parsed DEPS, in the decreasing |
101 # order of the length of the file path. | 101 # order of the length of the file path. |
102 for component_path in sorted(parsed_deps, | 102 for component_path in sorted(parsed_deps, |
103 key=(lambda path: -len(path))): | 103 key=(lambda path: -len(path))): |
104 # New_path is the component path with 'src/' removed. | 104 # new_component_path is the component path with 'src/' removed. |
105 new_path = component_path | 105 new_component_path = component_path |
106 if new_path.startswith('src/') and new_path != 'src/': | 106 if new_component_path.startswith('src/') and new_component_path != 'src/': |
107 new_path = new_path[len('src/'):] | 107 new_component_path = new_component_path[len('src/'):] |
108 | |
109 # We need to consider when the lowercased component path is in the path, | |
110 # because syzyasan build returns lowercased file path. | |
111 lower_component_path = new_component_path.lower() | |
108 | 112 |
109 # If this path is the part of file path, this file must be from this | 113 # If this path is the part of file path, this file must be from this |
110 # component. | 114 # component. |
111 if new_path in normalized_path: | 115 if new_component_path in normalized_path or \ |
116 lower_component_path in normalized_path: | |
112 | 117 |
113 # Currently does not support googlecode. | 118 # Case when the retreived path is in lowercase. |
114 if 'googlecode' in parsed_deps[component_path]['repository']: | 119 if lower_component_path in normalized_path: |
115 return (None, '', '') | 120 current_component_path = lower_component_path |
121 else: | |
122 current_component_path = new_component_path | |
116 | 123 |
117 # Normalize the path by stripping everything off the component's relative | 124 # Normalize the path by stripping everything off the component's relative |
118 # path. | 125 # path. |
119 normalized_path = normalized_path.split(new_path,1)[1] | 126 normalized_path = normalized_path.split(current_component_path, 1)[1] |
127 lower_normalized_path = normalized_path.lower() | |
120 | 128 |
121 # Add 'src/' or 'Source/' at the front of the normalized path, depending | 129 # Add 'src/' or 'Source/' at the front of the normalized path, depending |
122 # on what prefix the component path uses. For example, blink uses | 130 # on what prefix the component path uses. For example, blink uses |
123 # 'Source' but chromium uses 'src/', and blink component path is | 131 # 'Source' but chromium uses 'src/', and blink component path is |
124 # 'src/third_party/WebKit/Source', so add 'Source/' in front of the | 132 # 'src/third_party/WebKit/Source', so add 'Source/' in front of the |
125 # normalized path. | 133 # normalized path. |
126 if not (normalized_path.startswith('src/') or | 134 if not (lower_normalized_path.startswith('src/') or |
127 normalized_path.startswith('Source/')): | 135 lower_normalized_path.startswith('source/')): |
128 | 136 |
129 if (new_path.lower().endswith('src/') or | 137 if (lower_component_path.endswith('src/') or |
130 new_path.lower().endswith('source/')): | 138 lower_component_path.endswith('source/')): |
131 normalized_path = new_path.split('/')[-2] + '/' + normalized_path | 139 normalized_path = (current_component_path.split('/')[-2] + '/' + |
140 normalized_path) | |
132 | 141 |
133 else: | 142 else: |
134 normalized_path = 'src/' + normalized_path | 143 normalized_path = 'src/' + normalized_path |
135 | 144 |
136 component_name = parsed_deps[component_path]['name'] | 145 component_name = parsed_deps[component_path]['name'] |
137 | 146 |
138 return (component_path, component_name, normalized_path) | 147 return (component_path, component_name, normalized_path) |
139 | 148 |
140 # If the path does not match any component, default to chromium. | 149 # If the path does not match any component, default to chromium. |
141 return ('src/', 'chromium', normalized_path) | 150 return ('src/', 'chromium', normalized_path) |
(...skipping 11 matching lines...) Expand all Loading... | |
153 """ | 162 """ |
154 if not regression: | 163 if not regression: |
155 return None | 164 return None |
156 | 165 |
157 revisions = regression.split(':') | 166 revisions = regression.split(':') |
158 | 167 |
159 # If regression information is not available, return none. | 168 # If regression information is not available, return none. |
160 if len(revisions) != 2: | 169 if len(revisions) != 2: |
161 return None | 170 return None |
162 | 171 |
163 # Strip 'r' from both start and end range. | 172 range_start = revisions[0] |
164 range_start = revisions[0].lstrip('r') | 173 range_end = revisions[1] |
165 range_end = revisions[1].lstrip('r') | 174 |
175 # Strip 'r' off the range start/end. Not using lstrip to avoid the case when | |
Martin Barbella
2014/08/22 02:24:14
Maybe it would be best for us not to include the '
jeun
2014/08/22 22:58:43
Done.
| |
176 # the range is in git hash and it starts with 'r'. | |
177 if range_start.startswith('r'): | |
178 range_start = range_start[1:] | |
179 | |
180 if range_end.startswith('r'): | |
181 range_end = range_end[1:] | |
166 | 182 |
167 return [range_start, range_end] | 183 return [range_start, range_end] |
168 | 184 |
169 | 185 |
170 def LoadJSON(json_string): | 186 def LoadJSON(json_string): |
171 """Loads json object from string, or None. | 187 """Loads json object from string, or None. |
172 | 188 |
173 Args: | 189 Args: |
174 json_string: A string to get object from. | 190 json_string: A string to get object from. |
175 | 191 |
(...skipping 13 matching lines...) Expand all Loading... | |
189 | 205 |
190 Args: | 206 Args: |
191 url: URL to get data from. | 207 url: URL to get data from. |
192 retries: Number of times to retry connection. | 208 retries: Number of times to retry connection. |
193 sleep_time: Time in seconds to wait before retrying connection. | 209 sleep_time: Time in seconds to wait before retrying connection. |
194 timeout: Time in seconds to wait before time out. | 210 timeout: Time in seconds to wait before time out. |
195 | 211 |
196 Returns: | 212 Returns: |
197 None if the data retrieval fails, or the raw data. | 213 None if the data retrieval fails, or the raw data. |
198 """ | 214 """ |
199 count = 0 | 215 data = None |
200 while True: | 216 for i in range(retries): |
201 count += 1 | |
202 # Retrieves data from URL. | 217 # Retrieves data from URL. |
203 try: | 218 try: |
204 _, data = utils.GetHttpClient().Get(url) | 219 data = urllib2.urlopen(url, timeout=timeout) |
stgao
2014/08/22 06:50:53
Is there a specific reason to revert my change to
jeun
2014/08/22 22:58:43
Done.
| |
205 return data | 220 |
221 # If retrieval is successful, return the data. | |
222 if data: | |
223 return data.read() | |
224 | |
225 # If retrieval fails, try after sleep_time second. | |
226 except urllib2.URLError: | |
227 time.sleep(sleep_time) | |
228 continue | |
206 except IOError: | 229 except IOError: |
207 if count < retries: | 230 time.sleep(sleep_time) |
208 # If retrieval fails, try after sleep_time second. | 231 continue |
209 time.sleep(sleep_time) | |
210 else: | |
211 break | |
212 | 232 |
213 # Return None if it fails to read data from URL 'retries' times. | 233 # Return None if it fails to read data from URL 'retries' times. |
214 return None | 234 return None |
215 | 235 |
216 | 236 |
217 def FindMinLineDistance(crashed_line_list, changed_line_numbers): | 237 def FindMinLineDistance(crashed_line_list, changed_line_numbers): |
218 """Calculates how far the changed line is from one of the crashes. | 238 """Calculates how far the changed line is from one of the crashes. |
219 | 239 |
220 Finds the minimum distance between the lines that the file crashed on | 240 Finds the minimum distance between the lines that the file crashed on |
221 and the lines that the file changed. For example, if the file crashed on | 241 and the lines that the file changed. For example, if the file crashed on |
222 line 200 and the CL changes line 203,204 and 205, the function returns 3. | 242 line 200 and the CL changes line 203,204 and 205, the function returns 3. |
223 | 243 |
224 Args: | 244 Args: |
225 crashed_line_list: A list of lines that the file crashed on. | 245 crashed_line_list: A list of lines that the file crashed on. |
226 changed_line_numbers: A list of lines that the file changed. | 246 changed_line_numbers: A list of lines that the file changed. |
227 | 247 |
228 Returns: | 248 Returns: |
229 The minimum distance. If either of the input lists is empty, | 249 The minimum distance. If either of the input lists is empty, |
230 it returns inf. | 250 it returns inf. |
231 | 251 |
232 """ | 252 """ |
233 min_distance = INFINITY | 253 min_distance = INFINITY |
234 | 254 |
235 for line in crashed_line_list: | 255 crashed_line_numbers = [crashed_line |
256 for crashed_line_range in crashed_line_list | |
257 for crashed_line in crashed_line_range] | |
258 for line in crashed_line_numbers: | |
236 for distance in changed_line_numbers: | 259 for distance in changed_line_numbers: |
237 # Find the current distance and update the min if current distance is | 260 # Find the current distance and update the min if current distance is |
238 # less than current min. | 261 # less than current min. |
239 current_distance = abs(line - distance) | 262 current_distance = abs(line - distance) |
240 if current_distance < min_distance: | 263 if current_distance < min_distance: |
241 min_distance = current_distance | 264 min_distance = current_distance |
242 | 265 |
243 return min_distance | 266 return min_distance |
244 | 267 |
245 | 268 |
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
342 lines that the file changes. The intersection looks within 3 lines | 365 lines that the file changes. The intersection looks within 3 lines |
343 of the line that caused the crash. | 366 of the line that caused the crash. |
344 | 367 |
345 Args: | 368 Args: |
346 crashed_line_list: A list of lines that the file crashed on. | 369 crashed_line_list: A list of lines that the file crashed on. |
347 stack_frame_index: A list of positions in stack for each of the lines. | 370 stack_frame_index: A list of positions in stack for each of the lines. |
348 changed_line_numbers: A list of lines that the file changed. | 371 changed_line_numbers: A list of lines that the file changed. |
349 line_range: Number of lines to look backwards from crashed lines. | 372 line_range: Number of lines to look backwards from crashed lines. |
350 | 373 |
351 Returns: | 374 Returns: |
352 line_intersection: Intersection between crashed_line_list and | 375 line_number_intersection: Intersection between crashed_line_list and |
353 changed_line_numbers. | 376 changed_line_numbers. |
354 stack_frame_index_intersection: Stack number for each of the intersections. | 377 stack_frame_index_intersection: Stack number for each of the intersections. |
355 """ | 378 """ |
356 line_intersection = [] | 379 line_number_intersection = [] |
357 stack_frame_index_intersection = [] | 380 stack_frame_index_intersection = [] |
358 | 381 |
359 # Iterate through the crashed lines, and its occurence in stack. | 382 # Iterate through the crashed lines, and its occurence in stack. |
360 for (line, stack_frame_index) in zip(crashed_line_list, stack_frame_index): | 383 for (lines, stack_frame_index) in zip(crashed_line_list, stack_frame_index): |
361 # Also check previous 'line_range' lines. | 384 # Also check previous 'line_range' lines. Create a set of all changed lines |
362 line_minus_n = range(line - line_range, line + 1) | 385 # and lines within 3 lines range before the crashed line. |
386 line_minus_n = set() | |
387 for line in lines: | |
388 for line_in_range in range(line - line_range, line + 1): | |
389 line_minus_n.add(line_in_range) | |
363 | 390 |
364 for changed_line in changed_line_numbers: | 391 for changed_line in changed_line_numbers: |
365 # If a CL does not change crahsed line, check next line. | 392 # If a CL does not change crahsed line, check next line. |
366 if changed_line not in line_minus_n: | 393 if changed_line not in line_minus_n: |
367 continue | 394 continue |
368 | 395 |
396 intersected_line = set() | |
369 # If the changed line is exactly the crashed line, add that line. | 397 # If the changed line is exactly the crashed line, add that line. |
370 if line in changed_line_numbers: | 398 for line in lines: |
371 intersected_line = line | 399 if line in changed_line_numbers: |
400 intersected_line.add(line) | |
372 | 401 |
373 # If the changed line is in 3 lines of the crashed line, add the line. | 402 # If the changed line is in 3 lines of the crashed line, add the line. |
374 else: | 403 else: |
375 intersected_line = changed_line | 404 intersected_line.add(changed_line) |
376 | 405 |
377 # Avoid adding the same line twice. | 406 # Avoid adding the same line twice. |
378 if intersected_line not in line_intersection: | 407 if intersected_line not in line_number_intersection: |
379 line_intersection.append(intersected_line) | 408 line_number_intersection.append(list(intersected_line)) |
380 stack_frame_index_intersection.append(stack_frame_index) | 409 stack_frame_index_intersection.append(stack_frame_index) |
381 | 410 |
382 break | 411 break |
383 | 412 |
384 return (line_intersection, stack_frame_index_intersection) | 413 return (line_number_intersection, stack_frame_index_intersection) |
385 | 414 |
386 | 415 |
387 def MatchListToResultList(matches): | 416 def MatchListToResultList(matches): |
388 """Convert list of matches to the list of result objects. | 417 """Convert list of matches to the list of result objects. |
389 | 418 |
390 Args: | 419 Args: |
391 matches: A list of match objects along with its stack priority and revision | 420 matches: A list of match objects along with its stack priority and revision |
392 number/git hash | 421 number/git hash |
393 Returns: | 422 Returns: |
394 A list of result object. | 423 A list of result object. |
395 | 424 |
396 """ | 425 """ |
397 result_list = [] | 426 result_list = [] |
398 | 427 |
399 for _, cl, match in matches: | 428 for _, cl, match in matches: |
400 suspected_cl = cl | 429 suspected_cl = cl |
401 revision_url = match.revision_url | 430 revision_url = match.revision_url |
402 component_name = match.component_name | 431 component_name = match.component_name |
403 author = match.author | 432 author = match.author |
404 reason = match.reason | 433 reason = match.reason |
405 review_url = match.review_url | 434 review_url = match.review_url |
406 reviewers = match.reviewers | 435 reviewers = match.reviewers |
407 # For matches, line content do not exist. | 436 # For matches, line content do not exist. |
408 line_content = None | 437 line_content = None |
438 message = match.message | |
409 | 439 |
410 result = Result(suspected_cl, revision_url, component_name, author, reason, | 440 result = Result(suspected_cl, revision_url, component_name, author, reason, |
411 review_url, reviewers, line_content) | 441 review_url, reviewers, line_content, message) |
412 result_list.append(result) | 442 result_list.append(result) |
413 | 443 |
414 return result_list | 444 return result_list |
415 | 445 |
416 | 446 |
417 def BlameListToResultList(blame_list): | 447 def BlameListToResultList(blame_list): |
418 """Convert blame list to the list of result objects. | 448 """Convert blame list to the list of result objects. |
419 | 449 |
420 Args: | 450 Args: |
421 blame_list: A list of blame objects. | 451 blame_list: A list of blame objects. |
422 | 452 |
423 Returns: | 453 Returns: |
424 A list of result objects. | 454 A list of result objects. |
425 """ | 455 """ |
426 result_list = [] | 456 result_list = [] |
427 | 457 |
428 for blame in blame_list: | 458 for blame in blame_list: |
429 suspected_cl = blame.revision | 459 suspected_cl = blame.revision |
430 revision_url = blame.url | 460 revision_url = blame.url |
431 component_name = blame.component_name | 461 component_name = blame.component_name |
432 author = blame.author | 462 author = blame.author |
433 reason = ( | 463 reason = ( |
434 'The CL changes line %s of file %s from stack %d.' % | 464 'The CL changes line %s of file %s, which is stack frame index %d.' % |
Martin Barbella
2014/08/22 02:24:14
Nit: remove "index".
jeun
2014/08/22 22:58:43
Done.
| |
435 (blame.line_number, blame.file, blame.stack_frame_index)) | 465 (blame.line_number, blame.file, blame.stack_frame_index)) |
436 # Blame object does not have review url and reviewers. | 466 # Blame object does not have review url and reviewers. |
437 review_url = None | 467 review_url = None |
438 reviewers = None | 468 reviewers = None |
439 line_content = blame.line_content | 469 line_content = blame.line_content |
470 message = blame.message | |
440 | 471 |
441 result = Result(suspected_cl, revision_url, component_name, author, reason, | 472 result = Result(suspected_cl, revision_url, component_name, author, reason, |
442 review_url, reviewers, line_content) | 473 review_url, reviewers, line_content, message) |
443 result_list.append(result) | 474 result_list.append(result) |
444 | 475 |
445 return result_list | 476 return result_list |
OLD | NEW |