OLD | NEW |
---|---|
1 # Copyright (c) 2014 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2014 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 import cgi | 5 import cgi |
6 import ConfigParser | 6 import ConfigParser |
7 import json | 7 import json |
8 import logging | |
9 import os | 8 import os |
10 import time | 9 import time |
11 import urllib2 | |
12 | 10 |
13 from common import utils | 11 from common import utils |
14 from result import Result | 12 from result import Result |
15 | 13 |
16 | 14 |
17 INFINITY = float('inf') | 15 INFINITY = float('inf') |
18 | 16 |
19 | 17 |
18 def GetRepositoryType(revision_number): | |
19 """Returns the repository type of this revision number. | |
20 | |
21 Args: | |
22 revision_number: A revision number or git hash. | |
23 | |
24 Returns: | |
25 'git' or 'svn', depending on the revision_number. | |
26 """ | |
27 if utils.IsGitHash(revision_number): | |
28 return 'git' | |
29 else: | |
30 return 'svn' | |
31 | |
32 | |
20 def ParseURLsFromConfig(file_name): | 33 def ParseURLsFromConfig(file_name): |
21 """Parses URLS from the config file. | 34 """Parses URLS from the config file. |
22 | 35 |
23 The file should be in python config format, where svn section is in the | 36 The file should be in python config format, where svn section is in the |
24 format "svn:component_path". | 37 format "svn:component_path". |
25 Each of the section for svn should contain changelog_url, revision_url, | 38 Each of the section for svn should contain changelog_url, revision_url, |
26 diff_url and blame_url. | 39 diff_url and blame_url. |
27 | 40 |
28 Args: | 41 Args: |
29 file_name: The name of the file that contains URL information. | 42 file_name: The name of the file that contains URL information. |
30 | 43 |
31 Returns: | 44 Returns: |
32 A dictionary that maps repository type to list of URLs. For svn, it maps | 45 A dictionary that maps repository type to list of URLs. For svn, it maps |
33 key 'svn' to another dictionary, which maps component path to the URLs | 46 key 'svn' to another dictionary, which maps component path to the URLs |
34 as explained above. For git, it maps to the URLs as explained above. | 47 as explained above. For git, it maps to the URLs as explained above. |
35 """ | 48 """ |
36 config = ConfigParser.ConfigParser() | 49 config = ConfigParser.ConfigParser() |
37 | 50 |
38 # Get the absolute path of the config file, and read the file. If it fails, | 51 # Get the absolute path of the config file, and read the file. If it fails, |
39 # return none. | 52 # return none. |
40 config_file_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), | 53 config_file_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), |
41 file_name) | 54 file_name) |
42 config.read(config_file_path) | 55 config.read(config_file_path) |
43 if not config: | 56 if not config: |
44 logging.error('Config file with URLs does not exist.') | |
45 return None | 57 return None |
46 | 58 |
47 # Iterate through the config file, check for sections. | 59 # Iterate through the config file, check for sections. |
48 parsed_config = {} | 60 config_dict = {} |
49 for section in config.sections(): | 61 for section in config.sections(): |
50 # These two do not need another layer of dictionary, so add it and go | 62 # These two do not need another layer of dictionary, so add it and go |
51 # to next section. | 63 # to next section. |
52 if ':' not in section: | 64 if ':' not in section: |
53 for option in config.options(section): | 65 for option in config.options(section): |
54 if section not in parsed_config: | 66 if section not in config_dict: |
55 parsed_config[section] = {} | 67 config_dict[section] = {} |
56 | 68 |
57 url = config.get(section, option) | 69 url = config.get(section, option) |
58 parsed_config[section][option] = url | 70 config_dict[section][option] = url |
59 | 71 |
60 continue | 72 continue |
61 | 73 |
62 # Get repository type and component name from the section name. | 74 # Get repository type and component name from the section name. |
63 repository_type_and_component = section.split(':') | 75 repository_type_and_component = section.split(':') |
64 repository_type = repository_type_and_component[0] | 76 repository_type = repository_type_and_component[0] |
65 component_path = repository_type_and_component[1] | 77 component_path = repository_type_and_component[1] |
66 | 78 |
67 # Add 'svn' as the key, if it is not already there. | 79 # Add 'svn' as the key, if it is not already there. |
68 if repository_type not in parsed_config: | 80 if repository_type not in config_dict: |
69 parsed_config[repository_type] = {} | 81 config_dict[repository_type] = {} |
70 url_map_for_repository = parsed_config[repository_type] | 82 url_map_for_repository = config_dict[repository_type] |
71 | 83 |
72 # Add the path to the 'svn', if it is not already there. | 84 # Add the path to the 'svn', if it is not already there. |
73 if component_path not in url_map_for_repository: | 85 if component_path not in url_map_for_repository: |
74 url_map_for_repository[component_path] = {} | 86 url_map_for_repository[component_path] = {} |
75 type_to_url = url_map_for_repository[component_path] | 87 type_to_url = url_map_for_repository[component_path] |
76 | 88 |
77 # Add all URLs to this map. | 89 # Add all URLs to this map. |
78 for option in config.options(section): | 90 for option in config.options(section): |
79 url = config.get(section, option) | 91 url = config.get(section, option) |
80 type_to_url[option] = url | 92 type_to_url[option] = url |
81 | 93 |
82 return parsed_config | 94 return config_dict |
83 | 95 |
84 | 96 |
85 def NormalizePathLinux(path, parsed_deps): | 97 def NormalizePath(path, parsed_deps): |
86 """Normalizes linux path. | 98 """Normalizes the path. |
87 | 99 |
88 Args: | 100 Args: |
89 path: A string representing a path. | 101 path: A string representing a path. |
90 parsed_deps: A map from component path to its component name, repository, | 102 parsed_deps: A map from component path to its component name, repository, |
91 etc. | 103 etc. |
92 | 104 |
93 Returns: | 105 Returns: |
94 A tuple containing a component this path is in (e.g blink, skia, etc) | 106 A tuple containing a component this path is in (e.g blink, skia, etc) |
95 and a path in that component's repository. | 107 and a path in that component's repository. Returns None if the component |
108 repository is not supported, i.e from googlecode. | |
96 """ | 109 """ |
97 # First normalize the path by retreiving the absolute path. | 110 # First normalize the path by retreiving the normalized path. |
98 normalized_path = os.path.abspath(path) | 111 normalized_path = os.path.normpath(path.replace('\\', '/')) |
99 | 112 |
100 # Iterate through all component paths in the parsed DEPS, in the decreasing | 113 # Iterate through all component paths in the parsed DEPS, in the decreasing |
101 # order of the length of the file path. | 114 # order of the length of the file path. |
102 for component_path in sorted(parsed_deps, | 115 for component_path in sorted(parsed_deps, |
103 key=(lambda path: -len(path))): | 116 key=(lambda path: -len(path))): |
104 # New_path is the component path with 'src/' removed. | 117 # new_component_path is the component path with 'src/' removed. |
105 new_path = component_path | 118 new_component_path = component_path |
106 if new_path.startswith('src/') and new_path != 'src/': | 119 if new_component_path.startswith('src/') and new_component_path != 'src/': |
107 new_path = new_path[len('src/'):] | 120 new_component_path = new_component_path[len('src/'):] |
121 | |
122 # We need to consider when the lowercased component path is in the path, | |
123 # because syzyasan build returns lowercased file path. | |
124 lower_component_path = new_component_path.lower() | |
108 | 125 |
109 # If this path is the part of file path, this file must be from this | 126 # If this path is the part of file path, this file must be from this |
110 # component. | 127 # component. |
111 if new_path in normalized_path: | 128 if new_component_path in normalized_path or \ |
129 lower_component_path in normalized_path: | |
112 | 130 |
113 # Currently does not support googlecode. | 131 # Case when the retreived path is in lowercase. |
114 if 'googlecode' in parsed_deps[component_path]['repository']: | 132 if lower_component_path in normalized_path: |
115 return (None, '', '') | 133 current_component_path = lower_component_path |
134 else: | |
135 current_component_path = new_component_path | |
116 | 136 |
117 # Normalize the path by stripping everything off the component's relative | 137 # Normalize the path by stripping everything off the component's relative |
118 # path. | 138 # path. |
119 normalized_path = normalized_path.split(new_path,1)[1] | 139 normalized_path = normalized_path.split(current_component_path, 1)[1] |
140 lower_normalized_path = normalized_path.lower() | |
120 | 141 |
121 # Add 'src/' or 'Source/' at the front of the normalized path, depending | 142 # Add 'src/' or 'Source/' at the front of the normalized path, depending |
122 # on what prefix the component path uses. For example, blink uses | 143 # on what prefix the component path uses. For example, blink uses |
123 # 'Source' but chromium uses 'src/', and blink component path is | 144 # 'Source' but chromium uses 'src/', and blink component path is |
124 # 'src/third_party/WebKit/Source', so add 'Source/' in front of the | 145 # 'src/third_party/WebKit/Source', so add 'Source/' in front of the |
125 # normalized path. | 146 # normalized path. |
126 if not (normalized_path.startswith('src/') or | 147 if not (lower_normalized_path.startswith('src/') or |
127 normalized_path.startswith('Source/')): | 148 lower_normalized_path.startswith('source/')): |
128 | 149 |
129 if (new_path.lower().endswith('src/') or | 150 if (lower_component_path.endswith('src/') or |
130 new_path.lower().endswith('source/')): | 151 lower_component_path.endswith('source/')): |
131 normalized_path = new_path.split('/')[-2] + '/' + normalized_path | 152 normalized_path = (current_component_path.split('/')[-2] + '/' + |
153 normalized_path) | |
132 | 154 |
133 else: | 155 else: |
134 normalized_path = 'src/' + normalized_path | 156 normalized_path = 'src/' + normalized_path |
135 | 157 |
136 component_name = parsed_deps[component_path]['name'] | 158 component_name = parsed_deps[component_path]['name'] |
137 | 159 |
138 return (component_path, component_name, normalized_path) | 160 return (component_path, component_name, normalized_path) |
139 | 161 |
140 # If the path does not match any component, default to chromium. | 162 # If the path does not match any component, default to chromium. |
141 return ('src/', 'chromium', normalized_path) | 163 return ('src/', 'chromium', normalized_path) |
(...skipping 11 matching lines...) Expand all Loading... | |
153 """ | 175 """ |
154 if not regression: | 176 if not regression: |
155 return None | 177 return None |
156 | 178 |
157 revisions = regression.split(':') | 179 revisions = regression.split(':') |
158 | 180 |
159 # If regression information is not available, return none. | 181 # If regression information is not available, return none. |
160 if len(revisions) != 2: | 182 if len(revisions) != 2: |
161 return None | 183 return None |
162 | 184 |
163 # Strip 'r' from both start and end range. | 185 range_start = revisions[0] |
164 range_start = revisions[0].lstrip('r') | 186 range_end = revisions[1] |
165 range_end = revisions[1].lstrip('r') | 187 |
188 # Strip 'r' off the range start/end. Not using lstrip to avoid the case when | |
189 # the range is in git hash and it starts with 'r'. | |
190 if range_start.startswith('r'): | |
191 range_start = range_start[1:] | |
192 | |
193 if range_end.startswith('r'): | |
194 range_end = range_end[1:] | |
166 | 195 |
167 return [range_start, range_end] | 196 return [range_start, range_end] |
168 | 197 |
169 | 198 |
170 def LoadJSON(json_string): | 199 def LoadJSON(json_string): |
171 """Loads json object from string, or None. | 200 """Loads json object from string, or None. |
172 | 201 |
173 Args: | 202 Args: |
174 json_string: A string to get object from. | 203 json_string: A string to get object from. |
175 | 204 |
(...skipping 18 matching lines...) Expand all Loading... | |
194 timeout: Time in seconds to wait before time out. | 223 timeout: Time in seconds to wait before time out. |
195 | 224 |
196 Returns: | 225 Returns: |
197 None if the data retrieval fails, or the raw data. | 226 None if the data retrieval fails, or the raw data. |
198 """ | 227 """ |
199 count = 0 | 228 count = 0 |
200 while True: | 229 while True: |
201 count += 1 | 230 count += 1 |
202 # Retrieves data from URL. | 231 # Retrieves data from URL. |
203 try: | 232 try: |
204 _, data = utils.GetHttpClient().Get(url) | 233 _, data = utils.GetHttpClient().Get(url, timeout=timeout) |
205 return data | 234 return data |
206 except IOError: | 235 except IOError: |
207 if count < retries: | 236 if count < retries: |
208 # If retrieval fails, try after sleep_time second. | 237 # If retrieval fails, try after sleep_time second. |
209 time.sleep(sleep_time) | 238 time.sleep(sleep_time) |
210 else: | 239 else: |
211 break | 240 break |
212 | 241 |
213 # Return None if it fails to read data from URL 'retries' times. | 242 # Return None if it fails to read data from URL 'retries' times. |
214 return None | 243 return None |
215 | 244 |
216 | 245 |
217 def FindMinLineDistance(crashed_line_list, changed_line_numbers): | 246 def FindMinLineDistance(crashed_line_list, changed_line_numbers, |
247 line_range=3): | |
218 """Calculates how far the changed line is from one of the crashes. | 248 """Calculates how far the changed line is from one of the crashes. |
219 | 249 |
220 Finds the minimum distance between the lines that the file crashed on | 250 Finds the minimum distance between the lines that the file crashed on |
221 and the lines that the file changed. For example, if the file crashed on | 251 and the lines that the file changed. For example, if the file crashed on |
222 line 200 and the CL changes line 203,204 and 205, the function returns 3. | 252 line 200 and the CL changes line 203,204 and 205, the function returns 3. |
223 | 253 |
224 Args: | 254 Args: |
225 crashed_line_list: A list of lines that the file crashed on. | 255 crashed_line_list: A list of lines that the file crashed on. |
226 changed_line_numbers: A list of lines that the file changed. | 256 changed_line_numbers: A list of lines that the file changed. |
257 line_range: Number of lines to look back for. | |
227 | 258 |
228 Returns: | 259 Returns: |
229 The minimum distance. If either of the input lists is empty, | 260 The minimum distance. If either of the input lists is empty, |
230 it returns inf. | 261 it returns inf. |
231 | 262 |
232 """ | 263 """ |
233 min_distance = INFINITY | 264 min_distance = INFINITY |
265 crashed_line = -1 | |
266 changed_line = -1 | |
234 | 267 |
235 for line in crashed_line_list: | 268 crashed_line_numbers = set() |
269 for crashed_line_range in crashed_line_list: | |
270 for crashed_line in crashed_line_range: | |
271 for line in range(crashed_line - line_range, crashed_line + 1): | |
272 crashed_line_numbers.add(line) | |
273 | |
274 for line in crashed_line_numbers: | |
236 for distance in changed_line_numbers: | 275 for distance in changed_line_numbers: |
237 # Find the current distance and update the min if current distance is | 276 # Find the current distance and update the min if current distance is |
238 # less than current min. | 277 # less than current min. |
239 current_distance = abs(line - distance) | 278 current_distance = abs(line - distance) |
240 if current_distance < min_distance: | 279 if current_distance < min_distance: |
241 min_distance = current_distance | 280 min_distance = current_distance |
281 crashed_line = line | |
282 changed_line = distance | |
242 | 283 |
243 return min_distance | 284 return (min_distance, crashed_line, changed_line) |
244 | 285 |
245 | 286 |
246 def GuessIfSameSubPath(path1, path2): | 287 def GuessIfSameSubPath(path1, path2): |
247 """Guesses if two paths represent same path. | 288 """Guesses if two paths represent same path. |
248 | 289 |
249 Compares the name of the folders in the path (by split('/')), and checks | 290 Compares the name of the folders in the path (by split('/')), and checks |
250 if they match either more than 3 or min of path lengths. | 291 if they match either more than 3 or min of path lengths. |
251 | 292 |
252 Args: | 293 Args: |
253 path1: First path. | 294 path1: First path. |
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
328 Returns: | 369 Returns: |
329 A string representation of file names with their urls. | 370 A string representation of file names with their urls. |
330 """ | 371 """ |
331 ret = ['\n'] | 372 ret = ['\n'] |
332 for file_name, file_url in file_list: | 373 for file_name, file_url in file_list: |
333 ret.append(' %s\n' % AddHyperlink(file_name, file_url)) | 374 ret.append(' %s\n' % AddHyperlink(file_name, file_url)) |
334 return ''.join(ret) | 375 return ''.join(ret) |
335 | 376 |
336 | 377 |
337 def Intersection(crashed_line_list, stack_frame_index, changed_line_numbers, | 378 def Intersection(crashed_line_list, stack_frame_index, changed_line_numbers, |
338 line_range=3): | 379 function, line_range=3): |
339 """Finds the overlap betwee changed lines and crashed lines. | 380 """Finds the overlap betwee changed lines and crashed lines. |
340 | 381 |
341 Finds the intersection of the lines that caused the crash and | 382 Finds the intersection of the lines that caused the crash and |
342 lines that the file changes. The intersection looks within 3 lines | 383 lines that the file changes. The intersection looks within 3 lines |
343 of the line that caused the crash. | 384 of the line that caused the crash. |
344 | 385 |
345 Args: | 386 Args: |
346 crashed_line_list: A list of lines that the file crashed on. | 387 crashed_line_list: A list of lines that the file crashed on. |
347 stack_frame_index: A list of positions in stack for each of the lines. | 388 stack_frame_index: A list of positions in stack for each of the lines. |
348 changed_line_numbers: A list of lines that the file changed. | 389 changed_line_numbers: A list of lines that the file changed. |
390 function: A list of functions that the file crashed on. | |
stgao
2014/08/23 00:44:13
function -> functions
| |
349 line_range: Number of lines to look backwards from crashed lines. | 391 line_range: Number of lines to look backwards from crashed lines. |
350 | 392 |
351 Returns: | 393 Returns: |
352 line_intersection: Intersection between crashed_line_list and | 394 line_number_intersection: Intersection between crashed_line_list and |
353 changed_line_numbers. | 395 changed_line_numbers. |
354 stack_frame_index_intersection: Stack number for each of the intersections. | 396 stack_frame_index_intersection: Stack number for each of the intersections. |
355 """ | 397 """ |
356 line_intersection = [] | 398 line_number_intersection = [] |
357 stack_frame_index_intersection = [] | 399 stack_frame_index_intersection = [] |
400 function_intersection = [] | |
358 | 401 |
359 # Iterate through the crashed lines, and its occurence in stack. | 402 # Iterate through the crashed lines, and its occurence in stack. |
360 for (line, stack_frame_index) in zip(crashed_line_list, stack_frame_index): | 403 for (lines, stack_frame_index, function_name) in zip( |
361 # Also check previous 'line_range' lines. | 404 crashed_line_list, stack_frame_index, function): |
362 line_minus_n = range(line - line_range, line + 1) | 405 # Also check previous 'line_range' lines. Create a set of all changed lines |
406 # and lines within 3 lines range before the crashed line. | |
407 line_minus_n = set() | |
408 for line in lines: | |
409 for line_in_range in range(line - line_range, line + 1): | |
410 line_minus_n.add(line_in_range) | |
363 | 411 |
364 for changed_line in changed_line_numbers: | 412 for changed_line in changed_line_numbers: |
365 # If a CL does not change crahsed line, check next line. | 413 # If a CL does not change crahsed line, check next line. |
366 if changed_line not in line_minus_n: | 414 if changed_line not in line_minus_n: |
367 continue | 415 continue |
368 | 416 |
417 intersected_line = set() | |
369 # If the changed line is exactly the crashed line, add that line. | 418 # If the changed line is exactly the crashed line, add that line. |
370 if line in changed_line_numbers: | 419 for line in lines: |
371 intersected_line = line | 420 if line in changed_line_numbers: |
421 intersected_line.add(line) | |
372 | 422 |
373 # If the changed line is in 3 lines of the crashed line, add the line. | 423 # If the changed line is in 3 lines of the crashed line, add the line. |
374 else: | 424 else: |
375 intersected_line = changed_line | 425 intersected_line.add(changed_line) |
376 | 426 |
377 # Avoid adding the same line twice. | 427 # Avoid adding the same line twice. |
378 if intersected_line not in line_intersection: | 428 if intersected_line not in line_number_intersection: |
379 line_intersection.append(intersected_line) | 429 line_number_intersection.append(list(intersected_line)) |
380 stack_frame_index_intersection.append(stack_frame_index) | 430 stack_frame_index_intersection.append(stack_frame_index) |
381 | 431 function_intersection.append(function_name) |
382 break | 432 break |
383 | 433 |
384 return (line_intersection, stack_frame_index_intersection) | 434 return (line_number_intersection, stack_frame_index_intersection, |
435 function_intersection) | |
385 | 436 |
386 | 437 |
387 def MatchListToResultList(matches): | 438 def MatchListToResultList(matches): |
388 """Convert list of matches to the list of result objects. | 439 """Convert list of matches to the list of result objects. |
389 | 440 |
390 Args: | 441 Args: |
391 matches: A list of match objects along with its stack priority and revision | 442 matches: A list of match objects along with its stack priority and revision |
392 number/git hash | 443 number/git hash |
393 Returns: | 444 Returns: |
394 A list of result object. | 445 A list of result object. |
395 | 446 |
396 """ | 447 """ |
397 result_list = [] | 448 result_list = [] |
398 | 449 |
399 for _, cl, match in matches: | 450 for _, cl, match in matches: |
400 suspected_cl = cl | 451 suspected_cl = cl |
401 revision_url = match.revision_url | 452 revision_url = match.revision_url |
402 component_name = match.component_name | 453 component_name = match.component_name |
403 author = match.author | 454 author = match.author |
404 reason = match.reason | 455 reason = match.reason |
405 review_url = match.review_url | 456 review_url = match.review_url |
406 reviewers = match.reviewers | 457 reviewers = match.reviewers |
407 # For matches, line content do not exist. | 458 # For matches, line content do not exist. |
408 line_content = None | 459 line_content = None |
460 message = match.message | |
409 | 461 |
410 result = Result(suspected_cl, revision_url, component_name, author, reason, | 462 result = Result(suspected_cl, revision_url, component_name, author, reason, |
411 review_url, reviewers, line_content) | 463 review_url, reviewers, line_content, message) |
412 result_list.append(result) | 464 result_list.append(result) |
413 | 465 |
414 return result_list | 466 return result_list |
415 | 467 |
416 | 468 |
417 def BlameListToResultList(blame_list): | 469 def BlameListToResultList(blame_list): |
418 """Convert blame list to the list of result objects. | 470 """Convert blame list to the list of result objects. |
419 | 471 |
420 Args: | 472 Args: |
421 blame_list: A list of blame objects. | 473 blame_list: A list of blame objects. |
422 | 474 |
423 Returns: | 475 Returns: |
424 A list of result objects. | 476 A list of result objects. |
425 """ | 477 """ |
426 result_list = [] | 478 result_list = [] |
427 | 479 |
428 for blame in blame_list: | 480 for blame in blame_list: |
429 suspected_cl = blame.revision | 481 suspected_cl = blame.revision |
430 revision_url = blame.url | 482 revision_url = blame.url |
431 component_name = blame.component_name | 483 component_name = blame.component_name |
432 author = blame.author | 484 author = blame.author |
433 reason = ( | 485 reason = ( |
434 'The CL changes line %s of file %s from stack %d.' % | 486 'The CL last changed line %s of file %s, which is stack frame %d.' % |
435 (blame.line_number, blame.file, blame.stack_frame_index)) | 487 (blame.line_number, blame.file, blame.stack_frame_index)) |
436 # Blame object does not have review url and reviewers. | 488 # Blame object does not have review url and reviewers. |
437 review_url = None | 489 review_url = None |
438 reviewers = None | 490 reviewers = None |
439 line_content = blame.line_content | 491 line_content = blame.line_content |
492 message = blame.message | |
440 | 493 |
441 result = Result(suspected_cl, revision_url, component_name, author, reason, | 494 result = Result(suspected_cl, revision_url, component_name, author, reason, |
442 review_url, reviewers, line_content) | 495 review_url, reviewers, line_content, message) |
443 result_list.append(result) | 496 result_list.append(result) |
444 | 497 |
445 return result_list | 498 return result_list |
OLD | NEW |