tools/findit/crash_utils.py - Issue 478763003: [Findit] Bug fixing and implemented some feature requests.

Side by Side Diff: tools/findit/crash_utils.py

Issue 478763003: [Findit] Bug fixing and implemented some feature requests. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: fixed a bug in intersection Created 6 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright (c) 2014 The Chromium Authors. All rights reserved.	1 # Copyright (c) 2014 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import cgi	5 import cgi

6 import ConfigParser	6 import ConfigParser

7 import json	7 import json

8 import logging	8 import logging

9 import os	9 import os

10 import time	10 import time

11 import urllib2	11 import urllib2

12	12

13 from common import utils

14 from result import Result	13 from result import Result

15	14

16	15

17 INFINITY = float('inf')	16 INFINITY = float('inf')

18	17

19	18

20 def ParseURLsFromConfig(file_name):	19 def ParseURLsFromConfig(file_name):

21 """Parses URLS from the config file.	20 """Parses URLS from the config file.

22	21

23 The file should be in python config format, where svn section is in the	22 The file should be in python config format, where svn section is in the

(...skipping 14 matching lines...) Expand all Loading...
38 # Get the absolute path of the config file, and read the file. If it fails,	37 # Get the absolute path of the config file, and read the file. If it fails,

39 # return none.	38 # return none.

40 config_file_path = os.path.join(os.path.abspath(os.path.dirname(__file__)),	39 config_file_path = os.path.join(os.path.abspath(os.path.dirname(__file__)),

41 file_name)	40 file_name)

42 config.read(config_file_path)	41 config.read(config_file_path)

43 if not config:	42 if not config:

44 logging.error('Config file with URLs does not exist.')	43 logging.error('Config file with URLs does not exist.')

45 return None	44 return None

46	45

47 # Iterate through the config file, check for sections.	46 # Iterate through the config file, check for sections.

48 parsed_config = {}	47 config_dict = {}

49 for section in config.sections():	48 for section in config.sections():

50 # These two do not need another layer of dictionary, so add it and go	49 # These two do not need another layer of dictionary, so add it and go

51 # to next section.	50 # to next section.

52 if ':' not in section:	51 if ':' not in section:

53 for option in config.options(section):	52 for option in config.options(section):

54 if section not in parsed_config:	53 if section not in config_dict:

55 parsed_config[section] = {}	54 config_dict[section] = {}

56	55

57 url = config.get(section, option)	56 url = config.get(section, option)

58 parsed_config[section][option] = url	57 config_dict[section][option] = url

59	58

60 continue	59 continue

61	60

62 # Get repository type and component name from the section name.	61 # Get repository type and component name from the section name.

63 repository_type_and_component = section.split(':')	62 repository_type_and_component = section.split(':')

64 repository_type = repository_type_and_component[0]	63 repository_type = repository_type_and_component[0]

65 component_path = repository_type_and_component[1]	64 component_path = repository_type_and_component[1]

66	65

67 # Add 'svn' as the key, if it is not already there.	66 # Add 'svn' as the key, if it is not already there.

68 if repository_type not in parsed_config:	67 if repository_type not in config_dict:

69 parsed_config[repository_type] = {}	68 config_dict[repository_type] = {}

70 url_map_for_repository = parsed_config[repository_type]	69 url_map_for_repository = config_dict[repository_type]

71	70

72 # Add the path to the 'svn', if it is not already there.	71 # Add the path to the 'svn', if it is not already there.

73 if component_path not in url_map_for_repository:	72 if component_path not in url_map_for_repository:

74 url_map_for_repository[component_path] = {}	73 url_map_for_repository[component_path] = {}

75 type_to_url = url_map_for_repository[component_path]	74 type_to_url = url_map_for_repository[component_path]

76	75

77 # Add all URLs to this map.	76 # Add all URLs to this map.

78 for option in config.options(section):	77 for option in config.options(section):

79 url = config.get(section, option)	78 url = config.get(section, option)

80 type_to_url[option] = url	79 type_to_url[option] = url

81	80

82 return parsed_config	81 return config_dict

83	82

84	83

85 def NormalizePathLinux(path, parsed_deps):	84 def NormalizePath(path, parsed_deps):

86 """Normalizes linux path.	85 """Normalizes the path.

87	86

88 Args:	87 Args:

89 path: A string representing a path.	88 path: A string representing a path.

90 parsed_deps: A map from component path to its component name, repository,	89 parsed_deps: A map from component path to its component name, repository,

91 etc.	90 etc.

92	91

93 Returns:	92 Returns:

94 A tuple containing a component this path is in (e.g blink, skia, etc)	93 A tuple containing a component this path is in (e.g blink, skia, etc)

95 and a path in that component's repository.	94 and a path in that component's repository. Returns None if the component
	stgao 2014/08/22 06:50:53 It seems googlecode is not checked. It seems googlecode is not checked. jeun 2014/08/22 22:58:43 Even if we don't check it here, it will fail later Show quoted text On 2014/08/22 06:50:53, Shuotao wrote: > It seems googlecode is not checked. Even if we don't check it here, it will fail later, so I thought it would not be necessary to check it twice.
	95 repository is not supported, i.e from googlecode.

96 """	96 """

97 # First normalize the path by retreiving the absolute path.	97 # First normalize the path by retreiving the absolute path.

98 normalized_path = os.path.abspath(path)	98 normalized_path = os.path.normpath(path.replace('\\','/'))
	stgao 2014/08/22 06:50:53 Please also update the comment. Please also update the comment. jeun 2014/08/22 22:58:43 Done. Show quoted text On 2014/08/22 06:50:53, Shuotao wrote: > Please also update the comment. Done.
99	99

100 # Iterate through all component paths in the parsed DEPS, in the decreasing	100 # Iterate through all component paths in the parsed DEPS, in the decreasing

101 # order of the length of the file path.	101 # order of the length of the file path.

102 for component_path in sorted(parsed_deps,	102 for component_path in sorted(parsed_deps,

103 key=(lambda path: -len(path))):	103 key=(lambda path: -len(path))):

104 # New_path is the component path with 'src/' removed.	104 # new_component_path is the component path with 'src/' removed.

105 new_path = component_path	105 new_component_path = component_path

106 if new_path.startswith('src/') and new_path != 'src/':	106 if new_component_path.startswith('src/') and new_component_path != 'src/':

107 new_path = new_path[len('src/'):]	107 new_component_path = new_component_path[len('src/'):]

	108

	109 # We need to consider when the lowercased component path is in the path,

	110 # because syzyasan build returns lowercased file path.

	111 lower_component_path = new_component_path.lower()

108	112

109 # If this path is the part of file path, this file must be from this	113 # If this path is the part of file path, this file must be from this

110 # component.	114 # component.

111 if new_path in normalized_path:	115 if new_component_path in normalized_path or \

	116 lower_component_path in normalized_path:

112	117

113 # Currently does not support googlecode.	118 # Case when the retreived path is in lowercase.

114 if 'googlecode' in parsed_deps[component_path]['repository']:	119 if lower_component_path in normalized_path:

115 return (None, '', '')	120 current_component_path = lower_component_path

	121 else:

	122 current_component_path = new_component_path

116	123

117 # Normalize the path by stripping everything off the component's relative	124 # Normalize the path by stripping everything off the component's relative

118 # path.	125 # path.

119 normalized_path = normalized_path.split(new_path,1)[1]	126 normalized_path = normalized_path.split(current_component_path, 1)[1]

	127 lower_normalized_path = normalized_path.lower()

120	128

121 # Add 'src/' or 'Source/' at the front of the normalized path, depending	129 # Add 'src/' or 'Source/' at the front of the normalized path, depending

122 # on what prefix the component path uses. For example, blink uses	130 # on what prefix the component path uses. For example, blink uses

123 # 'Source' but chromium uses 'src/', and blink component path is	131 # 'Source' but chromium uses 'src/', and blink component path is

124 # 'src/third_party/WebKit/Source', so add 'Source/' in front of the	132 # 'src/third_party/WebKit/Source', so add 'Source/' in front of the

125 # normalized path.	133 # normalized path.

126 if not (normalized_path.startswith('src/') or	134 if not (lower_normalized_path.startswith('src/') or

127 normalized_path.startswith('Source/')):	135 lower_normalized_path.startswith('source/')):

128	136

129 if (new_path.lower().endswith('src/') or	137 if (lower_component_path.endswith('src/') or

130 new_path.lower().endswith('source/')):	138 lower_component_path.endswith('source/')):

131 normalized_path = new_path.split('/')[-2] + '/' + normalized_path	139 normalized_path = (current_component_path.split('/')[-2] + '/' +

	140 normalized_path)

132	141

133 else:	142 else:

134 normalized_path = 'src/' + normalized_path	143 normalized_path = 'src/' + normalized_path

135	144

136 component_name = parsed_deps[component_path]['name']	145 component_name = parsed_deps[component_path]['name']

137	146

138 return (component_path, component_name, normalized_path)	147 return (component_path, component_name, normalized_path)

139	148

140 # If the path does not match any component, default to chromium.	149 # If the path does not match any component, default to chromium.

141 return ('src/', 'chromium', normalized_path)	150 return ('src/', 'chromium', normalized_path)

(...skipping 11 matching lines...) Expand all Loading...
153 """	162 """

154 if not regression:	163 if not regression:

155 return None	164 return None

156	165

157 revisions = regression.split(':')	166 revisions = regression.split(':')

158	167

159 # If regression information is not available, return none.	168 # If regression information is not available, return none.

160 if len(revisions) != 2:	169 if len(revisions) != 2:

161 return None	170 return None

162	171

163 # Strip 'r' from both start and end range.	172 range_start = revisions[0]

164 range_start = revisions[0].lstrip('r')	173 range_end = revisions[1]

165 range_end = revisions[1].lstrip('r')	174

	175 # Strip 'r' off the range start/end. Not using lstrip to avoid the case when
	Martin Barbella 2014/08/22 02:24:14 Maybe it would be best for us not to include the ' Maybe it would be best for us not to include the 'r' in the input that gets passed to FindIt. Either way, this will work for now. Good catch. jeun 2014/08/22 22:58:43 Done. Show quoted text On 2014/08/22 02:24:14, mbarbella wrote: > Maybe it would be best for us not to include the 'r' in the input that gets > passed to FindIt. Either way, this will work for now. Good catch. Done.
	176 # the range is in git hash and it starts with 'r'.

	177 if range_start.startswith('r'):

	178 range_start = range_start[1:]

	179

	180 if range_end.startswith('r'):

	181 range_end = range_end[1:]

166	182

167 return [range_start, range_end]	183 return [range_start, range_end]

168	184

169	185

170 def LoadJSON(json_string):	186 def LoadJSON(json_string):

171 """Loads json object from string, or None.	187 """Loads json object from string, or None.

172	188

173 Args:	189 Args:

174 json_string: A string to get object from.	190 json_string: A string to get object from.

175	191

(...skipping 13 matching lines...) Expand all Loading...
189	205

190 Args:	206 Args:

191 url: URL to get data from.	207 url: URL to get data from.

192 retries: Number of times to retry connection.	208 retries: Number of times to retry connection.

193 sleep_time: Time in seconds to wait before retrying connection.	209 sleep_time: Time in seconds to wait before retrying connection.

194 timeout: Time in seconds to wait before time out.	210 timeout: Time in seconds to wait before time out.

195	211

196 Returns:	212 Returns:

197 None if the data retrieval fails, or the raw data.	213 None if the data retrieval fails, or the raw data.

198 """	214 """

199 count = 0	215 data = None

200 while True:	216 for i in range(retries):

201 count += 1

202 # Retrieves data from URL.	217 # Retrieves data from URL.

203 try:	218 try:

204 _, data = utils.GetHttpClient().Get(url)	219 data = urllib2.urlopen(url, timeout=timeout)
	stgao 2014/08/22 06:50:53 Is there a specific reason to revert my change to Is there a specific reason to revert my change to use utils.GetHttpClient? Forgot to update your code to include my change? jeun 2014/08/22 22:58:43 Done. Show quoted text On 2014/08/22 06:50:53, Shuotao wrote: > Is there a specific reason to revert my change to use utils.GetHttpClient? > > Forgot to update your code to include my change? Done.
205 return data	220

	221 # If retrieval is successful, return the data.

	222 if data:

	223 return data.read()

	224

	225 # If retrieval fails, try after sleep_time second.

	226 except urllib2.URLError:

	227 time.sleep(sleep_time)

	228 continue

206 except IOError:	229 except IOError:

207 if count < retries:	230 time.sleep(sleep_time)

208 # If retrieval fails, try after sleep_time second.	231 continue

209 time.sleep(sleep_time)

210 else:

211 break

212	232

213 # Return None if it fails to read data from URL 'retries' times.	233 # Return None if it fails to read data from URL 'retries' times.

214 return None	234 return None

215	235

216	236

217 def FindMinLineDistance(crashed_line_list, changed_line_numbers):	237 def FindMinLineDistance(crashed_line_list, changed_line_numbers):

218 """Calculates how far the changed line is from one of the crashes.	238 """Calculates how far the changed line is from one of the crashes.

219	239

220 Finds the minimum distance between the lines that the file crashed on	240 Finds the minimum distance between the lines that the file crashed on

221 and the lines that the file changed. For example, if the file crashed on	241 and the lines that the file changed. For example, if the file crashed on

222 line 200 and the CL changes line 203,204 and 205, the function returns 3.	242 line 200 and the CL changes line 203,204 and 205, the function returns 3.

223	243

224 Args:	244 Args:

225 crashed_line_list: A list of lines that the file crashed on.	245 crashed_line_list: A list of lines that the file crashed on.

226 changed_line_numbers: A list of lines that the file changed.	246 changed_line_numbers: A list of lines that the file changed.

227	247

228 Returns:	248 Returns:

229 The minimum distance. If either of the input lists is empty,	249 The minimum distance. If either of the input lists is empty,

230 it returns inf.	250 it returns inf.

231	251

232 """	252 """

233 min_distance = INFINITY	253 min_distance = INFINITY

234	254

235 for line in crashed_line_list:	255 crashed_line_numbers = [crashed_line

	256 for crashed_line_range in crashed_line_list

	257 for crashed_line in crashed_line_range]

	258 for line in crashed_line_numbers:

236 for distance in changed_line_numbers:	259 for distance in changed_line_numbers:

237 # Find the current distance and update the min if current distance is	260 # Find the current distance and update the min if current distance is

238 # less than current min.	261 # less than current min.

239 current_distance = abs(line - distance)	262 current_distance = abs(line - distance)

240 if current_distance < min_distance:	263 if current_distance < min_distance:

241 min_distance = current_distance	264 min_distance = current_distance

242	265

243 return min_distance	266 return min_distance

244	267

245	268

(...skipping 96 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
342 lines that the file changes. The intersection looks within 3 lines	365 lines that the file changes. The intersection looks within 3 lines

343 of the line that caused the crash.	366 of the line that caused the crash.

344	367

345 Args:	368 Args:

346 crashed_line_list: A list of lines that the file crashed on.	369 crashed_line_list: A list of lines that the file crashed on.

347 stack_frame_index: A list of positions in stack for each of the lines.	370 stack_frame_index: A list of positions in stack for each of the lines.

348 changed_line_numbers: A list of lines that the file changed.	371 changed_line_numbers: A list of lines that the file changed.

349 line_range: Number of lines to look backwards from crashed lines.	372 line_range: Number of lines to look backwards from crashed lines.

350	373

351 Returns:	374 Returns:

352 line_intersection: Intersection between crashed_line_list and	375 line_number_intersection: Intersection between crashed_line_list and

353 changed_line_numbers.	376 changed_line_numbers.

354 stack_frame_index_intersection: Stack number for each of the intersections.	377 stack_frame_index_intersection: Stack number for each of the intersections.

355 """	378 """

356 line_intersection = []	379 line_number_intersection = []

357 stack_frame_index_intersection = []	380 stack_frame_index_intersection = []

358	381

359 # Iterate through the crashed lines, and its occurence in stack.	382 # Iterate through the crashed lines, and its occurence in stack.

360 for (line, stack_frame_index) in zip(crashed_line_list, stack_frame_index):	383 for (lines, stack_frame_index) in zip(crashed_line_list, stack_frame_index):

361 # Also check previous 'line_range' lines.	384 # Also check previous 'line_range' lines. Create a set of all changed lines

362 line_minus_n = range(line - line_range, line + 1)	385 # and lines within 3 lines range before the crashed line.

	386 line_minus_n = set()

	387 for line in lines:

	388 for line_in_range in range(line - line_range, line + 1):

	389 line_minus_n.add(line_in_range)

363	390

364 for changed_line in changed_line_numbers:	391 for changed_line in changed_line_numbers:

365 # If a CL does not change crahsed line, check next line.	392 # If a CL does not change crahsed line, check next line.

366 if changed_line not in line_minus_n:	393 if changed_line not in line_minus_n:

367 continue	394 continue

368	395

	396 intersected_line = set()

369 # If the changed line is exactly the crashed line, add that line.	397 # If the changed line is exactly the crashed line, add that line.

370 if line in changed_line_numbers:	398 for line in lines:

371 intersected_line = line	399 if line in changed_line_numbers:

	400 intersected_line.add(line)

372	401

373 # If the changed line is in 3 lines of the crashed line, add the line.	402 # If the changed line is in 3 lines of the crashed line, add the line.

374 else:	403 else:

375 intersected_line = changed_line	404 intersected_line.add(changed_line)

376	405

377 # Avoid adding the same line twice.	406 # Avoid adding the same line twice.

378 if intersected_line not in line_intersection:	407 if intersected_line not in line_number_intersection:

379 line_intersection.append(intersected_line)	408 line_number_intersection.append(list(intersected_line))

380 stack_frame_index_intersection.append(stack_frame_index)	409 stack_frame_index_intersection.append(stack_frame_index)

381	410

382 break	411 break

383	412

384 return (line_intersection, stack_frame_index_intersection)	413 return (line_number_intersection, stack_frame_index_intersection)

385	414

386	415

387 def MatchListToResultList(matches):	416 def MatchListToResultList(matches):

388 """Convert list of matches to the list of result objects.	417 """Convert list of matches to the list of result objects.

389	418

390 Args:	419 Args:

391 matches: A list of match objects along with its stack priority and revision	420 matches: A list of match objects along with its stack priority and revision

392 number/git hash	421 number/git hash

393 Returns:	422 Returns:

394 A list of result object.	423 A list of result object.

395	424

396 """	425 """

397 result_list = []	426 result_list = []

398	427

399 for _, cl, match in matches:	428 for _, cl, match in matches:

400 suspected_cl = cl	429 suspected_cl = cl

401 revision_url = match.revision_url	430 revision_url = match.revision_url

402 component_name = match.component_name	431 component_name = match.component_name

403 author = match.author	432 author = match.author

404 reason = match.reason	433 reason = match.reason

405 review_url = match.review_url	434 review_url = match.review_url

406 reviewers = match.reviewers	435 reviewers = match.reviewers

407 # For matches, line content do not exist.	436 # For matches, line content do not exist.

408 line_content = None	437 line_content = None

	438 message = match.message

409	439

410 result = Result(suspected_cl, revision_url, component_name, author, reason,	440 result = Result(suspected_cl, revision_url, component_name, author, reason,

411 review_url, reviewers, line_content)	441 review_url, reviewers, line_content, message)

412 result_list.append(result)	442 result_list.append(result)

413	443

414 return result_list	444 return result_list

415	445

416	446

417 def BlameListToResultList(blame_list):	447 def BlameListToResultList(blame_list):

418 """Convert blame list to the list of result objects.	448 """Convert blame list to the list of result objects.

419	449

420 Args:	450 Args:

421 blame_list: A list of blame objects.	451 blame_list: A list of blame objects.

422	452

423 Returns:	453 Returns:

424 A list of result objects.	454 A list of result objects.

425 """	455 """

426 result_list = []	456 result_list = []

427	457

428 for blame in blame_list:	458 for blame in blame_list:

429 suspected_cl = blame.revision	459 suspected_cl = blame.revision

430 revision_url = blame.url	460 revision_url = blame.url

431 component_name = blame.component_name	461 component_name = blame.component_name

432 author = blame.author	462 author = blame.author

433 reason = (	463 reason = (

434 'The CL changes line %s of file %s from stack %d.' %	464 'The CL changes line %s of file %s, which is stack frame index %d.' %
	Martin Barbella 2014/08/22 02:24:14 Nit: remove "index". Nit: remove "index". jeun 2014/08/22 22:58:43 Done. Show quoted text On 2014/08/22 02:24:14, mbarbella wrote: > Nit: remove "index". Done.
435 (blame.line_number, blame.file, blame.stack_frame_index))	465 (blame.line_number, blame.file, blame.stack_frame_index))

436 # Blame object does not have review url and reviewers.	466 # Blame object does not have review url and reviewers.

437 review_url = None	467 review_url = None

438 reviewers = None	468 reviewers = None

439 line_content = blame.line_content	469 line_content = blame.line_content

	470 message = blame.message

440	471

441 result = Result(suspected_cl, revision_url, component_name, author, reason,	472 result = Result(suspected_cl, revision_url, component_name, author, reason,

442 review_url, reviewers, line_content)	473 review_url, reviewers, line_content, message)

443 result_list.append(result)	474 result_list.append(result)

444	475

445 return result_list	476 return result_list

OLD	NEW

« tools/findit/component_dictionary.py ('K') | « tools/findit/config.ini ('k') | tools/findit/findit_for_clusterfuzz.py » ('j') | tools/findit/findit_for_clusterfuzz.py » ('J')