| OLD | NEW |
| 1 # Copyright 2014 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2014 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import base64 | 5 import base64 |
| 6 import logging | |
| 7 import os | |
| 8 import xml.dom.minidom as minidom | 6 import xml.dom.minidom as minidom |
| 9 from xml.parsers.expat import ExpatError | 7 from xml.parsers.expat import ExpatError |
| 10 | 8 |
| 11 import crash_utils | 9 import crash_utils |
| 12 from repository_parser_interface import ParserInterface | 10 from repository_parser_interface import ParserInterface |
| 13 | 11 |
| 12 FILE_CHANGE_TYPE_MAP = { |
| 13 'add': 'A', |
| 14 'delete': 'D', |
| 15 'modify': 'M' |
| 16 } |
| 17 |
| 14 | 18 |
| 15 class GitParser(ParserInterface): | 19 class GitParser(ParserInterface): |
| 16 """Parser for Git repository in googlesource. | 20 """Parser for Git repository in googlesource. |
| 17 | 21 |
| 18 Attributes: | 22 Attributes: |
| 19 parsed_deps: A map from component path to its repository name, regression, | 23 parsed_deps: A map from component path to its repository name, regression, |
| 20 etc. | 24 etc. |
| 21 url_parts_map: A map from url type to its url parts. This parts are added | 25 url_parts_map: A map from url type to its url parts. This parts are added |
| 22 the base url to form different urls. | 26 the base url to form different urls. |
| 23 """ | 27 """ |
| 24 | 28 |
| 25 def __init__(self, parsed_deps, url_parts_map): | 29 def __init__(self, parsed_deps, url_parts_map): |
| 26 self.component_to_url_map = parsed_deps | 30 self.component_to_url_map = parsed_deps |
| 27 self.url_parts_map = url_parts_map | 31 self.url_parts_map = url_parts_map |
| 28 | 32 |
| 29 def ParseChangelog(self, component_path, range_start, range_end): | 33 def ParseChangelog(self, component_path, range_start, range_end): |
| 30 file_to_revision_map = {} | 34 file_to_revision_map = {} |
| 31 revision_map = {} | 35 revision_map = {} |
| 32 base_url = self.component_to_url_map[component_path]['repository'] | 36 base_url = self.component_to_url_map[component_path]['repository'] |
| 33 changelog_url = base_url + self.url_parts_map['changelog_url'] | 37 changelog_url = base_url + self.url_parts_map['changelog_url'] |
| 34 revision_url = base_url + self.url_parts_map['revision_url'] | 38 revision_url = base_url + self.url_parts_map['revision_url'] |
| 35 | 39 |
| 36 # Retrieve data from the url, return empty maps if fails. Html url is a\ | 40 # Retrieve data from the url, return empty maps if fails. Html url is a\ |
| 37 # url where the changelog can be parsed from html. | 41 # url where the changelog can be parsed from html. |
| 38 url = changelog_url % (range_start, range_end) | 42 url = changelog_url % (range_start, range_end) |
| 39 html_url = url + '?pretty=fuller' | 43 html_url = url + '?pretty=fuller' |
| 40 response = crash_utils.GetDataFromURL(html_url) | 44 response = crash_utils.GetDataFromURL(html_url) |
| 41 if not response: | 45 if not response: |
| 42 logging.error('Failed to retrieve changelog from %s', html_url) | |
| 43 return (revision_map, file_to_revision_map) | 46 return (revision_map, file_to_revision_map) |
| 44 | 47 |
| 45 # Parse xml out of the returned string. If it failes, return empty map. | 48 # Parse xml out of the returned string. If it failes, Try parsing |
| 49 # from JSON objects. |
| 46 try: | 50 try: |
| 47 dom = minidom.parseString(response) | 51 dom = minidom.parseString(response) |
| 48 except ExpatError: | 52 except ExpatError: |
| 49 logging.error('Failed to parse changelog from %s', url) | 53 self.ParseChangelogFromJSON(range_start, range_end, changelog_url, |
| 54 revision_url, revision_map, |
| 55 file_to_revision_map) |
| 50 return (revision_map, file_to_revision_map) | 56 return (revision_map, file_to_revision_map) |
| 51 | 57 |
| 52 # The revisions information are in from the third divs to the second | 58 # The revisions information are in from the third divs to the second |
| 53 # to last one. | 59 # to last one. |
| 54 divs = dom.getElementsByTagName('div')[2:-1] | 60 divs = dom.getElementsByTagName('div')[2:-1] |
| 55 pres = dom.getElementsByTagName('pre') | 61 pres = dom.getElementsByTagName('pre') |
| 56 uls = dom.getElementsByTagName('ul') | 62 uls = dom.getElementsByTagName('ul') |
| 57 | 63 |
| 58 # Divs, pres and uls each contain revision information for one CL, so | 64 # Divs, pres and uls each contain revision information for one CL, so |
| 59 # they should have same length. | 65 # they should have same length. |
| (...skipping 26 matching lines...) Expand all Loading... |
| 86 | 92 |
| 87 # Set url of this CL. | 93 # Set url of this CL. |
| 88 revision_url_part = self.url_parts_map['revision_url'] % githash | 94 revision_url_part = self.url_parts_map['revision_url'] % githash |
| 89 revision['url'] = base_url + revision_url_part | 95 revision['url'] = base_url + revision_url_part |
| 90 | 96 |
| 91 # Go through changed files, they are in li. | 97 # Go through changed files, they are in li. |
| 92 lis = ul.getElementsByTagName('li') | 98 lis = ul.getElementsByTagName('li') |
| 93 for li in lis: | 99 for li in lis: |
| 94 # Retrieve path and action of the changed file | 100 # Retrieve path and action of the changed file |
| 95 file_path = li.getElementsByTagName('a')[0].firstChild.nodeValue | 101 file_path = li.getElementsByTagName('a')[0].firstChild.nodeValue |
| 96 file_action = li.getElementsByTagName('span')[0].getAttribute('class') | 102 file_change_type = li.getElementsByTagName('span')[ |
| 103 0].getAttribute('class') |
| 97 | 104 |
| 98 # Normalize file action so that it is same as SVN parser. | 105 # Normalize file action so that it is same as SVN parser. |
| 99 if file_action == 'add': | 106 file_change_type = FILE_CHANGE_TYPE_MAP[file_change_type] |
| 100 file_action = 'A' | |
| 101 elif file_action == 'delete': | |
| 102 file_action = 'D' | |
| 103 elif file_action == 'modify': | |
| 104 file_action = 'M' | |
| 105 | 107 |
| 106 # Add the changed file to the map. | 108 # Add the changed file to the map. |
| 107 changed_file = os.path.basename(file_path) | 109 if file_path not in file_to_revision_map: |
| 108 if changed_file not in file_to_revision_map: | 110 file_to_revision_map[file_path] = [] |
| 109 file_to_revision_map[changed_file] = [] | 111 file_to_revision_map[file_path].append((githash, file_change_type)) |
| 110 file_to_revision_map[changed_file].append((githash, file_action, | |
| 111 file_path)) | |
| 112 | 112 |
| 113 # Add this revision object to the map. | 113 # Add this revision object to the map. |
| 114 revision_map[githash] = revision | 114 revision_map[githash] = revision |
| 115 | 115 |
| 116 # Parse one revision for the start range, because googlesource does not | 116 # Parse one revision for the start range, because googlesource does not |
| 117 # include the start of the range. | 117 # include the start of the range. |
| 118 self.ParseRevision(revision_url, range_start, revision_map, | 118 self.ParseRevision(revision_url, range_start, revision_map, |
| 119 file_to_revision_map) | 119 file_to_revision_map) |
| 120 | 120 |
| 121 return (revision_map, file_to_revision_map) | 121 return (revision_map, file_to_revision_map) |
| 122 | 122 |
| 123 def ParseChangelogFromJSON(self, range_start, range_end, changelog_url, | 123 def ParseChangelogFromJSON(self, range_start, range_end, changelog_url, |
| 124 revision_url, revision_map, file_to_revision_map): | 124 revision_url, revision_map, file_to_revision_map): |
| 125 """Parses changelog by going over the JSON file. | 125 """Parses changelog by going over the JSON file. |
| 126 | 126 |
| 127 Args: | 127 Args: |
| 128 range_start: Starting range of the regression. | 128 range_start: Starting range of the regression. |
| 129 range_end: Ending range of the regression. | 129 range_end: Ending range of the regression. |
| 130 changelog_url: The url to retrieve changelog from. | 130 changelog_url: The url to retrieve changelog from. |
| 131 revision_url: The url to retrieve individual revision from. | 131 revision_url: The url to retrieve individual revision from. |
| 132 revision_map: A map from a git hash number to its revision information. | 132 revision_map: A map from a git hash number to its revision information. |
| 133 file_to_revision_map: A map from file to a git hash in which it occurs. | 133 file_to_revision_map: A map from file to a git hash in which it occurs. |
| 134 """ | 134 """ |
| 135 # Compute URLs from given range, and retrieves changelog. Stop if it fails. | 135 # Compute URLs from given range, and retrieves changelog. Stop if it fails. |
| 136 changelog_url %= (range_start, range_end) | 136 changelog_url %= (range_start, range_end) |
| 137 json_url = changelog_url + '?format=json' | 137 json_url = changelog_url + '?format=json' |
| 138 response = crash_utils.GetDataFromURL(json_url) | 138 response = crash_utils.GetDataFromURL(json_url) |
| 139 if not response: | 139 if not response: |
| 140 logging.error('Failed to retrieve changelog from %s.', json_url) | |
| 141 return | 140 return |
| 142 | 141 |
| 143 # Parse changelog from the returned object. The returned string should | 142 # Parse changelog from the returned object. The returned string should |
| 144 # start with ")}]'\n", so start from the 6th character. | 143 # start with ")}]'\n", so start from the 6th character. |
| 145 revisions = crash_utils.LoadJSON(response[5:]) | 144 revisions = crash_utils.LoadJSON(response[5:]) |
| 146 if not revisions: | 145 if not revisions: |
| 147 logging.error('Failed to parse changelog from %s.', json_url) | |
| 148 return | 146 return |
| 149 | 147 |
| 150 # Parse individual revision in the log. | 148 # Parse individual revision in the log. |
| 151 for revision in revisions['log']: | 149 for revision in revisions['log']: |
| 152 githash = revision['commit'] | 150 githash = revision['commit'] |
| 153 self.ParseRevision(revision_url, githash, revision_map, | 151 self.ParseRevision(revision_url, githash, revision_map, |
| 154 file_to_revision_map) | 152 file_to_revision_map) |
| 155 | 153 |
| 156 # Parse the revision with range_start, because googlesource ignores | 154 # Parse the revision with range_start, because googlesource ignores |
| 157 # that one. | 155 # that one. |
| 158 self.ParseRevision(revision_url, range_start, revision_map, | 156 self.ParseRevision(revision_url, range_start, revision_map, |
| 159 file_to_revision_map) | 157 file_to_revision_map) |
| 160 | 158 |
| 161 def ParseRevision(self, revision_url, githash, revision_map, | 159 def ParseRevision(self, revision_url, githash, revision_map, |
| 162 file_to_revision_map): | 160 file_to_revision_map): |
| 163 | 161 |
| 164 # Retrieve data from the URL, return if it fails. | 162 # Retrieve data from the URL, return if it fails. |
| 165 url = revision_url % githash | 163 url = revision_url % githash |
| 166 response = crash_utils.GetDataFromURL(url + '?format=json') | 164 response = crash_utils.GetDataFromURL(url + '?format=json') |
| 167 if not response: | 165 if not response: |
| 168 logging.warning('Failed to retrieve revision from %s.', url) | |
| 169 return | 166 return |
| 170 | 167 |
| 171 # Load JSON object from the string. If it fails, terminate the function. | 168 # Load JSON object from the string. If it fails, terminate the function. |
| 172 json_revision = crash_utils.LoadJSON(response[5:]) | 169 json_revision = crash_utils.LoadJSON(response[5:]) |
| 173 if not json_revision: | 170 if not json_revision: |
| 174 logging.warning('Failed to parse revision from %s.', url) | |
| 175 return | 171 return |
| 176 | 172 |
| 177 # Create a map representing object and get githash from the JSON object. | 173 # Create a map representing object and get githash from the JSON object. |
| 178 revision = {} | 174 revision = {} |
| 179 githash = json_revision['commit'] | 175 githash = json_revision['commit'] |
| 180 | 176 |
| 181 # Set author, message and URL of this CL. | 177 # Set author, message and URL of this CL. |
| 182 revision['author'] = json_revision['author']['name'] | 178 revision['author'] = json_revision['author']['name'] |
| 183 revision['message'] = json_revision['message'] | 179 revision['message'] = json_revision['message'] |
| 184 revision['url'] = url | 180 revision['url'] = url |
| 185 | 181 |
| 186 # Iterate through the changed files. | 182 # Iterate through the changed files. |
| 187 for diff in json_revision['tree_diff']: | 183 for diff in json_revision['tree_diff']: |
| 188 file_path = diff['new_path'] | 184 file_path = diff['new_path'] |
| 189 file_action = diff['type'] | 185 file_change_type = diff['type'] |
| 190 | 186 |
| 191 # Normalize file action so that it fits with svn_repository_parser. | 187 # Normalize file action so that it fits with svn_repository_parser. |
| 192 if file_action == 'add': | 188 file_change_type = FILE_CHANGE_TYPE_MAP[file_change_type] |
| 193 file_action = 'A' | |
| 194 elif file_action == 'delete': | |
| 195 file_action = 'D' | |
| 196 elif file_action == 'modify': | |
| 197 file_action = 'M' | |
| 198 | 189 |
| 199 # Add the file to the map. | 190 # Add the file to the map. |
| 200 changed_file = os.path.basename(file_path) | 191 if file_path not in file_to_revision_map: |
| 201 if changed_file not in file_to_revision_map: | 192 file_to_revision_map[file_path] = [] |
| 202 file_to_revision_map[changed_file] = [] | 193 file_to_revision_map[file_path].append((githash, file_change_type)) |
| 203 file_to_revision_map[changed_file].append( | |
| 204 (githash, file_action, file_path)) | |
| 205 | 194 |
| 206 # Add this CL to the map. | 195 # Add this CL to the map. |
| 207 revision_map[githash] = revision | 196 revision_map[githash] = revision |
| 208 | 197 |
| 209 return | 198 return |
| 210 | 199 |
| 211 def ParseLineDiff(self, path, component, file_action, githash): | 200 def ParseLineDiff(self, path, component, file_change_type, githash): |
| 212 changed_line_numbers = [] | 201 changed_line_numbers = [] |
| 213 changed_line_contents = [] | 202 changed_line_contents = [] |
| 214 base_url = self.component_to_url_map[component]['repository'] | 203 base_url = self.component_to_url_map[component]['repository'] |
| 215 backup_url = (base_url + self.url_parts_map['revision_url']) % githash | 204 backup_url = (base_url + self.url_parts_map['revision_url']) % githash |
| 216 | 205 |
| 217 # If the file is added (not modified), treat it as if it is not changed. | 206 # If the file is added (not modified), treat it as if it is not changed. |
| 218 if file_action == 'A': | 207 if file_change_type == 'A': |
| 219 return (backup_url, changed_line_numbers, changed_line_contents) | 208 return (backup_url, changed_line_numbers, changed_line_contents) |
| 220 | 209 |
| 221 # Retrieves the diff data from URL, and if it fails, return emptry lines. | 210 # Retrieves the diff data from URL, and if it fails, return emptry lines. |
| 222 url = (base_url + self.url_parts_map['diff_url']) % (githash, path) | 211 url = (base_url + self.url_parts_map['diff_url']) % (githash, path) |
| 223 data = crash_utils.GetDataFromURL(url + '?format=text') | 212 data = crash_utils.GetDataFromURL(url + '?format=text') |
| 224 if not data: | 213 if not data: |
| 225 logging.error('Failed to get diff from %s.', url) | |
| 226 return (backup_url, changed_line_numbers, changed_line_contents) | 214 return (backup_url, changed_line_numbers, changed_line_contents) |
| 227 | 215 |
| 228 # Decode the returned object to line diff info | 216 # Decode the returned object to line diff info |
| 229 diff = base64.b64decode(data).splitlines() | 217 diff = base64.b64decode(data).splitlines() |
| 230 | 218 |
| 231 # Iterate through the lines in diff. Set current line to -1 so that we know | 219 # Iterate through the lines in diff. Set current line to -1 so that we know |
| 232 # that current line is part of the diff chunk. | 220 # that current line is part of the diff chunk. |
| 233 current_line = -1 | 221 current_line = -1 |
| 234 for line in diff: | 222 for line in diff: |
| 235 line = line.strip() | 223 line = line.strip() |
| (...skipping 17 matching lines...) Expand all Loading... |
| 253 return (url, changed_line_numbers, changed_line_contents) | 241 return (url, changed_line_numbers, changed_line_contents) |
| 254 | 242 |
| 255 def ParseBlameInfo(self, component, file_path, line, revision): | 243 def ParseBlameInfo(self, component, file_path, line, revision): |
| 256 base_url = self.component_to_url_map[component]['repository'] | 244 base_url = self.component_to_url_map[component]['repository'] |
| 257 | 245 |
| 258 # Retrieve blame JSON file from googlesource. If it fails, return None. | 246 # Retrieve blame JSON file from googlesource. If it fails, return None. |
| 259 url_part = self.url_parts_map['blame_url'] % (revision, file_path) | 247 url_part = self.url_parts_map['blame_url'] % (revision, file_path) |
| 260 blame_url = base_url + url_part | 248 blame_url = base_url + url_part |
| 261 json_string = crash_utils.GetDataFromURL(blame_url) | 249 json_string = crash_utils.GetDataFromURL(blame_url) |
| 262 if not json_string: | 250 if not json_string: |
| 263 logging.error('Failed to retrieve annotation information from %s.', | |
| 264 blame_url) | |
| 265 return | 251 return |
| 266 | 252 |
| 267 # Parse JSON object from the string. The returned string should | 253 # Parse JSON object from the string. The returned string should |
| 268 # start with ")}]'\n", so start from the 6th character. | 254 # start with ")}]'\n", so start from the 6th character. |
| 269 annotation = crash_utils.LoadJSON(json_string[5:]) | 255 annotation = crash_utils.LoadJSON(json_string[5:]) |
| 270 if not annotation: | 256 if not annotation: |
| 271 logging.error('Failed to parse annotation information from %s.', | |
| 272 blame_url) | |
| 273 return | 257 return |
| 274 | 258 |
| 275 # Go through the regions, which is a list of consecutive lines with same | 259 # Go through the regions, which is a list of consecutive lines with same |
| 276 # author/revision. | 260 # author/revision. |
| 277 for blame_line in annotation['regions']: | 261 for blame_line in annotation['regions']: |
| 278 start = blame_line['start'] | 262 start = blame_line['start'] |
| 279 count = blame_line['count'] | 263 count = blame_line['count'] |
| 280 | 264 |
| 281 # For each region, check if the line we want the blame info of is in this | 265 # For each region, check if the line we want the blame info of is in this |
| 282 # region. | 266 # region. |
| 283 if start <= line and line <= start + count - 1: | 267 if start <= line and line <= start + count - 1: |
| 284 # If we are in the right region, get the information from the line. | 268 # If we are in the right region, get the information from the line. |
| 285 revision = blame_line['commit'] | 269 revision = blame_line['commit'] |
| 286 author = blame_line['author']['name'] | 270 author = blame_line['author']['name'] |
| 287 revision_url_parts = self.url_parts_map['revision_url'] % revision | 271 revision_url_parts = self.url_parts_map['revision_url'] % revision |
| 288 revision_url = base_url + revision_url_parts | 272 revision_url = base_url + revision_url_parts |
| 289 # TODO(jeun): Add a way to get content from JSON object. | 273 # TODO(jeun): Add a way to get content from JSON object. |
| 290 content = None | 274 content = None |
| 291 | 275 |
| 292 return (content, revision, author, revision_url) | 276 (revision_info, _) = self.ParseChangelog(component, revision, revision) |
| 277 message = revision_info[revision]['message'] |
| 278 return (content, revision, author, revision_url, message) |
| 293 | 279 |
| 294 # Return none if the region does not exist. | 280 # Return none if the region does not exist. |
| 295 return None | 281 return None |
| OLD | NEW |