| OLD | NEW |
| (Empty) |
| 1 # -*- test-case-name: buildbot.test.test_svnpoller -*- | |
| 2 | |
| 3 # Based on the work of Dave Peticolas for the P4poll | |
| 4 # Changed to svn (using xml.dom.minidom) by Niklaus Giger | |
| 5 # Hacked beyond recognition by Brian Warner | |
| 6 | |
| 7 from twisted.python import log | |
| 8 from twisted.internet import defer, reactor, utils | |
| 9 from twisted.internet.task import LoopingCall | |
| 10 | |
| 11 from buildbot import util | |
| 12 from buildbot.changes import base | |
| 13 from buildbot.changes.changes import Change | |
| 14 | |
| 15 import xml.dom.minidom | |
| 16 import urllib | |
| 17 | |
| 18 def _assert(condition, msg): | |
| 19 if condition: | |
| 20 return True | |
| 21 raise AssertionError(msg) | |
| 22 | |
| 23 def dbgMsg(myString): | |
| 24 log.msg(myString) | |
| 25 return 1 | |
| 26 | |
| 27 # these split_file_* functions are available for use as values to the | |
| 28 # split_file= argument. | |
| 29 def split_file_alwaystrunk(path): | |
| 30 return (None, path) | |
| 31 | |
| 32 def split_file_branches(path): | |
| 33 # turn trunk/subdir/file.c into (None, "subdir/file.c") | |
| 34 # and branches/1.5.x/subdir/file.c into ("branches/1.5.x", "subdir/file.c") | |
| 35 pieces = path.split('/') | |
| 36 if pieces[0] == 'trunk': | |
| 37 return (None, '/'.join(pieces[1:])) | |
| 38 elif pieces[0] == 'branches': | |
| 39 return ('/'.join(pieces[0:2]), '/'.join(pieces[2:])) | |
| 40 else: | |
| 41 return None | |
| 42 | |
| 43 | |
| 44 class SVNPoller(base.ChangeSource, util.ComparableMixin): | |
| 45 """This source will poll a Subversion repository for changes and submit | |
| 46 them to the change master.""" | |
| 47 | |
| 48 compare_attrs = ["svnurl", "split_file_function", | |
| 49 "svnuser", "svnpasswd", | |
| 50 "pollinterval", "histmax", | |
| 51 "svnbin", "category"] | |
| 52 | |
| 53 parent = None # filled in when we're added | |
| 54 last_change = None | |
| 55 loop = None | |
| 56 working = False | |
| 57 | |
| 58 def __init__(self, svnurl, split_file=None, | |
| 59 svnuser=None, svnpasswd=None, | |
| 60 pollinterval=10*60, histmax=100, | |
| 61 svnbin='svn', revlinktmpl='', category=None): | |
| 62 """ | |
| 63 @type svnurl: string | |
| 64 @param svnurl: the SVN URL that describes the repository and | |
| 65 subdirectory to watch. If this ChangeSource should | |
| 66 only pay attention to a single branch, this should | |
| 67 point at the repository for that branch, like | |
| 68 svn://svn.twistedmatrix.com/svn/Twisted/trunk . If it | |
| 69 should follow multiple branches, point it at the | |
| 70 repository directory that contains all the branches | |
| 71 like svn://svn.twistedmatrix.com/svn/Twisted and also | |
| 72 provide a branch-determining function. | |
| 73 | |
| 74 Each file in the repository has a SVN URL in the form | |
| 75 (SVNURL)/(BRANCH)/(FILEPATH), where (BRANCH) could be | |
| 76 empty or not, depending upon your branch-determining | |
| 77 function. Only files that start with (SVNURL)/(BRANCH) | |
| 78 will be monitored. The Change objects that are sent to | |
| 79 the Schedulers will see (FILEPATH) for each modified | |
| 80 file. | |
| 81 | |
| 82 @type split_file: callable or None | |
| 83 @param split_file: a function that is called with a string of the | |
| 84 form (BRANCH)/(FILEPATH) and should return a tuple | |
| 85 (BRANCH, FILEPATH). This function should match | |
| 86 your repository's branch-naming policy. Each | |
| 87 changed file has a fully-qualified URL that can be | |
| 88 split into a prefix (which equals the value of the | |
| 89 'svnurl' argument) and a suffix; it is this suffix | |
| 90 which is passed to the split_file function. | |
| 91 | |
| 92 If the function returns None, the file is ignored. | |
| 93 Use this to indicate that the file is not a part | |
| 94 of this project. | |
| 95 | |
| 96 For example, if your repository puts the trunk in | |
| 97 trunk/... and branches are in places like | |
| 98 branches/1.5/..., your split_file function could | |
| 99 look like the following (this function is | |
| 100 available as svnpoller.split_file_branches):: | |
| 101 | |
| 102 pieces = path.split('/') | |
| 103 if pieces[0] == 'trunk': | |
| 104 return (None, '/'.join(pieces[1:])) | |
| 105 elif pieces[0] == 'branches': | |
| 106 return ('/'.join(pieces[0:2]), | |
| 107 '/'.join(pieces[2:])) | |
| 108 else: | |
| 109 return None | |
| 110 | |
| 111 If instead your repository layout puts the trunk | |
| 112 for ProjectA in trunk/ProjectA/... and the 1.5 | |
| 113 branch in branches/1.5/ProjectA/..., your | |
| 114 split_file function could look like:: | |
| 115 | |
| 116 pieces = path.split('/') | |
| 117 if pieces[0] == 'trunk': | |
| 118 branch = None | |
| 119 pieces.pop(0) # remove 'trunk' | |
| 120 elif pieces[0] == 'branches': | |
| 121 pieces.pop(0) # remove 'branches' | |
| 122 # grab branch name | |
| 123 branch = 'branches/' + pieces.pop(0) | |
| 124 else: | |
| 125 return None # something weird | |
| 126 projectname = pieces.pop(0) | |
| 127 if projectname != 'ProjectA': | |
| 128 return None # wrong project | |
| 129 return (branch, '/'.join(pieces)) | |
| 130 | |
| 131 The default of split_file= is None, which | |
| 132 indicates that no splitting should be done. This | |
| 133 is equivalent to the following function:: | |
| 134 | |
| 135 return (None, path) | |
| 136 | |
| 137 If you wish, you can override the split_file | |
| 138 method with the same sort of function instead of | |
| 139 passing in a split_file= argument. | |
| 140 | |
| 141 | |
| 142 @type svnuser: string | |
| 143 @param svnuser: If set, the --username option will be added to | |
| 144 the 'svn log' command. You may need this to get | |
| 145 access to a private repository. | |
| 146 @type svnpasswd: string | |
| 147 @param svnpasswd: If set, the --password option will be added. | |
| 148 | |
| 149 @type pollinterval: int | |
| 150 @param pollinterval: interval in seconds between polls. The default | |
| 151 is 600 seconds (10 minutes). Smaller values | |
| 152 decrease the latency between the time a change | |
| 153 is recorded and the time the buildbot notices | |
| 154 it, but it also increases the system load. | |
| 155 | |
| 156 @type histmax: int | |
| 157 @param histmax: maximum number of changes to look back through. | |
| 158 The default is 100. Smaller values decrease | |
| 159 system load, but if more than histmax changes | |
| 160 are recorded between polls, the extra ones will | |
| 161 be silently lost. | |
| 162 | |
| 163 @type svnbin: string | |
| 164 @param svnbin: path to svn binary, defaults to just 'svn'. Use | |
| 165 this if your subversion command lives in an | |
| 166 unusual location. | |
| 167 | |
| 168 @type revlinktmpl: string | |
| 169 @param revlinktmpl: A format string to use for hyperlinks to revision | |
| 170 information. For example, setting this to | |
| 171 "http://reposerver/websvn/revision.php?rev=%s" | |
| 172 would create suitable links on the build pages | |
| 173 to information in websvn on each revision. | |
| 174 | |
| 175 @type category: string | |
| 176 @param category: A single category associated with the changes that | |
| 177 could be used by schedulers watch for branches of a | |
| 178 certain name AND category. | |
| 179 """ | |
| 180 | |
| 181 if svnurl.endswith("/"): | |
| 182 svnurl = svnurl[:-1] # strip the trailing slash | |
| 183 self.svnurl = svnurl | |
| 184 self.split_file_function = split_file or split_file_alwaystrunk | |
| 185 self.svnuser = svnuser | |
| 186 self.svnpasswd = svnpasswd | |
| 187 | |
| 188 self.revlinktmpl = revlinktmpl | |
| 189 | |
| 190 self.svnbin = svnbin | |
| 191 self.pollinterval = pollinterval | |
| 192 self.histmax = histmax | |
| 193 self._prefix = None | |
| 194 self.overrun_counter = 0 | |
| 195 self.loop = LoopingCall(self.checksvn) | |
| 196 self.category = category | |
| 197 | |
| 198 def split_file(self, path): | |
| 199 # use getattr() to avoid turning this function into a bound method, | |
| 200 # which would require it to have an extra 'self' argument | |
| 201 f = getattr(self, "split_file_function") | |
| 202 return f(path) | |
| 203 | |
| 204 def startService(self): | |
| 205 log.msg("SVNPoller(%s) starting" % self.svnurl) | |
| 206 base.ChangeSource.startService(self) | |
| 207 # Don't start the loop just yet because the reactor isn't running. | |
| 208 # Give it a chance to go and install our SIGCHLD handler before | |
| 209 # spawning processes. | |
| 210 reactor.callLater(0, self.loop.start, self.pollinterval) | |
| 211 | |
| 212 def stopService(self): | |
| 213 log.msg("SVNPoller(%s) shutting down" % self.svnurl) | |
| 214 self.loop.stop() | |
| 215 return base.ChangeSource.stopService(self) | |
| 216 | |
| 217 def describe(self): | |
| 218 return "SVNPoller watching %s" % self.svnurl | |
| 219 | |
| 220 def checksvn(self): | |
| 221 # Our return value is only used for unit testing. | |
| 222 | |
| 223 # we need to figure out the repository root, so we can figure out | |
| 224 # repository-relative pathnames later. Each SVNURL is in the form | |
| 225 # (ROOT)/(PROJECT)/(BRANCH)/(FILEPATH), where (ROOT) is something | |
| 226 # like svn://svn.twistedmatrix.com/svn/Twisted (i.e. there is a | |
| 227 # physical repository at /svn/Twisted on that host), (PROJECT) is | |
| 228 # something like Projects/Twisted (i.e. within the repository's | |
| 229 # internal namespace, everything under Projects/Twisted/ has | |
| 230 # something to do with Twisted, but these directory names do not | |
| 231 # actually appear on the repository host), (BRANCH) is something like | |
| 232 # "trunk" or "branches/2.0.x", and (FILEPATH) is a tree-relative | |
| 233 # filename like "twisted/internet/defer.py". | |
| 234 | |
| 235 # our self.svnurl attribute contains (ROOT)/(PROJECT) combined | |
| 236 # together in a way that we can't separate without svn's help. If the | |
| 237 # user is not using the split_file= argument, then self.svnurl might | |
| 238 # be (ROOT)/(PROJECT)/(BRANCH) . In any case, the filenames we will | |
| 239 # get back from 'svn log' will be of the form | |
| 240 # (PROJECT)/(BRANCH)/(FILEPATH), but we want to be able to remove | |
| 241 # that (PROJECT) prefix from them. To do this without requiring the | |
| 242 # user to tell us how svnurl is split into ROOT and PROJECT, we do an | |
| 243 # 'svn info --xml' command at startup. This command will include a | |
| 244 # <root> element that tells us ROOT. We then strip this prefix from | |
| 245 # self.svnurl to determine PROJECT, and then later we strip the | |
| 246 # PROJECT prefix from the filenames reported by 'svn log --xml' to | |
| 247 # get a (BRANCH)/(FILEPATH) that can be passed to split_file() to | |
| 248 # turn into separate BRANCH and FILEPATH values. | |
| 249 | |
| 250 # whew. | |
| 251 | |
| 252 if self.working: | |
| 253 log.msg("SVNPoller(%s) overrun: timer fired but the previous " | |
| 254 "poll had not yet finished." % self.svnurl) | |
| 255 self.overrun_counter += 1 | |
| 256 return defer.succeed(None) | |
| 257 self.working = True | |
| 258 | |
| 259 log.msg("SVNPoller polling") | |
| 260 if not self._prefix: | |
| 261 # this sets self._prefix when it finishes. It fires with | |
| 262 # self._prefix as well, because that makes the unit tests easier | |
| 263 # to write. | |
| 264 d = self.get_root() | |
| 265 d.addCallback(self.determine_prefix) | |
| 266 else: | |
| 267 d = defer.succeed(self._prefix) | |
| 268 | |
| 269 d.addCallback(self.get_logs) | |
| 270 d.addCallback(self.parse_logs) | |
| 271 d.addCallback(self.get_new_logentries) | |
| 272 d.addCallback(self.create_changes) | |
| 273 d.addCallback(self.submit_changes) | |
| 274 d.addCallbacks(self.finished_ok, self.finished_failure) | |
| 275 return d | |
| 276 | |
| 277 def getProcessOutput(self, args): | |
| 278 # this exists so we can override it during the unit tests | |
| 279 d = utils.getProcessOutput(self.svnbin, args, {}) | |
| 280 return d | |
| 281 | |
| 282 def get_root(self): | |
| 283 args = ["info", "--xml", "--non-interactive", self.svnurl] | |
| 284 if self.svnuser: | |
| 285 args.extend(["--username=%s" % self.svnuser]) | |
| 286 if self.svnpasswd: | |
| 287 args.extend(["--password=%s" % self.svnpasswd]) | |
| 288 d = self.getProcessOutput(args) | |
| 289 return d | |
| 290 | |
| 291 def determine_prefix(self, output): | |
| 292 try: | |
| 293 doc = xml.dom.minidom.parseString(output) | |
| 294 except xml.parsers.expat.ExpatError: | |
| 295 dbgMsg("_process_changes: ExpatError in %s" % output) | |
| 296 log.msg("SVNPoller._determine_prefix_2: ExpatError in '%s'" | |
| 297 % output) | |
| 298 raise | |
| 299 rootnodes = doc.getElementsByTagName("root") | |
| 300 if not rootnodes: | |
| 301 # this happens if the URL we gave was already the root. In this | |
| 302 # case, our prefix is empty. | |
| 303 self._prefix = "" | |
| 304 return self._prefix | |
| 305 rootnode = rootnodes[0] | |
| 306 root = "".join([c.data for c in rootnode.childNodes]) | |
| 307 # root will be a unicode string | |
| 308 _assert(self.svnurl.startswith(root), | |
| 309 "svnurl='%s' doesn't start with <root>='%s'" % | |
| 310 (self.svnurl, root)) | |
| 311 self._prefix = self.svnurl[len(root):] | |
| 312 if self._prefix.startswith("/"): | |
| 313 self._prefix = self._prefix[1:] | |
| 314 log.msg("SVNPoller: svnurl=%s, root=%s, so prefix=%s" % | |
| 315 (self.svnurl, root, self._prefix)) | |
| 316 return self._prefix | |
| 317 | |
| 318 def get_logs(self, ignored_prefix=None): | |
| 319 args = [] | |
| 320 args.extend(["log", "--xml", "--verbose", "--non-interactive"]) | |
| 321 if self.svnuser: | |
| 322 args.extend(["--username=%s" % self.svnuser]) | |
| 323 if self.svnpasswd: | |
| 324 args.extend(["--password=%s" % self.svnpasswd]) | |
| 325 args.extend(["--limit=%d" % (self.histmax), self.svnurl]) | |
| 326 d = self.getProcessOutput(args) | |
| 327 return d | |
| 328 | |
| 329 def parse_logs(self, output): | |
| 330 # parse the XML output, return a list of <logentry> nodes | |
| 331 try: | |
| 332 doc = xml.dom.minidom.parseString(output) | |
| 333 except xml.parsers.expat.ExpatError: | |
| 334 dbgMsg("_process_changes: ExpatError in %s" % output) | |
| 335 log.msg("SVNPoller._parse_changes: ExpatError in '%s'" % output) | |
| 336 raise | |
| 337 logentries = doc.getElementsByTagName("logentry") | |
| 338 return logentries | |
| 339 | |
| 340 | |
| 341 def _filter_new_logentries(self, logentries, last_change): | |
| 342 # given a list of logentries, return a tuple of (new_last_change, | |
| 343 # new_logentries), where new_logentries contains only the ones after | |
| 344 # last_change | |
| 345 if not logentries: | |
| 346 # no entries, so last_change must stay at None | |
| 347 return (None, []) | |
| 348 | |
| 349 mostRecent = int(logentries[0].getAttribute("revision")) | |
| 350 | |
| 351 if last_change is None: | |
| 352 # if this is the first time we've been run, ignore any changes | |
| 353 # that occurred before now. This prevents a build at every | |
| 354 # startup. | |
| 355 log.msg('svnPoller: starting at change %s' % mostRecent) | |
| 356 return (mostRecent, []) | |
| 357 | |
| 358 if last_change == mostRecent: | |
| 359 # an unmodified repository will hit this case | |
| 360 log.msg('svnPoller: _process_changes last %s mostRecent %s' % ( | |
| 361 last_change, mostRecent)) | |
| 362 return (mostRecent, []) | |
| 363 | |
| 364 new_logentries = [] | |
| 365 for el in logentries: | |
| 366 if last_change == int(el.getAttribute("revision")): | |
| 367 break | |
| 368 new_logentries.append(el) | |
| 369 new_logentries.reverse() # return oldest first | |
| 370 | |
| 371 # If the newest commit's author is chrome-bot, skip this commit. This | |
| 372 # is a guard to ensure that we don't poll on our mirror while it could | |
| 373 # be mid-sync. In that case, the author data could be wrong and would | |
| 374 # look like it was a commit by chrome-bot@google.com. A downside: the | |
| 375 # chrome-bot account may have a legitimate commit. This should not | |
| 376 # happen generally, so we're okay waiting to see it until there's a | |
| 377 # later commit with a non-chrome-bot author. | |
| 378 if len(new_logentries) > 0: | |
| 379 if new_logentries[-1].getAttribute("author") == 'chrome-bot@google.com
': | |
| 380 new_logentries.pop(-1) | |
| 381 mostRecent = int(logentries[1].getAttribute("revision")) | |
| 382 | |
| 383 return (mostRecent, new_logentries) | |
| 384 | |
| 385 def get_new_logentries(self, logentries): | |
| 386 last_change = self.last_change | |
| 387 (new_last_change, | |
| 388 new_logentries) = self._filter_new_logentries(logentries, | |
| 389 self.last_change) | |
| 390 self.last_change = new_last_change | |
| 391 log.msg('svnPoller: _process_changes %s .. %s' % | |
| 392 (last_change, new_last_change)) | |
| 393 return new_logentries | |
| 394 | |
| 395 | |
| 396 def _get_text(self, element, tag_name): | |
| 397 try: | |
| 398 child_nodes = element.getElementsByTagName(tag_name)[0].childNodes | |
| 399 text = "".join([t.data for t in child_nodes]) | |
| 400 except: | |
| 401 text = "<unknown>" | |
| 402 return text | |
| 403 | |
| 404 def _transform_path(self, path): | |
| 405 _assert(path.startswith(self._prefix), | |
| 406 "filepath '%s' should start with prefix '%s'" % | |
| 407 (path, self._prefix)) | |
| 408 relative_path = path[len(self._prefix):] | |
| 409 if relative_path.startswith("/"): | |
| 410 relative_path = relative_path[1:] | |
| 411 where = self.split_file(relative_path) | |
| 412 # 'where' is either None or (branch, final_path) | |
| 413 return where | |
| 414 | |
| 415 def create_changes(self, new_logentries): | |
| 416 changes = [] | |
| 417 | |
| 418 for el in new_logentries: | |
| 419 branch_files = [] # get oldest change first | |
| 420 revision = str(el.getAttribute("revision")) | |
| 421 | |
| 422 revlink='' | |
| 423 | |
| 424 if self.revlinktmpl: | |
| 425 if revision: | |
| 426 revlink = self.revlinktmpl % urllib.quote_plus(revision) | |
| 427 | |
| 428 dbgMsg("Adding change revision %s" % (revision,)) | |
| 429 # TODO: the rest of buildbot may not be ready for unicode 'who' | |
| 430 # values | |
| 431 author = self._get_text(el, "author") | |
| 432 comments = self._get_text(el, "msg") | |
| 433 # there is a "date" field, but it provides localtime in the | |
| 434 # repository's timezone, whereas we care about buildmaster's | |
| 435 # localtime (since this will get used to position the boxes on | |
| 436 # the Waterfall display, etc). So ignore the date field and use | |
| 437 # our local clock instead. | |
| 438 #when = self._get_text(el, "date") | |
| 439 #when = time.mktime(time.strptime("%.19s" % when, | |
| 440 # "%Y-%m-%dT%H:%M:%S")) | |
| 441 branches = {} | |
| 442 pathlist = el.getElementsByTagName("paths")[0] | |
| 443 for p in pathlist.getElementsByTagName("path"): | |
| 444 action = p.getAttribute("action") | |
| 445 path = "".join([t.data for t in p.childNodes]) | |
| 446 # the rest of buildbot is certaily not yet ready to handle | |
| 447 # unicode filenames, because they get put in RemoteCommands | |
| 448 # which get sent via PB to the buildslave, and PB doesn't | |
| 449 # handle unicode. | |
| 450 path = path.encode("ascii") | |
| 451 if path.startswith("/"): | |
| 452 path = path[1:] | |
| 453 where = self._transform_path(path) | |
| 454 | |
| 455 # if 'where' is None, the file was outside any project that | |
| 456 # we care about and we should ignore it | |
| 457 if where: | |
| 458 branch, filename = where | |
| 459 if not branch in branches: | |
| 460 branches[branch] = { 'files': []} | |
| 461 branches[branch]['files'].append(filename) | |
| 462 | |
| 463 if not branches[branch].has_key('action'): | |
| 464 branches[branch]['action'] = action | |
| 465 | |
| 466 for branch in branches.keys(): | |
| 467 action = branches[branch]['action'] | |
| 468 files = branches[branch]['files'] | |
| 469 number_of_files_changed = len(files) | |
| 470 | |
| 471 if action == u'D' and number_of_files_changed == 1 and files[0]
== '': | |
| 472 log.msg("Ignoring deletion of branch '%s'" % branch) | |
| 473 else: | |
| 474 c = Change(who=author, | |
| 475 files=files, | |
| 476 comments=comments, | |
| 477 revision=revision, | |
| 478 branch=branch, | |
| 479 revlink=revlink, | |
| 480 category=self.category) | |
| 481 changes.append(c) | |
| 482 | |
| 483 return changes | |
| 484 | |
| 485 def submit_changes(self, changes): | |
| 486 for c in changes: | |
| 487 self.parent.addChange(c) | |
| 488 | |
| 489 def finished_ok(self, res): | |
| 490 log.msg("SVNPoller finished polling") | |
| 491 dbgMsg('_finished : %s' % res) | |
| 492 assert self.working | |
| 493 self.working = False | |
| 494 return res | |
| 495 | |
| 496 def finished_failure(self, f): | |
| 497 log.msg("SVNPoller failed") | |
| 498 dbgMsg('_finished : %s' % f) | |
| 499 assert self.working | |
| 500 self.working = False | |
| 501 return None # eat the failure | |
| OLD | NEW |