| OLD | NEW |
| (Empty) |
| 1 #!/usr/bin/python | |
| 2 | |
| 3 # Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
| 4 # Use of this source code is governed by a BSD-style license that can be | |
| 5 # found in the LICENSE file. | |
| 6 | |
| 7 '''This utility converts the html files as emitted by doxygen into ezt files | |
| 8 that are suitable for inclusion into Google code site. | |
| 9 | |
| 10 EZT stands for "EaZy Templating (for Python)". For more information, see | |
| 11 http://code.google.com/p/ezt/ | |
| 12 ''' | |
| 13 | |
| 14 import optparse | |
| 15 import os | |
| 16 import re | |
| 17 import shutil | |
| 18 import string | |
| 19 import sys | |
| 20 try: | |
| 21 from BeautifulSoup import BeautifulSoup, Tag | |
| 22 except (ImportError, NotImplementedError): | |
| 23 print ("This tool requires the BeautifulSoup package " | |
| 24 "(see http://www.crummy.com/software/BeautifulSoup/).\n" | |
| 25 "Make sure that the file BeautifulSoup.py is either in this directory " | |
| 26 "or is available in your PYTHON_PATH") | |
| 27 raise | |
| 28 | |
| 29 | |
| 30 class EZTFixer(object): | |
| 31 '''This class converts the html strings as produced by Doxygen into ezt | |
| 32 strings as used by the Google code site tools | |
| 33 ''' | |
| 34 | |
| 35 def __init__(self, html): | |
| 36 self.soup = BeautifulSoup(html) | |
| 37 | |
| 38 def FixTableHeadings(self): | |
| 39 '''Fixes the doxygen table headings to EZT's liking. | |
| 40 | |
| 41 This includes using <th> instead of <h2> for the heading, and putting | |
| 42 the "name" attribute into the "id" attribute of the <tr> tag. | |
| 43 | |
| 44 For example, this html: | |
| 45 <tr><td colspan="2"><h2><a name="pub-attribs"></a> | |
| 46 Data Fields List</h2></td></tr> | |
| 47 | |
| 48 would be converted to this: | |
| 49 <tr id="pub-attribs"><th colspan="2">Data Fields List</th></tr> | |
| 50 | |
| 51 Also, this function splits up tables into multiple separate tables if | |
| 52 a table heading appears in the middle of a table. | |
| 53 ''' | |
| 54 | |
| 55 table_headers = [] | |
| 56 for tag in self.soup.findAll('tr'): | |
| 57 if tag.td and tag.td.h2 and tag.td.h2.a and tag.td.h2.a['name']: | |
| 58 tag['id'] = tag.td.h2.a['name'] | |
| 59 tag.td.string = tag.td.h2.a.next | |
| 60 tag.td.name = 'th' | |
| 61 table_headers.append(tag) | |
| 62 | |
| 63 # reverse the list so that earlier tags don't delete later tags | |
| 64 table_headers.reverse() | |
| 65 # Split up tables that have multiple table header (th) rows | |
| 66 for tag in table_headers: | |
| 67 # Is this a heading in the middle of a table? | |
| 68 if tag.findPreviousSibling('tr') and tag.parent.name == 'table': | |
| 69 table = tag.parent | |
| 70 table_parent = table.parent | |
| 71 table_index = table_parent.contents.index(table) | |
| 72 new_table = Tag(self.soup, name='table', attrs=table.attrs) | |
| 73 table_parent.insert(table_index + 1, new_table) | |
| 74 tag_index = table.contents.index(tag) | |
| 75 new_table.contents = table.contents[tag_index:] | |
| 76 del table.contents[tag_index:] | |
| 77 | |
| 78 def RemoveTopHeadings(self): | |
| 79 '''Removes <div> sections with a header, tabs, or navpath class attribute''' | |
| 80 header_tags = self.soup.findAll( | |
| 81 name='div', | |
| 82 attrs={'class' : re.compile('^(header|tabs[0-9]*|navpath)$')}) | |
| 83 [tag.extract() for tag in header_tags] | |
| 84 | |
| 85 def FixAll(self): | |
| 86 self.FixTableHeadings() | |
| 87 self.RemoveTopHeadings() | |
| 88 | |
| 89 def __str__(self): | |
| 90 return str(self.soup) | |
| 91 | |
| 92 | |
| 93 def main(): | |
| 94 '''Main entry for the html2ezt utility | |
| 95 | |
| 96 html2ezt takes a list of html files and creates a set of ezt files with | |
| 97 the same basename and in the same directory as the original html files. | |
| 98 Each new ezt file contains a file that is suitable for presentation | |
| 99 on Google Codesite using the EZT tool.''' | |
| 100 | |
| 101 parser = optparse.OptionParser(usage='Usage: %prog [options] files...') | |
| 102 | |
| 103 parser.add_option('-m', '--move', dest='move', action='store_true', | |
| 104 default=False, help='move html files to "original_html"') | |
| 105 | |
| 106 options, files = parser.parse_args() | |
| 107 | |
| 108 if not files: | |
| 109 parser.print_usage() | |
| 110 return 1 | |
| 111 | |
| 112 for filename in files: | |
| 113 try: | |
| 114 with open(filename, 'r') as file: | |
| 115 html = file.read() | |
| 116 | |
| 117 fixer = EZTFixer(html) | |
| 118 fixer.FixAll() | |
| 119 new_name = re.sub(re.compile('\.html$'), '.ezt', filename) | |
| 120 with open(new_name, 'w') as file: | |
| 121 file.write(str(fixer)) | |
| 122 if options.move: | |
| 123 new_directory = os.path.join( | |
| 124 os.path.dirname(os.path.dirname(filename)), 'original_html') | |
| 125 if not os.path.exists(new_directory): | |
| 126 os.mkdir(new_directory) | |
| 127 shutil.move(filename, new_directory) | |
| 128 except: | |
| 129 print "Error while processing %s" % filename | |
| 130 raise | |
| 131 | |
| 132 return 0 | |
| 133 | |
| 134 if __name__ == '__main__': | |
| 135 sys.exit(main()) | |
| OLD | NEW |