OLD | NEW |
| (Empty) |
1 """ | |
2 Table of Contents Extension for Python-Markdown | |
3 * * * | |
4 | |
5 (c) 2008 [Jack Miller](http://codezen.org) | |
6 | |
7 Dependencies: | |
8 * [Markdown 2.1+](http://packages.python.org/Markdown/) | |
9 | |
10 """ | |
11 | |
12 from __future__ import absolute_import | |
13 from __future__ import unicode_literals | |
14 from . import Extension | |
15 from ..treeprocessors import Treeprocessor | |
16 from ..util import etree | |
17 from .headerid import slugify, unique, itertext | |
18 import re | |
19 | |
20 | |
21 def order_toc_list(toc_list): | |
22 """Given an unsorted list with errors and skips, return a nested one. | |
23 [{'level': 1}, {'level': 2}] | |
24 => | |
25 [{'level': 1, 'children': [{'level': 2, 'children': []}]}] | |
26 | |
27 A wrong list is also converted: | |
28 [{'level': 2}, {'level': 1}] | |
29 => | |
30 [{'level': 2, 'children': []}, {'level': 1, 'children': []}] | |
31 """ | |
32 | |
33 def build_correct(remaining_list, prev_elements=[{'level': 1000}]): | |
34 | |
35 if not remaining_list: | |
36 return [], [] | |
37 | |
38 current = remaining_list.pop(0) | |
39 if not 'children' in current.keys(): | |
40 current['children'] = [] | |
41 | |
42 if not prev_elements: | |
43 # This happens for instance with [8, 1, 1], ie. when some | |
44 # header level is outside a scope. We treat it as a | |
45 # top-level | |
46 next_elements, children = build_correct(remaining_list, [current]) | |
47 current['children'].append(children) | |
48 return [current] + next_elements, [] | |
49 | |
50 prev_element = prev_elements.pop() | |
51 children = [] | |
52 next_elements = [] | |
53 # Is current part of the child list or next list? | |
54 if current['level'] > prev_element['level']: | |
55 #print "%d is a child of %d" % (current['level'], prev_element['leve
l']) | |
56 prev_elements.append(prev_element) | |
57 prev_elements.append(current) | |
58 prev_element['children'].append(current) | |
59 next_elements2, children2 = build_correct(remaining_list, prev_eleme
nts) | |
60 children += children2 | |
61 next_elements += next_elements2 | |
62 else: | |
63 #print "%d is ancestor of %d" % (current['level'], prev_element['lev
el']) | |
64 if not prev_elements: | |
65 #print "No previous elements, so appending to the next set" | |
66 next_elements.append(current) | |
67 prev_elements = [current] | |
68 next_elements2, children2 = build_correct(remaining_list, prev_e
lements) | |
69 current['children'].extend(children2) | |
70 else: | |
71 #print "Previous elements, comparing to those first" | |
72 remaining_list.insert(0, current) | |
73 next_elements2, children2 = build_correct(remaining_list, prev_e
lements) | |
74 children.extend(children2) | |
75 next_elements += next_elements2 | |
76 | |
77 return next_elements, children | |
78 | |
79 ordered_list, __ = build_correct(toc_list) | |
80 return ordered_list | |
81 | |
82 | |
83 class TocTreeprocessor(Treeprocessor): | |
84 | |
85 # Iterator wrapper to get parent and child all at once | |
86 def iterparent(self, root): | |
87 for parent in root.getiterator(): | |
88 for child in parent: | |
89 yield parent, child | |
90 | |
91 def add_anchor(self, c, elem_id): #@ReservedAssignment | |
92 if self.use_anchors: | |
93 anchor = etree.Element("a") | |
94 anchor.text = c.text | |
95 anchor.attrib["href"] = "#" + elem_id | |
96 anchor.attrib["class"] = "toclink" | |
97 c.text = "" | |
98 for elem in c.getchildren(): | |
99 anchor.append(elem) | |
100 c.remove(elem) | |
101 c.append(anchor) | |
102 | |
103 def build_toc_etree(self, div, toc_list): | |
104 # Add title to the div | |
105 if self.config["title"]: | |
106 header = etree.SubElement(div, "span") | |
107 header.attrib["class"] = "toctitle" | |
108 header.text = self.config["title"] | |
109 | |
110 def build_etree_ul(toc_list, parent): | |
111 ul = etree.SubElement(parent, "ul") | |
112 for item in toc_list: | |
113 # List item link, to be inserted into the toc div | |
114 li = etree.SubElement(ul, "li") | |
115 link = etree.SubElement(li, "a") | |
116 link.text = item.get('name', '') | |
117 link.attrib["href"] = '#' + item.get('id', '') | |
118 if item['children']: | |
119 build_etree_ul(item['children'], li) | |
120 return ul | |
121 | |
122 return build_etree_ul(toc_list, div) | |
123 | |
124 def run(self, doc): | |
125 | |
126 div = etree.Element("div") | |
127 div.attrib["class"] = "toc" | |
128 header_rgx = re.compile("[Hh][123456]") | |
129 | |
130 self.use_anchors = self.config["anchorlink"] in [1, '1', True, 'True', '
true'] | |
131 | |
132 # Get a list of id attributes | |
133 used_ids = set() | |
134 for c in doc.getiterator(): | |
135 if "id" in c.attrib: | |
136 used_ids.add(c.attrib["id"]) | |
137 | |
138 toc_list = [] | |
139 marker_found = False | |
140 for (p, c) in self.iterparent(doc): | |
141 text = ''.join(itertext(c)).strip() | |
142 if not text: | |
143 continue | |
144 | |
145 # To keep the output from screwing up the | |
146 # validation by putting a <div> inside of a <p> | |
147 # we actually replace the <p> in its entirety. | |
148 # We do not allow the marker inside a header as that | |
149 # would causes an enless loop of placing a new TOC | |
150 # inside previously generated TOC. | |
151 if c.text and c.text.strip() == self.config["marker"] and \ | |
152 not header_rgx.match(c.tag) and c.tag not in ['pre', 'code']: | |
153 for i in range(len(p)): | |
154 if p[i] == c: | |
155 p[i] = div | |
156 break | |
157 marker_found = True | |
158 | |
159 if header_rgx.match(c.tag): | |
160 | |
161 # Do not override pre-existing ids | |
162 if not "id" in c.attrib: | |
163 elem_id = unique(self.config["slugify"](text, '-'), used_ids
) | |
164 c.attrib["id"] = elem_id | |
165 else: | |
166 elem_id = c.attrib["id"] | |
167 | |
168 tag_level = int(c.tag[-1]) | |
169 | |
170 toc_list.append({'level': tag_level, | |
171 'id': elem_id, | |
172 'name': text}) | |
173 | |
174 self.add_anchor(c, elem_id) | |
175 | |
176 toc_list_nested = order_toc_list(toc_list) | |
177 self.build_toc_etree(div, toc_list_nested) | |
178 prettify = self.markdown.treeprocessors.get('prettify') | |
179 if prettify: prettify.run(div) | |
180 if not marker_found: | |
181 # serialize and attach to markdown instance. | |
182 toc = self.markdown.serializer(div) | |
183 for pp in self.markdown.postprocessors.values(): | |
184 toc = pp.run(toc) | |
185 self.markdown.toc = toc | |
186 | |
187 | |
188 class TocExtension(Extension): | |
189 | |
190 TreeProcessorClass = TocTreeprocessor | |
191 | |
192 def __init__(self, configs=[]): | |
193 self.config = { "marker" : ["[TOC]", | |
194 "Text to find and replace with Table of Contents -" | |
195 "Defaults to \"[TOC]\""], | |
196 "slugify" : [slugify, | |
197 "Function to generate anchors based on header text-" | |
198 "Defaults to the headerid ext's slugify function."], | |
199 "title" : [None, | |
200 "Title to insert into TOC <div> - " | |
201 "Defaults to None"], | |
202 "anchorlink" : [0, | |
203 "1 if header should be a self link" | |
204 "Defaults to 0"]} | |
205 | |
206 for key, value in configs: | |
207 self.setConfig(key, value) | |
208 | |
209 def extendMarkdown(self, md, md_globals): | |
210 tocext = self.TreeProcessorClass(md) | |
211 tocext.config = self.getConfigs() | |
212 # Headerid ext is set to '>prettify'. With this set to '_end', | |
213 # it should always come after headerid ext (and honor ids assinged | |
214 # by the header id extension) if both are used. Same goes for | |
215 # attr_list extension. This must come last because we don't want | |
216 # to redefine ids after toc is created. But we do want toc prettified. | |
217 md.treeprocessors.add("toc", tocext, "_end") | |
218 | |
219 | |
220 def makeExtension(configs={}): | |
221 return TocExtension(configs=configs) | |
OLD | NEW |