OLD | NEW |
| (Empty) |
1 """ | |
2 Attribute List Extension for Python-Markdown | |
3 ============================================ | |
4 | |
5 Adds attribute list syntax. Inspired by | |
6 [maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s | |
7 feature of the same name. | |
8 | |
9 See <https://pythonhosted.org/Markdown/extensions/attr_list.html> | |
10 for documentation. | |
11 | |
12 Original code Copyright 2011 [Waylan Limberg](http://achinghead.com/). | |
13 | |
14 All changes Copyright 2011-2014 The Python Markdown Project | |
15 | |
16 License: [BSD](http://www.opensource.org/licenses/bsd-license.php) | |
17 | |
18 """ | |
19 | |
20 from __future__ import absolute_import | |
21 from __future__ import unicode_literals | |
22 from . import Extension | |
23 from ..treeprocessors import Treeprocessor | |
24 from ..util import isBlockLevel | |
25 import re | |
26 | |
27 try: | |
28 Scanner = re.Scanner | |
29 except AttributeError: # pragma: no cover | |
30 # must be on Python 2.4 | |
31 from sre import Scanner | |
32 | |
33 | |
34 def _handle_double_quote(s, t): | |
35 k, v = t.split('=') | |
36 return k, v.strip('"') | |
37 | |
38 | |
39 def _handle_single_quote(s, t): | |
40 k, v = t.split('=') | |
41 return k, v.strip("'") | |
42 | |
43 | |
44 def _handle_key_value(s, t): | |
45 return t.split('=') | |
46 | |
47 | |
48 def _handle_word(s, t): | |
49 if t.startswith('.'): | |
50 return '.', t[1:] | |
51 if t.startswith('#'): | |
52 return 'id', t[1:] | |
53 return t, t | |
54 | |
55 _scanner = Scanner([ | |
56 (r'[^ ]+=".*?"', _handle_double_quote), | |
57 (r"[^ ]+='.*?'", _handle_single_quote), | |
58 (r'[^ ]+=[^ =]+', _handle_key_value), | |
59 (r'[^ =]+', _handle_word), | |
60 (r' ', None) | |
61 ]) | |
62 | |
63 | |
64 def get_attrs(str): | |
65 """ Parse attribute list and return a list of attribute tuples. """ | |
66 return _scanner.scan(str)[0] | |
67 | |
68 | |
69 def isheader(elem): | |
70 return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] | |
71 | |
72 | |
73 class AttrListTreeprocessor(Treeprocessor): | |
74 | |
75 BASE_RE = r'\{\:?([^\}]*)\}' | |
76 HEADER_RE = re.compile(r'[ ]+%s[ ]*$' % BASE_RE) | |
77 BLOCK_RE = re.compile(r'\n[ ]*%s[ ]*$' % BASE_RE) | |
78 INLINE_RE = re.compile(r'^%s' % BASE_RE) | |
79 NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff' | |
80 r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d' | |
81 r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff' | |
82 r'\uf900-\ufdcf\ufdf0-\ufffd' | |
83 r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+') | |
84 | |
85 def run(self, doc): | |
86 for elem in doc.getiterator(): | |
87 if isBlockLevel(elem.tag): | |
88 # Block level: check for attrs on last line of text | |
89 RE = self.BLOCK_RE | |
90 if isheader(elem) or elem.tag == 'dt': | |
91 # header or def-term: check for attrs at end of line | |
92 RE = self.HEADER_RE | |
93 if len(elem) and elem.tag == 'li': | |
94 # special case list items. children may include a ul or ol. | |
95 pos = None | |
96 # find the ul or ol position | |
97 for i, child in enumerate(elem): | |
98 if child.tag in ['ul', 'ol']: | |
99 pos = i | |
100 break | |
101 if pos is None and elem[-1].tail: | |
102 # use tail of last child. no ul or ol. | |
103 m = RE.search(elem[-1].tail) | |
104 if m: | |
105 self.assign_attrs(elem, m.group(1)) | |
106 elem[-1].tail = elem[-1].tail[:m.start()] | |
107 elif pos is not None and pos > 0 and elem[pos-1].tail: | |
108 # use tail of last child before ul or ol | |
109 m = RE.search(elem[pos-1].tail) | |
110 if m: | |
111 self.assign_attrs(elem, m.group(1)) | |
112 elem[pos-1].tail = elem[pos-1].tail[:m.start()] | |
113 elif elem.text: | |
114 # use text. ul is first child. | |
115 m = RE.search(elem.text) | |
116 if m: | |
117 self.assign_attrs(elem, m.group(1)) | |
118 elem.text = elem.text[:m.start()] | |
119 elif len(elem) and elem[-1].tail: | |
120 # has children. Get from tail of last child | |
121 m = RE.search(elem[-1].tail) | |
122 if m: | |
123 self.assign_attrs(elem, m.group(1)) | |
124 elem[-1].tail = elem[-1].tail[:m.start()] | |
125 if isheader(elem): | |
126 # clean up trailing #s | |
127 elem[-1].tail = elem[-1].tail.rstrip('#').rstrip() | |
128 elif elem.text: | |
129 # no children. Get from text. | |
130 m = RE.search(elem.text) | |
131 if not m and elem.tag == 'td': | |
132 m = re.search(self.BASE_RE, elem.text) | |
133 if m: | |
134 self.assign_attrs(elem, m.group(1)) | |
135 elem.text = elem.text[:m.start()] | |
136 if isheader(elem): | |
137 # clean up trailing #s | |
138 elem.text = elem.text.rstrip('#').rstrip() | |
139 else: | |
140 # inline: check for attrs at start of tail | |
141 if elem.tail: | |
142 m = self.INLINE_RE.match(elem.tail) | |
143 if m: | |
144 self.assign_attrs(elem, m.group(1)) | |
145 elem.tail = elem.tail[m.end():] | |
146 | |
147 def assign_attrs(self, elem, attrs): | |
148 """ Assign attrs to element. """ | |
149 for k, v in get_attrs(attrs): | |
150 if k == '.': | |
151 # add to class | |
152 cls = elem.get('class') | |
153 if cls: | |
154 elem.set('class', '%s %s' % (cls, v)) | |
155 else: | |
156 elem.set('class', v) | |
157 else: | |
158 # assign attr k with v | |
159 elem.set(self.sanitize_name(k), v) | |
160 | |
161 def sanitize_name(self, name): | |
162 """ | |
163 Sanitize name as 'an XML Name, minus the ":"'. | |
164 See http://www.w3.org/TR/REC-xml-names/#NT-NCName | |
165 """ | |
166 return self.NAME_RE.sub('_', name) | |
167 | |
168 | |
169 class AttrListExtension(Extension): | |
170 def extendMarkdown(self, md, md_globals): | |
171 md.treeprocessors.add( | |
172 'attr_list', AttrListTreeprocessor(md), '>prettify' | |
173 ) | |
174 | |
175 | |
176 def makeExtension(*args, **kwargs): | |
177 return AttrListExtension(*args, **kwargs) | |
OLD | NEW |