OLD | NEW |
(Empty) | |
| 1 """ |
| 2 Attribute List Extension for Python-Markdown |
| 3 ============================================ |
| 4 |
| 5 Adds attribute list syntax. Inspired by |
| 6 [maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s |
| 7 feature of the same name. |
| 8 |
| 9 See <https://pythonhosted.org/Markdown/extensions/attr_list.html> |
| 10 for documentation. |
| 11 |
| 12 Original code Copyright 2011 [Waylan Limberg](http://achinghead.com/). |
| 13 |
| 14 All changes Copyright 2011-2014 The Python Markdown Project |
| 15 |
| 16 License: [BSD](http://www.opensource.org/licenses/bsd-license.php) |
| 17 |
| 18 """ |
| 19 |
| 20 from __future__ import absolute_import |
| 21 from __future__ import unicode_literals |
| 22 from . import Extension |
| 23 from ..treeprocessors import Treeprocessor |
| 24 from ..util import isBlockLevel |
| 25 import re |
| 26 |
| 27 try: |
| 28 Scanner = re.Scanner |
| 29 except AttributeError: # pragma: no cover |
| 30 # must be on Python 2.4 |
| 31 from sre import Scanner |
| 32 |
| 33 |
| 34 def _handle_double_quote(s, t): |
| 35 k, v = t.split('=') |
| 36 return k, v.strip('"') |
| 37 |
| 38 |
| 39 def _handle_single_quote(s, t): |
| 40 k, v = t.split('=') |
| 41 return k, v.strip("'") |
| 42 |
| 43 |
| 44 def _handle_key_value(s, t): |
| 45 return t.split('=') |
| 46 |
| 47 |
| 48 def _handle_word(s, t): |
| 49 if t.startswith('.'): |
| 50 return '.', t[1:] |
| 51 if t.startswith('#'): |
| 52 return 'id', t[1:] |
| 53 return t, t |
| 54 |
| 55 _scanner = Scanner([ |
| 56 (r'[^ ]+=".*?"', _handle_double_quote), |
| 57 (r"[^ ]+='.*?'", _handle_single_quote), |
| 58 (r'[^ ]+=[^ =]+', _handle_key_value), |
| 59 (r'[^ =]+', _handle_word), |
| 60 (r' ', None) |
| 61 ]) |
| 62 |
| 63 |
| 64 def get_attrs(str): |
| 65 """ Parse attribute list and return a list of attribute tuples. """ |
| 66 return _scanner.scan(str)[0] |
| 67 |
| 68 |
| 69 def isheader(elem): |
| 70 return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] |
| 71 |
| 72 |
| 73 class AttrListTreeprocessor(Treeprocessor): |
| 74 |
| 75 BASE_RE = r'\{\:?([^\}]*)\}' |
| 76 HEADER_RE = re.compile(r'[ ]+%s[ ]*$' % BASE_RE) |
| 77 BLOCK_RE = re.compile(r'\n[ ]*%s[ ]*$' % BASE_RE) |
| 78 INLINE_RE = re.compile(r'^%s' % BASE_RE) |
| 79 NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff' |
| 80 r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d' |
| 81 r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff' |
| 82 r'\uf900-\ufdcf\ufdf0-\ufffd' |
| 83 r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+') |
| 84 |
| 85 def run(self, doc): |
| 86 for elem in doc.getiterator(): |
| 87 if isBlockLevel(elem.tag): |
| 88 # Block level: check for attrs on last line of text |
| 89 RE = self.BLOCK_RE |
| 90 if isheader(elem) or elem.tag == 'dt': |
| 91 # header or def-term: check for attrs at end of line |
| 92 RE = self.HEADER_RE |
| 93 if len(elem) and elem.tag == 'li': |
| 94 # special case list items. children may include a ul or ol. |
| 95 pos = None |
| 96 # find the ul or ol position |
| 97 for i, child in enumerate(elem): |
| 98 if child.tag in ['ul', 'ol']: |
| 99 pos = i |
| 100 break |
| 101 if pos is None and elem[-1].tail: |
| 102 # use tail of last child. no ul or ol. |
| 103 m = RE.search(elem[-1].tail) |
| 104 if m: |
| 105 self.assign_attrs(elem, m.group(1)) |
| 106 elem[-1].tail = elem[-1].tail[:m.start()] |
| 107 elif pos is not None and pos > 0 and elem[pos-1].tail: |
| 108 # use tail of last child before ul or ol |
| 109 m = RE.search(elem[pos-1].tail) |
| 110 if m: |
| 111 self.assign_attrs(elem, m.group(1)) |
| 112 elem[pos-1].tail = elem[pos-1].tail[:m.start()] |
| 113 elif elem.text: |
| 114 # use text. ul is first child. |
| 115 m = RE.search(elem.text) |
| 116 if m: |
| 117 self.assign_attrs(elem, m.group(1)) |
| 118 elem.text = elem.text[:m.start()] |
| 119 elif len(elem) and elem[-1].tail: |
| 120 # has children. Get from tail of last child |
| 121 m = RE.search(elem[-1].tail) |
| 122 if m: |
| 123 self.assign_attrs(elem, m.group(1)) |
| 124 elem[-1].tail = elem[-1].tail[:m.start()] |
| 125 if isheader(elem): |
| 126 # clean up trailing #s |
| 127 elem[-1].tail = elem[-1].tail.rstrip('#').rstrip() |
| 128 elif elem.text: |
| 129 # no children. Get from text. |
| 130 m = RE.search(elem.text) |
| 131 if not m and elem.tag == 'td': |
| 132 m = re.search(self.BASE_RE, elem.text) |
| 133 if m: |
| 134 self.assign_attrs(elem, m.group(1)) |
| 135 elem.text = elem.text[:m.start()] |
| 136 if isheader(elem): |
| 137 # clean up trailing #s |
| 138 elem.text = elem.text.rstrip('#').rstrip() |
| 139 else: |
| 140 # inline: check for attrs at start of tail |
| 141 if elem.tail: |
| 142 m = self.INLINE_RE.match(elem.tail) |
| 143 if m: |
| 144 self.assign_attrs(elem, m.group(1)) |
| 145 elem.tail = elem.tail[m.end():] |
| 146 |
| 147 def assign_attrs(self, elem, attrs): |
| 148 """ Assign attrs to element. """ |
| 149 for k, v in get_attrs(attrs): |
| 150 if k == '.': |
| 151 # add to class |
| 152 cls = elem.get('class') |
| 153 if cls: |
| 154 elem.set('class', '%s %s' % (cls, v)) |
| 155 else: |
| 156 elem.set('class', v) |
| 157 else: |
| 158 # assign attr k with v |
| 159 elem.set(self.sanitize_name(k), v) |
| 160 |
| 161 def sanitize_name(self, name): |
| 162 """ |
| 163 Sanitize name as 'an XML Name, minus the ":"'. |
| 164 See http://www.w3.org/TR/REC-xml-names/#NT-NCName |
| 165 """ |
| 166 return self.NAME_RE.sub('_', name) |
| 167 |
| 168 |
| 169 class AttrListExtension(Extension): |
| 170 def extendMarkdown(self, md, md_globals): |
| 171 md.treeprocessors.add( |
| 172 'attr_list', AttrListTreeprocessor(md), '>prettify' |
| 173 ) |
| 174 |
| 175 |
| 176 def makeExtension(*args, **kwargs): |
| 177 return AttrListExtension(*args, **kwargs) |
OLD | NEW |