OLD | NEW |
1 # -*- coding: utf-8 -*- | 1 # -*- coding: utf-8 -*- |
2 # markdown is released under the BSD license | |
3 # Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later) | |
4 # Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) | |
5 # Copyright 2004 Manfred Stienstra (the original version) | |
6 # | |
7 # All rights reserved. | |
8 # | |
9 # Redistribution and use in source and binary forms, with or without | |
10 # modification, are permitted provided that the following conditions are met: | |
11 # | |
12 # * Redistributions of source code must retain the above copyright | |
13 # notice, this list of conditions and the following disclaimer. | |
14 # * Redistributions in binary form must reproduce the above copyright | |
15 # notice, this list of conditions and the following disclaimer in the | |
16 # documentation and/or other materials provided with the distribution. | |
17 # * Neither the name of the <organization> nor the | |
18 # names of its contributors may be used to endorse or promote products | |
19 # derived from this software without specific prior written permission. | |
20 # | |
21 # THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY | |
22 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
23 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
24 # DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT | |
25 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
26 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
27 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
28 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
29 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
30 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
31 # POSSIBILITY OF SUCH DAMAGE. | |
32 | |
33 | |
34 from __future__ import unicode_literals | 2 from __future__ import unicode_literals |
35 import re | 3 import re |
36 import sys | 4 import sys |
37 | 5 |
38 | 6 |
39 """ | 7 """ |
40 Python 3 Stuff | 8 Python 3 Stuff |
41 ============================================================================= | 9 ============================================================================= |
42 """ | 10 """ |
43 PY3 = sys.version_info[0] == 3 | 11 PY3 = sys.version_info[0] == 3 |
44 | 12 |
45 if PY3: | 13 if PY3: # pragma: no cover |
46 string_type = str | 14 string_type = str |
47 text_type = str | 15 text_type = str |
48 int2str = chr | 16 int2str = chr |
49 else: | 17 else: # pragma: no cover |
50 string_type = basestring | 18 string_type = basestring # noqa |
51 text_type = unicode | 19 text_type = unicode # noqa |
52 int2str = unichr | 20 int2str = unichr # noqa |
53 | 21 |
54 | 22 |
55 """ | 23 """ |
56 Constants you might want to modify | 24 Constants you might want to modify |
57 ----------------------------------------------------------------------------- | 25 ----------------------------------------------------------------------------- |
58 """ | 26 """ |
59 | 27 |
60 BLOCK_LEVEL_ELEMENTS = re.compile("^(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul" | 28 |
61 "|script|noscript|form|fieldset|iframe|math" | 29 BLOCK_LEVEL_ELEMENTS = re.compile( |
62 "|hr|hr/|style|li|dt|dd|thead|tbody" | 30 "^(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul" |
63 "|tr|th|td|section|footer|header|group|figure" | 31 "|script|noscript|form|fieldset|iframe|math" |
64 "|figcaption|aside|article|canvas|output" | 32 "|hr|hr/|style|li|dt|dd|thead|tbody" |
65 "|progress|video)$", re.IGNORECASE) | 33 "|tr|th|td|section|footer|header|group|figure" |
| 34 "|figcaption|aside|article|canvas|output" |
| 35 "|progress|video|nav)$", |
| 36 re.IGNORECASE |
| 37 ) |
66 # Placeholders | 38 # Placeholders |
67 STX = '\u0002' # Use STX ("Start of text") for start-of-placeholder | 39 STX = '\u0002' # Use STX ("Start of text") for start-of-placeholder |
68 ETX = '\u0003' # Use ETX ("End of text") for end-of-placeholder | 40 ETX = '\u0003' # Use ETX ("End of text") for end-of-placeholder |
69 INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:" | 41 INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:" |
70 INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX | 42 INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX |
71 INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]{4})') | 43 INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]+)') |
72 AMP_SUBSTITUTE = STX+"amp"+ETX | 44 AMP_SUBSTITUTE = STX+"amp"+ETX |
| 45 HTML_PLACEHOLDER = STX + "wzxhzdk:%s" + ETX |
| 46 HTML_PLACEHOLDER_RE = re.compile(HTML_PLACEHOLDER % r'([0-9]+)') |
| 47 TAG_PLACEHOLDER = STX + "hzzhzkh:%s" + ETX |
| 48 |
73 | 49 |
74 """ | 50 """ |
75 Constants you probably do not need to change | 51 Constants you probably do not need to change |
76 ----------------------------------------------------------------------------- | 52 ----------------------------------------------------------------------------- |
77 """ | 53 """ |
78 | 54 |
79 RTL_BIDI_RANGES = ( ('\u0590', '\u07FF'), | 55 RTL_BIDI_RANGES = ( |
80 # Hebrew (0590-05FF), Arabic (0600-06FF), | 56 ('\u0590', '\u07FF'), |
81 # Syriac (0700-074F), Arabic supplement (0750-077F), | 57 # Hebrew (0590-05FF), Arabic (0600-06FF), |
82 # Thaana (0780-07BF), Nko (07C0-07FF). | 58 # Syriac (0700-074F), Arabic supplement (0750-077F), |
83 ('\u2D30', '\u2D7F'), # Tifinagh | 59 # Thaana (0780-07BF), Nko (07C0-07FF). |
84 ) | 60 ('\u2D30', '\u2D7F') # Tifinagh |
| 61 ) |
85 | 62 |
86 # Extensions should use "markdown.util.etree" instead of "etree" (or do `from | 63 # Extensions should use "markdown.util.etree" instead of "etree" (or do `from |
87 # markdown.util import etree`). Do not import it by yourself. | 64 # markdown.util import etree`). Do not import it by yourself. |
88 | 65 |
89 try: # Is the C implemenation of ElementTree available? | 66 try: # pragma: no cover |
| 67 # Is the C implementation of ElementTree available? |
90 import xml.etree.cElementTree as etree | 68 import xml.etree.cElementTree as etree |
91 from xml.etree.ElementTree import Comment | 69 from xml.etree.ElementTree import Comment |
92 # Serializers (including ours) test with non-c Comment | 70 # Serializers (including ours) test with non-c Comment |
93 etree.test_comment = Comment | 71 etree.test_comment = Comment |
94 if etree.VERSION < "1.0.5": | 72 if etree.VERSION < "1.0.5": |
95 raise RuntimeError("cElementTree version 1.0.5 or higher is required.") | 73 raise RuntimeError("cElementTree version 1.0.5 or higher is required.") |
96 except (ImportError, RuntimeError): | 74 except (ImportError, RuntimeError): # pragma: no cover |
97 # Use the Python implementation of ElementTree? | 75 # Use the Python implementation of ElementTree? |
98 import xml.etree.ElementTree as etree | 76 import xml.etree.ElementTree as etree |
99 if etree.VERSION < "1.1": | 77 if etree.VERSION < "1.1": |
100 raise RuntimeError("ElementTree version 1.1 or higher is required") | 78 raise RuntimeError("ElementTree version 1.1 or higher is required") |
101 | 79 |
102 | 80 |
103 """ | 81 """ |
104 AUXILIARY GLOBAL FUNCTIONS | 82 AUXILIARY GLOBAL FUNCTIONS |
105 ============================================================================= | 83 ============================================================================= |
106 """ | 84 """ |
107 | 85 |
108 | 86 |
109 def isBlockLevel(tag): | 87 def isBlockLevel(tag): |
110 """Check if the tag is a block level HTML tag.""" | 88 """Check if the tag is a block level HTML tag.""" |
111 if isinstance(tag, string_type): | 89 if isinstance(tag, string_type): |
112 return BLOCK_LEVEL_ELEMENTS.match(tag) | 90 return BLOCK_LEVEL_ELEMENTS.match(tag) |
113 # Some ElementTree tags are not strings, so return False. | 91 # Some ElementTree tags are not strings, so return False. |
114 return False | 92 return False |
115 | 93 |
| 94 |
| 95 def parseBoolValue(value, fail_on_errors=True, preserve_none=False): |
| 96 """Parses a string representing bool value. If parsing was successful, |
| 97 returns True or False. If preserve_none=True, returns True, False, |
| 98 or None. If parsing was not successful, raises ValueError, or, if |
| 99 fail_on_errors=False, returns None.""" |
| 100 if not isinstance(value, string_type): |
| 101 if preserve_none and value is None: |
| 102 return value |
| 103 return bool(value) |
| 104 elif preserve_none and value.lower() == 'none': |
| 105 return None |
| 106 elif value.lower() in ('true', 'yes', 'y', 'on', '1'): |
| 107 return True |
| 108 elif value.lower() in ('false', 'no', 'n', 'off', '0', 'none'): |
| 109 return False |
| 110 elif fail_on_errors: |
| 111 raise ValueError('Cannot parse bool value: %r' % value) |
| 112 |
| 113 |
116 """ | 114 """ |
117 MISC AUXILIARY CLASSES | 115 MISC AUXILIARY CLASSES |
118 ============================================================================= | 116 ============================================================================= |
119 """ | 117 """ |
120 | 118 |
| 119 |
121 class AtomicString(text_type): | 120 class AtomicString(text_type): |
122 """A string which should not be further processed.""" | 121 """A string which should not be further processed.""" |
123 pass | 122 pass |
124 | 123 |
125 | 124 |
126 class Processor(object): | 125 class Processor(object): |
127 def __init__(self, markdown_instance=None): | 126 def __init__(self, markdown_instance=None): |
128 if markdown_instance: | 127 if markdown_instance: |
129 self.markdown = markdown_instance | 128 self.markdown = markdown_instance |
130 | 129 |
131 | 130 |
132 class HtmlStash(object): | 131 class HtmlStash(object): |
133 """ | 132 """ |
134 This class is used for stashing HTML objects that we extract | 133 This class is used for stashing HTML objects that we extract |
135 in the beginning and replace with place-holders. | 134 in the beginning and replace with place-holders. |
136 """ | 135 """ |
137 | 136 |
138 def __init__ (self): | 137 def __init__(self): |
139 """ Create a HtmlStash. """ | 138 """ Create a HtmlStash. """ |
140 self.html_counter = 0 # for counting inline html segments | 139 self.html_counter = 0 # for counting inline html segments |
141 self.rawHtmlBlocks=[] | 140 self.rawHtmlBlocks = [] |
| 141 self.tag_counter = 0 |
| 142 self.tag_data = [] # list of dictionaries in the order tags appear |
142 | 143 |
143 def store(self, html, safe=False): | 144 def store(self, html, safe=False): |
144 """ | 145 """ |
145 Saves an HTML segment for later reinsertion. Returns a | 146 Saves an HTML segment for later reinsertion. Returns a |
146 placeholder string that needs to be inserted into the | 147 placeholder string that needs to be inserted into the |
147 document. | 148 document. |
148 | 149 |
149 Keyword arguments: | 150 Keyword arguments: |
150 | 151 |
151 * html: an html segment | 152 * html: an html segment |
152 * safe: label an html segment as safe for safemode | 153 * safe: label an html segment as safe for safemode |
153 | 154 |
154 Returns : a placeholder string | 155 Returns : a placeholder string |
155 | 156 |
156 """ | 157 """ |
157 self.rawHtmlBlocks.append((html, safe)) | 158 self.rawHtmlBlocks.append((html, safe)) |
158 placeholder = self.get_placeholder(self.html_counter) | 159 placeholder = self.get_placeholder(self.html_counter) |
159 self.html_counter += 1 | 160 self.html_counter += 1 |
160 return placeholder | 161 return placeholder |
161 | 162 |
162 def reset(self): | 163 def reset(self): |
163 self.html_counter = 0 | 164 self.html_counter = 0 |
164 self.rawHtmlBlocks = [] | 165 self.rawHtmlBlocks = [] |
165 | 166 |
166 def get_placeholder(self, key): | 167 def get_placeholder(self, key): |
167 return "%swzxhzdk:%d%s" % (STX, key, ETX) | 168 return HTML_PLACEHOLDER % key |
168 | 169 |
| 170 def store_tag(self, tag, attrs, left_index, right_index): |
| 171 """Store tag data and return a placeholder.""" |
| 172 self.tag_data.append({'tag': tag, 'attrs': attrs, |
| 173 'left_index': left_index, |
| 174 'right_index': right_index}) |
| 175 placeholder = TAG_PLACEHOLDER % str(self.tag_counter) |
| 176 self.tag_counter += 1 # equal to the tag's index in self.tag_data |
| 177 return placeholder |
OLD | NEW |