OLD | NEW |
| (Empty) |
1 # -*- coding: utf-8 -*- | |
2 from __future__ import unicode_literals | |
3 import re | |
4 import sys | |
5 | |
6 | |
7 """ | |
8 Python 3 Stuff | |
9 ============================================================================= | |
10 """ | |
11 PY3 = sys.version_info[0] == 3 | |
12 | |
13 if PY3: # pragma: no cover | |
14 string_type = str | |
15 text_type = str | |
16 int2str = chr | |
17 else: # pragma: no cover | |
18 string_type = basestring # noqa | |
19 text_type = unicode # noqa | |
20 int2str = unichr # noqa | |
21 | |
22 | |
23 """ | |
24 Constants you might want to modify | |
25 ----------------------------------------------------------------------------- | |
26 """ | |
27 | |
28 | |
29 BLOCK_LEVEL_ELEMENTS = re.compile( | |
30 "^(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul" | |
31 "|script|noscript|form|fieldset|iframe|math" | |
32 "|hr|hr/|style|li|dt|dd|thead|tbody" | |
33 "|tr|th|td|section|footer|header|group|figure" | |
34 "|figcaption|aside|article|canvas|output" | |
35 "|progress|video|nav)$", | |
36 re.IGNORECASE | |
37 ) | |
38 # Placeholders | |
39 STX = '\u0002' # Use STX ("Start of text") for start-of-placeholder | |
40 ETX = '\u0003' # Use ETX ("End of text") for end-of-placeholder | |
41 INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:" | |
42 INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX | |
43 INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]+)') | |
44 AMP_SUBSTITUTE = STX+"amp"+ETX | |
45 HTML_PLACEHOLDER = STX + "wzxhzdk:%s" + ETX | |
46 HTML_PLACEHOLDER_RE = re.compile(HTML_PLACEHOLDER % r'([0-9]+)') | |
47 TAG_PLACEHOLDER = STX + "hzzhzkh:%s" + ETX | |
48 | |
49 | |
50 """ | |
51 Constants you probably do not need to change | |
52 ----------------------------------------------------------------------------- | |
53 """ | |
54 | |
55 RTL_BIDI_RANGES = ( | |
56 ('\u0590', '\u07FF'), | |
57 # Hebrew (0590-05FF), Arabic (0600-06FF), | |
58 # Syriac (0700-074F), Arabic supplement (0750-077F), | |
59 # Thaana (0780-07BF), Nko (07C0-07FF). | |
60 ('\u2D30', '\u2D7F') # Tifinagh | |
61 ) | |
62 | |
63 # Extensions should use "markdown.util.etree" instead of "etree" (or do `from | |
64 # markdown.util import etree`). Do not import it by yourself. | |
65 | |
66 try: # pragma: no cover | |
67 # Is the C implementation of ElementTree available? | |
68 import xml.etree.cElementTree as etree | |
69 from xml.etree.ElementTree import Comment | |
70 # Serializers (including ours) test with non-c Comment | |
71 etree.test_comment = Comment | |
72 if etree.VERSION < "1.0.5": | |
73 raise RuntimeError("cElementTree version 1.0.5 or higher is required.") | |
74 except (ImportError, RuntimeError): # pragma: no cover | |
75 # Use the Python implementation of ElementTree? | |
76 import xml.etree.ElementTree as etree | |
77 if etree.VERSION < "1.1": | |
78 raise RuntimeError("ElementTree version 1.1 or higher is required") | |
79 | |
80 | |
81 """ | |
82 AUXILIARY GLOBAL FUNCTIONS | |
83 ============================================================================= | |
84 """ | |
85 | |
86 | |
87 def isBlockLevel(tag): | |
88 """Check if the tag is a block level HTML tag.""" | |
89 if isinstance(tag, string_type): | |
90 return BLOCK_LEVEL_ELEMENTS.match(tag) | |
91 # Some ElementTree tags are not strings, so return False. | |
92 return False | |
93 | |
94 | |
95 def parseBoolValue(value, fail_on_errors=True, preserve_none=False): | |
96 """Parses a string representing bool value. If parsing was successful, | |
97 returns True or False. If preserve_none=True, returns True, False, | |
98 or None. If parsing was not successful, raises ValueError, or, if | |
99 fail_on_errors=False, returns None.""" | |
100 if not isinstance(value, string_type): | |
101 if preserve_none and value is None: | |
102 return value | |
103 return bool(value) | |
104 elif preserve_none and value.lower() == 'none': | |
105 return None | |
106 elif value.lower() in ('true', 'yes', 'y', 'on', '1'): | |
107 return True | |
108 elif value.lower() in ('false', 'no', 'n', 'off', '0', 'none'): | |
109 return False | |
110 elif fail_on_errors: | |
111 raise ValueError('Cannot parse bool value: %r' % value) | |
112 | |
113 | |
114 """ | |
115 MISC AUXILIARY CLASSES | |
116 ============================================================================= | |
117 """ | |
118 | |
119 | |
120 class AtomicString(text_type): | |
121 """A string which should not be further processed.""" | |
122 pass | |
123 | |
124 | |
125 class Processor(object): | |
126 def __init__(self, markdown_instance=None): | |
127 if markdown_instance: | |
128 self.markdown = markdown_instance | |
129 | |
130 | |
131 class HtmlStash(object): | |
132 """ | |
133 This class is used for stashing HTML objects that we extract | |
134 in the beginning and replace with place-holders. | |
135 """ | |
136 | |
137 def __init__(self): | |
138 """ Create a HtmlStash. """ | |
139 self.html_counter = 0 # for counting inline html segments | |
140 self.rawHtmlBlocks = [] | |
141 self.tag_counter = 0 | |
142 self.tag_data = [] # list of dictionaries in the order tags appear | |
143 | |
144 def store(self, html, safe=False): | |
145 """ | |
146 Saves an HTML segment for later reinsertion. Returns a | |
147 placeholder string that needs to be inserted into the | |
148 document. | |
149 | |
150 Keyword arguments: | |
151 | |
152 * html: an html segment | |
153 * safe: label an html segment as safe for safemode | |
154 | |
155 Returns : a placeholder string | |
156 | |
157 """ | |
158 self.rawHtmlBlocks.append((html, safe)) | |
159 placeholder = self.get_placeholder(self.html_counter) | |
160 self.html_counter += 1 | |
161 return placeholder | |
162 | |
163 def reset(self): | |
164 self.html_counter = 0 | |
165 self.rawHtmlBlocks = [] | |
166 | |
167 def get_placeholder(self, key): | |
168 return HTML_PLACEHOLDER % key | |
169 | |
170 def store_tag(self, tag, attrs, left_index, right_index): | |
171 """Store tag data and return a placeholder.""" | |
172 self.tag_data.append({'tag': tag, 'attrs': attrs, | |
173 'left_index': left_index, | |
174 'right_index': right_index}) | |
175 placeholder = TAG_PLACEHOLDER % str(self.tag_counter) | |
176 self.tag_counter += 1 # equal to the tag's index in self.tag_data | |
177 return placeholder | |
OLD | NEW |