third_party/markdown/preprocessors.py - Issue 93743005: Support markdown template for html editor

Side by Side Diff: third_party/markdown/preprocessors.py

Issue 93743005: Support markdown template for html editor (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: fix path without dir Created 6 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 """

	2 PRE-PROCESSORS

	3 =============================================================================

	4

	5 Preprocessors work on source text before we start doing anything too

	6 complicated.

	7 """

	8

	9 from __future__ import absolute_import

	10 from __future__ import unicode_literals

	11 from . import util

	12 from . import odict

	13 import re

	14

	15

	16 def build_preprocessors(md_instance, **kwargs):

	17 """ Build the default set of preprocessors used by Markdown. """

	18 preprocessors = odict.OrderedDict()

	19 preprocessors['normalize_whitespace'] = NormalizeWhitespace(md_instance)

	20 if md_instance.safeMode != 'escape':

	21 preprocessors["html_block"] = HtmlBlockPreprocessor(md_instance)

	22 preprocessors["reference"] = ReferencePreprocessor(md_instance)

	23 return preprocessors

	24

	25

	26 class Preprocessor(util.Processor):

	27 """

	28 Preprocessors are run after the text is broken into lines.

	29

	30 Each preprocessor implements a "run" method that takes a pointer to a

	31 list of lines of the document, modifies it as necessary and returns

	32 either the same pointer or a pointer to a new list.

	33

	34 Preprocessors must extend markdown.Preprocessor.

	35

	36 """

	37 def run(self, lines):

	38 """

	39 Each subclass of Preprocessor should override the `run` method, which

	40 takes the document as a list of strings split by newlines and returns

	41 the (possibly modified) list of lines.

	42

	43 """

	44 pass

	45

	46

	47 class NormalizeWhitespace(Preprocessor):

	48 """ Normalize whitespace for consistant parsing. """

	49

	50 def run(self, lines):

	51 source = '\n'.join(lines)

	52 source = source.replace(util.STX, "").replace(util.ETX, "")

	53 source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"

	54 source = source.expandtabs(self.markdown.tab_length)

	55 source = re.sub(r'(?<=\n) +\n', '\n', source)

	56 return source.split('\n')

	57

	58

	59 class HtmlBlockPreprocessor(Preprocessor):

	60 """Remove html blocks from the text and store them for later retrieval."""

	61

	62 right_tag_patterns = ["</%s>", "%s>"]

	63 attrs_pattern = r"""

	64 \s+(?P<attr>[^>"'/= ]+)=(?P<q>['"])(?P<value>.*?)(?P=q) # attr="value"

	65 \| # OR

	66 \s+(?P<attr1>[^>"'/= ]+)=(?P<value1>[^> ]+) # attr=value

	67 \| # OR

	68 \s+(?P<attr2>[^>"'/= ]+) # attr

	69 """

	70 left_tag_pattern = r'^\<(?P<tag>[^> ]+)(?P<attrs>(%s))\s\/?\>?' % attrs_pa ttern

	71 attrs_re = re.compile(attrs_pattern, re.VERBOSE)

	72 left_tag_re = re.compile(left_tag_pattern, re.VERBOSE)

	73 markdown_in_raw = False

	74

	75 def _get_left_tag(self, block):

	76 m = self.left_tag_re.match(block)

	77 if m:

	78 tag = m.group('tag')

	79 raw_attrs = m.group('attrs')

	80 attrs = {}

	81 if raw_attrs:

	82 for ma in self.attrs_re.finditer(raw_attrs):

	83 if ma.group('attr'):

	84 if ma.group('value'):

	85 attrs[ma.group('attr').strip()] = ma.group('value')

	86 else:

	87 attrs[ma.group('attr').strip()] = ""

	88 elif ma.group('attr1'):

	89 if ma.group('value1'):

	90 attrs[ma.group('attr1').strip()] = ma.group('value1' )

	91 else:

	92 attrs[ma.group('attr1').strip()] = ""

	93 elif ma.group('attr2'):

	94 attrs[ma.group('attr2').strip()] = ""

	95 return tag, len(m.group(0)), attrs

	96 else:

	97 tag = block[1:].split(">", 1)[0].lower()

	98 return tag, len(tag)+2, {}

	99

	100 def _recursive_tagfind(self, ltag, rtag, start_index, block):

	101 while 1:

	102 i = block.find(rtag, start_index)

	103 if i == -1:

	104 return -1

	105 j = block.find(ltag, start_index)

	106 # if no ltag, or rtag found before another ltag, return index

	107 if (j > i or j == -1):

	108 return i + len(rtag)

	109 # another ltag found before rtag, use end of ltag as starting

	110 # point and search again

	111 j = block.find('>', j)

	112 start_index = self._recursive_tagfind(ltag, rtag, j + 1, block)

	113 if start_index == -1:

	114 # HTML potentially malformed- ltag has no corresponding

	115 # rtag

	116 return -1

	117

	118 def _get_right_tag(self, left_tag, left_index, block):

	119 for p in self.right_tag_patterns:

	120 tag = p % left_tag

	121 i = self._recursive_tagfind("<%s" % left_tag, tag, left_index, block )

	122 if i > 2:

	123 return tag.lstrip("<").rstrip(">"), i

	124 return block.rstrip()[-left_index:-1].lower(), len(block)

	125

	126 def _equal_tags(self, left_tag, right_tag):

	127 if left_tag[0] in ['?', '@', '%']: # handle PHP, etc.

	128 return True

	129 if ("/" + left_tag) == right_tag:

	130 return True

	131 if (right_tag == "--" and left_tag == "--"):

	132 return True

	133 elif left_tag == right_tag[1:] \

	134 and right_tag[0] == "/":

	135 return True

	136 else:

	137 return False

	138

	139 def _is_oneliner(self, tag):

	140 return (tag in ['hr', 'hr/'])

	141

	142 def run(self, lines):

	143 text = "\n".join(lines)

	144 new_blocks = []

	145 text = text.rsplit("\n\n")

	146 items = []

	147 left_tag = ''

	148 right_tag = ''

	149 in_tag = False # flag

	150

	151 while text:

	152 block = text[0]

	153 if block.startswith("\n"):

	154 block = block[1:]

	155 text = text[1:]

	156

	157 if block.startswith("\n"):

	158 block = block[1:]

	159

	160 if not in_tag:

	161 if block.startswith("<") and len(block.strip()) > 1:

	162

	163 if block[1] == "!":

	164 # is a comment block

	165 left_tag, left_index, attrs = "--", 2, {}

	166 else:

	167 left_tag, left_index, attrs = self._get_left_tag(block)

	168 right_tag, data_index = self._get_right_tag(left_tag,

	169 left_index,

	170 block)

	171 # keep checking conditions below and maybe just append

	172

	173 if data_index < len(block) \

	174 and (util.isBlockLevel(left_tag)

	175 or left_tag == '--'):

	176 text.insert(0, block[data_index:])

	177 block = block[:data_index]

	178

	179 if not (util.isBlockLevel(left_tag) \

	180 or block[1] in ["!", "?", "@", "%"]):

	181 new_blocks.append(block)

	182 continue

	183

	184 if self._is_oneliner(left_tag):

	185 new_blocks.append(block.strip())

	186 continue

	187

	188 if block.rstrip().endswith(">") \

	189 and self._equal_tags(left_tag, right_tag):

	190 if self.markdown_in_raw and 'markdown' in attrs.keys():

	191 start = re.sub(r'\smarkdown(=[\'"]?[^> ]*[\'"]?)?',

	192 '', block[:left_index])

	193 end = block[-len(right_tag)-2:]

	194 block = block[left_index:-len(right_tag)-2]

	195 new_blocks.append(

	196 self.markdown.htmlStash.store(start))

	197 new_blocks.append(block)

	198 new_blocks.append(

	199 self.markdown.htmlStash.store(end))

	200 else:

	201 new_blocks.append(

	202 self.markdown.htmlStash.store(block.strip()))

	203 continue

	204 else:

	205 # if is block level tag and is not complete

	206

	207 if util.isBlockLevel(left_tag) or left_tag == "--" \

	208 and not block.rstrip().endswith(">"):

	209 items.append(block.strip())

	210 in_tag = True

	211 else:

	212 new_blocks.append(

	213 self.markdown.htmlStash.store(block.strip()))

	214

	215 continue

	216

	217 new_blocks.append(block)

	218

	219 else:

	220 items.append(block)

	221

	222 right_tag, data_index = self._get_right_tag(left_tag, 0, block)

	223

	224 if self._equal_tags(left_tag, right_tag):

	225 # if find closing tag

	226

	227 if data_index < len(block):

	228 # we have more text after right_tag

	229 items[-1] = block[:data_index]

	230 text.insert(0, block[data_index:])

	231

	232 in_tag = False

	233 if self.markdown_in_raw and 'markdown' in attrs.keys():

	234 start = re.sub(r'\smarkdown(=[\'"]?[^> ]*[\'"]?)?',

	235 '', items[0][:left_index])

	236 items[0] = items[0][left_index:]

	237 end = items[-1][-len(right_tag)-2:]

	238 items[-1] = items[-1][:-len(right_tag)-2]

	239 new_blocks.append(

	240 self.markdown.htmlStash.store(start))

	241 new_blocks.extend(items)

	242 new_blocks.append(

	243 self.markdown.htmlStash.store(end))

	244 else:

	245 new_blocks.append(

	246 self.markdown.htmlStash.store('\n\n'.join(items)))

	247 items = []

	248

	249 if items:

	250 if self.markdown_in_raw and 'markdown' in attrs.keys():

	251 start = re.sub(r'\smarkdown(=[\'"]?[^> ]*[\'"]?)?',

	252 '', items[0][:left_index])

	253 items[0] = items[0][left_index:]

	254 end = items[-1][-len(right_tag)-2:]

	255 items[-1] = items[-1][:-len(right_tag)-2]

	256 new_blocks.append(

	257 self.markdown.htmlStash.store(start))

	258 new_blocks.extend(items)

	259 if end.strip():

	260 new_blocks.append(

	261 self.markdown.htmlStash.store(end))

	262 else:

	263 new_blocks.append(

	264 self.markdown.htmlStash.store('\n\n'.join(items)))

	265 #new_blocks.append(self.markdown.htmlStash.store('\n\n'.join(items)) )

	266 new_blocks.append('\n')

	267

	268 new_text = "\n\n".join(new_blocks)

	269 return new_text.split("\n")

	270

	271

	272 class ReferencePreprocessor(Preprocessor):

	273 """ Remove reference definitions from text and store for later use. """

	274

	275 TITLE = r'[ ](\"(.)\"\|\'(.)\'\|$(.)$)[ ]*'

	276 RE = re.compile(r'^[ ]{0,3}\[([^\]])\]:\s([^ ])[ ](%s)?$' % TITLE, re.DO TALL)

	277 TITLE_RE = re.compile(r'^%s$' % TITLE)

	278

	279 def run (self, lines):

	280 new_text = [];

	281 while lines:

	282 line = lines.pop(0)

	283 m = self.RE.match(line)

	284 if m:

	285 id = m.group(1).strip().lower()

	286 link = m.group(2).lstrip('<').rstrip('>')

	287 t = m.group(5) or m.group(6) or m.group(7)

	288 if not t:

	289 # Check next line for title

	290 tm = self.TITLE_RE.match(lines[0])

	291 if tm:

	292 lines.pop(0)

	293 t = tm.group(2) or tm.group(3) or tm.group(4)

	294 self.markdown.references[id] = (link, t)

	295 else:

	296 new_text.append(line)

	297

	298 return new_text #+ "\n"

OLD	NEW

« no previous file with comments | « third_party/markdown/postprocessors.py ('k') | third_party/markdown/serializers.py » ('j') | no next file with comments »