| Index: appengine/monorail/third_party/markdown.py
|
| diff --git a/appengine/monorail/third_party/markdown.py b/appengine/monorail/third_party/markdown.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..cd415dca01949cfd05945175dd94ee0b6c501806
|
| --- /dev/null
|
| +++ b/appengine/monorail/third_party/markdown.py
|
| @@ -0,0 +1,677 @@
|
| +#!/usr/bin/python
|
| +import re, md5, sys, string
|
| +
|
| +"""markdown.py: A Markdown-styled-text to HTML converter in Python.
|
| +
|
| +Usage:
|
| + ./markdown.py textfile.markdown
|
| +
|
| +Calling:
|
| + import markdown
|
| + somehtml = markdown.markdown(sometext)
|
| +
|
| +For other versions of markdown, see:
|
| + http://www.freewisdom.org/projects/python-markdown/
|
| + http://en.wikipedia.org/wiki/Markdown
|
| +"""
|
| +
|
| +__version__ = '1.0.1-2' # port of 1.0.1
|
| +__license__ = "GNU GPL 2"
|
| +__author__ = [
|
| + 'John Gruber <http://daringfireball.net/>',
|
| + 'Tollef Fog Heen <tfheen@err.no>',
|
| + 'Aaron Swartz <me@aaronsw.com>'
|
| +]
|
| +
|
| +def htmlquote(text):
|
| + """Encodes `text` for raw use in HTML."""
|
| + text = text.replace("&", "&") # Must be done first!
|
| + text = text.replace("<", "<")
|
| + text = text.replace(">", ">")
|
| + text = text.replace("'", "'")
|
| + text = text.replace('"', """)
|
| + return text
|
| +
|
| +def semirandom(seed):
|
| + x = 0
|
| + for c in md5.new(seed).digest(): x += ord(c)
|
| + return x / (255*16.)
|
| +
|
| +class _Markdown:
|
| + emptyelt = " />"
|
| + tabwidth = 4
|
| +
|
| + escapechars = '\\`*_{}[]()>#+-.!'
|
| + escapetable = {}
|
| + for char in escapechars:
|
| + escapetable[char] = md5.new(char).hexdigest()
|
| +
|
| + r_multiline = re.compile("\n{2,}")
|
| + r_stripspace = re.compile(r"^[ \t]+$", re.MULTILINE)
|
| + def parse(self, text):
|
| + self.urls = {}
|
| + self.titles = {}
|
| + self.html_blocks = {}
|
| + self.list_level = 0
|
| +
|
| + text = text.replace("\r\n", "\n")
|
| + text = text.replace("\r", "\n")
|
| + text += "\n\n"
|
| + text = self._Detab(text)
|
| + text = self.r_stripspace.sub("", text)
|
| + text = self._HashHTMLBlocks(text)
|
| + text = self._StripLinkDefinitions(text)
|
| + text = self._RunBlockGamut(text)
|
| + text = self._UnescapeSpecialChars(text)
|
| + return text
|
| +
|
| + r_StripLinkDefinitions = re.compile(r"""
|
| + ^[ ]{0,%d}\[(.+)\]: # id = $1
|
| + [ \t]*\n?[ \t]*
|
| + <?(\S+?)>? # url = $2
|
| + [ \t]*\n?[ \t]*
|
| + (?:
|
| + (?<=\s) # lookbehind for whitespace
|
| + [\"\(] # " is backlashed so it colorizes our code right
|
| + (.+?) # title = $3
|
| + [\"\)]
|
| + [ \t]*
|
| + )? # title is optional
|
| + (?:\n+|\Z)
|
| + """ % (tabwidth-1), re.MULTILINE|re.VERBOSE)
|
| + def _StripLinkDefinitions(self, text):
|
| + def replacefunc(matchobj):
|
| + (t1, t2, t3) = matchobj.groups()
|
| + #@@ case sensitivity?
|
| + self.urls[t1.lower()] = self._EncodeAmpsAndAngles(t2)
|
| + if t3 is not None:
|
| + self.titles[t1.lower()] = t3.replace('"', '"')
|
| + return ""
|
| +
|
| + text = self.r_StripLinkDefinitions.sub(replacefunc, text)
|
| + return text
|
| +
|
| + blocktagsb = r"p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|math"
|
| + blocktagsa = blocktagsb + "|ins|del"
|
| +
|
| + r_HashHTMLBlocks1 = re.compile(r"""
|
| + ( # save in $1
|
| + ^ # start of line (with /m)
|
| + <(%s) # start tag = $2
|
| + \b # word break
|
| + (.*\n)*? # any number of lines, minimally matching
|
| + </\2> # the matching end tag
|
| + [ \t]* # trailing spaces/tabs
|
| + (?=\n+|$) # followed by a newline or end of document
|
| + )
|
| + """ % blocktagsa, re.MULTILINE | re.VERBOSE)
|
| +
|
| + r_HashHTMLBlocks2 = re.compile(r"""
|
| + ( # save in $1
|
| + ^ # start of line (with /m)
|
| + <(%s) # start tag = $2
|
| + \b # word break
|
| + (.*\n)*? # any number of lines, minimally matching
|
| + .*</\2> # the matching end tag
|
| + [ \t]* # trailing spaces/tabs
|
| + (?=\n+|\Z) # followed by a newline or end of document
|
| + )
|
| + """ % blocktagsb, re.MULTILINE | re.VERBOSE)
|
| +
|
| + r_HashHR = re.compile(r"""
|
| + (?:
|
| + (?<=\n\n) # Starting after a blank line
|
| + | # or
|
| + \A\n? # the beginning of the doc
|
| + )
|
| + ( # save in $1
|
| + [ ]{0,%d}
|
| + <(hr) # start tag = $2
|
| + \b # word break
|
| + ([^<>])*? #
|
| + /?> # the matching end tag
|
| + [ \t]*
|
| + (?=\n{2,}|\Z)# followed by a blank line or end of document
|
| + )
|
| + """ % (tabwidth-1), re.VERBOSE)
|
| + r_HashComment = re.compile(r"""
|
| + (?:
|
| + (?<=\n\n) # Starting after a blank line
|
| + | # or
|
| + \A\n? # the beginning of the doc
|
| + )
|
| + ( # save in $1
|
| + [ ]{0,%d}
|
| + (?:
|
| + <!
|
| + (--.*?--\s*)+
|
| + >
|
| + )
|
| + [ \t]*
|
| + (?=\n{2,}|\Z)# followed by a blank line or end of document
|
| + )
|
| + """ % (tabwidth-1), re.VERBOSE)
|
| +
|
| + def _HashHTMLBlocks(self, text):
|
| + def handler(m):
|
| + key = md5.new(m.group(1)).hexdigest()
|
| + self.html_blocks[key] = m.group(1)
|
| + return "\n\n%s\n\n" % key
|
| +
|
| + text = self.r_HashHTMLBlocks1.sub(handler, text)
|
| + text = self.r_HashHTMLBlocks2.sub(handler, text)
|
| + oldtext = text
|
| + text = self.r_HashHR.sub(handler, text)
|
| + text = self.r_HashComment.sub(handler, text)
|
| + return text
|
| +
|
| + #@@@ wrong!
|
| + r_hr1 = re.compile(r'^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$', re.M)
|
| + r_hr2 = re.compile(r'^[ ]{0,2}([ ]?-[ ]?){3,}[ \t]*$', re.M)
|
| + r_hr3 = re.compile(r'^[ ]{0,2}([ ]?_[ ]?){3,}[ \t]*$', re.M)
|
| +
|
| + def _RunBlockGamut(self, text):
|
| + text = self._DoHeaders(text)
|
| + for x in [self.r_hr1, self.r_hr2, self.r_hr3]:
|
| + text = x.sub("\n<hr%s\n" % self.emptyelt, text);
|
| + text = self._DoLists(text)
|
| + text = self._DoCodeBlocks(text)
|
| + text = self._DoBlockQuotes(text)
|
| +
|
| + # We did this in parse()
|
| + # to escape the source
|
| + # now it's stuff _we_ made
|
| + # so we don't wrap it in <p>s.
|
| + text = self._HashHTMLBlocks(text)
|
| + text = self._FormParagraphs(text)
|
| + return text
|
| +
|
| + r_NewLine = re.compile(" {2,}\n")
|
| + def _RunSpanGamut(self, text):
|
| + text = self._DoCodeSpans(text)
|
| + text = self._EscapeSpecialChars(text)
|
| + text = self._DoImages(text)
|
| + text = self._DoAnchors(text)
|
| + text = self._DoAutoLinks(text)
|
| + text = self._EncodeAmpsAndAngles(text)
|
| + text = self._DoItalicsAndBold(text)
|
| + text = self.r_NewLine.sub(" <br%s\n" % self.emptyelt, text)
|
| + return text
|
| +
|
| + def _EscapeSpecialChars(self, text):
|
| + tokens = self._TokenizeHTML(text)
|
| + text = ""
|
| + for cur_token in tokens:
|
| + if cur_token[0] == "tag":
|
| + cur_token[1] = cur_token[1].replace('*', self.escapetable["*"])
|
| + cur_token[1] = cur_token[1].replace('_', self.escapetable["_"])
|
| + text += cur_token[1]
|
| + else:
|
| + text += self._EncodeBackslashEscapes(cur_token[1])
|
| + return text
|
| +
|
| + r_DoAnchors1 = re.compile(
|
| + r""" ( # wrap whole match in $1
|
| + \[
|
| + (.*?) # link text = $2
|
| + # [for bracket nesting, see below]
|
| + \]
|
| +
|
| + [ ]? # one optional space
|
| + (?:\n[ ]*)? # one optional newline followed by spaces
|
| +
|
| + \[
|
| + (.*?) # id = $3
|
| + \]
|
| + )
|
| + """, re.S|re.VERBOSE)
|
| + r_DoAnchors2 = re.compile(
|
| + r""" ( # wrap whole match in $1
|
| + \[
|
| + (.*?) # link text = $2
|
| + \]
|
| + \( # literal paren
|
| + [ \t]*
|
| + <?(.+?)>? # href = $3
|
| + [ \t]*
|
| + ( # $4
|
| + ([\'\"]) # quote char = $5
|
| + (.*?) # Title = $6
|
| + \5 # matching quote
|
| + )? # title is optional
|
| + \)
|
| + )
|
| + """, re.S|re.VERBOSE)
|
| + def _DoAnchors(self, text):
|
| + # We here don't do the same as the perl version, as python's regex
|
| + # engine gives us no way to match brackets.
|
| +
|
| + def handler1(m):
|
| + whole_match = m.group(1)
|
| + link_text = m.group(2)
|
| + link_id = m.group(3).lower()
|
| + if not link_id: link_id = link_text.lower()
|
| + title = self.titles.get(link_id, None)
|
| +
|
| +
|
| + if self.urls.has_key(link_id):
|
| + url = self.urls[link_id]
|
| + url = url.replace("*", self.escapetable["*"])
|
| + url = url.replace("_", self.escapetable["_"])
|
| + res = '<a href="%s"' % htmlquote(url)
|
| +
|
| + if title:
|
| + title = title.replace("*", self.escapetable["*"])
|
| + title = title.replace("_", self.escapetable["_"])
|
| + res += ' title="%s"' % htmlquote(title)
|
| + res += ">%s</a>" % htmlquote(link_text)
|
| + else:
|
| + res = whole_match
|
| + return res
|
| +
|
| + def handler2(m):
|
| + whole_match = m.group(1)
|
| + link_text = m.group(2)
|
| + url = m.group(3)
|
| + title = m.group(6)
|
| +
|
| + url = url.replace("*", self.escapetable["*"])
|
| + url = url.replace("_", self.escapetable["_"])
|
| + res = '''<a href="%s"''' % htmlquote(url)
|
| +
|
| + if title:
|
| + title = title.replace('"', '"')
|
| + title = title.replace("*", self.escapetable["*"])
|
| + title = title.replace("_", self.escapetable["_"])
|
| + res += ' title="%s"' % htmlquote(title)
|
| + res += ">%s</a>" % htmlquote(link_text)
|
| + return res
|
| +
|
| + text = self.r_DoAnchors1.sub(handler1, text)
|
| + text = self.r_DoAnchors2.sub(handler2, text)
|
| + return text
|
| +
|
| + r_DoImages1 = re.compile(
|
| + r""" ( # wrap whole match in $1
|
| + !\[
|
| + (.*?) # alt text = $2
|
| + \]
|
| +
|
| + [ ]? # one optional space
|
| + (?:\n[ ]*)? # one optional newline followed by spaces
|
| +
|
| + \[
|
| + (.*?) # id = $3
|
| + \]
|
| +
|
| + )
|
| + """, re.VERBOSE|re.S)
|
| +
|
| + r_DoImages2 = re.compile(
|
| + r""" ( # wrap whole match in $1
|
| + !\[
|
| + (.*?) # alt text = $2
|
| + \]
|
| + \( # literal paren
|
| + [ \t]*
|
| + <?(\S+?)>? # src url = $3
|
| + [ \t]*
|
| + ( # $4
|
| + ([\'\"]) # quote char = $5
|
| + (.*?) # title = $6
|
| + \5 # matching quote
|
| + [ \t]*
|
| + )? # title is optional
|
| + \)
|
| + )
|
| + """, re.VERBOSE|re.S)
|
| +
|
| + def _DoImages(self, text):
|
| + def handler1(m):
|
| + whole_match = m.group(1)
|
| + alt_text = m.group(2)
|
| + link_id = m.group(3).lower()
|
| +
|
| + if not link_id:
|
| + link_id = alt_text.lower()
|
| +
|
| + alt_text = alt_text.replace('"', """)
|
| + if self.urls.has_key(link_id):
|
| + url = self.urls[link_id]
|
| + url = url.replace("*", self.escapetable["*"])
|
| + url = url.replace("_", self.escapetable["_"])
|
| + res = '''<img src="%s" alt="%s"''' % (htmlquote(url), htmlquote(alt_text))
|
| + if self.titles.has_key(link_id):
|
| + title = self.titles[link_id]
|
| + title = title.replace("*", self.escapetable["*"])
|
| + title = title.replace("_", self.escapetable["_"])
|
| + res += ' title="%s"' % htmlquote(title)
|
| + res += self.emptyelt
|
| + else:
|
| + res = whole_match
|
| + return res
|
| +
|
| + def handler2(m):
|
| + whole_match = m.group(1)
|
| + alt_text = m.group(2)
|
| + url = m.group(3)
|
| + title = m.group(6) or ''
|
| +
|
| + alt_text = alt_text.replace('"', """)
|
| + title = title.replace('"', """)
|
| + url = url.replace("*", self.escapetable["*"])
|
| + url = url.replace("_", self.escapetable["_"])
|
| + res = '<img src="%s" alt="%s"' % (htmlquote(url), htmlquote(alt_text))
|
| + if title is not None:
|
| + title = title.replace("*", self.escapetable["*"])
|
| + title = title.replace("_", self.escapetable["_"])
|
| + res += ' title="%s"' % htmlquote(title)
|
| + res += self.emptyelt
|
| + return res
|
| +
|
| + text = self.r_DoImages1.sub(handler1, text)
|
| + text = self.r_DoImages2.sub(handler2, text)
|
| + return text
|
| +
|
| + r_DoHeaders = re.compile(r"^(\#{1,6})[ \t]*(.+?)[ \t]*\#*\n+", re.VERBOSE|re.M)
|
| + def _DoHeaders(self, text):
|
| + def findheader(text, c, n):
|
| + textl = text.split('\n')
|
| + for i in xrange(len(textl)):
|
| + if i >= len(textl): continue
|
| + count = textl[i].strip().count(c)
|
| + if count > 0 and count == len(textl[i].strip()) and textl[i+1].strip() == '' and textl[i-1].strip() != '':
|
| + textl = textl[:i] + textl[i+1:]
|
| + textl[i-1] = '<h'+n+'>'+self._RunSpanGamut(textl[i-1])+'</h'+n+'>'
|
| + textl = textl[:i] + textl[i+1:]
|
| + text = '\n'.join(textl)
|
| + return text
|
| +
|
| + def handler(m):
|
| + level = len(m.group(1))
|
| + header = self._RunSpanGamut(m.group(2))
|
| + return "<h%s>%s</h%s>\n\n" % (level, header, level)
|
| +
|
| + text = findheader(text, '=', '1')
|
| + text = findheader(text, '-', '2')
|
| + text = self.r_DoHeaders.sub(handler, text)
|
| + return text
|
| +
|
| + rt_l = r"""
|
| + (
|
| + (
|
| + [ ]{0,%d}
|
| + ([*+-]|\d+[.])
|
| + [ \t]+
|
| + )
|
| + (?:.+?)
|
| + (
|
| + \Z
|
| + |
|
| + \n{2,}
|
| + (?=\S)
|
| + (?![ \t]* ([*+-]|\d+[.])[ \t]+)
|
| + )
|
| + )
|
| + """ % (tabwidth - 1)
|
| + r_DoLists = re.compile('^'+rt_l, re.M | re.VERBOSE | re.S)
|
| + r_DoListsTop = re.compile(
|
| + r'(?:\A\n?|(?<=\n\n))'+rt_l, re.M | re.VERBOSE | re.S)
|
| +
|
| + def _DoLists(self, text):
|
| + def handler(m):
|
| + list_type = "ol"
|
| + if m.group(3) in [ "*", "-", "+" ]:
|
| + list_type = "ul"
|
| + listn = m.group(1)
|
| + listn = self.r_multiline.sub("\n\n\n", listn)
|
| + res = self._ProcessListItems(listn)
|
| + res = "<%s>\n%s</%s>\n" % (list_type, res, list_type)
|
| + return res
|
| +
|
| + if self.list_level:
|
| + text = self.r_DoLists.sub(handler, text)
|
| + else:
|
| + text = self.r_DoListsTop.sub(handler, text)
|
| + return text
|
| +
|
| + r_multiend = re.compile(r"\n{2,}\Z")
|
| + r_ProcessListItems = re.compile(r"""
|
| + (\n)? # leading line = $1
|
| + (^[ \t]*) # leading whitespace = $2
|
| + ([*+-]|\d+[.]) [ \t]+ # list marker = $3
|
| + ((?:.+?) # list item text = $4
|
| + (\n{1,2}))
|
| + (?= \n* (\Z | \2 ([*+-]|\d+[.]) [ \t]+))
|
| + """, re.VERBOSE | re.M | re.S)
|
| +
|
| + def _ProcessListItems(self, text):
|
| + self.list_level += 1
|
| + text = self.r_multiend.sub("\n", text)
|
| +
|
| + def handler(m):
|
| + item = m.group(4)
|
| + leading_line = m.group(1)
|
| + leading_space = m.group(2)
|
| +
|
| + if leading_line or self.r_multiline.search(item):
|
| + item = self._RunBlockGamut(self._Outdent(item))
|
| + else:
|
| + item = self._DoLists(self._Outdent(item))
|
| + if item[-1] == "\n": item = item[:-1] # chomp
|
| + item = self._RunSpanGamut(item)
|
| + return "<li>%s</li>\n" % item
|
| +
|
| + text = self.r_ProcessListItems.sub(handler, text)
|
| + self.list_level -= 1
|
| + return text
|
| +
|
| + r_DoCodeBlocks = re.compile(r"""
|
| + (?:\n\n|\A)
|
| + ( # $1 = the code block
|
| + (?:
|
| + (?:[ ]{%d} | \t) # Lines must start with a tab or equiv
|
| + .*\n+
|
| + )+
|
| + )
|
| + ((?=^[ ]{0,%d}\S)|\Z) # Lookahead for non-space/end of doc
|
| + """ % (tabwidth, tabwidth), re.M | re.VERBOSE)
|
| + def _DoCodeBlocks(self, text):
|
| + def handler(m):
|
| + codeblock = m.group(1)
|
| + codeblock = self._EncodeCode(self._Outdent(codeblock))
|
| + codeblock = self._Detab(codeblock)
|
| + codeblock = codeblock.lstrip("\n")
|
| + codeblock = codeblock.rstrip()
|
| + res = "\n\n<pre><code>%s\n</code></pre>\n\n" % codeblock
|
| + return res
|
| +
|
| + text = self.r_DoCodeBlocks.sub(handler, text)
|
| + return text
|
| + r_DoCodeSpans = re.compile(r"""
|
| + (`+) # $1 = Opening run of `
|
| + (.+?) # $2 = The code block
|
| + (?<!`)
|
| + \1 # Matching closer
|
| + (?!`)
|
| + """, re.I|re.VERBOSE)
|
| + def _DoCodeSpans(self, text):
|
| + def handler(m):
|
| + c = m.group(2)
|
| + c = c.strip()
|
| + c = self._EncodeCode(c)
|
| + return "<code>%s</code>" % c
|
| +
|
| + text = self.r_DoCodeSpans.sub(handler, text)
|
| + return text
|
| +
|
| + def _EncodeCode(self, text):
|
| + text = text.replace("&","&")
|
| + text = text.replace("<","<")
|
| + text = text.replace(">",">")
|
| + for c in "*_{}[]\\":
|
| + text = text.replace(c, self.escapetable[c])
|
| + return text
|
| +
|
| +
|
| + r_DoBold = re.compile(r"(\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1", re.VERBOSE | re.S)
|
| + r_DoItalics = re.compile(r"(\*|_) (?=\S) (.+?) (?<=\S) \1", re.VERBOSE | re.S)
|
| + def _DoItalicsAndBold(self, text):
|
| + text = self.r_DoBold.sub(r"<strong>\2</strong>", text)
|
| + text = self.r_DoItalics.sub(r"<em>\2</em>", text)
|
| + return text
|
| +
|
| + r_start = re.compile(r"^", re.M)
|
| + r_DoBlockQuotes1 = re.compile(r"^[ \t]*>[ \t]?", re.M)
|
| + r_DoBlockQuotes2 = re.compile(r"^[ \t]+$", re.M)
|
| + r_DoBlockQuotes3 = re.compile(r"""
|
| + ( # Wrap whole match in $1
|
| + (
|
| + ^[ \t]*>[ \t]? # '>' at the start of a line
|
| + .+\n # rest of the first line
|
| + (.+\n)* # subsequent consecutive lines
|
| + \n* # blanks
|
| + )+
|
| + )""", re.M | re.VERBOSE)
|
| + r_protectpre = re.compile(r'(\s*<pre>.+?</pre>)', re.S)
|
| + r_propre = re.compile(r'^ ', re.M)
|
| +
|
| + def _DoBlockQuotes(self, text):
|
| + def prehandler(m):
|
| + return self.r_propre.sub('', m.group(1))
|
| +
|
| + def handler(m):
|
| + bq = m.group(1)
|
| + bq = self.r_DoBlockQuotes1.sub("", bq)
|
| + bq = self.r_DoBlockQuotes2.sub("", bq)
|
| + bq = self._RunBlockGamut(bq)
|
| + bq = self.r_start.sub(" ", bq)
|
| + bq = self.r_protectpre.sub(prehandler, bq)
|
| + return "<blockquote>\n%s\n</blockquote>\n\n" % bq
|
| +
|
| + text = self.r_DoBlockQuotes3.sub(handler, text)
|
| + return text
|
| +
|
| + r_tabbed = re.compile(r"^([ \t]*)")
|
| + def _FormParagraphs(self, text):
|
| + text = text.strip("\n")
|
| + grafs = self.r_multiline.split(text)
|
| +
|
| + for g in xrange(len(grafs)):
|
| + t = grafs[g].strip() #@@?
|
| + if not self.html_blocks.has_key(t):
|
| + t = self._RunSpanGamut(t)
|
| + t = self.r_tabbed.sub(r"<p>", t)
|
| + t += "</p>"
|
| + grafs[g] = t
|
| +
|
| + for g in xrange(len(grafs)):
|
| + t = grafs[g].strip()
|
| + if self.html_blocks.has_key(t):
|
| + grafs[g] = self.html_blocks[t]
|
| +
|
| + return "\n\n".join(grafs)
|
| +
|
| + r_EncodeAmps = re.compile(r"&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)")
|
| + r_EncodeAngles = re.compile(r"<(?![a-z/?\$!])")
|
| + def _EncodeAmpsAndAngles(self, text):
|
| + text = self.r_EncodeAmps.sub("&", text)
|
| + text = self.r_EncodeAngles.sub("<", text)
|
| + return text
|
| +
|
| + def _EncodeBackslashEscapes(self, text):
|
| + for char in self.escapechars:
|
| + text = text.replace("\\" + char, self.escapetable[char])
|
| + return text
|
| +
|
| + r_link = re.compile(r"<((https?|ftp):[^\'\">\s]+)>", re.I)
|
| + r_email = re.compile(r"""
|
| + <
|
| + (?:mailto:)?
|
| + (
|
| + [-.\w]+
|
| + \@
|
| + [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
|
| + )
|
| + >""", re.VERBOSE|re.I)
|
| + def _DoAutoLinks(self, text):
|
| + text = self.r_link.sub(r'<a href="\1">\1</a>', text)
|
| +
|
| + def handler(m):
|
| + l = m.group(1)
|
| + return self._EncodeEmailAddress(self._UnescapeSpecialChars(l))
|
| +
|
| + text = self.r_email.sub(handler, text)
|
| + return text
|
| +
|
| + r_EncodeEmailAddress = re.compile(r">.+?:")
|
| + def _EncodeEmailAddress(self, text):
|
| + encode = [
|
| + lambda x: "&#%s;" % ord(x),
|
| + lambda x: "&#x%X;" % ord(x),
|
| + lambda x: x
|
| + ]
|
| +
|
| + text = "mailto:" + text
|
| + addr = ""
|
| + for c in text:
|
| + if c == ':': addr += c; continue
|
| +
|
| + r = semirandom(addr)
|
| + if r < 0.45:
|
| + addr += encode[1](c)
|
| + elif r > 0.9 and c != '@':
|
| + addr += encode[2](c)
|
| + else:
|
| + addr += encode[0](c)
|
| +
|
| + text = '<a href="%s">%s</a>' % (addr, addr)
|
| + text = self.r_EncodeEmailAddress.sub('>', text)
|
| + return text
|
| +
|
| + def _UnescapeSpecialChars(self, text):
|
| + for key in self.escapetable.keys():
|
| + text = text.replace(self.escapetable[key], key)
|
| + return text
|
| +
|
| + tokenize_depth = 6
|
| + tokenize_nested_tags = '|'.join([r'(?:<[a-z/!$](?:[^<>]'] * tokenize_depth) + (')*>)' * tokenize_depth)
|
| + r_TokenizeHTML = re.compile(
|
| + r"""(?: <! ( -- .*? -- \s* )+ > ) | # comment
|
| + (?: <\? .*? \?> ) | # processing instruction
|
| + %s # nested tags
|
| + """ % tokenize_nested_tags, re.I|re.VERBOSE)
|
| + def _TokenizeHTML(self, text):
|
| + pos = 0
|
| + tokens = []
|
| + matchobj = self.r_TokenizeHTML.search(text, pos)
|
| + while matchobj:
|
| + whole_tag = matchobj.string[matchobj.start():matchobj.end()]
|
| + sec_start = matchobj.end()
|
| + tag_start = sec_start - len(whole_tag)
|
| + if pos < tag_start:
|
| + tokens.append(["text", matchobj.string[pos:tag_start]])
|
| +
|
| + tokens.append(["tag", whole_tag])
|
| + pos = sec_start
|
| + matchobj = self.r_TokenizeHTML.search(text, pos)
|
| +
|
| + if pos < len(text):
|
| + tokens.append(["text", text[pos:]])
|
| + return tokens
|
| +
|
| + r_Outdent = re.compile(r"""^(\t|[ ]{1,%d})""" % tabwidth, re.M)
|
| + def _Outdent(self, text):
|
| + text = self.r_Outdent.sub("", text)
|
| + return text
|
| +
|
| + def _Detab(self, text): return text.expandtabs(self.tabwidth)
|
| +
|
| +def Markdown(*args, **kw): return _Markdown().parse(*args, **kw)
|
| +markdown = Markdown
|
| +
|
| +if __name__ == '__main__':
|
| + if len(sys.argv) > 1:
|
| + print Markdown(open(sys.argv[1]).read())
|
| + else:
|
| + print Markdown(sys.stdin.read())
|
|
|