| Index: third_party/Python-Markdown/markdown/inlinepatterns.py
|
| diff --git a/third_party/markdown/inlinepatterns.py b/third_party/Python-Markdown/markdown/inlinepatterns.py
|
| similarity index 76%
|
| copy from third_party/markdown/inlinepatterns.py
|
| copy to third_party/Python-Markdown/markdown/inlinepatterns.py
|
| index 1d6fce423b0a982092a634202259ce856bb58815..95d358d7156ccd341f5c5f33a2a48d1255d122dd 100644
|
| --- a/third_party/markdown/inlinepatterns.py
|
| +++ b/third_party/Python-Markdown/markdown/inlinepatterns.py
|
| @@ -1,35 +1,3 @@
|
| -# markdown is released under the BSD license
|
| -# Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later)
|
| -# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
|
| -# Copyright 2004 Manfred Stienstra (the original version)
|
| -#
|
| -# All rights reserved.
|
| -#
|
| -# Redistribution and use in source and binary forms, with or without
|
| -# modification, are permitted provided that the following conditions are met:
|
| -#
|
| -# * Redistributions of source code must retain the above copyright
|
| -# notice, this list of conditions and the following disclaimer.
|
| -# * Redistributions in binary form must reproduce the above copyright
|
| -# notice, this list of conditions and the following disclaimer in the
|
| -# documentation and/or other materials provided with the distribution.
|
| -# * Neither the name of the <organization> nor the
|
| -# names of its contributors may be used to endorse or promote products
|
| -# derived from this software without specific prior written permission.
|
| -#
|
| -# THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY
|
| -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
| -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
| -# DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT
|
| -# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
| -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
| -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
| -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
| -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
| -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
| -# POSSIBILITY OF SUCH DAMAGE.
|
| -
|
| -
|
| """
|
| INLINE PATTERNS
|
| =============================================================================
|
| @@ -78,13 +46,13 @@ from __future__ import unicode_literals
|
| from . import util
|
| from . import odict
|
| import re
|
| -try:
|
| +try: # pragma: no cover
|
| from urllib.parse import urlparse, urlunparse
|
| -except ImportError:
|
| +except ImportError: # pragma: no cover
|
| from urlparse import urlparse, urlunparse
|
| -try:
|
| +try: # pragma: no cover
|
| from html import entities
|
| -except ImportError:
|
| +except ImportError: # pragma: no cover
|
| import htmlentitydefs as entities
|
|
|
|
|
| @@ -96,10 +64,12 @@ def build_inlinepatterns(md_instance, **kwargs):
|
| inlinePatterns["reference"] = ReferencePattern(REFERENCE_RE, md_instance)
|
| inlinePatterns["link"] = LinkPattern(LINK_RE, md_instance)
|
| inlinePatterns["image_link"] = ImagePattern(IMAGE_LINK_RE, md_instance)
|
| - inlinePatterns["image_reference"] = \
|
| - ImageReferencePattern(IMAGE_REFERENCE_RE, md_instance)
|
| - inlinePatterns["short_reference"] = \
|
| - ReferencePattern(SHORT_REF_RE, md_instance)
|
| + inlinePatterns["image_reference"] = ImageReferencePattern(
|
| + IMAGE_REFERENCE_RE, md_instance
|
| + )
|
| + inlinePatterns["short_reference"] = ReferencePattern(
|
| + SHORT_REF_RE, md_instance
|
| + )
|
| inlinePatterns["autolink"] = AutolinkPattern(AUTOLINK_RE, md_instance)
|
| inlinePatterns["automail"] = AutomailPattern(AUTOMAIL_RE, md_instance)
|
| inlinePatterns["linebreak"] = SubstituteTagPattern(LINE_BREAK_RE, 'br')
|
| @@ -107,7 +77,8 @@ def build_inlinepatterns(md_instance, **kwargs):
|
| inlinePatterns["html"] = HtmlPattern(HTML_RE, md_instance)
|
| inlinePatterns["entity"] = HtmlPattern(ENTITY_RE, md_instance)
|
| inlinePatterns["not_strong"] = SimpleTextPattern(NOT_STRONG_RE)
|
| - inlinePatterns["strong_em"] = DoubleTagPattern(STRONG_EM_RE, 'strong,em')
|
| + inlinePatterns["em_strong"] = DoubleTagPattern(EM_STRONG_RE, 'strong,em')
|
| + inlinePatterns["strong_em"] = DoubleTagPattern(STRONG_EM_RE, 'em,strong')
|
| inlinePatterns["strong"] = SimpleTagPattern(STRONG_RE, 'strong')
|
| inlinePatterns["emphasis"] = SimpleTagPattern(EMPHASIS_RE, 'em')
|
| if md_instance.smart_emphasis:
|
| @@ -122,46 +93,84 @@ The actual regular expressions for patterns
|
| """
|
|
|
| NOBRACKET = r'[^\]\[]*'
|
| -BRK = ( r'\[('
|
| - + (NOBRACKET + r'(\[')*6
|
| - + (NOBRACKET+ r'\])*')*6
|
| - + NOBRACKET + r')\]' )
|
| +BRK = (
|
| + r'\[(' +
|
| + (NOBRACKET + r'(\[')*6 +
|
| + (NOBRACKET + r'\])*')*6 +
|
| + NOBRACKET + r')\]'
|
| +)
|
| NOIMG = r'(?<!\!)'
|
|
|
| -BACKTICK_RE = r'(?<!\\)(`+)(.+?)(?<!`)\2(?!`)' # `e=f()` or ``e=f("`")``
|
| -ESCAPE_RE = r'\\(.)' # \<
|
| -EMPHASIS_RE = r'(\*)([^\*]+)\2' # *emphasis*
|
| -STRONG_RE = r'(\*{2}|_{2})(.+?)\2' # **strong**
|
| -STRONG_EM_RE = r'(\*{3}|_{3})(.+?)\2' # ***strong***
|
| -SMART_EMPHASIS_RE = r'(?<!\w)(_)(?!_)(.+?)(?<!_)\2(?!\w)' # _smart_emphasis_
|
| -EMPHASIS_2_RE = r'(_)(.+?)\2' # _emphasis_
|
| -LINK_RE = NOIMG + BRK + \
|
| -r'''\(\s*(<.*?>|((?:(?:\(.*?\))|[^\(\)]))*?)\s*((['"])(.*?)\12\s*)?\)'''
|
| +# `e=f()` or ``e=f("`")``
|
| +BACKTICK_RE = r'(?<!\\)(`+)(.+?)(?<!`)\2(?!`)'
|
| +
|
| +# \<
|
| +ESCAPE_RE = r'\\(.)'
|
| +
|
| +# *emphasis*
|
| +EMPHASIS_RE = r'(\*)([^\*]+)\2'
|
| +
|
| +# **strong**
|
| +STRONG_RE = r'(\*{2}|_{2})(.+?)\2'
|
| +
|
| +# ***strongem*** or ***em*strong**
|
| +EM_STRONG_RE = r'(\*|_)\2{2}(.+?)\2(.*?)\2{2}'
|
| +
|
| +# ***strong**em*
|
| +STRONG_EM_RE = r'(\*|_)\2{2}(.+?)\2{2}(.*?)\2'
|
| +
|
| +# _smart_emphasis_
|
| +SMART_EMPHASIS_RE = r'(?<!\w)(_)(?!_)(.+?)(?<!_)\2(?!\w)'
|
| +
|
| +# _emphasis_
|
| +EMPHASIS_2_RE = r'(_)(.+?)\2'
|
| +
|
| # [text](url) or [text](<url>) or [text](url "title")
|
| +LINK_RE = NOIMG + BRK + \
|
| + r'''\(\s*(<.*?>|((?:(?:\(.*?\))|[^\(\)]))*?)\s*((['"])(.*?)\12\s*)?\)'''
|
|
|
| -IMAGE_LINK_RE = r'\!' + BRK + r'\s*\((<.*?>|([^\)]*))\)'
|
| #  or 
|
| -REFERENCE_RE = NOIMG + BRK+ r'\s?\[([^\]]*)\]' # [Google][3]
|
| -SHORT_REF_RE = NOIMG + r'\[([^\]]+)\]' # [Google]
|
| -IMAGE_REFERENCE_RE = r'\!' + BRK + '\s?\[([^\]]*)\]' # ![alt text][2]
|
| -NOT_STRONG_RE = r'((^| )(\*|_)( |$))' # stand-alone * or _
|
| -AUTOLINK_RE = r'<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^>]*)>' # <http://www.123.com>
|
| -AUTOMAIL_RE = r'<([^> \!]*@[^> ]*)>' # <me@example.com>
|
| +IMAGE_LINK_RE = r'\!' + BRK + r'\s*\((<.*?>|([^")]+"[^"]*"|[^\)]*))\)'
|
| +
|
| +# [Google][3]
|
| +REFERENCE_RE = NOIMG + BRK + r'\s?\[([^\]]*)\]'
|
| +
|
| +# [Google]
|
| +SHORT_REF_RE = NOIMG + r'\[([^\]]+)\]'
|
| +
|
| +# ![alt text][2]
|
| +IMAGE_REFERENCE_RE = r'\!' + BRK + '\s?\[([^\]]*)\]'
|
| +
|
| +# stand-alone * or _
|
| +NOT_STRONG_RE = r'((^| )(\*|_)( |$))'
|
| +
|
| +# <http://www.123.com>
|
| +AUTOLINK_RE = r'<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^>]*)>'
|
| +
|
| +# <me@example.com>
|
| +AUTOMAIL_RE = r'<([^> \!]*@[^> ]*)>'
|
| +
|
| +# <...>
|
| +HTML_RE = r'(\<([a-zA-Z/][^\>]*?|\!--.*?--)\>)'
|
| +
|
| +# &
|
| +ENTITY_RE = r'(&[\#a-zA-Z0-9]*;)'
|
|
|
| -HTML_RE = r'(\<([a-zA-Z/][^\>]*?|\!--.*?--)\>)' # <...>
|
| -ENTITY_RE = r'(&[\#a-zA-Z0-9]*;)' # &
|
| -LINE_BREAK_RE = r' \n' # two spaces at end of line
|
| +# two spaces at end of line
|
| +LINE_BREAK_RE = r' \n'
|
|
|
|
|
| def dequote(string):
|
| """Remove quotes from around a string."""
|
| - if ( ( string.startswith('"') and string.endswith('"'))
|
| - or (string.startswith("'") and string.endswith("'")) ):
|
| + if ((string.startswith('"') and string.endswith('"')) or
|
| + (string.startswith("'") and string.endswith("'"))):
|
| return string[1:-1]
|
| else:
|
| return string
|
|
|
| -ATTR_RE = re.compile("\{@([^\}]*)=([^\}]*)}") # {@id=123}
|
| +
|
| +ATTR_RE = re.compile("\{@([^\}]*)=([^\}]*)}") # {@id=123}
|
| +
|
|
|
| def handleAttributes(text, parent):
|
| """Set values of an element based on attribute definitions ({@id=123})."""
|
| @@ -175,6 +184,7 @@ The pattern classes
|
| -----------------------------------------------------------------------------
|
| """
|
|
|
| +
|
| class Pattern(object):
|
| """Base class that inline patterns subclass. """
|
|
|
| @@ -188,7 +198,7 @@ class Pattern(object):
|
|
|
| """
|
| self.pattern = pattern
|
| - self.compiled_re = re.compile("^(.*?)%s(.*?)$" % pattern,
|
| + self.compiled_re = re.compile("^(.*?)%s(.*?)$" % pattern,
|
| re.DOTALL | re.UNICODE)
|
|
|
| # Api for Markdown to pass safe_mode into instance
|
| @@ -210,7 +220,7 @@ class Pattern(object):
|
| * m: A re match object containing a match of the pattern.
|
|
|
| """
|
| - pass
|
| + pass # pragma: no cover
|
|
|
| def type(self):
|
| """ Return class name, to define pattern type """
|
| @@ -220,9 +230,10 @@ class Pattern(object):
|
| """ Return unescaped text given text with an inline placeholder. """
|
| try:
|
| stash = self.markdown.treeprocessors['inline'].stashed_nodes
|
| - except KeyError:
|
| + except KeyError: # pragma: no cover
|
| return text
|
| - def itertext(el):
|
| +
|
| + def itertext(el): # pragma: no cover
|
| ' Reimplement Element.itertext for older python versions '
|
| tag = el.tag
|
| if not isinstance(tag, util.string_type) and tag is not None:
|
| @@ -234,6 +245,7 @@ class Pattern(object):
|
| yield s
|
| if e.tail:
|
| yield e.tail
|
| +
|
| def get_stash(m):
|
| id = m.group(1)
|
| if id in stash:
|
| @@ -242,17 +254,14 @@ class Pattern(object):
|
| return value
|
| else:
|
| # An etree Element - return text content only
|
| - return ''.join(itertext(value))
|
| + return ''.join(itertext(value))
|
| return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text)
|
|
|
|
|
| class SimpleTextPattern(Pattern):
|
| """ Return a simple text of group(2) of a Pattern. """
|
| def handleMatch(self, m):
|
| - text = m.group(2)
|
| - if text == util.INLINE_PLACEHOLDER_PREFIX:
|
| - return None
|
| - return text
|
| + return m.group(2)
|
|
|
|
|
| class EscapePattern(Pattern):
|
| @@ -263,7 +272,7 @@ class EscapePattern(Pattern):
|
| if char in self.markdown.ESCAPED_CHARS:
|
| return '%s%s%s' % (util.STX, ord(char), util.ETX)
|
| else:
|
| - return '\\%s' % char
|
| + return None
|
|
|
|
|
| class SimpleTagPattern(Pattern):
|
| @@ -272,7 +281,7 @@ class SimpleTagPattern(Pattern):
|
| of a Pattern.
|
|
|
| """
|
| - def __init__ (self, pattern, tag):
|
| + def __init__(self, pattern, tag):
|
| Pattern.__init__(self, pattern)
|
| self.tag = tag
|
|
|
| @@ -284,13 +293,13 @@ class SimpleTagPattern(Pattern):
|
|
|
| class SubstituteTagPattern(SimpleTagPattern):
|
| """ Return an element of type `tag` with no children. """
|
| - def handleMatch (self, m):
|
| + def handleMatch(self, m):
|
| return util.etree.Element(self.tag)
|
|
|
|
|
| class BacktickPattern(Pattern):
|
| """ Return a `<code>` element containing the matching text. """
|
| - def __init__ (self, pattern):
|
| + def __init__(self, pattern):
|
| Pattern.__init__(self, pattern)
|
| self.tag = "code"
|
|
|
| @@ -311,12 +320,14 @@ class DoubleTagPattern(SimpleTagPattern):
|
| el1 = util.etree.Element(tag1)
|
| el2 = util.etree.SubElement(el1, tag2)
|
| el2.text = m.group(3)
|
| + if len(m.groups()) == 5:
|
| + el2.tail = m.group(4)
|
| return el1
|
|
|
|
|
| class HtmlPattern(Pattern):
|
| """ Store raw inline html and return a placeholder. """
|
| - def handleMatch (self, m):
|
| + def handleMatch(self, m):
|
| rawhtml = self.unescape(m.group(2))
|
| place_holder = self.markdown.htmlStash.store(rawhtml)
|
| return place_holder
|
| @@ -325,8 +336,9 @@ class HtmlPattern(Pattern):
|
| """ Return unescaped text given text with an inline placeholder. """
|
| try:
|
| stash = self.markdown.treeprocessors['inline'].stashed_nodes
|
| - except KeyError:
|
| + except KeyError: # pragma: no cover
|
| return text
|
| +
|
| def get_stash(m):
|
| id = m.group(1)
|
| value = stash.get(id)
|
| @@ -335,7 +347,7 @@ class HtmlPattern(Pattern):
|
| return self.markdown.serializer(value)
|
| except:
|
| return '\%s' % value
|
| -
|
| +
|
| return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text)
|
|
|
|
|
| @@ -355,7 +367,7 @@ class LinkPattern(Pattern):
|
| el.set("href", "")
|
|
|
| if title:
|
| - title = dequote(self.unescape(title))
|
| + title = dequote(self.unescape(title))
|
| el.set("title", title)
|
| return el
|
|
|
| @@ -376,35 +388,36 @@ class LinkPattern(Pattern):
|
| `username:password@host:port`.
|
|
|
| """
|
| - url = url.replace(' ', '%20')
|
| if not self.markdown.safeMode:
|
| # Return immediately bipassing parsing.
|
| return url
|
| -
|
| +
|
| try:
|
| scheme, netloc, path, params, query, fragment = url = urlparse(url)
|
| - except ValueError:
|
| + except ValueError: # pragma: no cover
|
| # Bad url - so bad it couldn't be parsed.
|
| return ''
|
| -
|
| +
|
| locless_schemes = ['', 'mailto', 'news']
|
| allowed_schemes = locless_schemes + ['http', 'https', 'ftp', 'ftps']
|
| if scheme not in allowed_schemes:
|
| # Not a known (allowed) scheme. Not safe.
|
| return ''
|
| -
|
| - if netloc == '' and scheme not in locless_schemes:
|
| +
|
| + if netloc == '' and scheme not in locless_schemes: # pragma: no cover
|
| # This should not happen. Treat as suspect.
|
| return ''
|
|
|
| for part in url[2:]:
|
| if ":" in part:
|
| - # A colon in "path", "parameters", "query" or "fragment" is suspect.
|
| + # A colon in "path", "parameters", "query"
|
| + # or "fragment" is suspect.
|
| return ''
|
|
|
| # Url passes all tests. Return url as-is.
|
| return urlunparse(url)
|
|
|
| +
|
| class ImagePattern(LinkPattern):
|
| """ Return a img element from the given match. """
|
| def handleMatch(self, m):
|
| @@ -428,6 +441,7 @@ class ImagePattern(LinkPattern):
|
| el.set('alt', self.unescape(truealt))
|
| return el
|
|
|
| +
|
| class ReferencePattern(LinkPattern):
|
| """ Match to a stored reference and return link element. """
|
|
|
| @@ -445,7 +459,7 @@ class ReferencePattern(LinkPattern):
|
|
|
| # Clean up linebreaks in id
|
| id = self.NEWLINE_CLEANUP_RE.sub(' ', id)
|
| - if not id in self.markdown.references: # ignore undefined refs
|
| + if id not in self.markdown.references: # ignore undefined refs
|
| return None
|
| href, title = self.markdown.references[id]
|
|
|
| @@ -486,6 +500,7 @@ class AutolinkPattern(Pattern):
|
| el.text = util.AtomicString(m.group(2))
|
| return el
|
|
|
| +
|
| class AutomailPattern(Pattern):
|
| """
|
| Return a mailto link Element given an automail link (`<foo@example.com>`).
|
| @@ -512,4 +527,3 @@ class AutomailPattern(Pattern):
|
| ord(letter) for letter in mailto])
|
| el.set('href', mailto)
|
| return el
|
| -
|
|
|