| OLD | NEW |
| (Empty) |
| 1 <?php | |
| 2 # | |
| 3 # Markdown Extra - A text-to-HTML conversion tool for web writers | |
| 4 # | |
| 5 # PHP Markdown & Extra | |
| 6 # Copyright (c) 2004-2008 Michel Fortin | |
| 7 # <http://www.michelf.com/projects/php-markdown/> | |
| 8 # | |
| 9 # Original Markdown | |
| 10 # Copyright (c) 2004-2006 John Gruber | |
| 11 # <http://daringfireball.net/projects/markdown/> | |
| 12 # | |
| 13 | |
| 14 | |
| 15 define( 'MARKDOWN_VERSION', "1.0.1m" ); # Sat 21 Jun 2008 | |
| 16 define( 'MARKDOWNEXTRA_VERSION', "1.2.3" ); # Wed 31 Dec 2008 | |
| 17 | |
| 18 | |
| 19 # | |
| 20 # Global default settings: | |
| 21 # | |
| 22 | |
| 23 # Change to ">" for HTML output | |
| 24 @define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX', " />"); | |
| 25 | |
| 26 # Define the width of a tab for code blocks. | |
| 27 @define( 'MARKDOWN_TAB_WIDTH', 4 ); | |
| 28 | |
| 29 # Optional title attribute for footnote links and backlinks. | |
| 30 @define( 'MARKDOWN_FN_LINK_TITLE', "" ); | |
| 31 @define( 'MARKDOWN_FN_BACKLINK_TITLE', "" ); | |
| 32 | |
| 33 # Optional class attribute for footnote links and backlinks. | |
| 34 @define( 'MARKDOWN_FN_LINK_CLASS', "" ); | |
| 35 @define( 'MARKDOWN_FN_BACKLINK_CLASS', "" ); | |
| 36 | |
| 37 | |
| 38 # | |
| 39 # WordPress settings: | |
| 40 # | |
| 41 | |
| 42 # Change to false to remove Markdown from posts and/or comments. | |
| 43 @define( 'MARKDOWN_WP_POSTS', true ); | |
| 44 @define( 'MARKDOWN_WP_COMMENTS', true ); | |
| 45 | |
| 46 | |
| 47 | |
| 48 ### Standard Function Interface ### | |
| 49 | |
| 50 @define( 'MARKDOWN_PARSER_CLASS', 'MarkdownExtra_Parser' ); | |
| 51 | |
| 52 function Markdown($text) { | |
| 53 # | |
| 54 # Initialize the parser and return the result of its transform method. | |
| 55 # | |
| 56 # Setup static parser variable. | |
| 57 static $parser; | |
| 58 if (!isset($parser)) { | |
| 59 $parser_class = MARKDOWN_PARSER_CLASS; | |
| 60 $parser = new $parser_class; | |
| 61 } | |
| 62 | |
| 63 # Transform text using parser. | |
| 64 return $parser->transform($text); | |
| 65 } | |
| 66 | |
| 67 | |
| 68 ### WordPress Plugin Interface ### | |
| 69 | |
| 70 /* | |
| 71 Plugin Name: Markdown Extra | |
| 72 Plugin URI: http://www.michelf.com/projects/php-markdown/ | |
| 73 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdo
wn syntax</a> allows you to write using an easy-to-read, easy-to-write plain tex
t format. Based on the original Perl version by <a href="http://daringfireball.n
et/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">Mo
re...</a> | |
| 74 Version: 1.2.2 | |
| 75 Author: Michel Fortin | |
| 76 Author URI: http://www.michelf.com/ | |
| 77 */ | |
| 78 | |
| 79 if (isset($wp_version)) { | |
| 80 # More details about how it works here: | |
| 81 # <http://www.michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/> | |
| 82 | |
| 83 # Post content and excerpts | |
| 84 # - Remove WordPress paragraph generator. | |
| 85 # - Run Markdown on excerpt, then remove all tags. | |
| 86 # - Add paragraph tag around the excerpt, but remove it for the excerpt rss. | |
| 87 if (MARKDOWN_WP_POSTS) { | |
| 88 remove_filter('the_content', 'wpautop'); | |
| 89 remove_filter('the_content_rss', 'wpautop'); | |
| 90 remove_filter('the_excerpt', 'wpautop'); | |
| 91 add_filter('the_content', 'mdwp_MarkdownPost', 6); | |
| 92 add_filter('the_content_rss', 'mdwp_MarkdownPost', 6); | |
| 93 add_filter('get_the_excerpt', 'mdwp_MarkdownPost', 6); | |
| 94 add_filter('get_the_excerpt', 'trim', 7); | |
| 95 add_filter('the_excerpt', 'mdwp_add_p'); | |
| 96 add_filter('the_excerpt_rss', 'mdwp_strip_p'); | |
| 97 | |
| 98 remove_filter('content_save_pre', 'balanceTags', 50); | |
| 99 remove_filter('excerpt_save_pre', 'balanceTags', 50); | |
| 100 add_filter('the_content', 'balanceTags', 50); | |
| 101 add_filter('get_the_excerpt', 'balanceTags', 9); | |
| 102 } | |
| 103 | |
| 104 # Add a footnote id prefix to posts when inside a loop. | |
| 105 function mdwp_MarkdownPost($text) { | |
| 106 static $parser; | |
| 107 if (!$parser) { | |
| 108 $parser_class = MARKDOWN_PARSER_CLASS; | |
| 109 $parser = new $parser_class; | |
| 110 } | |
| 111 if (is_single() || is_page() || is_feed()) { | |
| 112 $parser->fn_id_prefix = ""; | |
| 113 } else { | |
| 114 $parser->fn_id_prefix = get_the_ID() . "."; | |
| 115 } | |
| 116 return $parser->transform($text); | |
| 117 } | |
| 118 | |
| 119 # Comments | |
| 120 # - Remove WordPress paragraph generator. | |
| 121 # - Remove WordPress auto-link generator. | |
| 122 # - Scramble important tags before passing them to the kses filter. | |
| 123 # - Run Markdown on excerpt then remove paragraph tags. | |
| 124 if (MARKDOWN_WP_COMMENTS) { | |
| 125 remove_filter('comment_text', 'wpautop', 30); | |
| 126 remove_filter('comment_text', 'make_clickable'); | |
| 127 add_filter('pre_comment_content', 'Markdown', 6); | |
| 128 add_filter('pre_comment_content', 'mdwp_hide_tags', 8); | |
| 129 add_filter('pre_comment_content', 'mdwp_show_tags', 12); | |
| 130 add_filter('get_comment_text', 'Markdown', 6); | |
| 131 add_filter('get_comment_excerpt', 'Markdown', 6); | |
| 132 add_filter('get_comment_excerpt', 'mdwp_strip_p', 7); | |
| 133 | |
| 134 global $mdwp_hidden_tags, $mdwp_placeholders; | |
| 135 $mdwp_hidden_tags = explode(' ', | |
| 136 '<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>'); | |
| 137 $mdwp_placeholders = explode(' ', str_rot13( | |
| 138 'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '. | |
| 139 'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli')); | |
| 140 } | |
| 141 | |
| 142 function mdwp_add_p($text) { | |
| 143 if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) { | |
| 144 $text = '<p>'.$text.'</p>'; | |
| 145 $text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text); | |
| 146 } | |
| 147 return $text; | |
| 148 } | |
| 149 | |
| 150 function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); } | |
| 151 | |
| 152 function mdwp_hide_tags($text) { | |
| 153 global $mdwp_hidden_tags, $mdwp_placeholders; | |
| 154 return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text); | |
| 155 } | |
| 156 function mdwp_show_tags($text) { | |
| 157 global $mdwp_hidden_tags, $mdwp_placeholders; | |
| 158 return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text); | |
| 159 } | |
| 160 } | |
| 161 | |
| 162 | |
| 163 ### bBlog Plugin Info ### | |
| 164 | |
| 165 function identify_modifier_markdown() { | |
| 166 return array( | |
| 167 'name' => 'markdown', | |
| 168 'type' => 'modifier', | |
| 169 'nicename' => 'PHP Markdown Extra', | |
| 170 'description' => 'A text-to-HTML conversion tool for web writers', | |
| 171 'authors' => 'Michel Fortin and John Gruber', | |
| 172 'licence' => 'GPL', | |
| 173 'version' => MARKDOWNEXTRA_VERSION, | |
| 174 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">
Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write pla
in text format. Based on the original Perl version by <a href="http://daringfire
ball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdow
n/">More...</a>', | |
| 175 ); | |
| 176 } | |
| 177 | |
| 178 | |
| 179 ### Smarty Modifier Interface ### | |
| 180 | |
| 181 function smarty_modifier_markdown($text) { | |
| 182 return Markdown($text); | |
| 183 } | |
| 184 | |
| 185 | |
| 186 ### Textile Compatibility Mode ### | |
| 187 | |
| 188 # Rename this file to "classTextile.php" and it can replace Textile everywhere. | |
| 189 | |
| 190 if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) { | |
| 191 # Try to include PHP SmartyPants. Should be in the same directory. | |
| 192 @include_once 'smartypants.php'; | |
| 193 # Fake Textile class. It calls Markdown instead. | |
| 194 class Textile { | |
| 195 function TextileThis($text, $lite='', $encode='') { | |
| 196 if ($lite == '' && $encode == '') $text = Markdown($text); | |
| 197 if (function_exists('SmartyPants')) $text = SmartyPants($text); | |
| 198 return $text; | |
| 199 } | |
| 200 # Fake restricted version: restrictions are not supported for now. | |
| 201 function TextileRestricted($text, $lite='', $noimage='') { | |
| 202 return $this->TextileThis($text, $lite); | |
| 203 } | |
| 204 # Workaround to ensure compatibility with TextPattern 4.0.3. | |
| 205 function blockLite($text) { return $text; } | |
| 206 } | |
| 207 } | |
| 208 | |
| 209 | |
| 210 | |
| 211 # | |
| 212 # Markdown Parser Class | |
| 213 # | |
| 214 | |
| 215 class Markdown_Parser { | |
| 216 | |
| 217 # Regex to match balanced [brackets]. | |
| 218 # Needed to insert a maximum bracked depth while converting to PHP. | |
| 219 var $nested_brackets_depth = 6; | |
| 220 var $nested_brackets_re; | |
| 221 | |
| 222 var $nested_url_parenthesis_depth = 4; | |
| 223 var $nested_url_parenthesis_re; | |
| 224 | |
| 225 # Table of hash values for escaped characters: | |
| 226 var $escape_chars = '\`*_{}[]()>#+-.!'; | |
| 227 var $escape_chars_re; | |
| 228 | |
| 229 # Change to ">" for HTML output. | |
| 230 var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX; | |
| 231 var $tab_width = MARKDOWN_TAB_WIDTH; | |
| 232 | |
| 233 # Change to `true` to disallow markup or entities. | |
| 234 var $no_markup = false; | |
| 235 var $no_entities = false; | |
| 236 | |
| 237 # Predefined urls and titles for reference links and images. | |
| 238 var $predef_urls = array(); | |
| 239 var $predef_titles = array(); | |
| 240 | |
| 241 | |
| 242 function Markdown_Parser() { | |
| 243 # | |
| 244 # Constructor function. Initialize appropriate member variables. | |
| 245 # | |
| 246 $this->_initDetab(); | |
| 247 $this->prepareItalicsAndBold(); | |
| 248 | |
| 249 $this->nested_brackets_re = | |
| 250 str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth). | |
| 251 str_repeat('\])*', $this->nested_brackets_depth); | |
| 252 | |
| 253 $this->nested_url_parenthesis_re = | |
| 254 str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth). | |
| 255 str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth); | |
| 256 | |
| 257 $this->escape_chars_re = '['.preg_quote($this->escape_chars).']'; | |
| 258 | |
| 259 # Sort document, block, and span gamut in ascendent priority order. | |
| 260 asort($this->document_gamut); | |
| 261 asort($this->block_gamut); | |
| 262 asort($this->span_gamut); | |
| 263 } | |
| 264 | |
| 265 | |
| 266 # Internal hashes used during transformation. | |
| 267 var $urls = array(); | |
| 268 var $titles = array(); | |
| 269 var $html_hashes = array(); | |
| 270 | |
| 271 # Status flag to avoid invalid nesting. | |
| 272 var $in_anchor = false; | |
| 273 | |
| 274 | |
| 275 function setup() { | |
| 276 # | |
| 277 # Called before the transformation process starts to setup parser | |
| 278 # states. | |
| 279 # | |
| 280 # Clear global hashes. | |
| 281 $this->urls = $this->predef_urls; | |
| 282 $this->titles = $this->predef_titles; | |
| 283 $this->html_hashes = array(); | |
| 284 | |
| 285 $in_anchor = false; | |
| 286 } | |
| 287 | |
| 288 function teardown() { | |
| 289 # | |
| 290 # Called after the transformation process to clear any variable | |
| 291 # which may be taking up memory unnecessarly. | |
| 292 # | |
| 293 $this->urls = array(); | |
| 294 $this->titles = array(); | |
| 295 $this->html_hashes = array(); | |
| 296 } | |
| 297 | |
| 298 | |
| 299 function transform($text) { | |
| 300 # | |
| 301 # Main function. Performs some preprocessing on the input text | |
| 302 # and pass it through the document gamut. | |
| 303 # | |
| 304 $this->setup(); | |
| 305 | |
| 306 # Remove UTF-8 BOM and marker character in input, if present. | |
| 307 $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text); | |
| 308 | |
| 309 # Standardize line endings: | |
| 310 # DOS to Unix and Mac to Unix | |
| 311 $text = preg_replace('{\r\n?}', "\n", $text); | |
| 312 | |
| 313 # Make sure $text ends with a couple of newlines: | |
| 314 $text .= "\n\n"; | |
| 315 | |
| 316 # Convert all tabs to spaces. | |
| 317 $text = $this->detab($text); | |
| 318 | |
| 319 # Turn block-level HTML blocks into hash entries | |
| 320 $text = $this->hashHTMLBlocks($text); | |
| 321 | |
| 322 # Strip any lines consisting only of spaces and tabs. | |
| 323 # This makes subsequent regexen easier to write, because we can | |
| 324 # match consecutive blank lines with /\n+/ instead of something | |
| 325 # contorted like /[ ]*\n+/ . | |
| 326 $text = preg_replace('/^[ ]+$/m', '', $text); | |
| 327 | |
| 328 # Run document gamut methods. | |
| 329 foreach ($this->document_gamut as $method => $priority) { | |
| 330 $text = $this->$method($text); | |
| 331 } | |
| 332 | |
| 333 $this->teardown(); | |
| 334 | |
| 335 return $text . "\n"; | |
| 336 } | |
| 337 | |
| 338 var $document_gamut = array( | |
| 339 # Strip link definitions, store in hashes. | |
| 340 "stripLinkDefinitions" => 20, | |
| 341 | |
| 342 "runBasicBlockGamut" => 30, | |
| 343 ); | |
| 344 | |
| 345 | |
| 346 function stripLinkDefinitions($text) { | |
| 347 # | |
| 348 # Strips link definitions from text, stores the URLs and titles in | |
| 349 # hash references. | |
| 350 # | |
| 351 $less_than_tab = $this->tab_width - 1; | |
| 352 | |
| 353 # Link defs are in the form: ^[id]: url "optional title" | |
| 354 $text = preg_replace_callback('{ | |
| 355 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1 | |
| 356 [ ]* | |
| 357 \n? # maybe *one* newline | |
| 358 [ ]* | |
| 359 <?(\S+?)>? # url = $2 | |
| 360 [ ]* | |
| 361 \n? # maybe one newline | |
| 362 [ ]* | |
| 363 (?: | |
| 364 (?<=\s) # lookbehind for whitespace | |
| 365 ["(] | |
| 366 (.*?) # title = $3 | |
| 367 [")] | |
| 368 [ ]* | |
| 369 )? # title is optional | |
| 370 (?:\n+|\Z) | |
| 371 }xm', | |
| 372 array(&$this, '_stripLinkDefinitions_callback'), | |
| 373 $text); | |
| 374 return $text; | |
| 375 } | |
| 376 function _stripLinkDefinitions_callback($matches) { | |
| 377 $link_id = strtolower($matches[1]); | |
| 378 $this->urls[$link_id] = $matches[2]; | |
| 379 $this->titles[$link_id] =& $matches[3]; | |
| 380 return ''; # String that will replace the block | |
| 381 } | |
| 382 | |
| 383 | |
| 384 function hashHTMLBlocks($text) { | |
| 385 if ($this->no_markup) return $text; | |
| 386 | |
| 387 $less_than_tab = $this->tab_width - 1; | |
| 388 | |
| 389 # Hashify HTML blocks: | |
| 390 # We only want to do this for block-level HTML tags, such as headers, | |
| 391 # lists, and tables. That's because we still want to wrap <p>s around | |
| 392 # "paragraphs" that are wrapped in non-block-level tags, such as anchors
, | |
| 393 # phrase emphasis, and spans. The list of tags we're looking for is | |
| 394 # hard-coded: | |
| 395 # | |
| 396 # * List "a" is made of tags which can be both inline or block-level. | |
| 397 # These will be treated block-level when the start tag is alone on | |
| 398 # its line, otherwise they're not matched here and will be taken as | |
| 399 # inline later. | |
| 400 # * List "b" is made of tags which are always block-level; | |
| 401 # | |
| 402 $block_tags_a_re = 'ins|del'; | |
| 403 $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'
. | |
| 404 'script|noscript|form|fieldset|iframe|math'; | |
| 405 | |
| 406 # Regular expression for the content of a block tag. | |
| 407 $nested_tags_level = 4; | |
| 408 $attr = ' | |
| 409 (?> # optional tag attributes | |
| 410 \s # starts with whitespace | |
| 411 (?> | |
| 412 [^>"/]+ # text outside quotes | |
| 413 | | |
| 414 /+(?!>) # slash not followed by ">" | |
| 415 | | |
| 416 "[^"]*" # text inside double quotes (tolerate ">") | |
| 417 | | |
| 418 \'[^\']*\' # text inside single quotes (tolerate ">") | |
| 419 )* | |
| 420 )? | |
| 421 '; | |
| 422 $content = | |
| 423 str_repeat(' | |
| 424 (?> | |
| 425 [^<]+ # content without tag | |
| 426 | | |
| 427 <\2 # nested opening tag | |
| 428 '.$attr.' # attributes | |
| 429 (?> | |
| 430 /> | |
| 431 | | |
| 432 >', $nested_tags_level). # end of opening tag | |
| 433 '.*?'. # last level nested tag content | |
| 434 str_repeat(' | |
| 435 </\2\s*> # closing nested tag | |
| 436 ) | |
| 437 | | |
| 438 <(?!/\2\s*> # other tags with a different name | |
| 439 ) | |
| 440 )*', | |
| 441 $nested_tags_level); | |
| 442 $content2 = str_replace('\2', '\3', $content); | |
| 443 | |
| 444 # First, look for nested blocks, e.g.: | |
| 445 # <div> | |
| 446 # <div> | |
| 447 # tags for inner block must be indented. | |
| 448 # </div> | |
| 449 # </div> | |
| 450 # | |
| 451 # The outermost tags must start at the left margin for this to match, an
d | |
| 452 # the inner nested divs must be indented. | |
| 453 # We need to do this before the next, more liberal match, because the ne
xt | |
| 454 # match will start at the first `<div>` and stop at the first `</div>`. | |
| 455 $text = preg_replace_callback('{(?> | |
| 456 (?> | |
| 457 (?<=\n\n) # Starting after a blank line | |
| 458 | # or | |
| 459 \A\n? # the beginning of the doc | |
| 460 ) | |
| 461 ( # save in $1 | |
| 462 | |
| 463 # Match from `\n<tag>` to `</tag>\n`, handling nested tags | |
| 464 # in between. | |
| 465 | |
| 466 [ ]{0,'.$less_than_tab.'} | |
| 467 <('.$block_tags_b_re.')# start tag = $2 | |
| 468 '.$attr.'> # attributes followed by > and \n | |
| 469 '.$content.' # content, support nesting | |
| 470 </\2> # the matching end tag | |
| 471 [ ]* # trailing spaces/tabs | |
| 472 (?=\n+|\Z) # followed by a newline or end of document | |
| 473 | |
| 474 | # Special version for tags of group a. | |
| 475 | |
| 476 [ ]{0,'.$less_than_tab.'} | |
| 477 <('.$block_tags_a_re.')# start tag = $3 | |
| 478 '.$attr.'>[ ]*\n # attributes followed by > | |
| 479 '.$content2.' # content, support nesting | |
| 480 </\3> # the matching end tag | |
| 481 [ ]* # trailing spaces/tabs | |
| 482 (?=\n+|\Z) # followed by a newline or end of document | |
| 483 | |
| 484 | # Special case just for <hr />. It was easier to make a special | |
| 485 # case than to make the other regex more complicated. | |
| 486 | |
| 487 [ ]{0,'.$less_than_tab.'} | |
| 488 <(hr) # start tag = $2 | |
| 489 '.$attr.' # attributes | |
| 490 /?> # the matching end tag | |
| 491 [ ]* | |
| 492 (?=\n{2,}|\Z) # followed by a blank line or end of
document | |
| 493 | |
| 494 | # Special case for standalone HTML comments: | |
| 495 | |
| 496 [ ]{0,'.$less_than_tab.'} | |
| 497 (?s: | |
| 498 <!-- .*? --> | |
| 499 ) | |
| 500 [ ]* | |
| 501 (?=\n{2,}|\Z) # followed by a blank line or end of doc
ument | |
| 502 | |
| 503 | # PHP and ASP-style processor instructions (<? and <%) | |
| 504 | |
| 505 [ ]{0,'.$less_than_tab.'} | |
| 506 (?s: | |
| 507 <([?%]) # $2 | |
| 508 .*? | |
| 509 \2> | |
| 510 ) | |
| 511 [ ]* | |
| 512 (?=\n{2,}|\Z) # followed by a blank line or end of doc
ument | |
| 513 | |
| 514 ) | |
| 515 )}Sxmi', | |
| 516 array(&$this, '_hashHTMLBlocks_callback'), | |
| 517 $text); | |
| 518 | |
| 519 return $text; | |
| 520 } | |
| 521 function _hashHTMLBlocks_callback($matches) { | |
| 522 $text = $matches[1]; | |
| 523 $key = $this->hashBlock($text); | |
| 524 return "\n\n$key\n\n"; | |
| 525 } | |
| 526 | |
| 527 | |
| 528 function hashPart($text, $boundary = 'X') { | |
| 529 # | |
| 530 # Called whenever a tag must be hashed when a function insert an atomic | |
| 531 # element in the text stream. Passing $text to through this function gives | |
| 532 # a unique text-token which will be reverted back when calling unhash. | |
| 533 # | |
| 534 # The $boundary argument specify what character should be used to surround | |
| 535 # the token. By convension, "B" is used for block elements that needs not | |
| 536 # to be wrapped into paragraph tags at the end, ":" is used for elements | |
| 537 # that are word separators and "X" is used in the general case. | |
| 538 # | |
| 539 # Swap back any tag hash found in $text so we do not have to `unhash` | |
| 540 # multiple times at the end. | |
| 541 $text = $this->unhash($text); | |
| 542 | |
| 543 # Then hash the block. | |
| 544 static $i = 0; | |
| 545 $key = "$boundary\x1A" . ++$i . $boundary; | |
| 546 $this->html_hashes[$key] = $text; | |
| 547 return $key; # String that will replace the tag. | |
| 548 } | |
| 549 | |
| 550 | |
| 551 function hashBlock($text) { | |
| 552 # | |
| 553 # Shortcut function for hashPart with block-level boundaries. | |
| 554 # | |
| 555 return $this->hashPart($text, 'B'); | |
| 556 } | |
| 557 | |
| 558 | |
| 559 var $block_gamut = array( | |
| 560 # | |
| 561 # These are all the transformations that form block-level | |
| 562 # tags like paragraphs, headers, and list items. | |
| 563 # | |
| 564 "doHeaders" => 10, | |
| 565 "doHorizontalRules" => 20, | |
| 566 | |
| 567 "doLists" => 40, | |
| 568 "doCodeBlocks" => 50, | |
| 569 "doBlockQuotes" => 60, | |
| 570 ); | |
| 571 | |
| 572 function runBlockGamut($text) { | |
| 573 # | |
| 574 # Run block gamut tranformations. | |
| 575 # | |
| 576 # We need to escape raw HTML in Markdown source before doing anything | |
| 577 # else. This need to be done for each block, and not only at the | |
| 578 # begining in the Markdown function since hashed blocks can be part of | |
| 579 # list items and could have been indented. Indented blocks would have | |
| 580 # been seen as a code block in a previous pass of hashHTMLBlocks. | |
| 581 $text = $this->hashHTMLBlocks($text); | |
| 582 | |
| 583 return $this->runBasicBlockGamut($text); | |
| 584 } | |
| 585 | |
| 586 function runBasicBlockGamut($text) { | |
| 587 # | |
| 588 # Run block gamut tranformations, without hashing HTML blocks. This is | |
| 589 # useful when HTML blocks are known to be already hashed, like in the first | |
| 590 # whole-document pass. | |
| 591 # | |
| 592 foreach ($this->block_gamut as $method => $priority) { | |
| 593 $text = $this->$method($text); | |
| 594 } | |
| 595 | |
| 596 # Finally form paragraph and restore hashed blocks. | |
| 597 $text = $this->formParagraphs($text); | |
| 598 | |
| 599 return $text; | |
| 600 } | |
| 601 | |
| 602 | |
| 603 function doHorizontalRules($text) { | |
| 604 # Do Horizontal Rules: | |
| 605 return preg_replace( | |
| 606 '{ | |
| 607 ^[ ]{0,3} # Leading space | |
| 608 ([-*_]) # $1: First marker | |
| 609 (?> # Repeated marker group | |
| 610 [ ]{0,2} # Zero, one, or two spaces. | |
| 611 \1 # Marker character | |
| 612 ){2,} # Group repeated at least twice | |
| 613 [ ]* # Tailing spaces | |
| 614 $ # End of line. | |
| 615 }mx', | |
| 616 "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n", | |
| 617 $text); | |
| 618 } | |
| 619 | |
| 620 | |
| 621 var $span_gamut = array( | |
| 622 # | |
| 623 # These are all the transformations that occur *within* block-level | |
| 624 # tags like paragraphs, headers, and list items. | |
| 625 # | |
| 626 # Process character escapes, code spans, and inline HTML | |
| 627 # in one shot. | |
| 628 "parseSpan" => -30, | |
| 629 | |
| 630 # Process anchor and image tags. Images must come first, | |
| 631 # because ![foo][f] looks like an anchor. | |
| 632 "doImages" => 10, | |
| 633 "doAnchors" => 20, | |
| 634 | |
| 635 # Make links out of things like `<http://example.com/>` | |
| 636 # Must come after doAnchors, because you can use < and > | |
| 637 # delimiters in inline links like [this](<url>). | |
| 638 "doAutoLinks" => 30, | |
| 639 "encodeAmpsAndAngles" => 40, | |
| 640 | |
| 641 "doItalicsAndBold" => 50, | |
| 642 "doHardBreaks" => 60, | |
| 643 ); | |
| 644 | |
| 645 function runSpanGamut($text) { | |
| 646 # | |
| 647 # Run span gamut tranformations. | |
| 648 # | |
| 649 foreach ($this->span_gamut as $method => $priority) { | |
| 650 $text = $this->$method($text); | |
| 651 } | |
| 652 | |
| 653 return $text; | |
| 654 } | |
| 655 | |
| 656 | |
| 657 function doHardBreaks($text) { | |
| 658 # Do hard breaks: | |
| 659 return preg_replace_callback('/ {2,}\n/', | |
| 660 array(&$this, '_doHardBreaks_callback'), $text); | |
| 661 } | |
| 662 function _doHardBreaks_callback($matches) { | |
| 663 return $this->hashPart("<br$this->empty_element_suffix\n"); | |
| 664 } | |
| 665 | |
| 666 | |
| 667 function doAnchors($text) { | |
| 668 # | |
| 669 # Turn Markdown link shortcuts into XHTML <a> tags. | |
| 670 # | |
| 671 if ($this->in_anchor) return $text; | |
| 672 $this->in_anchor = true; | |
| 673 | |
| 674 # | |
| 675 # First, handle reference-style links: [link text] [id] | |
| 676 # | |
| 677 $text = preg_replace_callback('{ | |
| 678 ( # wrap whole match in $1 | |
| 679 \[ | |
| 680 ('.$this->nested_brackets_re.') # link text = $2 | |
| 681 \] | |
| 682 | |
| 683 [ ]? # one optional space | |
| 684 (?:\n[ ]*)? # one optional newline followed by spaces | |
| 685 | |
| 686 \[ | |
| 687 (.*?) # id = $3 | |
| 688 \] | |
| 689 ) | |
| 690 }xs', | |
| 691 array(&$this, '_doAnchors_reference_callback'), $text); | |
| 692 | |
| 693 # | |
| 694 # Next, inline-style links: [link text](url "optional title") | |
| 695 # | |
| 696 $text = preg_replace_callback('{ | |
| 697 ( # wrap whole match in $1 | |
| 698 \[ | |
| 699 ('.$this->nested_brackets_re.') # link text = $2 | |
| 700 \] | |
| 701 \( # literal paren | |
| 702 [ ]* | |
| 703 (?: | |
| 704 <(\S*)> # href = $3 | |
| 705 | | |
| 706 ('.$this->nested_url_parenthesis_re.') # href = $4 | |
| 707 ) | |
| 708 [ ]* | |
| 709 ( # $5 | |
| 710 ([\'"]) # quote char = $6 | |
| 711 (.*?) # Title = $7 | |
| 712 \6 # matching quote | |
| 713 [ ]* # ignore any spaces/tabs between closing quote and ) | |
| 714 )? # title is optional | |
| 715 \) | |
| 716 ) | |
| 717 }xs', | |
| 718 array(&$this, '_DoAnchors_inline_callback'), $text); | |
| 719 | |
| 720 # | |
| 721 # Last, handle reference-style shortcuts: [link text] | |
| 722 # These must come last in case you've also got [link test][1] | |
| 723 # or [link test](/foo) | |
| 724 # | |
| 725 // $text = preg_replace_callback('{ | |
| 726 // ( # wrap whole match in $1 | |
| 727 // \[ | |
| 728 // ([^\[\]]+) # link text = $2; can\'t contain [ or ] | |
| 729 // \] | |
| 730 // ) | |
| 731 // }xs', | |
| 732 // array(&$this, '_doAnchors_reference_callback'), $text); | |
| 733 | |
| 734 $this->in_anchor = false; | |
| 735 return $text; | |
| 736 } | |
| 737 function _doAnchors_reference_callback($matches) { | |
| 738 $whole_match = $matches[1]; | |
| 739 $link_text = $matches[2]; | |
| 740 $link_id =& $matches[3]; | |
| 741 | |
| 742 if ($link_id == "") { | |
| 743 # for shortcut links like [this][] or [this]. | |
| 744 $link_id = $link_text; | |
| 745 } | |
| 746 | |
| 747 # lower-case and turn embedded newlines into spaces | |
| 748 $link_id = strtolower($link_id); | |
| 749 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id); | |
| 750 | |
| 751 if (isset($this->urls[$link_id])) { | |
| 752 $url = $this->urls[$link_id]; | |
| 753 $url = $this->encodeAttribute($url); | |
| 754 | |
| 755 $result = "<a href=\"$url\""; | |
| 756 if ( isset( $this->titles[$link_id] ) ) { | |
| 757 $title = $this->titles[$link_id]; | |
| 758 $title = $this->encodeAttribute($title); | |
| 759 $result .= " title=\"$title\""; | |
| 760 } | |
| 761 | |
| 762 $link_text = $this->runSpanGamut($link_text); | |
| 763 $result .= ">$link_text</a>"; | |
| 764 $result = $this->hashPart($result); | |
| 765 } | |
| 766 else { | |
| 767 $result = $whole_match; | |
| 768 } | |
| 769 return $result; | |
| 770 } | |
| 771 function _doAnchors_inline_callback($matches) { | |
| 772 $whole_match = $matches[1]; | |
| 773 $link_text = $this->runSpanGamut($matches[2]); | |
| 774 $url = $matches[3] == '' ? $matches[4] : $matches[3]; | |
| 775 $title =& $matches[7]; | |
| 776 | |
| 777 $url = $this->encodeAttribute($url); | |
| 778 | |
| 779 $result = "<a href=\"$url\""; | |
| 780 if (isset($title)) { | |
| 781 $title = $this->encodeAttribute($title); | |
| 782 $result .= " title=\"$title\""; | |
| 783 } | |
| 784 | |
| 785 $link_text = $this->runSpanGamut($link_text); | |
| 786 $result .= ">$link_text</a>"; | |
| 787 | |
| 788 return $this->hashPart($result); | |
| 789 } | |
| 790 | |
| 791 | |
| 792 function doImages($text) { | |
| 793 # | |
| 794 # Turn Markdown image shortcuts into <img> tags. | |
| 795 # | |
| 796 # | |
| 797 # First, handle reference-style labeled images: ![alt text][id] | |
| 798 # | |
| 799 $text = preg_replace_callback('{ | |
| 800 ( # wrap whole match in $1 | |
| 801 !\[ | |
| 802 ('.$this->nested_brackets_re.') # alt text = $2 | |
| 803 \] | |
| 804 | |
| 805 [ ]? # one optional space | |
| 806 (?:\n[ ]*)? # one optional newline followed by spaces | |
| 807 | |
| 808 \[ | |
| 809 (.*?) # id = $3 | |
| 810 \] | |
| 811 | |
| 812 ) | |
| 813 }xs', | |
| 814 array(&$this, '_doImages_reference_callback'), $text); | |
| 815 | |
| 816 # | |
| 817 # Next, handle inline images:  | |
| 818 # Don't forget: encode * and _ | |
| 819 # | |
| 820 $text = preg_replace_callback('{ | |
| 821 ( # wrap whole match in $1 | |
| 822 !\[ | |
| 823 ('.$this->nested_brackets_re.') # alt text = $2 | |
| 824 \] | |
| 825 \s? # One optional whitespace character | |
| 826 \( # literal paren | |
| 827 [ ]* | |
| 828 (?: | |
| 829 <(\S*)> # src url = $3 | |
| 830 | | |
| 831 ('.$this->nested_url_parenthesis_re.') # src url = $4 | |
| 832 ) | |
| 833 [ ]* | |
| 834 ( # $5 | |
| 835 ([\'"]) # quote char = $6 | |
| 836 (.*?) # title = $7 | |
| 837 \6 # matching quote | |
| 838 [ ]* | |
| 839 )? # title is optional | |
| 840 \) | |
| 841 ) | |
| 842 }xs', | |
| 843 array(&$this, '_doImages_inline_callback'), $text); | |
| 844 | |
| 845 return $text; | |
| 846 } | |
| 847 function _doImages_reference_callback($matches) { | |
| 848 $whole_match = $matches[1]; | |
| 849 $alt_text = $matches[2]; | |
| 850 $link_id = strtolower($matches[3]); | |
| 851 | |
| 852 if ($link_id == "") { | |
| 853 $link_id = strtolower($alt_text); # for shortcut links like ![this][
]. | |
| 854 } | |
| 855 | |
| 856 $alt_text = $this->encodeAttribute($alt_text); | |
| 857 if (isset($this->urls[$link_id])) { | |
| 858 $url = $this->encodeAttribute($this->urls[$link_id]); | |
| 859 $result = "<img src=\"$url\" alt=\"$alt_text\""; | |
| 860 if (isset($this->titles[$link_id])) { | |
| 861 $title = $this->titles[$link_id]; | |
| 862 $title = $this->encodeAttribute($title); | |
| 863 $result .= " title=\"$title\""; | |
| 864 } | |
| 865 $result .= $this->empty_element_suffix; | |
| 866 $result = $this->hashPart($result); | |
| 867 } | |
| 868 else { | |
| 869 # If there's no such link ID, leave intact: | |
| 870 $result = $whole_match; | |
| 871 } | |
| 872 | |
| 873 return $result; | |
| 874 } | |
| 875 function _doImages_inline_callback($matches) { | |
| 876 $whole_match = $matches[1]; | |
| 877 $alt_text = $matches[2]; | |
| 878 $url = $matches[3] == '' ? $matches[4] : $matches[3]; | |
| 879 $title =& $matches[7]; | |
| 880 | |
| 881 $alt_text = $this->encodeAttribute($alt_text); | |
| 882 $url = $this->encodeAttribute($url); | |
| 883 $result = "<img src=\"$url\" alt=\"$alt_text\""; | |
| 884 if (isset($title)) { | |
| 885 $title = $this->encodeAttribute($title); | |
| 886 $result .= " title=\"$title\""; # $title already quoted | |
| 887 } | |
| 888 $result .= $this->empty_element_suffix; | |
| 889 | |
| 890 return $this->hashPart($result); | |
| 891 } | |
| 892 | |
| 893 | |
| 894 function doHeaders($text) { | |
| 895 # Setext-style headers: | |
| 896 # Header 1 | |
| 897 # ======== | |
| 898 # | |
| 899 # Header 2 | |
| 900 # -------- | |
| 901 # | |
| 902 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx', | |
| 903 array(&$this, '_doHeaders_callback_setext'), $text); | |
| 904 | |
| 905 # atx-style headers: | |
| 906 # # Header 1 | |
| 907 # ## Header 2 | |
| 908 # ## Header 2 with closing hashes ## | |
| 909 # ... | |
| 910 # ###### Header 6 | |
| 911 # | |
| 912 $text = preg_replace_callback('{ | |
| 913 ^(\#{1,6}) # $1 = string of #\'s | |
| 914 [ ]* | |
| 915 (.+?) # $2 = Header text | |
| 916 [ ]* | |
| 917 \#* # optional closing #\'s (not counted) | |
| 918 \n+ | |
| 919 }xm', | |
| 920 array(&$this, '_doHeaders_callback_atx'), $text); | |
| 921 | |
| 922 return $text; | |
| 923 } | |
| 924 function _doHeaders_callback_setext($matches) { | |
| 925 # Terrible hack to check we haven't found an empty list item. | |
| 926 if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1])) | |
| 927 return $matches[0]; | |
| 928 | |
| 929 $level = $matches[2]{0} == '=' ? 1 : 2; | |
| 930 $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>"; | |
| 931 return "\n" . $this->hashBlock($block) . "\n\n"; | |
| 932 } | |
| 933 function _doHeaders_callback_atx($matches) { | |
| 934 $level = strlen($matches[1]); | |
| 935 $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>"; | |
| 936 return "\n" . $this->hashBlock($block) . "\n\n"; | |
| 937 } | |
| 938 | |
| 939 | |
| 940 function doLists($text) { | |
| 941 # | |
| 942 # Form HTML ordered (numbered) and unordered (bulleted) lists. | |
| 943 # | |
| 944 $less_than_tab = $this->tab_width - 1; | |
| 945 | |
| 946 # Re-usable patterns to match list item bullets and number markers: | |
| 947 $marker_ul_re = '[*+-]'; | |
| 948 $marker_ol_re = '\d+[.]'; | |
| 949 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)"; | |
| 950 | |
| 951 $markers_relist = array($marker_ul_re, $marker_ol_re); | |
| 952 | |
| 953 foreach ($markers_relist as $marker_re) { | |
| 954 # Re-usable pattern to match any entirel ul or ol list: | |
| 955 $whole_list_re = ' | |
| 956 ( # $1 = whole list | |
| 957 ( # $2 | |
| 958 [ ]{0,'.$less_than_tab.'} | |
| 959 ('.$marker_re.') # $3 = first list item marker | |
| 960 [ ]+ | |
| 961 ) | |
| 962 (?s:.+?) | |
| 963 ( # $4 | |
| 964 \z | |
| 965 | | |
| 966 \n{2,} | |
| 967 (?=\S) | |
| 968 (?! # Negative lookahead for another
list item marker | |
| 969 [ ]* | |
| 970 '.$marker_re.'[ ]+ | |
| 971 ) | |
| 972 ) | |
| 973 ) | |
| 974 '; // mx | |
| 975 | |
| 976 # We use a different prefix before nested lists than top-level lists
. | |
| 977 # See extended comment in _ProcessListItems(). | |
| 978 | |
| 979 if ($this->list_level) { | |
| 980 $text = preg_replace_callback('{ | |
| 981 ^ | |
| 982 '.$whole_list_re.' | |
| 983 }mx', | |
| 984 array(&$this, '_doLists_callback'), $text); | |
| 985 } | |
| 986 else { | |
| 987 $text = preg_replace_callback('{ | |
| 988 (?:(?<=\n)\n|\A\n?) # Must eat the newline | |
| 989 '.$whole_list_re.' | |
| 990 }mx', | |
| 991 array(&$this, '_doLists_callback'), $text); | |
| 992 } | |
| 993 } | |
| 994 | |
| 995 return $text; | |
| 996 } | |
| 997 function _doLists_callback($matches) { | |
| 998 # Re-usable patterns to match list item bullets and number markers: | |
| 999 $marker_ul_re = '[*+-]'; | |
| 1000 $marker_ol_re = '\d+[.]'; | |
| 1001 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)"; | |
| 1002 | |
| 1003 $list = $matches[1]; | |
| 1004 $list_type = preg_match("/$marker_ul_re/", $matches[3]) ? "ul" : "ol"; | |
| 1005 | |
| 1006 $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re ); | |
| 1007 | |
| 1008 $list .= "\n"; | |
| 1009 $result = $this->processListItems($list, $marker_any_re); | |
| 1010 | |
| 1011 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>")
; | |
| 1012 return "\n". $result ."\n\n"; | |
| 1013 } | |
| 1014 | |
| 1015 var $list_level = 0; | |
| 1016 | |
| 1017 function processListItems($list_str, $marker_any_re) { | |
| 1018 # | |
| 1019 # Process the contents of a single ordered or unordered list, splitting it | |
| 1020 # into individual list items. | |
| 1021 # | |
| 1022 # The $this->list_level global keeps track of when we're inside a list. | |
| 1023 # Each time we enter a list, we increment it; when we leave a list, | |
| 1024 # we decrement. If it's zero, we're not in a list anymore. | |
| 1025 # | |
| 1026 # We do this because when we're not inside a list, we want to treat | |
| 1027 # something like this: | |
| 1028 # | |
| 1029 # I recommend upgrading to version | |
| 1030 # 8. Oops, now this line is treated | |
| 1031 # as a sub-list. | |
| 1032 # | |
| 1033 # As a single paragraph, despite the fact that the second line starts | |
| 1034 # with a digit-period-space sequence. | |
| 1035 # | |
| 1036 # Whereas when we're inside a list (or sub-list), that line will be | |
| 1037 # treated as the start of a sub-list. What a kludge, huh? This is | |
| 1038 # an aspect of Markdown's syntax that's hard to parse perfectly | |
| 1039 # without resorting to mind-reading. Perhaps the solution is to | |
| 1040 # change the syntax rules such that sub-lists must start with a | |
| 1041 # starting cardinal number; e.g. "1." or "a.". | |
| 1042 | |
| 1043 $this->list_level++; | |
| 1044 | |
| 1045 # trim trailing blank lines: | |
| 1046 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); | |
| 1047 | |
| 1048 $list_str = preg_replace_callback('{ | |
| 1049 (\n)? # leading line = $1 | |
| 1050 (^[ ]*) # leading whitespace = $2 | |
| 1051 ('.$marker_any_re.' # list marker and space = $3 | |
| 1052 (?:[ ]+|(?=\n)) # space only required if item is not empty | |
| 1053 ) | |
| 1054 ((?s:.*?)) # list item text = $4 | |
| 1055 (?:(\n+(?=\n))|\n) # tailing blank line = $5 | |
| 1056 (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n)))) | |
| 1057 }xm', | |
| 1058 array(&$this, '_processListItems_callback'), $list_str); | |
| 1059 | |
| 1060 $this->list_level--; | |
| 1061 return $list_str; | |
| 1062 } | |
| 1063 function _processListItems_callback($matches) { | |
| 1064 $item = $matches[4]; | |
| 1065 $leading_line =& $matches[1]; | |
| 1066 $leading_space =& $matches[2]; | |
| 1067 $marker_space = $matches[3]; | |
| 1068 $tailing_blank_line =& $matches[5]; | |
| 1069 | |
| 1070 if ($leading_line || $tailing_blank_line || | |
| 1071 preg_match('/\n{2,}/', $item)) | |
| 1072 { | |
| 1073 # Replace marker with the appropriate whitespace indentation | |
| 1074 $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $i
tem; | |
| 1075 $item = $this->runBlockGamut($this->outdent($item)."\n"); | |
| 1076 } | |
| 1077 else { | |
| 1078 # Recursion for sub-lists: | |
| 1079 $item = $this->doLists($this->outdent($item)); | |
| 1080 $item = preg_replace('/\n+$/', '', $item); | |
| 1081 $item = $this->runSpanGamut($item); | |
| 1082 } | |
| 1083 | |
| 1084 return "<li>" . $item . "</li>\n"; | |
| 1085 } | |
| 1086 | |
| 1087 | |
| 1088 function doCodeBlocks($text) { | |
| 1089 # | |
| 1090 # Process Markdown `<pre><code>` blocks. | |
| 1091 # | |
| 1092 $text = preg_replace_callback('{ | |
| 1093 (?:\n\n|\A\n?) | |
| 1094 ( # $1 = the code block -- one or more lines, star
ting with a space/tab | |
| 1095 (?> | |
| 1096 [ ]{'.$this->tab_width.'} # Lines must start with a tab or
a tab-width of spaces | |
| 1097 .*\n+ | |
| 1098 )+ | |
| 1099 ) | |
| 1100 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-spac
e at line-start, or end of doc | |
| 1101 }xm', | |
| 1102 array(&$this, '_doCodeBlocks_callback'), $text); | |
| 1103 | |
| 1104 return $text; | |
| 1105 } | |
| 1106 function _doCodeBlocks_callback($matches) { | |
| 1107 $codeblock = $matches[1]; | |
| 1108 | |
| 1109 $codeblock = $this->outdent($codeblock); | |
| 1110 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); | |
| 1111 | |
| 1112 # trim leading newlines and trailing newlines | |
| 1113 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock); | |
| 1114 | |
| 1115 $codeblock = "<pre><code>$codeblock\n</code></pre>"; | |
| 1116 return "\n\n".$this->hashBlock($codeblock)."\n\n"; | |
| 1117 } | |
| 1118 | |
| 1119 | |
| 1120 function makeCodeSpan($code) { | |
| 1121 # | |
| 1122 # Create a code span markup for $code. Called from handleSpanToken. | |
| 1123 # | |
| 1124 $code = htmlspecialchars(trim($code), ENT_NOQUOTES); | |
| 1125 return $this->hashPart("<code>$code</code>"); | |
| 1126 } | |
| 1127 | |
| 1128 | |
| 1129 var $em_relist = array( | |
| 1130 '' => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S)(?![.,:;]\s)', | |
| 1131 '*' => '(?<=\S)(?<!\*)\*(?!\*)', | |
| 1132 '_' => '(?<=\S)(?<!_)_(?!_)', | |
| 1133 ); | |
| 1134 var $strong_relist = array( | |
| 1135 '' => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S)(?![.,:;]\s)', | |
| 1136 '**' => '(?<=\S)(?<!\*)\*\*(?!\*)', | |
| 1137 '__' => '(?<=\S)(?<!_)__(?!_)', | |
| 1138 ); | |
| 1139 var $em_strong_relist = array( | |
| 1140 '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S)(?![.,:;]\s)', | |
| 1141 '***' => '(?<=\S)(?<!\*)\*\*\*(?!\*)', | |
| 1142 '___' => '(?<=\S)(?<!_)___(?!_)', | |
| 1143 ); | |
| 1144 var $em_strong_prepared_relist; | |
| 1145 | |
| 1146 function prepareItalicsAndBold() { | |
| 1147 # | |
| 1148 # Prepare regular expressions for seraching emphasis tokens in any | |
| 1149 # context. | |
| 1150 # | |
| 1151 foreach ($this->em_relist as $em => $em_re) { | |
| 1152 foreach ($this->strong_relist as $strong => $strong_re) { | |
| 1153 # Construct list of allowed token expressions. | |
| 1154 $token_relist = array(); | |
| 1155 if (isset($this->em_strong_relist["$em$strong"])) { | |
| 1156 $token_relist[] = $this->em_strong_relist["$em$strong"]; | |
| 1157 } | |
| 1158 $token_relist[] = $em_re; | |
| 1159 $token_relist[] = $strong_re; | |
| 1160 | |
| 1161 # Construct master expression from list. | |
| 1162 $token_re = '{('. implode('|', $token_relist) .')}'; | |
| 1163 $this->em_strong_prepared_relist["$em$strong"] = $token_re; | |
| 1164 } | |
| 1165 } | |
| 1166 } | |
| 1167 | |
| 1168 function doItalicsAndBold($text) { | |
| 1169 $token_stack = array(''); | |
| 1170 $text_stack = array(''); | |
| 1171 $em = ''; | |
| 1172 $strong = ''; | |
| 1173 $tree_char_em = false; | |
| 1174 | |
| 1175 while (1) { | |
| 1176 # | |
| 1177 # Get prepared regular expression for seraching emphasis tokens | |
| 1178 # in current context. | |
| 1179 # | |
| 1180 $token_re = $this->em_strong_prepared_relist["$em$strong"]; | |
| 1181 | |
| 1182 # | |
| 1183 # Each loop iteration seach for the next emphasis token. | |
| 1184 # Each token is then passed to handleSpanToken. | |
| 1185 # | |
| 1186 $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); | |
| 1187 $text_stack[0] .= $parts[0]; | |
| 1188 $token =& $parts[1]; | |
| 1189 $text =& $parts[2]; | |
| 1190 | |
| 1191 if (empty($token)) { | |
| 1192 # Reached end of text span: empty stack without emitting. | |
| 1193 # any more emphasis. | |
| 1194 while ($token_stack[0]) { | |
| 1195 $text_stack[1] .= array_shift($token_stack); | |
| 1196 $text_stack[0] .= array_shift($text_stack); | |
| 1197 } | |
| 1198 break; | |
| 1199 } | |
| 1200 | |
| 1201 $token_len = strlen($token); | |
| 1202 if ($tree_char_em) { | |
| 1203 # Reached closing marker while inside a three-char emphasis. | |
| 1204 if ($token_len == 3) { | |
| 1205 # Three-char closing marker, close em and strong. | |
| 1206 array_shift($token_stack); | |
| 1207 $span = array_shift($text_stack); | |
| 1208 $span = $this->runSpanGamut($span); | |
| 1209 $span = "<strong><em>$span</em></strong>"; | |
| 1210 $text_stack[0] .= $this->hashPart($span); | |
| 1211 $em = ''; | |
| 1212 $strong = ''; | |
| 1213 } else { | |
| 1214 # Other closing marker: close one em or strong and | |
| 1215 # change current token state to match the other | |
| 1216 $token_stack[0] = str_repeat($token{0}, 3-$token_len); | |
| 1217 $tag = $token_len == 2 ? "strong" : "em"; | |
| 1218 $span = $text_stack[0]; | |
| 1219 $span = $this->runSpanGamut($span); | |
| 1220 $span = "<$tag>$span</$tag>"; | |
| 1221 $text_stack[0] = $this->hashPart($span); | |
| 1222 $$tag = ''; # $$tag stands for $em or $strong | |
| 1223 } | |
| 1224 $tree_char_em = false; | |
| 1225 } else if ($token_len == 3) { | |
| 1226 if ($em) { | |
| 1227 # Reached closing marker for both em and strong. | |
| 1228 # Closing strong marker: | |
| 1229 for ($i = 0; $i < 2; ++$i) { | |
| 1230 $shifted_token = array_shift($token_stack); | |
| 1231 $tag = strlen($shifted_token) == 2 ? "strong" : "em"; | |
| 1232 $span = array_shift($text_stack); | |
| 1233 $span = $this->runSpanGamut($span); | |
| 1234 $span = "<$tag>$span</$tag>"; | |
| 1235 $text_stack[0] .= $this->hashPart($span); | |
| 1236 $$tag = ''; # $$tag stands for $em or $strong | |
| 1237 } | |
| 1238 } else { | |
| 1239 # Reached opening three-char emphasis marker. Push on token | |
| 1240 # stack; will be handled by the special condition above. | |
| 1241 $em = $token{0}; | |
| 1242 $strong = "$em$em"; | |
| 1243 array_unshift($token_stack, $token); | |
| 1244 array_unshift($text_stack, ''); | |
| 1245 $tree_char_em = true; | |
| 1246 } | |
| 1247 } else if ($token_len == 2) { | |
| 1248 if ($strong) { | |
| 1249 # Unwind any dangling emphasis marker: | |
| 1250 if (strlen($token_stack[0]) == 1) { | |
| 1251 $text_stack[1] .= array_shift($token_stack); | |
| 1252 $text_stack[0] .= array_shift($text_stack); | |
| 1253 } | |
| 1254 # Closing strong marker: | |
| 1255 array_shift($token_stack); | |
| 1256 $span = array_shift($text_stack); | |
| 1257 $span = $this->runSpanGamut($span); | |
| 1258 $span = "<strong>$span</strong>"; | |
| 1259 $text_stack[0] .= $this->hashPart($span); | |
| 1260 $strong = ''; | |
| 1261 } else { | |
| 1262 array_unshift($token_stack, $token); | |
| 1263 array_unshift($text_stack, ''); | |
| 1264 $strong = $token; | |
| 1265 } | |
| 1266 } else { | |
| 1267 # Here $token_len == 1 | |
| 1268 if ($em) { | |
| 1269 if (strlen($token_stack[0]) == 1) { | |
| 1270 # Closing emphasis marker: | |
| 1271 array_shift($token_stack); | |
| 1272 $span = array_shift($text_stack); | |
| 1273 $span = $this->runSpanGamut($span); | |
| 1274 $span = "<em>$span</em>"; | |
| 1275 $text_stack[0] .= $this->hashPart($span); | |
| 1276 $em = ''; | |
| 1277 } else { | |
| 1278 $text_stack[0] .= $token; | |
| 1279 } | |
| 1280 } else { | |
| 1281 array_unshift($token_stack, $token); | |
| 1282 array_unshift($text_stack, ''); | |
| 1283 $em = $token; | |
| 1284 } | |
| 1285 } | |
| 1286 } | |
| 1287 return $text_stack[0]; | |
| 1288 } | |
| 1289 | |
| 1290 | |
| 1291 function doBlockQuotes($text) { | |
| 1292 $text = preg_replace_callback('/ | |
| 1293 ( # Wrap whole match in $1 | |
| 1294 (?> | |
| 1295 ^[ ]*>[ ]? # ">" at the start of a line | |
| 1296 .+\n # rest of the first line | |
| 1297 (.+\n)* # subsequent consecutive lines | |
| 1298 \n* # blanks | |
| 1299 )+ | |
| 1300 ) | |
| 1301 /xm', | |
| 1302 array(&$this, '_doBlockQuotes_callback'), $text); | |
| 1303 | |
| 1304 return $text; | |
| 1305 } | |
| 1306 function _doBlockQuotes_callback($matches) { | |
| 1307 $bq = $matches[1]; | |
| 1308 # trim one level of quoting - trim whitespace-only lines | |
| 1309 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq); | |
| 1310 $bq = $this->runBlockGamut($bq); # recurse | |
| 1311 | |
| 1312 $bq = preg_replace('/^/m', " ", $bq); | |
| 1313 # These leading spaces cause problem with <pre> content, | |
| 1314 # so we need to fix that: | |
| 1315 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx', | |
| 1316 array(&$this, '_DoBlockQuotes_callback2'), $bq); | |
| 1317 | |
| 1318 return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n"
; | |
| 1319 } | |
| 1320 function _doBlockQuotes_callback2($matches) { | |
| 1321 $pre = $matches[1]; | |
| 1322 $pre = preg_replace('/^ /m', '', $pre); | |
| 1323 return $pre; | |
| 1324 } | |
| 1325 | |
| 1326 | |
| 1327 function formParagraphs($text) { | |
| 1328 # | |
| 1329 # Params: | |
| 1330 # $text - string to process with html <p> tags | |
| 1331 # | |
| 1332 # Strip leading and trailing lines: | |
| 1333 $text = preg_replace('/\A\n+|\n+\z/', '', $text); | |
| 1334 | |
| 1335 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); | |
| 1336 | |
| 1337 # | |
| 1338 # Wrap <p> tags and unhashify HTML blocks | |
| 1339 # | |
| 1340 foreach ($grafs as $key => $value) { | |
| 1341 if (!preg_match('/^B\x1A[0-9]+B$/', $value)) { | |
| 1342 # Is a paragraph. | |
| 1343 $value = $this->runSpanGamut($value); | |
| 1344 $value = preg_replace('/^([ ]*)/', "<p>", $value); | |
| 1345 $value .= "</p>"; | |
| 1346 $grafs[$key] = $this->unhash($value); | |
| 1347 } | |
| 1348 else { | |
| 1349 # Is a block. | |
| 1350 # Modify elements of @grafs in-place... | |
| 1351 $graf = $value; | |
| 1352 $block = $this->html_hashes[$graf]; | |
| 1353 $graf = $block; | |
| 1354 // if (preg_match('{ | |
| 1355 // \A | |
| 1356 // ( # $1 = <div> tag | |
| 1357 // <div \s+ | |
| 1358 // [^>]* | |
| 1359 // \b | |
| 1360 // markdown\s*=\s* ([\'"]) # $2 = attr quote char | |
| 1361 // 1 | |
| 1362 // \2 | |
| 1363 // [^>]* | |
| 1364 // > | |
| 1365 // ) | |
| 1366 // ( # $3 = contents | |
| 1367 // .* | |
| 1368 // ) | |
| 1369 // (</div>) # $4 = closing tag | |
| 1370 // \z | |
| 1371 // }xs', $block, $matches)) | |
| 1372 // { | |
| 1373 // list(, $div_open, , $div_content, $div_close) = $matches; | |
| 1374 // | |
| 1375 // # We can't call Markdown(), because that resets the hash; | |
| 1376 // # that initialization code should be pulled into its own sub
, though. | |
| 1377 // $div_content = $this->hashHTMLBlocks($div_content); | |
| 1378 // | |
| 1379 // # Run document gamut methods on the content. | |
| 1380 // foreach ($this->document_gamut as $method => $priority) { | |
| 1381 // $div_content = $this->$method($div_content); | |
| 1382 // } | |
| 1383 // | |
| 1384 // $div_open = preg_replace( | |
| 1385 // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open); | |
| 1386 // | |
| 1387 // $graf = $div_open . "\n" . $div_content . "\n" . $div_close; | |
| 1388 // } | |
| 1389 $grafs[$key] = $graf; | |
| 1390 } | |
| 1391 } | |
| 1392 | |
| 1393 return implode("\n\n", $grafs); | |
| 1394 } | |
| 1395 | |
| 1396 | |
| 1397 function encodeAttribute($text) { | |
| 1398 # | |
| 1399 # Encode text for a double-quoted HTML attribute. This function | |
| 1400 # is *not* suitable for attributes enclosed in single quotes. | |
| 1401 # | |
| 1402 $text = $this->encodeAmpsAndAngles($text); | |
| 1403 $text = str_replace('"', '"', $text); | |
| 1404 return $text; | |
| 1405 } | |
| 1406 | |
| 1407 | |
| 1408 function encodeAmpsAndAngles($text) { | |
| 1409 # | |
| 1410 # Smart processing for ampersands and angle brackets that need to | |
| 1411 # be encoded. Valid character entities are left alone unless the | |
| 1412 # no-entities mode is set. | |
| 1413 # | |
| 1414 if ($this->no_entities) { | |
| 1415 $text = str_replace('&', '&', $text); | |
| 1416 } else { | |
| 1417 # Ampersand-encoding based entirely on Nat Irons's Amputator | |
| 1418 # MT plugin: <http://bumppo.net/projects/amputator/> | |
| 1419 $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', | |
| 1420 '&', $text);; | |
| 1421 } | |
| 1422 # Encode remaining <'s | |
| 1423 $text = str_replace('<', '<', $text); | |
| 1424 | |
| 1425 return $text; | |
| 1426 } | |
| 1427 | |
| 1428 | |
| 1429 function doAutoLinks($text) { | |
| 1430 $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i', | |
| 1431 array(&$this, '_doAutoLinks_url_callback'), $text); | |
| 1432 | |
| 1433 # Email addresses: <address@domain.foo> | |
| 1434 $text = preg_replace_callback('{ | |
| 1435 < | |
| 1436 (?:mailto:)? | |
| 1437 ( | |
| 1438 [-.\w\x80-\xFF]+ | |
| 1439 \@ | |
| 1440 [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+ | |
| 1441 ) | |
| 1442 > | |
| 1443 }xi', | |
| 1444 array(&$this, '_doAutoLinks_email_callback'), $text); | |
| 1445 | |
| 1446 return $text; | |
| 1447 } | |
| 1448 function _doAutoLinks_url_callback($matches) { | |
| 1449 $url = $this->encodeAttribute($matches[1]); | |
| 1450 $link = "<a href=\"$url\">$url</a>"; | |
| 1451 return $this->hashPart($link); | |
| 1452 } | |
| 1453 function _doAutoLinks_email_callback($matches) { | |
| 1454 $address = $matches[1]; | |
| 1455 $link = $this->encodeEmailAddress($address); | |
| 1456 return $this->hashPart($link); | |
| 1457 } | |
| 1458 | |
| 1459 | |
| 1460 function encodeEmailAddress($addr) { | |
| 1461 # | |
| 1462 # Input: an email address, e.g. "foo@example.com" | |
| 1463 # | |
| 1464 # Output: the email address as a mailto link, with each character | |
| 1465 # of the address encoded as either a decimal or hex entity, in | |
| 1466 # the hopes of foiling most address harvesting spam bots. E.g.: | |
| 1467 # | |
| 1468 # <p><a href="mailto:foo | |
| 1469 # @example.co | |
| 1470 # m">foo@exampl | |
| 1471 # e.com</a></p> | |
| 1472 # | |
| 1473 # Based by a filter by Matthew Wickline, posted to BBEdit-Talk. | |
| 1474 # With some optimizations by Milian Wolff. | |
| 1475 # | |
| 1476 $addr = "mailto:" . $addr; | |
| 1477 $chars = preg_split('/(?<!^)(?!$)/', $addr); | |
| 1478 $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed. | |
| 1479 | |
| 1480 foreach ($chars as $key => $char) { | |
| 1481 $ord = ord($char); | |
| 1482 # Ignore non-ascii chars. | |
| 1483 if ($ord < 128) { | |
| 1484 $r = ($seed * (1 + $key)) % 100; # Pseudo-random function. | |
| 1485 # roughly 10% raw, 45% hex, 45% dec | |
| 1486 # '@' *must* be encoded. I insist. | |
| 1487 if ($r > 90 && $char != '@') /* do nothing */; | |
| 1488 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';'; | |
| 1489 else $chars[$key] = '&#'.$ord.';'; | |
| 1490 } | |
| 1491 } | |
| 1492 | |
| 1493 $addr = implode('', $chars); | |
| 1494 $text = implode('', array_slice($chars, 7)); # text without `mailto:` | |
| 1495 $addr = "<a href=\"$addr\">$text</a>"; | |
| 1496 | |
| 1497 return $addr; | |
| 1498 } | |
| 1499 | |
| 1500 | |
| 1501 function parseSpan($str) { | |
| 1502 # | |
| 1503 # Take the string $str and parse it into tokens, hashing embeded HTML, | |
| 1504 # escaped characters and handling code spans. | |
| 1505 # | |
| 1506 $output = ''; | |
| 1507 | |
| 1508 $span_re = '{ | |
| 1509 ( | |
| 1510 \\\\'.$this->escape_chars_re.' | |
| 1511 | | |
| 1512 (?<![`\\\\]) | |
| 1513 `+ # code span marker | |
| 1514 '.( $this->no_markup ? '' : ' | |
| 1515 | | |
| 1516 <!-- .*? --> # comment | |
| 1517 | | |
| 1518 <\?.*?\?> | <%.*?%> # processing instruction | |
| 1519 | | |
| 1520 <[/!$]?[-a-zA-Z0-9:]+ # regular tags | |
| 1521 (?> | |
| 1522 \s | |
| 1523 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')* | |
| 1524 )? | |
| 1525 > | |
| 1526 ').' | |
| 1527 ) | |
| 1528 }xs'; | |
| 1529 | |
| 1530 while (1) { | |
| 1531 # | |
| 1532 # Each loop iteration seach for either the next tag, the next | |
| 1533 # openning code span marker, or the next escaped character. | |
| 1534 # Each token is then passed to handleSpanToken. | |
| 1535 # | |
| 1536 $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE); | |
| 1537 | |
| 1538 # Create token from text preceding tag. | |
| 1539 if ($parts[0] != "") { | |
| 1540 $output .= $parts[0]; | |
| 1541 } | |
| 1542 | |
| 1543 # Check if we reach the end. | |
| 1544 if (isset($parts[1])) { | |
| 1545 $output .= $this->handleSpanToken($parts[1], $parts[2]); | |
| 1546 $str = $parts[2]; | |
| 1547 } | |
| 1548 else { | |
| 1549 break; | |
| 1550 } | |
| 1551 } | |
| 1552 | |
| 1553 return $output; | |
| 1554 } | |
| 1555 | |
| 1556 | |
| 1557 function handleSpanToken($token, &$str) { | |
| 1558 # | |
| 1559 # Handle $token provided by parseSpan by determining its nature and | |
| 1560 # returning the corresponding value that should replace it. | |
| 1561 # | |
| 1562 switch ($token{0}) { | |
| 1563 case "\\": | |
| 1564 return $this->hashPart("&#". ord($token{1}). ";"); | |
| 1565 case "`": | |
| 1566 # Search for end marker in remaining text. | |
| 1567 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm', | |
| 1568 $str, $matches)) | |
| 1569 { | |
| 1570 $str = $matches[2]; | |
| 1571 $codespan = $this->makeCodeSpan($matches[1]); | |
| 1572 return $this->hashPart($codespan); | |
| 1573 } | |
| 1574 return $token; // return as text since no ending marker found. | |
| 1575 default: | |
| 1576 return $this->hashPart($token); | |
| 1577 } | |
| 1578 } | |
| 1579 | |
| 1580 | |
| 1581 function outdent($text) { | |
| 1582 # | |
| 1583 # Remove one level of line-leading tabs or spaces | |
| 1584 # | |
| 1585 return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text); | |
| 1586 } | |
| 1587 | |
| 1588 | |
| 1589 # String length function for detab. `_initDetab` will create a function to | |
| 1590 # hanlde UTF-8 if the default function does not exist. | |
| 1591 var $utf8_strlen = 'mb_strlen'; | |
| 1592 | |
| 1593 function detab($text) { | |
| 1594 # | |
| 1595 # Replace tabs with the appropriate amount of space. | |
| 1596 # | |
| 1597 # For each line we separate the line in blocks delemited by | |
| 1598 # tab characters. Then we reconstruct every line by adding the | |
| 1599 # appropriate number of space between each blocks. | |
| 1600 | |
| 1601 $text = preg_replace_callback('/^.*\t.*$/m', | |
| 1602 array(&$this, '_detab_callback'), $text); | |
| 1603 | |
| 1604 return $text; | |
| 1605 } | |
| 1606 function _detab_callback($matches) { | |
| 1607 $line = $matches[0]; | |
| 1608 $strlen = $this->utf8_strlen; # strlen function for UTF-8. | |
| 1609 | |
| 1610 # Split in blocks. | |
| 1611 $blocks = explode("\t", $line); | |
| 1612 # Add each blocks to the line. | |
| 1613 $line = $blocks[0]; | |
| 1614 unset($blocks[0]); # Do not add first block twice. | |
| 1615 foreach ($blocks as $block) { | |
| 1616 # Calculate amount of space, insert spaces, insert block. | |
| 1617 $amount = $this->tab_width - | |
| 1618 $strlen($line, 'UTF-8') % $this->tab_width; | |
| 1619 $line .= str_repeat(" ", $amount) . $block; | |
| 1620 } | |
| 1621 return $line; | |
| 1622 } | |
| 1623 function _initDetab() { | |
| 1624 # | |
| 1625 # Check for the availability of the function in the `utf8_strlen` property | |
| 1626 # (initially `mb_strlen`). If the function is not available, create a | |
| 1627 # function that will loosely count the number of UTF-8 characters with a | |
| 1628 # regular expression. | |
| 1629 # | |
| 1630 if (function_exists($this->utf8_strlen)) return; | |
| 1631 $this->utf8_strlen = create_function('$text', 'return preg_match_all( | |
| 1632 "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/", | |
| 1633 $text, $m);'); | |
| 1634 } | |
| 1635 | |
| 1636 | |
| 1637 function unhash($text) { | |
| 1638 # | |
| 1639 # Swap back in all the tags hashed by _HashHTMLBlocks. | |
| 1640 # | |
| 1641 return preg_replace_callback('/(.)\x1A[0-9]+\1/', | |
| 1642 array(&$this, '_unhash_callback'), $text); | |
| 1643 } | |
| 1644 function _unhash_callback($matches) { | |
| 1645 return $this->html_hashes[$matches[0]]; | |
| 1646 } | |
| 1647 | |
| 1648 } | |
| 1649 | |
| 1650 | |
| 1651 # | |
| 1652 # Markdown Extra Parser Class | |
| 1653 # | |
| 1654 | |
| 1655 class MarkdownExtra_Parser extends Markdown_Parser { | |
| 1656 | |
| 1657 # Prefix for footnote ids. | |
| 1658 var $fn_id_prefix = ""; | |
| 1659 | |
| 1660 # Optional title attribute for footnote links and backlinks. | |
| 1661 var $fn_link_title = MARKDOWN_FN_LINK_TITLE; | |
| 1662 var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE; | |
| 1663 | |
| 1664 # Optional class attribute for footnote links and backlinks. | |
| 1665 var $fn_link_class = MARKDOWN_FN_LINK_CLASS; | |
| 1666 var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS; | |
| 1667 | |
| 1668 # Predefined abbreviations. | |
| 1669 var $predef_abbr = array(); | |
| 1670 | |
| 1671 | |
| 1672 function MarkdownExtra_Parser() { | |
| 1673 # | |
| 1674 # Constructor function. Initialize the parser object. | |
| 1675 # | |
| 1676 # Add extra escapable characters before parent constructor | |
| 1677 # initialize the table. | |
| 1678 $this->escape_chars .= ':|'; | |
| 1679 | |
| 1680 # Insert extra document, block, and span transformations. | |
| 1681 # Parent constructor will do the sorting. | |
| 1682 $this->document_gamut += array( | |
| 1683 "doFencedCodeBlocks" => 5, | |
| 1684 "stripFootnotes" => 15, | |
| 1685 "stripAbbreviations" => 25, | |
| 1686 "appendFootnotes" => 50, | |
| 1687 ); | |
| 1688 $this->block_gamut += array( | |
| 1689 "doFencedCodeBlocks" => 5, | |
| 1690 "doTables" => 15, | |
| 1691 "doDefLists" => 45, | |
| 1692 ); | |
| 1693 $this->span_gamut += array( | |
| 1694 "doFootnotes" => 5, | |
| 1695 "doAbbreviations" => 70, | |
| 1696 ); | |
| 1697 | |
| 1698 parent::Markdown_Parser(); | |
| 1699 } | |
| 1700 | |
| 1701 | |
| 1702 # Extra variables used during extra transformations. | |
| 1703 var $footnotes = array(); | |
| 1704 var $footnotes_ordered = array(); | |
| 1705 var $abbr_desciptions = array(); | |
| 1706 var $abbr_word_re = ''; | |
| 1707 | |
| 1708 # Give the current footnote number. | |
| 1709 var $footnote_counter = 1; | |
| 1710 | |
| 1711 | |
| 1712 function setup() { | |
| 1713 # | |
| 1714 # Setting up Extra-specific variables. | |
| 1715 # | |
| 1716 parent::setup(); | |
| 1717 | |
| 1718 $this->footnotes = array(); | |
| 1719 $this->footnotes_ordered = array(); | |
| 1720 $this->abbr_desciptions = array(); | |
| 1721 $this->abbr_word_re = ''; | |
| 1722 $this->footnote_counter = 1; | |
| 1723 | |
| 1724 foreach ($this->predef_abbr as $abbr_word => $abbr_desc) { | |
| 1725 if ($this->abbr_word_re) | |
| 1726 $this->abbr_word_re .= '|'; | |
| 1727 $this->abbr_word_re .= preg_quote($abbr_word); | |
| 1728 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); | |
| 1729 } | |
| 1730 } | |
| 1731 | |
| 1732 function teardown() { | |
| 1733 # | |
| 1734 # Clearing Extra-specific variables. | |
| 1735 # | |
| 1736 $this->footnotes = array(); | |
| 1737 $this->footnotes_ordered = array(); | |
| 1738 $this->abbr_desciptions = array(); | |
| 1739 $this->abbr_word_re = ''; | |
| 1740 | |
| 1741 parent::teardown(); | |
| 1742 } | |
| 1743 | |
| 1744 | |
| 1745 ### HTML Block Parser ### | |
| 1746 | |
| 1747 # Tags that are always treated as block tags: | |
| 1748 var $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|for
m|fieldset|iframe|hr|legend'; | |
| 1749 | |
| 1750 # Tags treated as block tags only if the opening tag is alone on it's line: | |
| 1751 var $context_block_tags_re = 'script|noscript|math|ins|del'; | |
| 1752 | |
| 1753 # Tags where markdown="1" default to span mode: | |
| 1754 var $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address'; | |
| 1755 | |
| 1756 # Tags which must not have their contents modified, no matter where | |
| 1757 # they appear: | |
| 1758 var $clean_tags_re = 'script|math'; | |
| 1759 | |
| 1760 # Tags that do not need to be closed. | |
| 1761 var $auto_close_tags_re = 'hr|img'; | |
| 1762 | |
| 1763 | |
| 1764 function hashHTMLBlocks($text) { | |
| 1765 # | |
| 1766 # Hashify HTML Blocks and "clean tags". | |
| 1767 # | |
| 1768 # We only want to do this for block-level HTML tags, such as headers, | |
| 1769 # lists, and tables. That's because we still want to wrap <p>s around | |
| 1770 # "paragraphs" that are wrapped in non-block-level tags, such as anchors, | |
| 1771 # phrase emphasis, and spans. The list of tags we're looking for is | |
| 1772 # hard-coded. | |
| 1773 # | |
| 1774 # This works by calling _HashHTMLBlocks_InMarkdown, which then calls | |
| 1775 # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" | |
| 1776 # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back | |
| 1777 # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag. | |
| 1778 # These two functions are calling each other. It's recursive! | |
| 1779 # | |
| 1780 # | |
| 1781 # Call the HTML-in-Markdown hasher. | |
| 1782 # | |
| 1783 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text); | |
| 1784 | |
| 1785 return $text; | |
| 1786 } | |
| 1787 function _hashHTMLBlocks_inMarkdown($text, $indent = 0, | |
| 1788 $enclosing_tag_re = '', $span = false) | |
| 1789 { | |
| 1790 # | |
| 1791 # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags. | |
| 1792 # | |
| 1793 # * $indent is the number of space to be ignored when checking for code | |
| 1794 # blocks. This is important because if we don't take the indent into | |
| 1795 # account, something like this (which looks right) won't work as expecte
d: | |
| 1796 # | |
| 1797 # <div> | |
| 1798 # <div markdown="1"> | |
| 1799 # Hello World. <-- Is this a Markdown code block or text? | |
| 1800 # </div> <-- Is this a Markdown code block or a real tag? | |
| 1801 # <div> | |
| 1802 # | |
| 1803 # If you don't like this, just don't indent the tag on which | |
| 1804 # you apply the markdown="1" attribute. | |
| 1805 # | |
| 1806 # * If $enclosing_tag_re is not empty, stops at the first unmatched closin
g | |
| 1807 # tag with that name. Nested tags supported. | |
| 1808 # | |
| 1809 # * If $span is true, text inside must treated as span. So any double | |
| 1810 # newline will be replaced by a single newline so that it does not creat
e | |
| 1811 # paragraphs. | |
| 1812 # | |
| 1813 # Returns an array of that form: ( processed text , remaining text ) | |
| 1814 # | |
| 1815 if ($text === '') return array('', ''); | |
| 1816 | |
| 1817 # Regex to check for the presense of newlines around a block tag. | |
| 1818 $newline_before_re = '/(?:^\n?|\n\n)*$/'; | |
| 1819 $newline_after_re = | |
| 1820 '{ | |
| 1821 ^ # Start of text following the tag. | |
| 1822 (?>[ ]*<!--.*?-->)? # Optional comment. | |
| 1823 [ ]*\n # Must be followed by newline. | |
| 1824 }xs'; | |
| 1825 | |
| 1826 # Regex to match any tag. | |
| 1827 $block_tag_re = | |
| 1828 '{ | |
| 1829 ( # $2: Capture hole tag. | |
| 1830 </? # Any opening or closing tag. | |
| 1831 (?> # Tag name. | |
| 1832 '.$this->block_tags_re.' | | |
| 1833 '.$this->context_block_tags_re.' | | |
| 1834 '.$this->clean_tags_re.' | | |
| 1835 (?!\s)'.$enclosing_tag_re.' | |
| 1836 ) | |
| 1837 (?: | |
| 1838 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after t
ag name. | |
| 1839 (?> | |
| 1840 ".*?" | # Double quotes (can contain `>`
) | |
| 1841 \'.*?\' | # Single quotes (can contain `>`
) | |
| 1842 .+? # Anything but quotes and `>`. | |
| 1843 )*? | |
| 1844 )? | |
| 1845 > # End of tag. | |
| 1846 | | |
| 1847 <!-- .*? --> # HTML Comment | |
| 1848 | | |
| 1849 <\?.*?\?> | <%.*?%> # Processing instruction | |
| 1850 | | |
| 1851 <!\[CDATA\[.*?\]\]> # CData Block | |
| 1852 | | |
| 1853 # Code span marker | |
| 1854 `+ | |
| 1855 '. ( !$span ? ' # If not in span. | |
| 1856 | | |
| 1857 # Indented code block | |
| 1858 (?> ^[ ]*\n? | \n[ ]*\n ) | |
| 1859 [ ]{'.($indent+4).'}[^\n]* \n | |
| 1860 (?> | |
| 1861 (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n | |
| 1862 )* | |
| 1863 | | |
| 1864 # Fenced code block marker | |
| 1865 (?> ^ | \n ) | |
| 1866 [ ]{'.($indent).'}~~~+[ ]*\n | |
| 1867 ' : '' ). ' # End (if not is span). | |
| 1868 ) | |
| 1869 }xs'; | |
| 1870 | |
| 1871 | |
| 1872 $depth = 0; # Current depth inside the tag tree. | |
| 1873 $parsed = ""; # Parsed text that will be returned. | |
| 1874 | |
| 1875 # | |
| 1876 # Loop through every tag until we find the closing tag of the parent | |
| 1877 # or loop until reaching the end of text if no parent tag specified. | |
| 1878 # | |
| 1879 do { | |
| 1880 # | |
| 1881 # Split the text using the first $tag_match pattern found. | |
| 1882 # Text before pattern will be first in the array, text after | |
| 1883 # pattern will be at the end, and between will be any catches made | |
| 1884 # by the pattern. | |
| 1885 # | |
| 1886 $parts = preg_split($block_tag_re, $text, 2, | |
| 1887 PREG_SPLIT_DELIM_CAPTURE); | |
| 1888 | |
| 1889 # If in Markdown span mode, add a empty-string span-level hash | |
| 1890 # after each newline to prevent triggering any block element. | |
| 1891 if ($span) { | |
| 1892 $void = $this->hashPart("", ':'); | |
| 1893 $newline = "$void\n"; | |
| 1894 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $vo
id; | |
| 1895 } | |
| 1896 | |
| 1897 $parsed .= $parts[0]; # Text before current tag. | |
| 1898 | |
| 1899 # If end of $text has been reached. Stop loop. | |
| 1900 if (count($parts) < 3) { | |
| 1901 $text = ""; | |
| 1902 break; | |
| 1903 } | |
| 1904 | |
| 1905 $tag = $parts[1]; # Tag to handle. | |
| 1906 $text = $parts[2]; # Remaining text after current tag. | |
| 1907 $tag_re = preg_quote($tag); # For use in a regular expression. | |
| 1908 | |
| 1909 # | |
| 1910 # Check for: Code span marker | |
| 1911 # | |
| 1912 if ($tag{0} == "`") { | |
| 1913 # Find corresponding end marker. | |
| 1914 $tag_re = preg_quote($tag); | |
| 1915 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}', | |
| 1916 $text, $matches)) | |
| 1917 { | |
| 1918 # End marker found: pass text unchanged until marker. | |
| 1919 $parsed .= $tag . $matches[0]; | |
| 1920 $text = substr($text, strlen($matches[0])); | |
| 1921 } | |
| 1922 else { | |
| 1923 # Unmatched marker: just skip it. | |
| 1924 $parsed .= $tag; | |
| 1925 } | |
| 1926 } | |
| 1927 # | |
| 1928 # Check for: Indented code block or fenced code block marker. | |
| 1929 # | |
| 1930 else if ($tag{0} == "\n" || $tag{0} == "~") { | |
| 1931 if ($tag{1} == "\n" || $tag{1} == " ") { | |
| 1932 # Indented code block: pass it unchanged, will be handled | |
| 1933 # later. | |
| 1934 $parsed .= $tag; | |
| 1935 } | |
| 1936 else { | |
| 1937 # Fenced code block marker: find matching end marker. | |
| 1938 $tag_re = preg_quote(trim($tag)); | |
| 1939 if (preg_match('{^(?>.*\n)+?'.$tag_re.' *\n}', $text, | |
| 1940 $matches)) | |
| 1941 { | |
| 1942 # End marker found: pass text unchanged until marker. | |
| 1943 $parsed .= $tag . $matches[0]; | |
| 1944 $text = substr($text, strlen($matches[0])); | |
| 1945 } | |
| 1946 else { | |
| 1947 # No end marker: just skip it. | |
| 1948 $parsed .= $tag; | |
| 1949 } | |
| 1950 } | |
| 1951 } | |
| 1952 # | |
| 1953 # Check for: Opening Block level tag or | |
| 1954 # Opening Context Block tag (like ins and del) | |
| 1955 # used as a block tag (tag is alone on it's line). | |
| 1956 # | |
| 1957 else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) || | |
| 1958 ( preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $ta
g) && | |
| 1959 preg_match($newline_before_re, $parsed) && | |
| 1960 preg_match($newline_after_re, $text) ) | |
| 1961 ) | |
| 1962 { | |
| 1963 # Need to parse tag and following text using the HTML parser. | |
| 1964 list($block_text, $text) = | |
| 1965 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", tru
e); | |
| 1966 | |
| 1967 # Make sure it stays outside of any paragraph by adding newlines
. | |
| 1968 $parsed .= "\n\n$block_text\n\n"; | |
| 1969 } | |
| 1970 # | |
| 1971 # Check for: Clean tag (like script, math) | |
| 1972 # HTML Comments, processing instructions. | |
| 1973 # | |
| 1974 else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) || | |
| 1975 $tag{1} == '!' || $tag{1} == '?') | |
| 1976 { | |
| 1977 # Need to parse tag and following text using the HTML parser. | |
| 1978 # (don't check for markdown attribute) | |
| 1979 list($block_text, $text) = | |
| 1980 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", fal
se); | |
| 1981 | |
| 1982 $parsed .= $block_text; | |
| 1983 } | |
| 1984 # | |
| 1985 # Check for: Tag with same name as enclosing tag. | |
| 1986 # | |
| 1987 else if ($enclosing_tag_re !== '' && | |
| 1988 # Same name as enclosing tag. | |
| 1989 preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag)) | |
| 1990 { | |
| 1991 # | |
| 1992 # Increase/decrease nested tag count. | |
| 1993 # | |
| 1994 if ($tag{1} == '/') $depth--; | |
| 1995 else if ($tag{strlen($tag)-2} != '/') $depth++; | |
| 1996 | |
| 1997 if ($depth < 0) { | |
| 1998 # | |
| 1999 # Going out of parent element. Clean up and break so we | |
| 2000 # return to the calling function. | |
| 2001 # | |
| 2002 $text = $tag . $text; | |
| 2003 break; | |
| 2004 } | |
| 2005 | |
| 2006 $parsed .= $tag; | |
| 2007 } | |
| 2008 else { | |
| 2009 $parsed .= $tag; | |
| 2010 } | |
| 2011 } while ($depth >= 0); | |
| 2012 | |
| 2013 return array($parsed, $text); | |
| 2014 } | |
| 2015 function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) { | |
| 2016 # | |
| 2017 # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags. | |
| 2018 # | |
| 2019 # * Calls $hash_method to convert any blocks. | |
| 2020 # * Stops when the first opening tag closes. | |
| 2021 # * $md_attr indicate if the use of the `markdown="1"` attribute is allowe
d. | |
| 2022 # (it is not inside clean tags) | |
| 2023 # | |
| 2024 # Returns an array of that form: ( processed text , remaining text ) | |
| 2025 # | |
| 2026 if ($text === '') return array('', ''); | |
| 2027 | |
| 2028 # Regex to match `markdown` attribute inside of a tag. | |
| 2029 $markdown_attr_re = ' | |
| 2030 { | |
| 2031 \s* # Eat whitespace before the `markdown` attribute | |
| 2032 markdown | |
| 2033 \s*=\s* | |
| 2034 (?> | |
| 2035 (["\']) # $1: quote delimiter | |
| 2036 (.*?) # $2: attribute value | |
| 2037 \1 # matching delimiter | |
| 2038 | | |
| 2039 ([^\s>]*) # $3: unquoted attribute value | |
| 2040 ) | |
| 2041 () # $4: make $3 always defined (avoid warnings) | |
| 2042 }xs'; | |
| 2043 | |
| 2044 # Regex to match any tag. | |
| 2045 $tag_re = '{ | |
| 2046 ( # $2: Capture hole tag. | |
| 2047 </? # Any opening or closing tag. | |
| 2048 [\w:$]+ # Tag name. | |
| 2049 (?: | |
| 2050 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after t
ag name. | |
| 2051 (?> | |
| 2052 ".*?" | # Double quotes (can contain `>`
) | |
| 2053 \'.*?\' | # Single quotes (can contain `>`
) | |
| 2054 .+? # Anything but quotes and `>`. | |
| 2055 )*? | |
| 2056 )? | |
| 2057 > # End of tag. | |
| 2058 | | |
| 2059 <!-- .*? --> # HTML Comment | |
| 2060 | | |
| 2061 <\?.*?\?> | <%.*?%> # Processing instruction | |
| 2062 | | |
| 2063 <!\[CDATA\[.*?\]\]> # CData Block | |
| 2064 ) | |
| 2065 }xs'; | |
| 2066 | |
| 2067 $original_text = $text; # Save original text in case of faliure. | |
| 2068 | |
| 2069 $depth = 0; # Current depth inside the tag tree. | |
| 2070 $block_text = ""; # Temporary text holder for current text. | |
| 2071 $parsed = ""; # Parsed text that will be returned. | |
| 2072 | |
| 2073 # | |
| 2074 # Get the name of the starting tag. | |
| 2075 # (This pattern makes $base_tag_name_re safe without quoting.) | |
| 2076 # | |
| 2077 if (preg_match('/^<([\w:$]*)\b/', $text, $matches)) | |
| 2078 $base_tag_name_re = $matches[1]; | |
| 2079 | |
| 2080 # | |
| 2081 # Loop through every tag until we find the corresponding closing tag. | |
| 2082 # | |
| 2083 do { | |
| 2084 # | |
| 2085 # Split the text using the first $tag_match pattern found. | |
| 2086 # Text before pattern will be first in the array, text after | |
| 2087 # pattern will be at the end, and between will be any catches made | |
| 2088 # by the pattern. | |
| 2089 # | |
| 2090 $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); | |
| 2091 | |
| 2092 if (count($parts) < 3) { | |
| 2093 # | |
| 2094 # End of $text reached with unbalenced tag(s). | |
| 2095 # In that case, we return original text unchanged and pass the | |
| 2096 # first character as filtered to prevent an infinite loop in the | |
| 2097 # parent function. | |
| 2098 # | |
| 2099 return array($original_text{0}, substr($original_text, 1)); | |
| 2100 } | |
| 2101 | |
| 2102 $block_text .= $parts[0]; # Text before current tag. | |
| 2103 $tag = $parts[1]; # Tag to handle. | |
| 2104 $text = $parts[2]; # Remaining text after current tag. | |
| 2105 | |
| 2106 # | |
| 2107 # Check for: Auto-close tag (like <hr/>) | |
| 2108 # Comments and Processing Instructions. | |
| 2109 # | |
| 2110 if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) || | |
| 2111 $tag{1} == '!' || $tag{1} == '?') | |
| 2112 { | |
| 2113 # Just add the tag to the block as if it was text. | |
| 2114 $block_text .= $tag; | |
| 2115 } | |
| 2116 else { | |
| 2117 # | |
| 2118 # Increase/decrease nested tag count. Only do so if | |
| 2119 # the tag's name match base tag's. | |
| 2120 # | |
| 2121 if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) { | |
| 2122 if ($tag{1} == '/') $depth--; | |
| 2123 else if ($tag{strlen($tag)-2} != '/') $depth++; | |
| 2124 } | |
| 2125 | |
| 2126 # | |
| 2127 # Check for `markdown="1"` attribute and handle it. | |
| 2128 # | |
| 2129 if ($md_attr && | |
| 2130 preg_match($markdown_attr_re, $tag, $attr_m) && | |
| 2131 preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3])) | |
| 2132 { | |
| 2133 # Remove `markdown` attribute from opening tag. | |
| 2134 $tag = preg_replace($markdown_attr_re, '', $tag); | |
| 2135 | |
| 2136 # Check if text inside this tag must be parsed in span mode. | |
| 2137 $this->mode = $attr_m[2] . $attr_m[3]; | |
| 2138 $span_mode = $this->mode == 'span' || $this->mode != 'block'
&& | |
| 2139 preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}',
$tag); | |
| 2140 | |
| 2141 # Calculate indent before tag. | |
| 2142 if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $mat
ches)) { | |
| 2143 $strlen = $this->utf8_strlen; | |
| 2144 $indent = $strlen($matches[1], 'UTF-8'); | |
| 2145 } else { | |
| 2146 $indent = 0; | |
| 2147 } | |
| 2148 | |
| 2149 # End preceding block with this tag. | |
| 2150 $block_text .= $tag; | |
| 2151 $parsed .= $this->$hash_method($block_text); | |
| 2152 | |
| 2153 # Get enclosing tag name for the ParseMarkdown function. | |
| 2154 # (This pattern makes $tag_name_re safe without quoting.) | |
| 2155 preg_match('/^<([\w:$]*)\b/', $tag, $matches); | |
| 2156 $tag_name_re = $matches[1]; | |
| 2157 | |
| 2158 # Parse the content using the HTML-in-Markdown parser. | |
| 2159 list ($block_text, $text) | |
| 2160 = $this->_hashHTMLBlocks_inMarkdown($text, $indent, | |
| 2161 $tag_name_re, $span_mode); | |
| 2162 | |
| 2163 # Outdent markdown text. | |
| 2164 if ($indent > 0) { | |
| 2165 $block_text = preg_replace("/^[ ]{1,$indent}/m", "", | |
| 2166 $block_text); | |
| 2167 } | |
| 2168 | |
| 2169 # Append tag content to parsed text. | |
| 2170 if (!$span_mode) $parsed .= "\n\n$block_text\n\n"; | |
| 2171 else $parsed .= "$block_text"; | |
| 2172 | |
| 2173 # Start over a new block. | |
| 2174 $block_text = ""; | |
| 2175 } | |
| 2176 else $block_text .= $tag; | |
| 2177 } | |
| 2178 | |
| 2179 } while ($depth > 0); | |
| 2180 | |
| 2181 # | |
| 2182 # Hash last block text that wasn't processed inside the loop. | |
| 2183 # | |
| 2184 $parsed .= $this->$hash_method($block_text); | |
| 2185 | |
| 2186 return array($parsed, $text); | |
| 2187 } | |
| 2188 | |
| 2189 | |
| 2190 function hashClean($text) { | |
| 2191 # | |
| 2192 # Called whenever a tag must be hashed when a function insert a "clean" tag | |
| 2193 # in $text, it pass through this function and is automaticaly escaped, | |
| 2194 # blocking invalid nested overlap. | |
| 2195 # | |
| 2196 return $this->hashPart($text, 'C'); | |
| 2197 } | |
| 2198 | |
| 2199 | |
| 2200 function doHeaders($text) { | |
| 2201 # | |
| 2202 # Redefined to add id attribute support. | |
| 2203 # | |
| 2204 # Setext-style headers: | |
| 2205 # Header 1 {#header1} | |
| 2206 # ======== | |
| 2207 # | |
| 2208 # Header 2 {#header2} | |
| 2209 # -------- | |
| 2210 # | |
| 2211 $text = preg_replace_callback( | |
| 2212 '{ | |
| 2213 (^.+?) # $1: Header text | |
| 2214 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # $2: Id attribute | |
| 2215 [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer | |
| 2216 }mx', | |
| 2217 array(&$this, '_doHeaders_callback_setext'), $text); | |
| 2218 | |
| 2219 # atx-style headers: | |
| 2220 # # Header 1 {#header1} | |
| 2221 # ## Header 2 {#header2} | |
| 2222 # ## Header 2 with closing hashes ## {#header3} | |
| 2223 # ... | |
| 2224 # ###### Header 6 {#header2} | |
| 2225 # | |
| 2226 $text = preg_replace_callback('{ | |
| 2227 ^(\#{1,6}) # $1 = string of #\'s | |
| 2228 [ ]* | |
| 2229 (.+?) # $2 = Header text | |
| 2230 [ ]* | |
| 2231 \#* # optional closing #\'s (not counted) | |
| 2232 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # id attribute | |
| 2233 [ ]* | |
| 2234 \n+ | |
| 2235 }xm', | |
| 2236 array(&$this, '_doHeaders_callback_atx'), $text); | |
| 2237 | |
| 2238 return $text; | |
| 2239 } | |
| 2240 function _doHeaders_attr($attr) { | |
| 2241 if (empty($attr)) return ""; | |
| 2242 return " id=\"$attr\""; | |
| 2243 } | |
| 2244 function _doHeaders_callback_setext($matches) { | |
| 2245 if ($matches[3] == '-' && preg_match('{^- }', $matches[1])) | |
| 2246 return $matches[0]; | |
| 2247 $level = $matches[3]{0} == '=' ? 1 : 2; | |
| 2248 $attr = $this->_doHeaders_attr($id =& $matches[2]); | |
| 2249 $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>"; | |
| 2250 return "\n" . $this->hashBlock($block) . "\n\n"; | |
| 2251 } | |
| 2252 function _doHeaders_callback_atx($matches) { | |
| 2253 $level = strlen($matches[1]); | |
| 2254 $attr = $this->_doHeaders_attr($id =& $matches[3]); | |
| 2255 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>"; | |
| 2256 return "\n" . $this->hashBlock($block) . "\n\n"; | |
| 2257 } | |
| 2258 | |
| 2259 | |
| 2260 function doTables($text) { | |
| 2261 # | |
| 2262 # Form HTML tables. | |
| 2263 # | |
| 2264 $less_than_tab = $this->tab_width - 1; | |
| 2265 # | |
| 2266 # Find tables with leading pipe. | |
| 2267 # | |
| 2268 # | Header 1 | Header 2 | |
| 2269 # | -------- | -------- | |
| 2270 # | Cell 1 | Cell 2 | |
| 2271 # | Cell 3 | Cell 4 | |
| 2272 # | |
| 2273 $text = preg_replace_callback(' | |
| 2274 { | |
| 2275 ^ # Start of a line | |
| 2276 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. | |
| 2277 [|] # Optional leading pipe (present) | |
| 2278 (.+) \n # $1: Header row (at least one pipe) | |
| 2279 | |
| 2280 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. | |
| 2281 [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline | |
| 2282 | |
| 2283 ( # $3: Cells | |
| 2284 (?> | |
| 2285 [ ]* # Allowed whitespace. | |
| 2286 [|] .* \n # Row content. | |
| 2287 )* | |
| 2288 ) | |
| 2289 (?=\n|\Z) # Stop at final double newline. | |
| 2290 }xm', | |
| 2291 array(&$this, '_doTable_leadingPipe_callback'), $text); | |
| 2292 | |
| 2293 # | |
| 2294 # Find tables without leading pipe. | |
| 2295 # | |
| 2296 # Header 1 | Header 2 | |
| 2297 # -------- | -------- | |
| 2298 # Cell 1 | Cell 2 | |
| 2299 # Cell 3 | Cell 4 | |
| 2300 # | |
| 2301 $text = preg_replace_callback(' | |
| 2302 { | |
| 2303 ^ # Start of a line | |
| 2304 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. | |
| 2305 (\S.*[|].*) \n # $1: Header row (at least one pipe) | |
| 2306 | |
| 2307 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. | |
| 2308 ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline | |
| 2309 | |
| 2310 ( # $3: Cells | |
| 2311 (?> | |
| 2312 .* [|] .* \n # Row content | |
| 2313 )* | |
| 2314 ) | |
| 2315 (?=\n|\Z) # Stop at final double newline. | |
| 2316 }xm', | |
| 2317 array(&$this, '_DoTable_callback'), $text); | |
| 2318 | |
| 2319 return $text; | |
| 2320 } | |
| 2321 function _doTable_leadingPipe_callback($matches) { | |
| 2322 $head = $matches[1]; | |
| 2323 $underline = $matches[2]; | |
| 2324 $content = $matches[3]; | |
| 2325 | |
| 2326 # Remove leading pipe for each row. | |
| 2327 $content = preg_replace('/^ *[|]/m', '', $content); | |
| 2328 | |
| 2329 return $this->_doTable_callback(array($matches[0], $head, $underline, $c
ontent)); | |
| 2330 } | |
| 2331 function _doTable_callback($matches) { | |
| 2332 $head = $matches[1]; | |
| 2333 $underline = $matches[2]; | |
| 2334 $content = $matches[3]; | |
| 2335 | |
| 2336 # Remove any tailing pipes for each line. | |
| 2337 $head = preg_replace('/[|] *$/m', '', $head); | |
| 2338 $underline = preg_replace('/[|] *$/m', '', $underline); | |
| 2339 $content = preg_replace('/[|] *$/m', '', $content); | |
| 2340 | |
| 2341 # Reading alignement from header underline. | |
| 2342 $separators = preg_split('/ *[|] */', $underline); | |
| 2343 foreach ($separators as $n => $s) { | |
| 2344 if (preg_match('/^ *-+: *$/', $s)) $attr[$n] = ' align="right"'
; | |
| 2345 else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"
'; | |
| 2346 else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"'; | |
| 2347 else $attr[$n] = ''; | |
| 2348 } | |
| 2349 | |
| 2350 # Parsing span elements, including code spans, character escapes, | |
| 2351 # and inline HTML tags, so that pipes inside those gets ignored. | |
| 2352 $head = $this->parseSpan($head); | |
| 2353 $headers = preg_split('/ *[|] */', $head); | |
| 2354 $col_count = count($headers); | |
| 2355 | |
| 2356 # Write column headers. | |
| 2357 $text = "<table>\n"; | |
| 2358 $text .= "<thead>\n"; | |
| 2359 $text .= "<tr>\n"; | |
| 2360 foreach ($headers as $n => $header) | |
| 2361 $text .= " <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>
\n"; | |
| 2362 $text .= "</tr>\n"; | |
| 2363 $text .= "</thead>\n"; | |
| 2364 | |
| 2365 # Split content by row. | |
| 2366 $rows = explode("\n", trim($content, "\n")); | |
| 2367 | |
| 2368 $text .= "<tbody>\n"; | |
| 2369 foreach ($rows as $row) { | |
| 2370 # Parsing span elements, including code spans, character escapes, | |
| 2371 # and inline HTML tags, so that pipes inside those gets ignored. | |
| 2372 $row = $this->parseSpan($row); | |
| 2373 | |
| 2374 # Split row by cell. | |
| 2375 $row_cells = preg_split('/ *[|] */', $row, $col_count); | |
| 2376 $row_cells = array_pad($row_cells, $col_count, ''); | |
| 2377 | |
| 2378 $text .= "<tr>\n"; | |
| 2379 foreach ($row_cells as $n => $cell) | |
| 2380 $text .= " <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</t
d>\n"; | |
| 2381 $text .= "</tr>\n"; | |
| 2382 } | |
| 2383 $text .= "</tbody>\n"; | |
| 2384 $text .= "</table>"; | |
| 2385 | |
| 2386 return $this->hashBlock($text) . "\n"; | |
| 2387 } | |
| 2388 | |
| 2389 | |
| 2390 function doDefLists($text) { | |
| 2391 # | |
| 2392 # Form HTML definition lists. | |
| 2393 # | |
| 2394 $less_than_tab = $this->tab_width - 1; | |
| 2395 | |
| 2396 # Re-usable pattern to match any entire dl list: | |
| 2397 $whole_list_re = '(?> | |
| 2398 ( # $1 = whole list | |
| 2399 ( # $2 | |
| 2400 [ ]{0,'.$less_than_tab.'} | |
| 2401 ((?>.*\S.*\n)+) # $3 = defined term | |
| 2402 \n? | |
| 2403 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition | |
| 2404 ) | |
| 2405 (?s:.+?) | |
| 2406 ( # $4 | |
| 2407 \z | |
| 2408 | | |
| 2409 \n{2,} | |
| 2410 (?=\S) | |
| 2411 (?! # Negative lookahead for another ter
m | |
| 2412 [ ]{0,'.$less_than_tab.'} | |
| 2413 (?: \S.*\n )+? # defined term | |
| 2414 \n? | |
| 2415 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition | |
| 2416 ) | |
| 2417 (?! # Negative lookahead for another def
inition | |
| 2418 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition | |
| 2419 ) | |
| 2420 ) | |
| 2421 ) | |
| 2422 )'; // mx | |
| 2423 | |
| 2424 $text = preg_replace_callback('{ | |
| 2425 (?>\A\n?|(?<=\n\n)) | |
| 2426 '.$whole_list_re.' | |
| 2427 }mx', | |
| 2428 array(&$this, '_doDefLists_callback'), $text); | |
| 2429 | |
| 2430 return $text; | |
| 2431 } | |
| 2432 function _doDefLists_callback($matches) { | |
| 2433 # Re-usable patterns to match list item bullets and number markers: | |
| 2434 $list = $matches[1]; | |
| 2435 | |
| 2436 # Turn double returns into triple returns, so that we can make a | |
| 2437 # paragraph for the last item in a list, if necessary: | |
| 2438 $result = trim($this->processDefListItems($list)); | |
| 2439 $result = "<dl>\n" . $result . "\n</dl>"; | |
| 2440 return $this->hashBlock($result) . "\n\n"; | |
| 2441 } | |
| 2442 | |
| 2443 | |
| 2444 function processDefListItems($list_str) { | |
| 2445 # | |
| 2446 # Process the contents of a single definition list, splitting it | |
| 2447 # into individual term and definition list items. | |
| 2448 # | |
| 2449 $less_than_tab = $this->tab_width - 1; | |
| 2450 | |
| 2451 # trim trailing blank lines: | |
| 2452 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); | |
| 2453 | |
| 2454 # Process definition terms. | |
| 2455 $list_str = preg_replace_callback('{ | |
| 2456 (?>\A\n?|\n\n+) # leading line | |
| 2457 ( # definition terms = $1 | |
| 2458 [ ]{0,'.$less_than_tab.'} # leading whitespace | |
| 2459 (?![:][ ]|[ ]) # negative lookahead for a definitio
n | |
| 2460 # mark (colon) or more whitespace. | |
| 2461 (?> \S.* \n)+? # actual term (not whitespace). | |
| 2462 ) | |
| 2463 (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed | |
| 2464 # with a definition mark. | |
| 2465 }xm', | |
| 2466 array(&$this, '_processDefListItems_callback_dt'), $list_str); | |
| 2467 | |
| 2468 # Process actual definitions. | |
| 2469 $list_str = preg_replace_callback('{ | |
| 2470 \n(\n+)? # leading line = $1 | |
| 2471 ( # marker space = $2 | |
| 2472 [ ]{0,'.$less_than_tab.'} # whitespace before colon | |
| 2473 [:][ ]+ # definition mark (colon) | |
| 2474 ) | |
| 2475 ((?s:.+?)) # definition text = $3 | |
| 2476 (?= \n+ # stop at next definition mark, | |
| 2477 (?: # next term or end of text | |
| 2478 [ ]{0,'.$less_than_tab.'} [:][ ] | | |
| 2479 <dt> | \z | |
| 2480 ) | |
| 2481 ) | |
| 2482 }xm', | |
| 2483 array(&$this, '_processDefListItems_callback_dd'), $list_str); | |
| 2484 | |
| 2485 return $list_str; | |
| 2486 } | |
| 2487 function _processDefListItems_callback_dt($matches) { | |
| 2488 $terms = explode("\n", trim($matches[1])); | |
| 2489 $text = ''; | |
| 2490 foreach ($terms as $term) { | |
| 2491 $term = $this->runSpanGamut(trim($term)); | |
| 2492 $text .= "\n<dt>" . $term . "</dt>"; | |
| 2493 } | |
| 2494 return $text . "\n"; | |
| 2495 } | |
| 2496 function _processDefListItems_callback_dd($matches) { | |
| 2497 $leading_line = $matches[1]; | |
| 2498 $marker_space = $matches[2]; | |
| 2499 $def = $matches[3]; | |
| 2500 | |
| 2501 if ($leading_line || preg_match('/\n{2,}/', $def)) { | |
| 2502 # Replace marker with the appropriate whitespace indentation | |
| 2503 $def = str_repeat(' ', strlen($marker_space)) . $def; | |
| 2504 $def = $this->runBlockGamut($this->outdent($def . "\n\n")); | |
| 2505 $def = "\n". $def ."\n"; | |
| 2506 } | |
| 2507 else { | |
| 2508 $def = rtrim($def); | |
| 2509 $def = $this->runSpanGamut($this->outdent($def)); | |
| 2510 } | |
| 2511 | |
| 2512 return "\n<dd>" . $def . "</dd>\n"; | |
| 2513 } | |
| 2514 | |
| 2515 | |
| 2516 function doFencedCodeBlocks($text) { | |
| 2517 # | |
| 2518 # Adding the fenced code block syntax to regular Markdown: | |
| 2519 # | |
| 2520 # ~~~ | |
| 2521 # Code block | |
| 2522 # ~~~ | |
| 2523 # | |
| 2524 $less_than_tab = $this->tab_width; | |
| 2525 | |
| 2526 $text = preg_replace_callback('{ | |
| 2527 (?:\n|\A) | |
| 2528 # 1: Opening marker | |
| 2529 ( | |
| 2530 ~{3,} # Marker: three tilde or more. | |
| 2531 ) | |
| 2532 [ ]* \n # Whitespace and newline following marker. | |
| 2533 | |
| 2534 # 2: Content | |
| 2535 ( | |
| 2536 (?> | |
| 2537 (?!\1 [ ]* \n) # Not a closing marker. | |
| 2538 .*\n+ | |
| 2539 )+ | |
| 2540 ) | |
| 2541 | |
| 2542 # Closing marker. | |
| 2543 \1 [ ]* \n | |
| 2544 }xm', | |
| 2545 array(&$this, '_doFencedCodeBlocks_callback'), $text); | |
| 2546 | |
| 2547 return $text; | |
| 2548 } | |
| 2549 function _doFencedCodeBlocks_callback($matches) { | |
| 2550 $codeblock = $matches[2]; | |
| 2551 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); | |
| 2552 $codeblock = preg_replace_callback('/^\n+/', | |
| 2553 array(&$this, '_doFencedCodeBlocks_newlines'), $codeblock); | |
| 2554 $codeblock = "<pre><code>$codeblock</code></pre>"; | |
| 2555 return "\n\n".$this->hashBlock($codeblock)."\n\n"; | |
| 2556 } | |
| 2557 function _doFencedCodeBlocks_newlines($matches) { | |
| 2558 return str_repeat("<br$this->empty_element_suffix", | |
| 2559 strlen($matches[0])); | |
| 2560 } | |
| 2561 | |
| 2562 | |
| 2563 # | |
| 2564 # Redefining emphasis markers so that emphasis by underscore does not | |
| 2565 # work in the middle of a word. | |
| 2566 # | |
| 2567 var $em_relist = array( | |
| 2568 '' => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?=\S)(?![.,:;]\s)', | |
| 2569 '*' => '(?<=\S)(?<!\*)\*(?!\*)', | |
| 2570 '_' => '(?<=\S)(?<!_)_(?![a-zA-Z0-9_])', | |
| 2571 ); | |
| 2572 var $strong_relist = array( | |
| 2573 '' => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?=\S)(?![.,:;]\s
)', | |
| 2574 '**' => '(?<=\S)(?<!\*)\*\*(?!\*)', | |
| 2575 '__' => '(?<=\S)(?<!_)__(?![a-zA-Z0-9_])', | |
| 2576 ); | |
| 2577 var $em_strong_relist = array( | |
| 2578 '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?=\S)(?![.,:
;]\s)', | |
| 2579 '***' => '(?<=\S)(?<!\*)\*\*\*(?!\*)', | |
| 2580 '___' => '(?<=\S)(?<!_)___(?![a-zA-Z0-9_])', | |
| 2581 ); | |
| 2582 | |
| 2583 | |
| 2584 function formParagraphs($text) { | |
| 2585 # | |
| 2586 # Params: | |
| 2587 # $text - string to process with html <p> tags | |
| 2588 # | |
| 2589 # Strip leading and trailing lines: | |
| 2590 $text = preg_replace('/\A\n+|\n+\z/', '', $text); | |
| 2591 | |
| 2592 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); | |
| 2593 | |
| 2594 # | |
| 2595 # Wrap <p> tags and unhashify HTML blocks | |
| 2596 # | |
| 2597 foreach ($grafs as $key => $value) { | |
| 2598 $value = trim($this->runSpanGamut($value)); | |
| 2599 | |
| 2600 # Check if this should be enclosed in a paragraph. | |
| 2601 # Clean tag hashes & block tag hashes are left alone. | |
| 2602 $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value); | |
| 2603 | |
| 2604 if ($is_p) { | |
| 2605 $value = "<p>$value</p>"; | |
| 2606 } | |
| 2607 $grafs[$key] = $value; | |
| 2608 } | |
| 2609 | |
| 2610 # Join grafs in one text, then unhash HTML tags. | |
| 2611 $text = implode("\n\n", $grafs); | |
| 2612 | |
| 2613 # Finish by removing any tag hashes still present in $text. | |
| 2614 $text = $this->unhash($text); | |
| 2615 | |
| 2616 return $text; | |
| 2617 } | |
| 2618 | |
| 2619 | |
| 2620 ### Footnotes | |
| 2621 | |
| 2622 function stripFootnotes($text) { | |
| 2623 # | |
| 2624 # Strips link definitions from text, stores the URLs and titles in | |
| 2625 # hash references. | |
| 2626 # | |
| 2627 $less_than_tab = $this->tab_width - 1; | |
| 2628 | |
| 2629 # Link defs are in the form: [^id]: url "optional title" | |
| 2630 $text = preg_replace_callback('{ | |
| 2631 ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?: # note_id = $1 | |
| 2632 [ ]* | |
| 2633 \n? # maybe *one* newline | |
| 2634 ( # text = $2 (no blank lines allowed) | |
| 2635 (?: | |
| 2636 .+ # actual text | |
| 2637 | | |
| 2638 \n # newlines but | |
| 2639 (?!\[\^.+?\]:\s)# negative lookahead for footnote marker. | |
| 2640 (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed | |
| 2641 # by non-indented content | |
| 2642 )* | |
| 2643 ) | |
| 2644 }xm', | |
| 2645 array(&$this, '_stripFootnotes_callback'), | |
| 2646 $text); | |
| 2647 return $text; | |
| 2648 } | |
| 2649 function _stripFootnotes_callback($matches) { | |
| 2650 $note_id = $this->fn_id_prefix . $matches[1]; | |
| 2651 $this->footnotes[$note_id] = $this->outdent($matches[2]); | |
| 2652 return ''; # String that will replace the block | |
| 2653 } | |
| 2654 | |
| 2655 | |
| 2656 function doFootnotes($text) { | |
| 2657 # | |
| 2658 # Replace footnote references in $text [^id] with a special text-token | |
| 2659 # which will be replaced by the actual footnote marker in appendFootnotes. | |
| 2660 # | |
| 2661 if (!$this->in_anchor) { | |
| 2662 $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text); | |
| 2663 } | |
| 2664 return $text; | |
| 2665 } | |
| 2666 | |
| 2667 | |
| 2668 function appendFootnotes($text) { | |
| 2669 # | |
| 2670 # Append footnote list to text. | |
| 2671 # | |
| 2672 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', | |
| 2673 array(&$this, '_appendFootnotes_callback'), $text); | |
| 2674 | |
| 2675 if (!empty($this->footnotes_ordered)) { | |
| 2676 $text .= "\n\n"; | |
| 2677 $text .= "<div class=\"footnotes\">\n"; | |
| 2678 $text .= "<hr". MARKDOWN_EMPTY_ELEMENT_SUFFIX ."\n"; | |
| 2679 $text .= "<ol>\n\n"; | |
| 2680 | |
| 2681 $attr = " rev=\"footnote\""; | |
| 2682 if ($this->fn_backlink_class != "") { | |
| 2683 $class = $this->fn_backlink_class; | |
| 2684 $class = $this->encodeAttribute($class); | |
| 2685 $attr .= " class=\"$class\""; | |
| 2686 } | |
| 2687 if ($this->fn_backlink_title != "") { | |
| 2688 $title = $this->fn_backlink_title; | |
| 2689 $title = $this->encodeAttribute($title); | |
| 2690 $attr .= " title=\"$title\""; | |
| 2691 } | |
| 2692 $num = 0; | |
| 2693 | |
| 2694 while (!empty($this->footnotes_ordered)) { | |
| 2695 $footnote = reset($this->footnotes_ordered); | |
| 2696 $note_id = key($this->footnotes_ordered); | |
| 2697 unset($this->footnotes_ordered[$note_id]); | |
| 2698 | |
| 2699 $footnote .= "\n"; # Need to append newline before parsing. | |
| 2700 $footnote = $this->runBlockGamut("$footnote\n"); | |
| 2701 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', | |
| 2702 array(&$this, '_appendFootnotes_callback'), $footnote); | |
| 2703 | |
| 2704 $attr = str_replace("%%", ++$num, $attr); | |
| 2705 $note_id = $this->encodeAttribute($note_id); | |
| 2706 | |
| 2707 # Add backlink to last paragraph; create new paragraph if needed
. | |
| 2708 $backlink = "<a href=\"#fnref:$note_id\"$attr>↩</a>"; | |
| 2709 if (preg_match('{</p>$}', $footnote)) { | |
| 2710 $footnote = substr($footnote, 0, -4) . " $backlink</p>"
; | |
| 2711 } else { | |
| 2712 $footnote .= "\n\n<p>$backlink</p>"; | |
| 2713 } | |
| 2714 | |
| 2715 $text .= "<li id=\"fn:$note_id\">\n"; | |
| 2716 $text .= $footnote . "\n"; | |
| 2717 $text .= "</li>\n\n"; | |
| 2718 } | |
| 2719 | |
| 2720 $text .= "</ol>\n"; | |
| 2721 $text .= "</div>"; | |
| 2722 } | |
| 2723 return $text; | |
| 2724 } | |
| 2725 function _appendFootnotes_callback($matches) { | |
| 2726 $node_id = $this->fn_id_prefix . $matches[1]; | |
| 2727 | |
| 2728 # Create footnote marker only if it has a corresponding footnote *and* | |
| 2729 # the footnote hasn't been used by another marker. | |
| 2730 if (isset($this->footnotes[$node_id])) { | |
| 2731 # Transfert footnote content to the ordered list. | |
| 2732 $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id]; | |
| 2733 unset($this->footnotes[$node_id]); | |
| 2734 | |
| 2735 $num = $this->footnote_counter++; | |
| 2736 $attr = " rel=\"footnote\""; | |
| 2737 if ($this->fn_link_class != "") { | |
| 2738 $class = $this->fn_link_class; | |
| 2739 $class = $this->encodeAttribute($class); | |
| 2740 $attr .= " class=\"$class\""; | |
| 2741 } | |
| 2742 if ($this->fn_link_title != "") { | |
| 2743 $title = $this->fn_link_title; | |
| 2744 $title = $this->encodeAttribute($title); | |
| 2745 $attr .= " title=\"$title\""; | |
| 2746 } | |
| 2747 | |
| 2748 $attr = str_replace("%%", $num, $attr); | |
| 2749 $node_id = $this->encodeAttribute($node_id); | |
| 2750 | |
| 2751 return | |
| 2752 "<sup id=\"fnref:$node_id\">". | |
| 2753 "<a href=\"#fn:$node_id\"$attr>$num</a>". | |
| 2754 "</sup>"; | |
| 2755 } | |
| 2756 | |
| 2757 return "[^".$matches[1]."]"; | |
| 2758 } | |
| 2759 | |
| 2760 | |
| 2761 ### Abbreviations ### | |
| 2762 | |
| 2763 function stripAbbreviations($text) { | |
| 2764 # | |
| 2765 # Strips abbreviations from text, stores titles in hash references. | |
| 2766 # | |
| 2767 $less_than_tab = $this->tab_width - 1; | |
| 2768 | |
| 2769 # Link defs are in the form: [id]*: url "optional title" | |
| 2770 $text = preg_replace_callback('{ | |
| 2771 ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?: # abbr_id = $1 | |
| 2772 (.*) # text = $2 (no blank lines allowed) | |
| 2773 }xm', | |
| 2774 array(&$this, '_stripAbbreviations_callback'), | |
| 2775 $text); | |
| 2776 return $text; | |
| 2777 } | |
| 2778 function _stripAbbreviations_callback($matches) { | |
| 2779 $abbr_word = $matches[1]; | |
| 2780 $abbr_desc = $matches[2]; | |
| 2781 if ($this->abbr_word_re) | |
| 2782 $this->abbr_word_re .= '|'; | |
| 2783 $this->abbr_word_re .= preg_quote($abbr_word); | |
| 2784 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); | |
| 2785 return ''; # String that will replace the block | |
| 2786 } | |
| 2787 | |
| 2788 | |
| 2789 function doAbbreviations($text) { | |
| 2790 # | |
| 2791 # Find defined abbreviations in text and wrap them in <abbr> elements. | |
| 2792 # | |
| 2793 if ($this->abbr_word_re) { | |
| 2794 // cannot use the /x modifier because abbr_word_re may | |
| 2795 // contain significant spaces: | |
| 2796 $text = preg_replace_callback('{'. | |
| 2797 '(?<![\w\x1A])'. | |
| 2798 '(?:'.$this->abbr_word_re.')'. | |
| 2799 '(?![\w\x1A])'. | |
| 2800 '}', | |
| 2801 array(&$this, '_doAbbreviations_callback'), $text); | |
| 2802 } | |
| 2803 return $text; | |
| 2804 } | |
| 2805 function _doAbbreviations_callback($matches) { | |
| 2806 $abbr = $matches[0]; | |
| 2807 if (isset($this->abbr_desciptions[$abbr])) { | |
| 2808 $desc = $this->abbr_desciptions[$abbr]; | |
| 2809 if (empty($desc)) { | |
| 2810 return $this->hashPart("<abbr>$abbr</abbr>"); | |
| 2811 } else { | |
| 2812 $desc = $this->encodeAttribute($desc); | |
| 2813 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>"); | |
| 2814 } | |
| 2815 } else { | |
| 2816 return $matches[0]; | |
| 2817 } | |
| 2818 } | |
| 2819 | |
| 2820 } | |
| 2821 | |
| 2822 | |
| 2823 /* | |
| 2824 | |
| 2825 PHP Markdown Extra | |
| 2826 ================== | |
| 2827 | |
| 2828 Description | |
| 2829 ----------- | |
| 2830 | |
| 2831 This is a PHP port of the original Markdown formatter written in Perl | |
| 2832 by John Gruber. This special "Extra" version of PHP Markdown features | |
| 2833 further enhancements to the syntax for making additional constructs | |
| 2834 such as tables and definition list. | |
| 2835 | |
| 2836 Markdown is a text-to-HTML filter; it translates an easy-to-read / | |
| 2837 easy-to-write structured text format into HTML. Markdown's text format | |
| 2838 is most similar to that of plain text email, and supports features such | |
| 2839 as headers, *emphasis*, code blocks, blockquotes, and links. | |
| 2840 | |
| 2841 Markdown's syntax is designed not as a generic markup language, but | |
| 2842 specifically to serve as a front-end to (X)HTML. You can use span-level | |
| 2843 HTML tags anywhere in a Markdown document, and you can use block level | |
| 2844 HTML tags (like <div> and <table> as well). | |
| 2845 | |
| 2846 For more information about Markdown's syntax, see: | |
| 2847 | |
| 2848 <http://daringfireball.net/projects/markdown/> | |
| 2849 | |
| 2850 | |
| 2851 Bugs | |
| 2852 ---- | |
| 2853 | |
| 2854 To file bug reports please send email to: | |
| 2855 | |
| 2856 <michel.fortin@michelf.com> | |
| 2857 | |
| 2858 Please include with your report: (1) the example input; (2) the output you | |
| 2859 expected; (3) the output Markdown actually produced. | |
| 2860 | |
| 2861 | |
| 2862 Version History | |
| 2863 --------------- | |
| 2864 | |
| 2865 See the readme file for detailed release notes for this version. | |
| 2866 | |
| 2867 | |
| 2868 Copyright and License | |
| 2869 --------------------- | |
| 2870 | |
| 2871 PHP Markdown & Extra | |
| 2872 Copyright (c) 2004-2008 Michel Fortin | |
| 2873 <http://www.michelf.com/> | |
| 2874 All rights reserved. | |
| 2875 | |
| 2876 Based on Markdown | |
| 2877 Copyright (c) 2003-2006 John Gruber | |
| 2878 <http://daringfireball.net/> | |
| 2879 All rights reserved. | |
| 2880 | |
| 2881 Redistribution and use in source and binary forms, with or without | |
| 2882 modification, are permitted provided that the following conditions are | |
| 2883 met: | |
| 2884 | |
| 2885 * Redistributions of source code must retain the above copyright notice, | |
| 2886 this list of conditions and the following disclaimer. | |
| 2887 | |
| 2888 * Redistributions in binary form must reproduce the above copyright | |
| 2889 notice, this list of conditions and the following disclaimer in the | |
| 2890 documentation and/or other materials provided with the distribution. | |
| 2891 | |
| 2892 * Neither the name "Markdown" nor the names of its contributors may | |
| 2893 be used to endorse or promote products derived from this software | |
| 2894 without specific prior written permission. | |
| 2895 | |
| 2896 This software is provided by the copyright holders and contributors "as | |
| 2897 is" and any express or implied warranties, including, but not limited | |
| 2898 to, the implied warranties of merchantability and fitness for a | |
| 2899 particular purpose are disclaimed. In no event shall the copyright owner | |
| 2900 or contributors be liable for any direct, indirect, incidental, special, | |
| 2901 exemplary, or consequential damages (including, but not limited to, | |
| 2902 procurement of substitute goods or services; loss of use, data, or | |
| 2903 profits; or business interruption) however caused and on any theory of | |
| 2904 liability, whether in contract, strict liability, or tort (including | |
| 2905 negligence or otherwise) arising in any way out of the use of this | |
| 2906 software, even if advised of the possibility of such damage. | |
| 2907 | |
| 2908 */ | |
| 2909 ?> | |
| OLD | NEW |