| OLD | NEW |
| (Empty) |
| 1 <?php | |
| 2 /** | |
| 3 * GeSHi - Generic Syntax Highlighter | |
| 4 * | |
| 5 * The GeSHi class for Generic Syntax Highlighting. Please refer to the | |
| 6 * documentation at http://qbnz.com/highlighter/documentation.php for more | |
| 7 * information about how to use this class. | |
| 8 * | |
| 9 * For changes, release notes, TODOs etc, see the relevant files in the docs/ | |
| 10 * directory. | |
| 11 * | |
| 12 * This file is part of GeSHi. | |
| 13 * | |
| 14 * GeSHi is free software; you can redistribute it and/or modify | |
| 15 * it under the terms of the GNU General Public License as published by | |
| 16 * the Free Software Foundation; either version 2 of the License, or | |
| 17 * (at your option) any later version. | |
| 18 * | |
| 19 * GeSHi is distributed in the hope that it will be useful, | |
| 20 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 22 * GNU General Public License for more details. | |
| 23 * | |
| 24 * You should have received a copy of the GNU General Public License | |
| 25 * along with GeSHi; if not, write to the Free Software | |
| 26 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
| 27 * | |
| 28 * @package geshi | |
| 29 * @subpackage core | |
| 30 * @author Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de> | |
| 31 * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann | |
| 32 * @license http://gnu.org/copyleft/gpl.html GNU GPL | |
| 33 * | |
| 34 */ | |
| 35 | |
| 36 // | |
| 37 // GeSHi Constants | |
| 38 // You should use these constant names in your programs instead of | |
| 39 // their values - you never know when a value may change in a future | |
| 40 // version | |
| 41 // | |
| 42 | |
| 43 /** The version of this GeSHi file */ | |
| 44 define('GESHI_VERSION', '1.0.8.3'); | |
| 45 | |
| 46 // Define the root directory for the GeSHi code tree | |
| 47 if (!defined('GESHI_ROOT')) { | |
| 48 /** The root directory for GeSHi */ | |
| 49 define('GESHI_ROOT', dirname(__FILE__) . DIRECTORY_SEPARATOR); | |
| 50 } | |
| 51 /** The language file directory for GeSHi | |
| 52 @access private */ | |
| 53 define('GESHI_LANG_ROOT', GESHI_ROOT . 'geshi' . DIRECTORY_SEPARATOR); | |
| 54 | |
| 55 // Define if GeSHi should be paranoid about security | |
| 56 if (!defined('GESHI_SECURITY_PARANOID')) { | |
| 57 /** Tells GeSHi to be paranoid about security settings */ | |
| 58 define('GESHI_SECURITY_PARANOID', false); | |
| 59 } | |
| 60 | |
| 61 // Line numbers - use with enable_line_numbers() | |
| 62 /** Use no line numbers when building the result */ | |
| 63 define('GESHI_NO_LINE_NUMBERS', 0); | |
| 64 /** Use normal line numbers when building the result */ | |
| 65 define('GESHI_NORMAL_LINE_NUMBERS', 1); | |
| 66 /** Use fancy line numbers when building the result */ | |
| 67 define('GESHI_FANCY_LINE_NUMBERS', 2); | |
| 68 | |
| 69 // Container HTML type | |
| 70 /** Use nothing to surround the source */ | |
| 71 define('GESHI_HEADER_NONE', 0); | |
| 72 /** Use a "div" to surround the source */ | |
| 73 define('GESHI_HEADER_DIV', 1); | |
| 74 /** Use a "pre" to surround the source */ | |
| 75 define('GESHI_HEADER_PRE', 2); | |
| 76 /** Use a pre to wrap lines when line numbers are enabled or to wrap the whole c
ode. */ | |
| 77 define('GESHI_HEADER_PRE_VALID', 3); | |
| 78 /** | |
| 79 * Use a "table" to surround the source: | |
| 80 * | |
| 81 * <table> | |
| 82 * <thead><tr><td colspan="2">$header</td></tr></thead> | |
| 83 * <tbody><tr><td><pre>$linenumbers</pre></td><td><pre>$code></pre></td></tr>
</tbody> | |
| 84 * <tfooter><tr><td colspan="2">$footer</td></tr></tfoot> | |
| 85 * </table> | |
| 86 * | |
| 87 * this is essentially only a workaround for Firefox, see sf#1651996 or take a l
ook at | |
| 88 * https://bugzilla.mozilla.org/show_bug.cgi?id=365805 | |
| 89 * @note when linenumbers are disabled this is essentially the same as GESHI_HEA
DER_PRE | |
| 90 */ | |
| 91 define('GESHI_HEADER_PRE_TABLE', 4); | |
| 92 | |
| 93 // Capatalisation constants | |
| 94 /** Lowercase keywords found */ | |
| 95 define('GESHI_CAPS_NO_CHANGE', 0); | |
| 96 /** Uppercase keywords found */ | |
| 97 define('GESHI_CAPS_UPPER', 1); | |
| 98 /** Leave keywords found as the case that they are */ | |
| 99 define('GESHI_CAPS_LOWER', 2); | |
| 100 | |
| 101 // Link style constants | |
| 102 /** Links in the source in the :link state */ | |
| 103 define('GESHI_LINK', 0); | |
| 104 /** Links in the source in the :hover state */ | |
| 105 define('GESHI_HOVER', 1); | |
| 106 /** Links in the source in the :active state */ | |
| 107 define('GESHI_ACTIVE', 2); | |
| 108 /** Links in the source in the :visited state */ | |
| 109 define('GESHI_VISITED', 3); | |
| 110 | |
| 111 // Important string starter/finisher | |
| 112 // Note that if you change these, they should be as-is: i.e., don't | |
| 113 // write them as if they had been run through htmlentities() | |
| 114 /** The starter for important parts of the source */ | |
| 115 define('GESHI_START_IMPORTANT', '<BEGIN GeSHi>'); | |
| 116 /** The ender for important parts of the source */ | |
| 117 define('GESHI_END_IMPORTANT', '<END GeSHi>'); | |
| 118 | |
| 119 /**#@+ | |
| 120 * @access private | |
| 121 */ | |
| 122 // When strict mode applies for a language | |
| 123 /** Strict mode never applies (this is the most common) */ | |
| 124 define('GESHI_NEVER', 0); | |
| 125 /** Strict mode *might* apply, and can be enabled or | |
| 126 disabled by {@link GeSHi->enable_strict_mode()} */ | |
| 127 define('GESHI_MAYBE', 1); | |
| 128 /** Strict mode always applies */ | |
| 129 define('GESHI_ALWAYS', 2); | |
| 130 | |
| 131 // Advanced regexp handling constants, used in language files | |
| 132 /** The key of the regex array defining what to search for */ | |
| 133 define('GESHI_SEARCH', 0); | |
| 134 /** The key of the regex array defining what bracket group in a | |
| 135 matched search to use as a replacement */ | |
| 136 define('GESHI_REPLACE', 1); | |
| 137 /** The key of the regex array defining any modifiers to the regular expression
*/ | |
| 138 define('GESHI_MODIFIERS', 2); | |
| 139 /** The key of the regex array defining what bracket group in a | |
| 140 matched search to put before the replacement */ | |
| 141 define('GESHI_BEFORE', 3); | |
| 142 /** The key of the regex array defining what bracket group in a | |
| 143 matched search to put after the replacement */ | |
| 144 define('GESHI_AFTER', 4); | |
| 145 /** The key of the regex array defining a custom keyword to use | |
| 146 for this regexp's html tag class */ | |
| 147 define('GESHI_CLASS', 5); | |
| 148 | |
| 149 /** Used in language files to mark comments */ | |
| 150 define('GESHI_COMMENTS', 0); | |
| 151 | |
| 152 /** Used to work around missing PHP features **/ | |
| 153 define('GESHI_PHP_PRE_433', !(version_compare(PHP_VERSION, '4.3.3') === 1)); | |
| 154 | |
| 155 /** make sure we can call stripos **/ | |
| 156 if (!function_exists('stripos')) { | |
| 157 // the offset param of preg_match is not supported below PHP 4.3.3 | |
| 158 if (GESHI_PHP_PRE_433) { | |
| 159 /** | |
| 160 * @ignore | |
| 161 */ | |
| 162 function stripos($haystack, $needle, $offset = null) { | |
| 163 if (!is_null($offset)) { | |
| 164 $haystack = substr($haystack, $offset); | |
| 165 } | |
| 166 if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $matc
h, PREG_OFFSET_CAPTURE)) { | |
| 167 return $match[0][1]; | |
| 168 } | |
| 169 return false; | |
| 170 } | |
| 171 } | |
| 172 else { | |
| 173 /** | |
| 174 * @ignore | |
| 175 */ | |
| 176 function stripos($haystack, $needle, $offset = null) { | |
| 177 if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $matc
h, PREG_OFFSET_CAPTURE, $offset)) { | |
| 178 return $match[0][1]; | |
| 179 } | |
| 180 return false; | |
| 181 } | |
| 182 } | |
| 183 } | |
| 184 | |
| 185 /** some old PHP / PCRE subpatterns only support up to xxx subpatterns in | |
| 186 regular expressions. Set this to false if your PCRE lib is up to date | |
| 187 @see GeSHi->optimize_regexp_list() | |
| 188 **/ | |
| 189 define('GESHI_MAX_PCRE_SUBPATTERNS', 500); | |
| 190 /** it's also important not to generate too long regular expressions | |
| 191 be generous here... but keep in mind, that when reaching this limit we | |
| 192 still have to close open patterns. 12k should do just fine on a 16k limit. | |
| 193 @see GeSHi->optimize_regexp_list() | |
| 194 **/ | |
| 195 define('GESHI_MAX_PCRE_LENGTH', 12288); | |
| 196 | |
| 197 //Number format specification | |
| 198 /** Basic number format for integers */ | |
| 199 define('GESHI_NUMBER_INT_BASIC', 1); //Default integers \d+ | |
| 200 /** Enhanced number format for integers like seen in C */ | |
| 201 define('GESHI_NUMBER_INT_CSTYLE', 2); //Default C-Style \d+[lL]? | |
| 202 /** Number format to highlight binary numbers with a suffix "b" */ | |
| 203 define('GESHI_NUMBER_BIN_SUFFIX', 16); //[01]+[bB] | |
| 204 /** Number format to highlight binary numbers with a prefix % */ | |
| 205 define('GESHI_NUMBER_BIN_PREFIX_PERCENT', 32); //%[01]+ | |
| 206 /** Number format to highlight binary numbers with a prefix 0b (C) */ | |
| 207 define('GESHI_NUMBER_BIN_PREFIX_0B', 64); //0b[01]+ | |
| 208 /** Number format to highlight octal numbers with a leading zero */ | |
| 209 define('GESHI_NUMBER_OCT_PREFIX', 256); //0[0-7]+ | |
| 210 /** Number format to highlight octal numbers with a suffix of o */ | |
| 211 define('GESHI_NUMBER_OCT_SUFFIX', 512); //[0-7]+[oO] | |
| 212 /** Number format to highlight hex numbers with a prefix 0x */ | |
| 213 define('GESHI_NUMBER_HEX_PREFIX', 4096); //0x[0-9a-fA-F]+ | |
| 214 /** Number format to highlight hex numbers with a suffix of h */ | |
| 215 define('GESHI_NUMBER_HEX_SUFFIX', 8192); //[0-9][0-9a-fA-F]*h | |
| 216 /** Number format to highlight floating-point numbers without support for scient
ific notation */ | |
| 217 define('GESHI_NUMBER_FLT_NONSCI', 65536); //\d+\.\d+ | |
| 218 /** Number format to highlight floating-point numbers without support for scient
ific notation */ | |
| 219 define('GESHI_NUMBER_FLT_NONSCI_F', 131072); //\d+(\.\d+)?f | |
| 220 /** Number format to highlight floating-point numbers with support for scientifi
c notation (E) and optional leading zero */ | |
| 221 define('GESHI_NUMBER_FLT_SCI_SHORT', 262144); //\.\d+e\d+ | |
| 222 /** Number format to highlight floating-point numbers with support for scientifi
c notation (E) and required leading digit */ | |
| 223 define('GESHI_NUMBER_FLT_SCI_ZERO', 524288); //\d+(\.\d+)?e\d+ | |
| 224 //Custom formats are passed by RX array | |
| 225 | |
| 226 // Error detection - use these to analyse faults | |
| 227 /** No sourcecode to highlight was specified | |
| 228 * @deprecated | |
| 229 */ | |
| 230 define('GESHI_ERROR_NO_INPUT', 1); | |
| 231 /** The language specified does not exist */ | |
| 232 define('GESHI_ERROR_NO_SUCH_LANG', 2); | |
| 233 /** GeSHi could not open a file for reading (generally a language file) */ | |
| 234 define('GESHI_ERROR_FILE_NOT_READABLE', 3); | |
| 235 /** The header type passed to {@link GeSHi->set_header_type()} was invalid */ | |
| 236 define('GESHI_ERROR_INVALID_HEADER_TYPE', 4); | |
| 237 /** The line number type passed to {@link GeSHi->enable_line_numbers()} was inva
lid */ | |
| 238 define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5); | |
| 239 /**#@-*/ | |
| 240 | |
| 241 | |
| 242 /** | |
| 243 * The GeSHi Class. | |
| 244 * | |
| 245 * Please refer to the documentation for GeSHi 1.0.X that is available | |
| 246 * at http://qbnz.com/highlighter/documentation.php for more information | |
| 247 * about how to use this class. | |
| 248 * | |
| 249 * @package geshi | |
| 250 * @author Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de> | |
| 251 * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann | |
| 252 */ | |
| 253 class GeSHi { | |
| 254 /**#@+ | |
| 255 * @access private | |
| 256 */ | |
| 257 /** | |
| 258 * The source code to highlight | |
| 259 * @var string | |
| 260 */ | |
| 261 var $source = ''; | |
| 262 | |
| 263 /** | |
| 264 * The language to use when highlighting | |
| 265 * @var string | |
| 266 */ | |
| 267 var $language = ''; | |
| 268 | |
| 269 /** | |
| 270 * The data for the language used | |
| 271 * @var array | |
| 272 */ | |
| 273 var $language_data = array(); | |
| 274 | |
| 275 /** | |
| 276 * The path to the language files | |
| 277 * @var string | |
| 278 */ | |
| 279 var $language_path = GESHI_LANG_ROOT; | |
| 280 | |
| 281 /** | |
| 282 * The error message associated with an error | |
| 283 * @var string | |
| 284 * @todo check err reporting works | |
| 285 */ | |
| 286 var $error = false; | |
| 287 | |
| 288 /** | |
| 289 * Possible error messages | |
| 290 * @var array | |
| 291 */ | |
| 292 var $error_messages = array( | |
| 293 GESHI_ERROR_NO_SUCH_LANG => 'GeSHi could not find the language {LANGUAGE
} (using path {PATH})', | |
| 294 GESHI_ERROR_FILE_NOT_READABLE => 'The file specified for load_from_file
was not readable', | |
| 295 GESHI_ERROR_INVALID_HEADER_TYPE => 'The header type specified is invalid
', | |
| 296 GESHI_ERROR_INVALID_LINE_NUMBER_TYPE => 'The line number type specified
is invalid' | |
| 297 ); | |
| 298 | |
| 299 /** | |
| 300 * Whether highlighting is strict or not | |
| 301 * @var boolean | |
| 302 */ | |
| 303 var $strict_mode = false; | |
| 304 | |
| 305 /** | |
| 306 * Whether to use CSS classes in output | |
| 307 * @var boolean | |
| 308 */ | |
| 309 var $use_classes = false; | |
| 310 | |
| 311 /** | |
| 312 * The type of header to use. Can be one of the following | |
| 313 * values: | |
| 314 * | |
| 315 * - GESHI_HEADER_PRE: Source is outputted in a "pre" HTML element. | |
| 316 * - GESHI_HEADER_DIV: Source is outputted in a "div" HTML element. | |
| 317 * - GESHI_HEADER_NONE: No header is outputted. | |
| 318 * | |
| 319 * @var int | |
| 320 */ | |
| 321 var $header_type = GESHI_HEADER_PRE; | |
| 322 | |
| 323 /** | |
| 324 * Array of permissions for which lexics should be highlighted | |
| 325 * @var array | |
| 326 */ | |
| 327 var $lexic_permissions = array( | |
| 328 'KEYWORDS' => array(), | |
| 329 'COMMENTS' => array('MULTI' => true), | |
| 330 'REGEXPS' => array(), | |
| 331 'ESCAPE_CHAR' => true, | |
| 332 'BRACKETS' => true, | |
| 333 'SYMBOLS' => false, | |
| 334 'STRINGS' => true, | |
| 335 'NUMBERS' => true, | |
| 336 'METHODS' => true, | |
| 337 'SCRIPT' => true | |
| 338 ); | |
| 339 | |
| 340 /** | |
| 341 * The time it took to parse the code | |
| 342 * @var double | |
| 343 */ | |
| 344 var $time = 0; | |
| 345 | |
| 346 /** | |
| 347 * The content of the header block | |
| 348 * @var string | |
| 349 */ | |
| 350 var $header_content = ''; | |
| 351 | |
| 352 /** | |
| 353 * The content of the footer block | |
| 354 * @var string | |
| 355 */ | |
| 356 var $footer_content = ''; | |
| 357 | |
| 358 /** | |
| 359 * The style of the header block | |
| 360 * @var string | |
| 361 */ | |
| 362 var $header_content_style = ''; | |
| 363 | |
| 364 /** | |
| 365 * The style of the footer block | |
| 366 * @var string | |
| 367 */ | |
| 368 var $footer_content_style = ''; | |
| 369 | |
| 370 /** | |
| 371 * Tells if a block around the highlighted source should be forced | |
| 372 * if not using line numbering | |
| 373 * @var boolean | |
| 374 */ | |
| 375 var $force_code_block = false; | |
| 376 | |
| 377 /** | |
| 378 * The styles for hyperlinks in the code | |
| 379 * @var array | |
| 380 */ | |
| 381 var $link_styles = array(); | |
| 382 | |
| 383 /** | |
| 384 * Whether important blocks should be recognised or not | |
| 385 * @var boolean | |
| 386 * @deprecated | |
| 387 * @todo REMOVE THIS FUNCTIONALITY! | |
| 388 */ | |
| 389 var $enable_important_blocks = false; | |
| 390 | |
| 391 /** | |
| 392 * Styles for important parts of the code | |
| 393 * @var string | |
| 394 * @deprecated | |
| 395 * @todo As above - rethink the whole idea of important blocks as it is bugg
y and | |
| 396 * will be hard to implement in 1.2 | |
| 397 */ | |
| 398 var $important_styles = 'font-weight: bold; color: red;'; // Styles for impo
rtant parts of the code | |
| 399 | |
| 400 /** | |
| 401 * Whether CSS IDs should be added to the code | |
| 402 * @var boolean | |
| 403 */ | |
| 404 var $add_ids = false; | |
| 405 | |
| 406 /** | |
| 407 * Lines that should be highlighted extra | |
| 408 * @var array | |
| 409 */ | |
| 410 var $highlight_extra_lines = array(); | |
| 411 | |
| 412 /** | |
| 413 * Styles of lines that should be highlighted extra | |
| 414 * @var array | |
| 415 */ | |
| 416 var $highlight_extra_lines_styles = array(); | |
| 417 | |
| 418 /** | |
| 419 * Styles of extra-highlighted lines | |
| 420 * @var string | |
| 421 */ | |
| 422 var $highlight_extra_lines_style = 'background-color: #ffc;'; | |
| 423 | |
| 424 /** | |
| 425 * The line ending | |
| 426 * If null, nl2br() will be used on the result string. | |
| 427 * Otherwise, all instances of \n will be replaced with $line_ending | |
| 428 * @var string | |
| 429 */ | |
| 430 var $line_ending = null; | |
| 431 | |
| 432 /** | |
| 433 * Number at which line numbers should start at | |
| 434 * @var int | |
| 435 */ | |
| 436 var $line_numbers_start = 1; | |
| 437 | |
| 438 /** | |
| 439 * The overall style for this code block | |
| 440 * @var string | |
| 441 */ | |
| 442 var $overall_style = 'font-family:monospace;'; | |
| 443 | |
| 444 /** | |
| 445 * The style for the actual code | |
| 446 * @var string | |
| 447 */ | |
| 448 var $code_style = 'font: normal normal 1em/1.2em monospace; margin:0; paddin
g:0; background:none; vertical-align:top;'; | |
| 449 | |
| 450 /** | |
| 451 * The overall class for this code block | |
| 452 * @var string | |
| 453 */ | |
| 454 var $overall_class = ''; | |
| 455 | |
| 456 /** | |
| 457 * The overall ID for this code block | |
| 458 * @var string | |
| 459 */ | |
| 460 var $overall_id = ''; | |
| 461 | |
| 462 /** | |
| 463 * Line number styles | |
| 464 * @var string | |
| 465 */ | |
| 466 var $line_style1 = 'font-weight: normal; vertical-align:top;'; | |
| 467 | |
| 468 /** | |
| 469 * Line number styles for fancy lines | |
| 470 * @var string | |
| 471 */ | |
| 472 var $line_style2 = 'font-weight: bold; vertical-align:top;'; | |
| 473 | |
| 474 /** | |
| 475 * Style for line numbers when GESHI_HEADER_PRE_TABLE is chosen | |
| 476 * @var string | |
| 477 */ | |
| 478 var $table_linenumber_style = 'width:1px;text-align:right;margin:0;padding:0
2px;vertical-align:top;'; | |
| 479 | |
| 480 /** | |
| 481 * Flag for how line numbers are displayed | |
| 482 * @var boolean | |
| 483 */ | |
| 484 var $line_numbers = GESHI_NO_LINE_NUMBERS; | |
| 485 | |
| 486 /** | |
| 487 * Flag to decide if multi line spans are allowed. Set it to false to make s
ure | |
| 488 * each tag is closed before and reopened after each linefeed. | |
| 489 * @var boolean | |
| 490 */ | |
| 491 var $allow_multiline_span = true; | |
| 492 | |
| 493 /** | |
| 494 * The "nth" value for fancy line highlighting | |
| 495 * @var int | |
| 496 */ | |
| 497 var $line_nth_row = 0; | |
| 498 | |
| 499 /** | |
| 500 * The size of tab stops | |
| 501 * @var int | |
| 502 */ | |
| 503 var $tab_width = 8; | |
| 504 | |
| 505 /** | |
| 506 * Should we use language-defined tab stop widths? | |
| 507 * @var int | |
| 508 */ | |
| 509 var $use_language_tab_width = false; | |
| 510 | |
| 511 /** | |
| 512 * Default target for keyword links | |
| 513 * @var string | |
| 514 */ | |
| 515 var $link_target = ''; | |
| 516 | |
| 517 /** | |
| 518 * The encoding to use for entity encoding | |
| 519 * NOTE: Used with Escape Char Sequences to fix UTF-8 handling (cf. SF#20375
98) | |
| 520 * @var string | |
| 521 */ | |
| 522 var $encoding = 'utf-8'; | |
| 523 | |
| 524 /** | |
| 525 * Should keywords be linked? | |
| 526 * @var boolean | |
| 527 */ | |
| 528 var $keyword_links = true; | |
| 529 | |
| 530 /** | |
| 531 * Currently loaded language file | |
| 532 * @var string | |
| 533 * @since 1.0.7.22 | |
| 534 */ | |
| 535 var $loaded_language = ''; | |
| 536 | |
| 537 /** | |
| 538 * Wether the caches needed for parsing are built or not | |
| 539 * | |
| 540 * @var bool | |
| 541 * @since 1.0.8 | |
| 542 */ | |
| 543 var $parse_cache_built = false; | |
| 544 | |
| 545 /** | |
| 546 * Work around for Suhosin Patch with disabled /e modifier | |
| 547 * | |
| 548 * Note from suhosins author in config file: | |
| 549 * <blockquote> | |
| 550 * The /e modifier inside <code>preg_replace()</code> allows code executio
n. | |
| 551 * Often it is the cause for remote code execution exploits. It is wise to | |
| 552 * deactivate this feature and test where in the application it is used. | |
| 553 * The developer using the /e modifier should be made aware that he should | |
| 554 * use <code>preg_replace_callback()</code> instead | |
| 555 * </blockquote> | |
| 556 * | |
| 557 * @var array | |
| 558 * @since 1.0.8 | |
| 559 */ | |
| 560 var $_kw_replace_group = 0; | |
| 561 var $_rx_key = 0; | |
| 562 | |
| 563 /** | |
| 564 * some "callback parameters" for handle_multiline_regexps | |
| 565 * | |
| 566 * @since 1.0.8 | |
| 567 * @access private | |
| 568 * @var string | |
| 569 */ | |
| 570 var $_hmr_before = ''; | |
| 571 var $_hmr_replace = ''; | |
| 572 var $_hmr_after = ''; | |
| 573 var $_hmr_key = 0; | |
| 574 | |
| 575 /**#@-*/ | |
| 576 | |
| 577 /** | |
| 578 * Creates a new GeSHi object, with source and language | |
| 579 * | |
| 580 * @param string The source code to highlight | |
| 581 * @param string The language to highlight the source with | |
| 582 * @param string The path to the language file directory. <b>This | |
| 583 * is deprecated!</b> I've backported the auto path | |
| 584 * detection from the 1.1.X dev branch, so now it | |
| 585 * should be automatically set correctly. If you have | |
| 586 * renamed the language directory however, you will | |
| 587 * still need to set the path using this parameter or | |
| 588 * {@link GeSHi->set_language_path()} | |
| 589 * @since 1.0.0 | |
| 590 */ | |
| 591 function GeSHi($source = '', $language = '', $path = '') { | |
| 592 if (!empty($source)) { | |
| 593 $this->set_source($source); | |
| 594 } | |
| 595 if (!empty($language)) { | |
| 596 $this->set_language($language); | |
| 597 } | |
| 598 $this->set_language_path($path); | |
| 599 } | |
| 600 | |
| 601 /** | |
| 602 * Returns an error message associated with the last GeSHi operation, | |
| 603 * or false if no error has occured | |
| 604 * | |
| 605 * @return string|false An error message if there has been an error, else fa
lse | |
| 606 * @since 1.0.0 | |
| 607 */ | |
| 608 function error() { | |
| 609 if ($this->error) { | |
| 610 //Put some template variables for debugging here ... | |
| 611 $debug_tpl_vars = array( | |
| 612 '{LANGUAGE}' => $this->language, | |
| 613 '{PATH}' => $this->language_path | |
| 614 ); | |
| 615 $msg = str_replace( | |
| 616 array_keys($debug_tpl_vars), | |
| 617 array_values($debug_tpl_vars), | |
| 618 $this->error_messages[$this->error]); | |
| 619 | |
| 620 return "<br /><strong>GeSHi Error:</strong> $msg (code {$this->error
})<br />"; | |
| 621 } | |
| 622 return false; | |
| 623 } | |
| 624 | |
| 625 /** | |
| 626 * Gets a human-readable language name (thanks to Simon Patterson | |
| 627 * for the idea :)) | |
| 628 * | |
| 629 * @return string The name for the current language | |
| 630 * @since 1.0.2 | |
| 631 */ | |
| 632 function get_language_name() { | |
| 633 if (GESHI_ERROR_NO_SUCH_LANG == $this->error) { | |
| 634 return $this->language_data['LANG_NAME'] . ' (Unknown Language)'; | |
| 635 } | |
| 636 return $this->language_data['LANG_NAME']; | |
| 637 } | |
| 638 | |
| 639 /** | |
| 640 * Sets the source code for this object | |
| 641 * | |
| 642 * @param string The source code to highlight | |
| 643 * @since 1.0.0 | |
| 644 */ | |
| 645 function set_source($source) { | |
| 646 $this->source = $source; | |
| 647 $this->highlight_extra_lines = array(); | |
| 648 } | |
| 649 | |
| 650 /** | |
| 651 * Sets the language for this object | |
| 652 * | |
| 653 * @note since 1.0.8 this function won't reset language-settings by default
anymore! | |
| 654 * if you need this set $force_reset = true | |
| 655 * | |
| 656 * @param string The name of the language to use | |
| 657 * @since 1.0.0 | |
| 658 */ | |
| 659 function set_language($language, $force_reset = false) { | |
| 660 if ($force_reset) { | |
| 661 $this->loaded_language = false; | |
| 662 } | |
| 663 | |
| 664 //Clean up the language name to prevent malicious code injection | |
| 665 $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language); | |
| 666 | |
| 667 $language = strtolower($language); | |
| 668 | |
| 669 //Retreive the full filename | |
| 670 $file_name = $this->language_path . $language . '.php'; | |
| 671 if ($file_name == $this->loaded_language) { | |
| 672 // this language is already loaded! | |
| 673 return; | |
| 674 } | |
| 675 | |
| 676 $this->language = $language; | |
| 677 | |
| 678 $this->error = false; | |
| 679 $this->strict_mode = GESHI_NEVER; | |
| 680 | |
| 681 //Check if we can read the desired file | |
| 682 if (!is_readable($file_name)) { | |
| 683 $this->error = GESHI_ERROR_NO_SUCH_LANG; | |
| 684 return; | |
| 685 } | |
| 686 | |
| 687 // Load the language for parsing | |
| 688 $this->load_language($file_name); | |
| 689 } | |
| 690 | |
| 691 /** | |
| 692 * Sets the path to the directory containing the language files. Note | |
| 693 * that this path is relative to the directory of the script that included | |
| 694 * geshi.php, NOT geshi.php itself. | |
| 695 * | |
| 696 * @param string The path to the language directory | |
| 697 * @since 1.0.0 | |
| 698 * @deprecated The path to the language files should now be automatically | |
| 699 * detected, so this method should no longer be needed. The | |
| 700 * 1.1.X branch handles manual setting of the path differently | |
| 701 * so this method will disappear in 1.2.0. | |
| 702 */ | |
| 703 function set_language_path($path) { | |
| 704 if(strpos($path,':')) { | |
| 705 //Security Fix to prevent external directories using fopen wrappers. | |
| 706 if(DIRECTORY_SEPARATOR == "\\") { | |
| 707 if(!preg_match('#^[a-zA-Z]:#', $path) || false !== strpos($path,
':', 2)) { | |
| 708 return; | |
| 709 } | |
| 710 } else { | |
| 711 return; | |
| 712 } | |
| 713 } | |
| 714 if(preg_match('#[^/a-zA-Z0-9_\.\-\\\s:]#', $path)) { | |
| 715 //Security Fix to prevent external directories using fopen wrappers. | |
| 716 return; | |
| 717 } | |
| 718 if(GESHI_SECURITY_PARANOID && false !== strpos($path, '/.')) { | |
| 719 //Security Fix to prevent external directories using fopen wrappers. | |
| 720 return; | |
| 721 } | |
| 722 if(GESHI_SECURITY_PARANOID && false !== strpos($path, '..')) { | |
| 723 //Security Fix to prevent external directories using fopen wrappers. | |
| 724 return; | |
| 725 } | |
| 726 if ($path) { | |
| 727 $this->language_path = ('/' == $path[strlen($path) - 1]) ? $path : $
path . '/'; | |
| 728 $this->set_language($this->language); // otherwise set_language_path
has no effect | |
| 729 } | |
| 730 } | |
| 731 | |
| 732 /** | |
| 733 * Sets the type of header to be used. | |
| 734 * | |
| 735 * If GESHI_HEADER_DIV is used, the code is surrounded in a "div".This | |
| 736 * means more source code but more control over tab width and line-wrapping. | |
| 737 * GESHI_HEADER_PRE means that a "pre" is used - less source, but less | |
| 738 * control. Default is GESHI_HEADER_PRE. | |
| 739 * | |
| 740 * From 1.0.7.2, you can use GESHI_HEADER_NONE to specify that no header cod
e | |
| 741 * should be outputted. | |
| 742 * | |
| 743 * @param int The type of header to be used | |
| 744 * @since 1.0.0 | |
| 745 */ | |
| 746 function set_header_type($type) { | |
| 747 //Check if we got a valid header type | |
| 748 if (!in_array($type, array(GESHI_HEADER_NONE, GESHI_HEADER_DIV, | |
| 749 GESHI_HEADER_PRE, GESHI_HEADER_PRE_VALID, GESHI_HEADER_PRE_TABLE)))
{ | |
| 750 $this->error = GESHI_ERROR_INVALID_HEADER_TYPE; | |
| 751 return; | |
| 752 } | |
| 753 | |
| 754 //Set that new header type | |
| 755 $this->header_type = $type; | |
| 756 } | |
| 757 | |
| 758 /** | |
| 759 * Sets the styles for the code that will be outputted | |
| 760 * when this object is parsed. The style should be a | |
| 761 * string of valid stylesheet declarations | |
| 762 * | |
| 763 * @param string The overall style for the outputted code block | |
| 764 * @param boolean Whether to merge the styles with the current styles or not | |
| 765 * @since 1.0.0 | |
| 766 */ | |
| 767 function set_overall_style($style, $preserve_defaults = false) { | |
| 768 if (!$preserve_defaults) { | |
| 769 $this->overall_style = $style; | |
| 770 } else { | |
| 771 $this->overall_style .= $style; | |
| 772 } | |
| 773 } | |
| 774 | |
| 775 /** | |
| 776 * Sets the overall classname for this block of code. This | |
| 777 * class can then be used in a stylesheet to style this object's | |
| 778 * output | |
| 779 * | |
| 780 * @param string The class name to use for this block of code | |
| 781 * @since 1.0.0 | |
| 782 */ | |
| 783 function set_overall_class($class) { | |
| 784 $this->overall_class = $class; | |
| 785 } | |
| 786 | |
| 787 /** | |
| 788 * Sets the overall id for this block of code. This id can then | |
| 789 * be used in a stylesheet to style this object's output | |
| 790 * | |
| 791 * @param string The ID to use for this block of code | |
| 792 * @since 1.0.0 | |
| 793 */ | |
| 794 function set_overall_id($id) { | |
| 795 $this->overall_id = $id; | |
| 796 } | |
| 797 | |
| 798 /** | |
| 799 * Sets whether CSS classes should be used to highlight the source. Default | |
| 800 * is off, calling this method with no arguments will turn it on | |
| 801 * | |
| 802 * @param boolean Whether to turn classes on or not | |
| 803 * @since 1.0.0 | |
| 804 */ | |
| 805 function enable_classes($flag = true) { | |
| 806 $this->use_classes = ($flag) ? true : false; | |
| 807 } | |
| 808 | |
| 809 /** | |
| 810 * Sets the style for the actual code. This should be a string | |
| 811 * containing valid stylesheet declarations. If $preserve_defaults is | |
| 812 * true, then styles are merged with the default styles, with the | |
| 813 * user defined styles having priority | |
| 814 * | |
| 815 * Note: Use this method to override any style changes you made to | |
| 816 * the line numbers if you are using line numbers, else the line of | |
| 817 * code will have the same style as the line number! Consult the | |
| 818 * GeSHi documentation for more information about this. | |
| 819 * | |
| 820 * @param string The style to use for actual code | |
| 821 * @param boolean Whether to merge the current styles with the new styles | |
| 822 * @since 1.0.2 | |
| 823 */ | |
| 824 function set_code_style($style, $preserve_defaults = false) { | |
| 825 if (!$preserve_defaults) { | |
| 826 $this->code_style = $style; | |
| 827 } else { | |
| 828 $this->code_style .= $style; | |
| 829 } | |
| 830 } | |
| 831 | |
| 832 /** | |
| 833 * Sets the styles for the line numbers. | |
| 834 * | |
| 835 * @param string The style for the line numbers that are "normal" | |
| 836 * @param string|boolean If a string, this is the style of the line | |
| 837 * numbers that are "fancy", otherwise if boolean then this | |
| 838 * defines whether the normal styles should be merged with the | |
| 839 * new normal styles or not | |
| 840 * @param boolean If set, is the flag for whether to merge the "fancy" | |
| 841 * styles with the current styles or not | |
| 842 * @since 1.0.2 | |
| 843 */ | |
| 844 function set_line_style($style1, $style2 = '', $preserve_defaults = false) { | |
| 845 //Check if we got 2 or three parameters | |
| 846 if (is_bool($style2)) { | |
| 847 $preserve_defaults = $style2; | |
| 848 $style2 = ''; | |
| 849 } | |
| 850 | |
| 851 //Actually set the new styles | |
| 852 if (!$preserve_defaults) { | |
| 853 $this->line_style1 = $style1; | |
| 854 $this->line_style2 = $style2; | |
| 855 } else { | |
| 856 $this->line_style1 .= $style1; | |
| 857 $this->line_style2 .= $style2; | |
| 858 } | |
| 859 } | |
| 860 | |
| 861 /** | |
| 862 * Sets whether line numbers should be displayed. | |
| 863 * | |
| 864 * Valid values for the first parameter are: | |
| 865 * | |
| 866 * - GESHI_NO_LINE_NUMBERS: Line numbers will not be displayed | |
| 867 * - GESHI_NORMAL_LINE_NUMBERS: Line numbers will be displayed | |
| 868 * - GESHI_FANCY_LINE_NUMBERS: Fancy line numbers will be displayed | |
| 869 * | |
| 870 * For fancy line numbers, the second parameter is used to signal which line
s | |
| 871 * are to be fancy. For example, if the value of this parameter is 5 then ev
ery | |
| 872 * 5th line will be fancy. | |
| 873 * | |
| 874 * @param int How line numbers should be displayed | |
| 875 * @param int Defines which lines are fancy | |
| 876 * @since 1.0.0 | |
| 877 */ | |
| 878 function enable_line_numbers($flag, $nth_row = 5) { | |
| 879 if (GESHI_NO_LINE_NUMBERS != $flag && GESHI_NORMAL_LINE_NUMBERS != $flag | |
| 880 && GESHI_FANCY_LINE_NUMBERS != $flag) { | |
| 881 $this->error = GESHI_ERROR_INVALID_LINE_NUMBER_TYPE; | |
| 882 } | |
| 883 $this->line_numbers = $flag; | |
| 884 $this->line_nth_row = $nth_row; | |
| 885 } | |
| 886 | |
| 887 /** | |
| 888 * Sets wether spans and other HTML markup generated by GeSHi can | |
| 889 * span over multiple lines or not. Defaults to true to reduce overhead. | |
| 890 * Set it to false if you want to manipulate the output or manually display | |
| 891 * the code in an ordered list. | |
| 892 * | |
| 893 * @param boolean Wether multiline spans are allowed or not | |
| 894 * @since 1.0.7.22 | |
| 895 */ | |
| 896 function enable_multiline_span($flag) { | |
| 897 $this->allow_multiline_span = (bool) $flag; | |
| 898 } | |
| 899 | |
| 900 /** | |
| 901 * Get current setting for multiline spans, see GeSHi->enable_multiline_span
(). | |
| 902 * | |
| 903 * @see enable_multiline_span | |
| 904 * @return bool | |
| 905 */ | |
| 906 function get_multiline_span() { | |
| 907 return $this->allow_multiline_span; | |
| 908 } | |
| 909 | |
| 910 /** | |
| 911 * Sets the style for a keyword group. If $preserve_defaults is | |
| 912 * true, then styles are merged with the default styles, with the | |
| 913 * user defined styles having priority | |
| 914 * | |
| 915 * @param int The key of the keyword group to change the styles of | |
| 916 * @param string The style to make the keywords | |
| 917 * @param boolean Whether to merge the new styles with the old or just | |
| 918 * to overwrite them | |
| 919 * @since 1.0.0 | |
| 920 */ | |
| 921 function set_keyword_group_style($key, $style, $preserve_defaults = false) { | |
| 922 //Set the style for this keyword group | |
| 923 if (!$preserve_defaults) { | |
| 924 $this->language_data['STYLES']['KEYWORDS'][$key] = $style; | |
| 925 } else { | |
| 926 $this->language_data['STYLES']['KEYWORDS'][$key] .= $style; | |
| 927 } | |
| 928 | |
| 929 //Update the lexic permissions | |
| 930 if (!isset($this->lexic_permissions['KEYWORDS'][$key])) { | |
| 931 $this->lexic_permissions['KEYWORDS'][$key] = true; | |
| 932 } | |
| 933 } | |
| 934 | |
| 935 /** | |
| 936 * Turns highlighting on/off for a keyword group | |
| 937 * | |
| 938 * @param int The key of the keyword group to turn on or off | |
| 939 * @param boolean Whether to turn highlighting for that group on or off | |
| 940 * @since 1.0.0 | |
| 941 */ | |
| 942 function set_keyword_group_highlighting($key, $flag = true) { | |
| 943 $this->lexic_permissions['KEYWORDS'][$key] = ($flag) ? true : false; | |
| 944 } | |
| 945 | |
| 946 /** | |
| 947 * Sets the styles for comment groups. If $preserve_defaults is | |
| 948 * true, then styles are merged with the default styles, with the | |
| 949 * user defined styles having priority | |
| 950 * | |
| 951 * @param int The key of the comment group to change the styles of | |
| 952 * @param string The style to make the comments | |
| 953 * @param boolean Whether to merge the new styles with the old or just | |
| 954 * to overwrite them | |
| 955 * @since 1.0.0 | |
| 956 */ | |
| 957 function set_comments_style($key, $style, $preserve_defaults = false) { | |
| 958 if (!$preserve_defaults) { | |
| 959 $this->language_data['STYLES']['COMMENTS'][$key] = $style; | |
| 960 } else { | |
| 961 $this->language_data['STYLES']['COMMENTS'][$key] .= $style; | |
| 962 } | |
| 963 } | |
| 964 | |
| 965 /** | |
| 966 * Turns highlighting on/off for comment groups | |
| 967 * | |
| 968 * @param int The key of the comment group to turn on or off | |
| 969 * @param boolean Whether to turn highlighting for that group on or off | |
| 970 * @since 1.0.0 | |
| 971 */ | |
| 972 function set_comments_highlighting($key, $flag = true) { | |
| 973 $this->lexic_permissions['COMMENTS'][$key] = ($flag) ? true : false; | |
| 974 } | |
| 975 | |
| 976 /** | |
| 977 * Sets the styles for escaped characters. If $preserve_defaults is | |
| 978 * true, then styles are merged with the default styles, with the | |
| 979 * user defined styles having priority | |
| 980 * | |
| 981 * @param string The style to make the escape characters | |
| 982 * @param boolean Whether to merge the new styles with the old or just | |
| 983 * to overwrite them | |
| 984 * @since 1.0.0 | |
| 985 */ | |
| 986 function set_escape_characters_style($style, $preserve_defaults = false, $gr
oup = 0) { | |
| 987 if (!$preserve_defaults) { | |
| 988 $this->language_data['STYLES']['ESCAPE_CHAR'][$group] = $style; | |
| 989 } else { | |
| 990 $this->language_data['STYLES']['ESCAPE_CHAR'][$group] .= $style; | |
| 991 } | |
| 992 } | |
| 993 | |
| 994 /** | |
| 995 * Turns highlighting on/off for escaped characters | |
| 996 * | |
| 997 * @param boolean Whether to turn highlighting for escape characters on or o
ff | |
| 998 * @since 1.0.0 | |
| 999 */ | |
| 1000 function set_escape_characters_highlighting($flag = true) { | |
| 1001 $this->lexic_permissions['ESCAPE_CHAR'] = ($flag) ? true : false; | |
| 1002 } | |
| 1003 | |
| 1004 /** | |
| 1005 * Sets the styles for brackets. If $preserve_defaults is | |
| 1006 * true, then styles are merged with the default styles, with the | |
| 1007 * user defined styles having priority | |
| 1008 * | |
| 1009 * This method is DEPRECATED: use set_symbols_style instead. | |
| 1010 * This method will be removed in 1.2.X | |
| 1011 * | |
| 1012 * @param string The style to make the brackets | |
| 1013 * @param boolean Whether to merge the new styles with the old or just | |
| 1014 * to overwrite them | |
| 1015 * @since 1.0.0 | |
| 1016 * @deprecated In favour of set_symbols_style | |
| 1017 */ | |
| 1018 function set_brackets_style($style, $preserve_defaults = false) { | |
| 1019 if (!$preserve_defaults) { | |
| 1020 $this->language_data['STYLES']['BRACKETS'][0] = $style; | |
| 1021 } else { | |
| 1022 $this->language_data['STYLES']['BRACKETS'][0] .= $style; | |
| 1023 } | |
| 1024 } | |
| 1025 | |
| 1026 /** | |
| 1027 * Turns highlighting on/off for brackets | |
| 1028 * | |
| 1029 * This method is DEPRECATED: use set_symbols_highlighting instead. | |
| 1030 * This method will be remove in 1.2.X | |
| 1031 * | |
| 1032 * @param boolean Whether to turn highlighting for brackets on or off | |
| 1033 * @since 1.0.0 | |
| 1034 * @deprecated In favour of set_symbols_highlighting | |
| 1035 */ | |
| 1036 function set_brackets_highlighting($flag) { | |
| 1037 $this->lexic_permissions['BRACKETS'] = ($flag) ? true : false; | |
| 1038 } | |
| 1039 | |
| 1040 /** | |
| 1041 * Sets the styles for symbols. If $preserve_defaults is | |
| 1042 * true, then styles are merged with the default styles, with the | |
| 1043 * user defined styles having priority | |
| 1044 * | |
| 1045 * @param string The style to make the symbols | |
| 1046 * @param boolean Whether to merge the new styles with the old or just | |
| 1047 * to overwrite them | |
| 1048 * @param int Tells the group of symbols for which style should be set. | |
| 1049 * @since 1.0.1 | |
| 1050 */ | |
| 1051 function set_symbols_style($style, $preserve_defaults = false, $group = 0) { | |
| 1052 // Update the style of symbols | |
| 1053 if (!$preserve_defaults) { | |
| 1054 $this->language_data['STYLES']['SYMBOLS'][$group] = $style; | |
| 1055 } else { | |
| 1056 $this->language_data['STYLES']['SYMBOLS'][$group] .= $style; | |
| 1057 } | |
| 1058 | |
| 1059 // For backward compatibility | |
| 1060 if (0 == $group) { | |
| 1061 $this->set_brackets_style ($style, $preserve_defaults); | |
| 1062 } | |
| 1063 } | |
| 1064 | |
| 1065 /** | |
| 1066 * Turns highlighting on/off for symbols | |
| 1067 * | |
| 1068 * @param boolean Whether to turn highlighting for symbols on or off | |
| 1069 * @since 1.0.0 | |
| 1070 */ | |
| 1071 function set_symbols_highlighting($flag) { | |
| 1072 // Update lexic permissions for this symbol group | |
| 1073 $this->lexic_permissions['SYMBOLS'] = ($flag) ? true : false; | |
| 1074 | |
| 1075 // For backward compatibility | |
| 1076 $this->set_brackets_highlighting ($flag); | |
| 1077 } | |
| 1078 | |
| 1079 /** | |
| 1080 * Sets the styles for strings. If $preserve_defaults is | |
| 1081 * true, then styles are merged with the default styles, with the | |
| 1082 * user defined styles having priority | |
| 1083 * | |
| 1084 * @param string The style to make the escape characters | |
| 1085 * @param boolean Whether to merge the new styles with the old or just | |
| 1086 * to overwrite them | |
| 1087 * @since 1.0.0 | |
| 1088 */ | |
| 1089 function set_strings_style($style, $preserve_defaults = false) { | |
| 1090 if (!$preserve_defaults) { | |
| 1091 $this->language_data['STYLES']['STRINGS'][0] = $style; | |
| 1092 } else { | |
| 1093 $this->language_data['STYLES']['STRINGS'][0] .= $style; | |
| 1094 } | |
| 1095 } | |
| 1096 | |
| 1097 /** | |
| 1098 * Turns highlighting on/off for strings | |
| 1099 * | |
| 1100 * @param boolean Whether to turn highlighting for strings on or off | |
| 1101 * @since 1.0.0 | |
| 1102 */ | |
| 1103 function set_strings_highlighting($flag) { | |
| 1104 $this->lexic_permissions['STRINGS'] = ($flag) ? true : false; | |
| 1105 } | |
| 1106 | |
| 1107 /** | |
| 1108 * Sets the styles for numbers. If $preserve_defaults is | |
| 1109 * true, then styles are merged with the default styles, with the | |
| 1110 * user defined styles having priority | |
| 1111 * | |
| 1112 * @param string The style to make the numbers | |
| 1113 * @param boolean Whether to merge the new styles with the old or just | |
| 1114 * to overwrite them | |
| 1115 * @since 1.0.0 | |
| 1116 */ | |
| 1117 function set_numbers_style($style, $preserve_defaults = false) { | |
| 1118 if (!$preserve_defaults) { | |
| 1119 $this->language_data['STYLES']['NUMBERS'][0] = $style; | |
| 1120 } else { | |
| 1121 $this->language_data['STYLES']['NUMBERS'][0] .= $style; | |
| 1122 } | |
| 1123 } | |
| 1124 | |
| 1125 /** | |
| 1126 * Turns highlighting on/off for numbers | |
| 1127 * | |
| 1128 * @param boolean Whether to turn highlighting for numbers on or off | |
| 1129 * @since 1.0.0 | |
| 1130 */ | |
| 1131 function set_numbers_highlighting($flag) { | |
| 1132 $this->lexic_permissions['NUMBERS'] = ($flag) ? true : false; | |
| 1133 } | |
| 1134 | |
| 1135 /** | |
| 1136 * Sets the styles for methods. $key is a number that references the | |
| 1137 * appropriate "object splitter" - see the language file for the language | |
| 1138 * you are highlighting to get this number. If $preserve_defaults is | |
| 1139 * true, then styles are merged with the default styles, with the | |
| 1140 * user defined styles having priority | |
| 1141 * | |
| 1142 * @param int The key of the object splitter to change the styles of | |
| 1143 * @param string The style to make the methods | |
| 1144 * @param boolean Whether to merge the new styles with the old or just | |
| 1145 * to overwrite them | |
| 1146 * @since 1.0.0 | |
| 1147 */ | |
| 1148 function set_methods_style($key, $style, $preserve_defaults = false) { | |
| 1149 if (!$preserve_defaults) { | |
| 1150 $this->language_data['STYLES']['METHODS'][$key] = $style; | |
| 1151 } else { | |
| 1152 $this->language_data['STYLES']['METHODS'][$key] .= $style; | |
| 1153 } | |
| 1154 } | |
| 1155 | |
| 1156 /** | |
| 1157 * Turns highlighting on/off for methods | |
| 1158 * | |
| 1159 * @param boolean Whether to turn highlighting for methods on or off | |
| 1160 * @since 1.0.0 | |
| 1161 */ | |
| 1162 function set_methods_highlighting($flag) { | |
| 1163 $this->lexic_permissions['METHODS'] = ($flag) ? true : false; | |
| 1164 } | |
| 1165 | |
| 1166 /** | |
| 1167 * Sets the styles for regexps. If $preserve_defaults is | |
| 1168 * true, then styles are merged with the default styles, with the | |
| 1169 * user defined styles having priority | |
| 1170 * | |
| 1171 * @param string The style to make the regular expression matches | |
| 1172 * @param boolean Whether to merge the new styles with the old or just | |
| 1173 * to overwrite them | |
| 1174 * @since 1.0.0 | |
| 1175 */ | |
| 1176 function set_regexps_style($key, $style, $preserve_defaults = false) { | |
| 1177 if (!$preserve_defaults) { | |
| 1178 $this->language_data['STYLES']['REGEXPS'][$key] = $style; | |
| 1179 } else { | |
| 1180 $this->language_data['STYLES']['REGEXPS'][$key] .= $style; | |
| 1181 } | |
| 1182 } | |
| 1183 | |
| 1184 /** | |
| 1185 * Turns highlighting on/off for regexps | |
| 1186 * | |
| 1187 * @param int The key of the regular expression group to turn on or off | |
| 1188 * @param boolean Whether to turn highlighting for the regular expression gr
oup on or off | |
| 1189 * @since 1.0.0 | |
| 1190 */ | |
| 1191 function set_regexps_highlighting($key, $flag) { | |
| 1192 $this->lexic_permissions['REGEXPS'][$key] = ($flag) ? true : false; | |
| 1193 } | |
| 1194 | |
| 1195 /** | |
| 1196 * Sets whether a set of keywords are checked for in a case sensitive manner | |
| 1197 * | |
| 1198 * @param int The key of the keyword group to change the case sensitivity of | |
| 1199 * @param boolean Whether to check in a case sensitive manner or not | |
| 1200 * @since 1.0.0 | |
| 1201 */ | |
| 1202 function set_case_sensitivity($key, $case) { | |
| 1203 $this->language_data['CASE_SENSITIVE'][$key] = ($case) ? true : false; | |
| 1204 } | |
| 1205 | |
| 1206 /** | |
| 1207 * Sets the case that keywords should use when found. Use the constants: | |
| 1208 * | |
| 1209 * - GESHI_CAPS_NO_CHANGE: leave keywords as-is | |
| 1210 * - GESHI_CAPS_UPPER: convert all keywords to uppercase where found | |
| 1211 * - GESHI_CAPS_LOWER: convert all keywords to lowercase where found | |
| 1212 * | |
| 1213 * @param int A constant specifying what to do with matched keywords | |
| 1214 * @since 1.0.1 | |
| 1215 */ | |
| 1216 function set_case_keywords($case) { | |
| 1217 if (in_array($case, array( | |
| 1218 GESHI_CAPS_NO_CHANGE, GESHI_CAPS_UPPER, GESHI_CAPS_LOWER))) { | |
| 1219 $this->language_data['CASE_KEYWORDS'] = $case; | |
| 1220 } | |
| 1221 } | |
| 1222 | |
| 1223 /** | |
| 1224 * Sets how many spaces a tab is substituted for | |
| 1225 * | |
| 1226 * Widths below zero are ignored | |
| 1227 * | |
| 1228 * @param int The tab width | |
| 1229 * @since 1.0.0 | |
| 1230 */ | |
| 1231 function set_tab_width($width) { | |
| 1232 $this->tab_width = intval($width); | |
| 1233 | |
| 1234 //Check if it fit's the constraints: | |
| 1235 if ($this->tab_width < 1) { | |
| 1236 //Return it to the default | |
| 1237 $this->tab_width = 8; | |
| 1238 } | |
| 1239 } | |
| 1240 | |
| 1241 /** | |
| 1242 * Sets whether or not to use tab-stop width specifed by language | |
| 1243 * | |
| 1244 * @param boolean Whether to use language-specific tab-stop widths | |
| 1245 * @since 1.0.7.20 | |
| 1246 */ | |
| 1247 function set_use_language_tab_width($use) { | |
| 1248 $this->use_language_tab_width = (bool) $use; | |
| 1249 } | |
| 1250 | |
| 1251 /** | |
| 1252 * Returns the tab width to use, based on the current language and user | |
| 1253 * preference | |
| 1254 * | |
| 1255 * @return int Tab width | |
| 1256 * @since 1.0.7.20 | |
| 1257 */ | |
| 1258 function get_real_tab_width() { | |
| 1259 if (!$this->use_language_tab_width || | |
| 1260 !isset($this->language_data['TAB_WIDTH'])) { | |
| 1261 return $this->tab_width; | |
| 1262 } else { | |
| 1263 return $this->language_data['TAB_WIDTH']; | |
| 1264 } | |
| 1265 } | |
| 1266 | |
| 1267 /** | |
| 1268 * Enables/disables strict highlighting. Default is off, calling this | |
| 1269 * method without parameters will turn it on. See documentation | |
| 1270 * for more details on strict mode and where to use it. | |
| 1271 * | |
| 1272 * @param boolean Whether to enable strict mode or not | |
| 1273 * @since 1.0.0 | |
| 1274 */ | |
| 1275 function enable_strict_mode($mode = true) { | |
| 1276 if (GESHI_MAYBE == $this->language_data['STRICT_MODE_APPLIES']) { | |
| 1277 $this->strict_mode = ($mode) ? GESHI_ALWAYS : GESHI_NEVER; | |
| 1278 } | |
| 1279 } | |
| 1280 | |
| 1281 /** | |
| 1282 * Disables all highlighting | |
| 1283 * | |
| 1284 * @since 1.0.0 | |
| 1285 * @todo Rewrite with array traversal | |
| 1286 * @deprecated In favour of enable_highlighting | |
| 1287 */ | |
| 1288 function disable_highlighting() { | |
| 1289 $this->enable_highlighting(false); | |
| 1290 } | |
| 1291 | |
| 1292 /** | |
| 1293 * Enables all highlighting | |
| 1294 * | |
| 1295 * The optional flag parameter was added in version 1.0.7.21 and can be used | |
| 1296 * to enable (true) or disable (false) all highlighting. | |
| 1297 * | |
| 1298 * @since 1.0.0 | |
| 1299 * @param boolean A flag specifying whether to enable or disable all highlig
hting | |
| 1300 * @todo Rewrite with array traversal | |
| 1301 */ | |
| 1302 function enable_highlighting($flag = true) { | |
| 1303 $flag = $flag ? true : false; | |
| 1304 foreach ($this->lexic_permissions as $key => $value) { | |
| 1305 if (is_array($value)) { | |
| 1306 foreach ($value as $k => $v) { | |
| 1307 $this->lexic_permissions[$key][$k] = $flag; | |
| 1308 } | |
| 1309 } else { | |
| 1310 $this->lexic_permissions[$key] = $flag; | |
| 1311 } | |
| 1312 } | |
| 1313 | |
| 1314 // Context blocks | |
| 1315 $this->enable_important_blocks = $flag; | |
| 1316 } | |
| 1317 | |
| 1318 /** | |
| 1319 * Given a file extension, this method returns either a valid geshi language | |
| 1320 * name, or the empty string if it couldn't be found | |
| 1321 * | |
| 1322 * @param string The extension to get a language name for | |
| 1323 * @param array A lookup array to use instead of the default one | |
| 1324 * @since 1.0.5 | |
| 1325 * @todo Re-think about how this method works (maybe make it private and/or
make it | |
| 1326 * a extension->lang lookup?) | |
| 1327 * @todo static? | |
| 1328 */ | |
| 1329 function get_language_name_from_extension( $extension, $lookup = array() ) { | |
| 1330 if ( !is_array($lookup) || empty($lookup)) { | |
| 1331 $lookup = array( | |
| 1332 'actionscript' => array('as'), | |
| 1333 'ada' => array('a', 'ada', 'adb', 'ads'), | |
| 1334 'apache' => array('conf'), | |
| 1335 'asm' => array('ash', 'asm', 'inc'), | |
| 1336 'asp' => array('asp'), | |
| 1337 'bash' => array('sh'), | |
| 1338 'bf' => array('bf'), | |
| 1339 'c' => array('c', 'h'), | |
| 1340 'c_mac' => array('c', 'h'), | |
| 1341 'caddcl' => array(), | |
| 1342 'cadlisp' => array(), | |
| 1343 'cdfg' => array('cdfg'), | |
| 1344 'cobol' => array('cbl'), | |
| 1345 'cpp' => array('cpp', 'hpp', 'C', 'H', 'CPP', 'HPP'), | |
| 1346 'csharp' => array('cs'), | |
| 1347 'css' => array('css'), | |
| 1348 'd' => array('d'), | |
| 1349 'delphi' => array('dpk', 'dpr', 'pp', 'pas'), | |
| 1350 'diff' => array('diff', 'patch'), | |
| 1351 'dos' => array('bat', 'cmd'), | |
| 1352 'gettext' => array('po', 'pot'), | |
| 1353 'gml' => array('gml'), | |
| 1354 'gnuplot' => array('plt'), | |
| 1355 'groovy' => array('groovy'), | |
| 1356 'haskell' => array('hs'), | |
| 1357 'html4strict' => array('html', 'htm'), | |
| 1358 'ini' => array('ini', 'desktop'), | |
| 1359 'java' => array('java'), | |
| 1360 'javascript' => array('js'), | |
| 1361 'klonec' => array('kl1'), | |
| 1362 'klonecpp' => array('klx'), | |
| 1363 'latex' => array('tex'), | |
| 1364 'lisp' => array('lisp'), | |
| 1365 'lua' => array('lua'), | |
| 1366 'matlab' => array('m'), | |
| 1367 'mpasm' => array(), | |
| 1368 'mysql' => array('sql'), | |
| 1369 'nsis' => array(), | |
| 1370 'objc' => array(), | |
| 1371 'oobas' => array(), | |
| 1372 'oracle8' => array(), | |
| 1373 'oracle10' => array(), | |
| 1374 'pascal' => array('pas'), | |
| 1375 'perl' => array('pl', 'pm'), | |
| 1376 'php' => array('php', 'php5', 'phtml', 'phps'), | |
| 1377 'povray' => array('pov'), | |
| 1378 'providex' => array('pvc', 'pvx'), | |
| 1379 'prolog' => array('pl'), | |
| 1380 'python' => array('py'), | |
| 1381 'qbasic' => array('bi'), | |
| 1382 'reg' => array('reg'), | |
| 1383 'ruby' => array('rb'), | |
| 1384 'sas' => array('sas'), | |
| 1385 'scala' => array('scala'), | |
| 1386 'scheme' => array('scm'), | |
| 1387 'scilab' => array('sci'), | |
| 1388 'smalltalk' => array('st'), | |
| 1389 'smarty' => array(), | |
| 1390 'tcl' => array('tcl'), | |
| 1391 'vb' => array('bas'), | |
| 1392 'vbnet' => array(), | |
| 1393 'visualfoxpro' => array(), | |
| 1394 'whitespace' => array('ws'), | |
| 1395 'xml' => array('xml', 'svg'), | |
| 1396 'z80' => array('z80', 'asm', 'inc') | |
| 1397 ); | |
| 1398 } | |
| 1399 | |
| 1400 foreach ($lookup as $lang => $extensions) { | |
| 1401 if (in_array($extension, $extensions)) { | |
| 1402 return $lang; | |
| 1403 } | |
| 1404 } | |
| 1405 return ''; | |
| 1406 } | |
| 1407 | |
| 1408 /** | |
| 1409 * Given a file name, this method loads its contents in, and attempts | |
| 1410 * to set the language automatically. An optional lookup table can be | |
| 1411 * passed for looking up the language name. If not specified a default | |
| 1412 * table is used | |
| 1413 * | |
| 1414 * The language table is in the form | |
| 1415 * <pre>array( | |
| 1416 * 'lang_name' => array('extension', 'extension', ...), | |
| 1417 * 'lang_name' ... | |
| 1418 * );</pre> | |
| 1419 * | |
| 1420 * @param string The filename to load the source from | |
| 1421 * @param array A lookup array to use instead of the default one | |
| 1422 * @todo Complete rethink of this and above method | |
| 1423 * @since 1.0.5 | |
| 1424 */ | |
| 1425 function load_from_file($file_name, $lookup = array()) { | |
| 1426 if (is_readable($file_name)) { | |
| 1427 $this->set_source(file_get_contents($file_name)); | |
| 1428 $this->set_language($this->get_language_name_from_extension(substr(s
trrchr($file_name, '.'), 1), $lookup)); | |
| 1429 } else { | |
| 1430 $this->error = GESHI_ERROR_FILE_NOT_READABLE; | |
| 1431 } | |
| 1432 } | |
| 1433 | |
| 1434 /** | |
| 1435 * Adds a keyword to a keyword group for highlighting | |
| 1436 * | |
| 1437 * @param int The key of the keyword group to add the keyword to | |
| 1438 * @param string The word to add to the keyword group | |
| 1439 * @since 1.0.0 | |
| 1440 */ | |
| 1441 function add_keyword($key, $word) { | |
| 1442 if (!in_array($word, $this->language_data['KEYWORDS'][$key])) { | |
| 1443 $this->language_data['KEYWORDS'][$key][] = $word; | |
| 1444 | |
| 1445 //NEW in 1.0.8 don't recompile the whole optimized regexp, simply ap
pend it | |
| 1446 if ($this->parse_cache_built) { | |
| 1447 $subkey = count($this->language_data['CACHED_KEYWORD_LISTS'][$ke
y]) - 1; | |
| 1448 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$subkey] .= '
|' . preg_quote($word, '/'); | |
| 1449 } | |
| 1450 } | |
| 1451 } | |
| 1452 | |
| 1453 /** | |
| 1454 * Removes a keyword from a keyword group | |
| 1455 * | |
| 1456 * @param int The key of the keyword group to remove the keyword from | |
| 1457 * @param string The word to remove from the keyword group | |
| 1458 * @param bool Wether to automatically recompile the optimized regexp list
or not. | |
| 1459 * Note: if you set this to false and @see GeSHi->parse_code()
was already called once, | |
| 1460 * for the current language, you have to manually call @see Ge
SHi->optimize_keyword_group() | |
| 1461 * or the removed keyword will stay in cache and still be high
lighted! On the other hand | |
| 1462 * it might be too expensive to recompile the regexp list for
every removal if you want to | |
| 1463 * remove a lot of keywords. | |
| 1464 * @since 1.0.0 | |
| 1465 */ | |
| 1466 function remove_keyword($key, $word, $recompile = true) { | |
| 1467 $key_to_remove = array_search($word, $this->language_data['KEYWORDS'][$k
ey]); | |
| 1468 if ($key_to_remove !== false) { | |
| 1469 unset($this->language_data['KEYWORDS'][$key][$key_to_remove]); | |
| 1470 | |
| 1471 //NEW in 1.0.8, optionally recompile keyword group | |
| 1472 if ($recompile && $this->parse_cache_built) { | |
| 1473 $this->optimize_keyword_group($key); | |
| 1474 } | |
| 1475 } | |
| 1476 } | |
| 1477 | |
| 1478 /** | |
| 1479 * Creates a new keyword group | |
| 1480 * | |
| 1481 * @param int The key of the keyword group to create | |
| 1482 * @param string The styles for the keyword group | |
| 1483 * @param boolean Whether the keyword group is case sensitive ornot | |
| 1484 * @param array The words to use for the keyword group | |
| 1485 * @since 1.0.0 | |
| 1486 */ | |
| 1487 function add_keyword_group($key, $styles, $case_sensitive = true, $words = a
rray()) { | |
| 1488 $words = (array) $words; | |
| 1489 if (empty($words)) { | |
| 1490 // empty word lists mess up highlighting | |
| 1491 return false; | |
| 1492 } | |
| 1493 | |
| 1494 //Add the new keyword group internally | |
| 1495 $this->language_data['KEYWORDS'][$key] = $words; | |
| 1496 $this->lexic_permissions['KEYWORDS'][$key] = true; | |
| 1497 $this->language_data['CASE_SENSITIVE'][$key] = $case_sensitive; | |
| 1498 $this->language_data['STYLES']['KEYWORDS'][$key] = $styles; | |
| 1499 | |
| 1500 //NEW in 1.0.8, cache keyword regexp | |
| 1501 if ($this->parse_cache_built) { | |
| 1502 $this->optimize_keyword_group($key); | |
| 1503 } | |
| 1504 } | |
| 1505 | |
| 1506 /** | |
| 1507 * Removes a keyword group | |
| 1508 * | |
| 1509 * @param int The key of the keyword group to remove | |
| 1510 * @since 1.0.0 | |
| 1511 */ | |
| 1512 function remove_keyword_group ($key) { | |
| 1513 //Remove the keyword group internally | |
| 1514 unset($this->language_data['KEYWORDS'][$key]); | |
| 1515 unset($this->lexic_permissions['KEYWORDS'][$key]); | |
| 1516 unset($this->language_data['CASE_SENSITIVE'][$key]); | |
| 1517 unset($this->language_data['STYLES']['KEYWORDS'][$key]); | |
| 1518 | |
| 1519 //NEW in 1.0.8 | |
| 1520 unset($this->language_data['CACHED_KEYWORD_LISTS'][$key]); | |
| 1521 } | |
| 1522 | |
| 1523 /** | |
| 1524 * compile optimized regexp list for keyword group | |
| 1525 * | |
| 1526 * @param int The key of the keyword group to compile & optimize | |
| 1527 * @since 1.0.8 | |
| 1528 */ | |
| 1529 function optimize_keyword_group($key) { | |
| 1530 $this->language_data['CACHED_KEYWORD_LISTS'][$key] = | |
| 1531 $this->optimize_regexp_list($this->language_data['KEYWORDS'][$key]); | |
| 1532 $space_as_whitespace = false; | |
| 1533 if(isset($this->language_data['PARSER_CONTROL'])) { | |
| 1534 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) { | |
| 1535 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['SPA
CE_AS_WHITESPACE'])) { | |
| 1536 $space_as_whitespace = $this->language_data['PARSER_CONTROL'
]['KEYWORDS']['SPACE_AS_WHITESPACE']; | |
| 1537 } | |
| 1538 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key
]['SPACE_AS_WHITESPACE'])) { | |
| 1539 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][
$key]['SPACE_AS_WHITESPACE'])) { | |
| 1540 $space_as_whitespace = $this->language_data['PARSER_CONT
ROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE']; | |
| 1541 } | |
| 1542 } | |
| 1543 } | |
| 1544 } | |
| 1545 if($space_as_whitespace) { | |
| 1546 foreach($this->language_data['CACHED_KEYWORD_LISTS'][$key] as $rxk =
> $rxv) { | |
| 1547 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$rxk] = | |
| 1548 str_replace(" ", "\\s+", $rxv); | |
| 1549 } | |
| 1550 } | |
| 1551 } | |
| 1552 | |
| 1553 /** | |
| 1554 * Sets the content of the header block | |
| 1555 * | |
| 1556 * @param string The content of the header block | |
| 1557 * @since 1.0.2 | |
| 1558 */ | |
| 1559 function set_header_content($content) { | |
| 1560 $this->header_content = $content; | |
| 1561 } | |
| 1562 | |
| 1563 /** | |
| 1564 * Sets the content of the footer block | |
| 1565 * | |
| 1566 * @param string The content of the footer block | |
| 1567 * @since 1.0.2 | |
| 1568 */ | |
| 1569 function set_footer_content($content) { | |
| 1570 $this->footer_content = $content; | |
| 1571 } | |
| 1572 | |
| 1573 /** | |
| 1574 * Sets the style for the header content | |
| 1575 * | |
| 1576 * @param string The style for the header content | |
| 1577 * @since 1.0.2 | |
| 1578 */ | |
| 1579 function set_header_content_style($style) { | |
| 1580 $this->header_content_style = $style; | |
| 1581 } | |
| 1582 | |
| 1583 /** | |
| 1584 * Sets the style for the footer content | |
| 1585 * | |
| 1586 * @param string The style for the footer content | |
| 1587 * @since 1.0.2 | |
| 1588 */ | |
| 1589 function set_footer_content_style($style) { | |
| 1590 $this->footer_content_style = $style; | |
| 1591 } | |
| 1592 | |
| 1593 /** | |
| 1594 * Sets whether to force a surrounding block around | |
| 1595 * the highlighted code or not | |
| 1596 * | |
| 1597 * @param boolean Tells whether to enable or disable this feature | |
| 1598 * @since 1.0.7.20 | |
| 1599 */ | |
| 1600 function enable_inner_code_block($flag) { | |
| 1601 $this->force_code_block = (bool)$flag; | |
| 1602 } | |
| 1603 | |
| 1604 /** | |
| 1605 * Sets the base URL to be used for keywords | |
| 1606 * | |
| 1607 * @param int The key of the keyword group to set the URL for | |
| 1608 * @param string The URL to set for the group. If {FNAME} is in | |
| 1609 * the url somewhere, it is replaced by the keyword | |
| 1610 * that the URL is being made for | |
| 1611 * @since 1.0.2 | |
| 1612 */ | |
| 1613 function set_url_for_keyword_group($group, $url) { | |
| 1614 $this->language_data['URLS'][$group] = $url; | |
| 1615 } | |
| 1616 | |
| 1617 /** | |
| 1618 * Sets styles for links in code | |
| 1619 * | |
| 1620 * @param int A constant that specifies what state the style is being | |
| 1621 * set for - e.g. :hover or :visited | |
| 1622 * @param string The styles to use for that state | |
| 1623 * @since 1.0.2 | |
| 1624 */ | |
| 1625 function set_link_styles($type, $styles) { | |
| 1626 $this->link_styles[$type] = $styles; | |
| 1627 } | |
| 1628 | |
| 1629 /** | |
| 1630 * Sets the target for links in code | |
| 1631 * | |
| 1632 * @param string The target for links in the code, e.g. _blank | |
| 1633 * @since 1.0.3 | |
| 1634 */ | |
| 1635 function set_link_target($target) { | |
| 1636 if (!$target) { | |
| 1637 $this->link_target = ''; | |
| 1638 } else { | |
| 1639 $this->link_target = ' target="' . $target . '"'; | |
| 1640 } | |
| 1641 } | |
| 1642 | |
| 1643 /** | |
| 1644 * Sets styles for important parts of the code | |
| 1645 * | |
| 1646 * @param string The styles to use on important parts of the code | |
| 1647 * @since 1.0.2 | |
| 1648 */ | |
| 1649 function set_important_styles($styles) { | |
| 1650 $this->important_styles = $styles; | |
| 1651 } | |
| 1652 | |
| 1653 /** | |
| 1654 * Sets whether context-important blocks are highlighted | |
| 1655 * | |
| 1656 * @param boolean Tells whether to enable or disable highlighting of importa
nt blocks | |
| 1657 * @todo REMOVE THIS SHIZ FROM GESHI! | |
| 1658 * @deprecated | |
| 1659 * @since 1.0.2 | |
| 1660 */ | |
| 1661 function enable_important_blocks($flag) { | |
| 1662 $this->enable_important_blocks = ( $flag ) ? true : false; | |
| 1663 } | |
| 1664 | |
| 1665 /** | |
| 1666 * Whether CSS IDs should be added to each line | |
| 1667 * | |
| 1668 * @param boolean If true, IDs will be added to each line. | |
| 1669 * @since 1.0.2 | |
| 1670 */ | |
| 1671 function enable_ids($flag = true) { | |
| 1672 $this->add_ids = ($flag) ? true : false; | |
| 1673 } | |
| 1674 | |
| 1675 /** | |
| 1676 * Specifies which lines to highlight extra | |
| 1677 * | |
| 1678 * The extra style parameter was added in 1.0.7.21. | |
| 1679 * | |
| 1680 * @param mixed An array of line numbers to highlight, or just a line | |
| 1681 * number on its own. | |
| 1682 * @param string A string specifying the style to use for this line. | |
| 1683 * If null is specified, the default style is used. | |
| 1684 * If false is specified, the line will be removed from | |
| 1685 * special highlighting | |
| 1686 * @since 1.0.2 | |
| 1687 * @todo Some data replication here that could be cut down on | |
| 1688 */ | |
| 1689 function highlight_lines_extra($lines, $style = null) { | |
| 1690 if (is_array($lines)) { | |
| 1691 //Split up the job using single lines at a time | |
| 1692 foreach ($lines as $line) { | |
| 1693 $this->highlight_lines_extra($line, $style); | |
| 1694 } | |
| 1695 } else { | |
| 1696 //Mark the line as being highlighted specially | |
| 1697 $lines = intval($lines); | |
| 1698 $this->highlight_extra_lines[$lines] = $lines; | |
| 1699 | |
| 1700 //Decide on which style to use | |
| 1701 if ($style === null) { //Check if we should use default style | |
| 1702 unset($this->highlight_extra_lines_styles[$lines]); | |
| 1703 } else if ($style === false) { //Check if to remove this line | |
| 1704 unset($this->highlight_extra_lines[$lines]); | |
| 1705 unset($this->highlight_extra_lines_styles[$lines]); | |
| 1706 } else { | |
| 1707 $this->highlight_extra_lines_styles[$lines] = $style; | |
| 1708 } | |
| 1709 } | |
| 1710 } | |
| 1711 | |
| 1712 /** | |
| 1713 * Sets the style for extra-highlighted lines | |
| 1714 * | |
| 1715 * @param string The style for extra-highlighted lines | |
| 1716 * @since 1.0.2 | |
| 1717 */ | |
| 1718 function set_highlight_lines_extra_style($styles) { | |
| 1719 $this->highlight_extra_lines_style = $styles; | |
| 1720 } | |
| 1721 | |
| 1722 /** | |
| 1723 * Sets the line-ending | |
| 1724 * | |
| 1725 * @param string The new line-ending | |
| 1726 * @since 1.0.2 | |
| 1727 */ | |
| 1728 function set_line_ending($line_ending) { | |
| 1729 $this->line_ending = (string)$line_ending; | |
| 1730 } | |
| 1731 | |
| 1732 /** | |
| 1733 * Sets what number line numbers should start at. Should | |
| 1734 * be a positive integer, and will be converted to one. | |
| 1735 * | |
| 1736 * <b>Warning:</b> Using this method will add the "start" | |
| 1737 * attribute to the <ol> that is used for line numbering. | |
| 1738 * This is <b>not</b> valid XHTML strict, so if that's what you | |
| 1739 * care about then don't use this method. Firefox is getting | |
| 1740 * support for the CSS method of doing this in 1.1 and Opera | |
| 1741 * has support for the CSS method, but (of course) IE doesn't | |
| 1742 * so it's not worth doing it the CSS way yet. | |
| 1743 * | |
| 1744 * @param int The number to start line numbers at | |
| 1745 * @since 1.0.2 | |
| 1746 */ | |
| 1747 function start_line_numbers_at($number) { | |
| 1748 $this->line_numbers_start = abs(intval($number)); | |
| 1749 } | |
| 1750 | |
| 1751 /** | |
| 1752 * Sets the encoding used for htmlspecialchars(), for international | |
| 1753 * support. | |
| 1754 * | |
| 1755 * NOTE: This is not needed for now because htmlspecialchars() is not | |
| 1756 * being used (it has a security hole in PHP4 that has not been patched). | |
| 1757 * Maybe in a future version it may make a return for speed reasons, but | |
| 1758 * I doubt it. | |
| 1759 * | |
| 1760 * @param string The encoding to use for the source | |
| 1761 * @since 1.0.3 | |
| 1762 */ | |
| 1763 function set_encoding($encoding) { | |
| 1764 if ($encoding) { | |
| 1765 $this->encoding = strtolower($encoding); | |
| 1766 } | |
| 1767 } | |
| 1768 | |
| 1769 /** | |
| 1770 * Turns linking of keywords on or off. | |
| 1771 * | |
| 1772 * @param boolean If true, links will be added to keywords | |
| 1773 * @since 1.0.2 | |
| 1774 */ | |
| 1775 function enable_keyword_links($enable = true) { | |
| 1776 $this->keyword_links = (bool) $enable; | |
| 1777 } | |
| 1778 | |
| 1779 /** | |
| 1780 * Setup caches needed for styling. This is automatically called in | |
| 1781 * parse_code() and get_stylesheet() when appropriate. This function helps | |
| 1782 * stylesheet generators as they rely on some style information being | |
| 1783 * preprocessed | |
| 1784 * | |
| 1785 * @since 1.0.8 | |
| 1786 * @access private | |
| 1787 */ | |
| 1788 function build_style_cache() { | |
| 1789 //Build the style cache needed to highlight numbers appropriate | |
| 1790 if($this->lexic_permissions['NUMBERS']) { | |
| 1791 //First check what way highlighting information for numbers are give
n | |
| 1792 if(!isset($this->language_data['NUMBERS'])) { | |
| 1793 $this->language_data['NUMBERS'] = 0; | |
| 1794 } | |
| 1795 | |
| 1796 if(is_array($this->language_data['NUMBERS'])) { | |
| 1797 $this->language_data['NUMBERS_CACHE'] = $this->language_data['NU
MBERS']; | |
| 1798 } else { | |
| 1799 $this->language_data['NUMBERS_CACHE'] = array(); | |
| 1800 if(!$this->language_data['NUMBERS']) { | |
| 1801 $this->language_data['NUMBERS'] = | |
| 1802 GESHI_NUMBER_INT_BASIC | | |
| 1803 GESHI_NUMBER_FLT_NONSCI; | |
| 1804 } | |
| 1805 | |
| 1806 for($i = 0, $j = $this->language_data['NUMBERS']; $j > 0; ++$i,
$j>>=1) { | |
| 1807 //Rearrange style indices if required ... | |
| 1808 if(isset($this->language_data['STYLES']['NUMBERS'][1<<$i]))
{ | |
| 1809 $this->language_data['STYLES']['NUMBERS'][$i] = | |
| 1810 $this->language_data['STYLES']['NUMBERS'][1<<$i]; | |
| 1811 unset($this->language_data['STYLES']['NUMBERS'][1<<$i]); | |
| 1812 } | |
| 1813 | |
| 1814 //Check if this bit is set for highlighting | |
| 1815 if($j&1) { | |
| 1816 //So this bit is set ... | |
| 1817 //Check if it belongs to group 0 or the actual stylegrou
p | |
| 1818 if(isset($this->language_data['STYLES']['NUMBERS'][$i]))
{ | |
| 1819 $this->language_data['NUMBERS_CACHE'][$i] = 1 << $i; | |
| 1820 } else { | |
| 1821 if(!isset($this->language_data['NUMBERS_CACHE'][0]))
{ | |
| 1822 $this->language_data['NUMBERS_CACHE'][0] = 0; | |
| 1823 } | |
| 1824 $this->language_data['NUMBERS_CACHE'][0] |= 1 << $i; | |
| 1825 } | |
| 1826 } | |
| 1827 } | |
| 1828 } | |
| 1829 } | |
| 1830 } | |
| 1831 | |
| 1832 /** | |
| 1833 * Setup caches needed for parsing. This is automatically called in parse_co
de() when appropriate. | |
| 1834 * This function makes stylesheet generators much faster as they do not need
these caches. | |
| 1835 * | |
| 1836 * @since 1.0.8 | |
| 1837 * @access private | |
| 1838 */ | |
| 1839 function build_parse_cache() { | |
| 1840 // cache symbol regexp | |
| 1841 //As this is a costy operation, we avoid doing it for multiple groups ..
. | |
| 1842 //Instead we perform it for all symbols at once. | |
| 1843 // | |
| 1844 //For this to work, we need to reorganize the data arrays. | |
| 1845 if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['
SYMBOLS'])) { | |
| 1846 $this->language_data['MULTIPLE_SYMBOL_GROUPS'] = count($this->langua
ge_data['STYLES']['SYMBOLS']) > 1; | |
| 1847 | |
| 1848 $this->language_data['SYMBOL_DATA'] = array(); | |
| 1849 $symbol_preg_multi = array(); // multi char symbols | |
| 1850 $symbol_preg_single = array(); // single char symbols | |
| 1851 foreach ($this->language_data['SYMBOLS'] as $key => $symbols) { | |
| 1852 if (is_array($symbols)) { | |
| 1853 foreach ($symbols as $sym) { | |
| 1854 $sym = $this->hsc($sym); | |
| 1855 if (!isset($this->language_data['SYMBOL_DATA'][$sym])) { | |
| 1856 $this->language_data['SYMBOL_DATA'][$sym] = $key; | |
| 1857 if (isset($sym[1])) { // multiple chars | |
| 1858 $symbol_preg_multi[] = preg_quote($sym, '/'); | |
| 1859 } else { // single char | |
| 1860 if ($sym == '-') { | |
| 1861 // don't trigger range out of order error | |
| 1862 $symbol_preg_single[] = '\-'; | |
| 1863 } else { | |
| 1864 $symbol_preg_single[] = preg_quote($sym, '/'
); | |
| 1865 } | |
| 1866 } | |
| 1867 } | |
| 1868 } | |
| 1869 } else { | |
| 1870 $symbols = $this->hsc($symbols); | |
| 1871 if (!isset($this->language_data['SYMBOL_DATA'][$symbols])) { | |
| 1872 $this->language_data['SYMBOL_DATA'][$symbols] = 0; | |
| 1873 if (isset($symbols[1])) { // multiple chars | |
| 1874 $symbol_preg_multi[] = preg_quote($symbols, '/'); | |
| 1875 } else if ($symbols == '-') { | |
| 1876 // don't trigger range out of order error | |
| 1877 $symbol_preg_single[] = '\-'; | |
| 1878 } else { // single char | |
| 1879 $symbol_preg_single[] = preg_quote($symbols, '/'); | |
| 1880 } | |
| 1881 } | |
| 1882 } | |
| 1883 } | |
| 1884 | |
| 1885 //Now we have an array with each possible symbol as the key and the
style as the actual data. | |
| 1886 //This way we can set the correct style just the moment we highlight
... | |
| 1887 // | |
| 1888 //Now we need to rewrite our array to get a search string that | |
| 1889 $symbol_preg = array(); | |
| 1890 if (!empty($symbol_preg_multi)) { | |
| 1891 rsort($symbol_preg_multi); | |
| 1892 $symbol_preg[] = implode('|', $symbol_preg_multi); | |
| 1893 } | |
| 1894 if (!empty($symbol_preg_single)) { | |
| 1895 rsort($symbol_preg_single); | |
| 1896 $symbol_preg[] = '[' . implode('', $symbol_preg_single) . ']'; | |
| 1897 } | |
| 1898 $this->language_data['SYMBOL_SEARCH'] = implode("|", $symbol_preg); | |
| 1899 } | |
| 1900 | |
| 1901 // cache optimized regexp for keyword matching | |
| 1902 // remove old cache | |
| 1903 $this->language_data['CACHED_KEYWORD_LISTS'] = array(); | |
| 1904 foreach (array_keys($this->language_data['KEYWORDS']) as $key) { | |
| 1905 if (!isset($this->lexic_permissions['KEYWORDS'][$key]) || | |
| 1906 $this->lexic_permissions['KEYWORDS'][$key]) { | |
| 1907 $this->optimize_keyword_group($key); | |
| 1908 } | |
| 1909 } | |
| 1910 | |
| 1911 // brackets | |
| 1912 if ($this->lexic_permissions['BRACKETS']) { | |
| 1913 $this->language_data['CACHE_BRACKET_MATCH'] = array('[', ']', '(', '
)', '{', '}'); | |
| 1914 if (!$this->use_classes && isset($this->language_data['STYLES']['BRA
CKETS'][0])) { | |
| 1915 $this->language_data['CACHE_BRACKET_REPLACE'] = array( | |
| 1916 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0]
. '">[|>', | |
| 1917 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0]
. '">]|>', | |
| 1918 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0]
. '">(|>', | |
| 1919 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0]
. '">)|>', | |
| 1920 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0]
. '">{|>', | |
| 1921 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0]
. '">}|>', | |
| 1922 ); | |
| 1923 } | |
| 1924 else { | |
| 1925 $this->language_data['CACHE_BRACKET_REPLACE'] = array( | |
| 1926 '<| class="br0">[|>', | |
| 1927 '<| class="br0">]|>', | |
| 1928 '<| class="br0">(|>', | |
| 1929 '<| class="br0">)|>', | |
| 1930 '<| class="br0">{|>', | |
| 1931 '<| class="br0">}|>', | |
| 1932 ); | |
| 1933 } | |
| 1934 } | |
| 1935 | |
| 1936 //Build the parse cache needed to highlight numbers appropriate | |
| 1937 if($this->lexic_permissions['NUMBERS']) { | |
| 1938 //Check if the style rearrangements have been processed ... | |
| 1939 //This also does some preprocessing to check which style groups are
useable ... | |
| 1940 if(!isset($this->language_data['NUMBERS_CACHE'])) { | |
| 1941 $this->build_style_cache(); | |
| 1942 } | |
| 1943 | |
| 1944 //Number format specification | |
| 1945 //All this formats are matched case-insensitively! | |
| 1946 static $numbers_format = array( | |
| 1947 GESHI_NUMBER_INT_BASIC => | |
| 1948 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)(?![0-9a-z\.
])', | |
| 1949 GESHI_NUMBER_INT_CSTYLE => | |
| 1950 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)l(?![0-9a-z\
.])', | |
| 1951 GESHI_NUMBER_BIN_SUFFIX => | |
| 1952 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[01]+?b(?![0-9a-z\.])', | |
| 1953 GESHI_NUMBER_BIN_PREFIX_PERCENT => | |
| 1954 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])%[01]+?(?![0-9a-z\.])', | |
| 1955 GESHI_NUMBER_BIN_PREFIX_0B => | |
| 1956 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0b[01]+?(?![0-9a-z\.])', | |
| 1957 GESHI_NUMBER_OCT_PREFIX => | |
| 1958 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0[0-7]+?(?![0-9a-z\.])', | |
| 1959 GESHI_NUMBER_OCT_SUFFIX => | |
| 1960 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[0-7]+?o(?![0-9a-z\.])', | |
| 1961 GESHI_NUMBER_HEX_PREFIX => | |
| 1962 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0x[0-9a-f]+?(?![0-9a-z\.])
', | |
| 1963 GESHI_NUMBER_HEX_SUFFIX => | |
| 1964 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d[0-9a-f]*?h(?![0-9a-z\.]
)', | |
| 1965 GESHI_NUMBER_FLT_NONSCI => | |
| 1966 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d+?\.\d+?(?![0-9a-z\.])', | |
| 1967 GESHI_NUMBER_FLT_NONSCI_F => | |
| 1968 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)
f(?![0-9a-z\.])', | |
| 1969 GESHI_NUMBER_FLT_SCI_SHORT => | |
| 1970 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\.\d+?(?:e[+\-]?\d+?)?(?![
0-9a-z\.])', | |
| 1971 GESHI_NUMBER_FLT_SCI_ZERO => | |
| 1972 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)
(?:e[+\-]?\d+?)?(?![0-9a-z\.])' | |
| 1973 ); | |
| 1974 | |
| 1975 //At this step we have an associative array with flag groups for a | |
| 1976 //specific style or an string denoting a regexp given its index. | |
| 1977 $this->language_data['NUMBERS_RXCACHE'] = array(); | |
| 1978 foreach($this->language_data['NUMBERS_CACHE'] as $key => $rxdata) { | |
| 1979 if(is_string($rxdata)) { | |
| 1980 $regexp = $rxdata; | |
| 1981 } else { | |
| 1982 //This is a bitfield of number flags to highlight: | |
| 1983 //Build an array, implode them together and make this the ac
tual RX | |
| 1984 $rxuse = array(); | |
| 1985 for($i = 1; $i <= $rxdata; $i<<=1) { | |
| 1986 if($rxdata & $i) { | |
| 1987 $rxuse[] = $numbers_format[$i]; | |
| 1988 } | |
| 1989 } | |
| 1990 $regexp = implode("|", $rxuse); | |
| 1991 } | |
| 1992 | |
| 1993 $this->language_data['NUMBERS_RXCACHE'][$key] = | |
| 1994 "/(?<!<\|\/NUM!)(?<!\d\/>)($regexp)(?!\|>)/i"; | |
| 1995 } | |
| 1996 } | |
| 1997 | |
| 1998 $this->parse_cache_built = true; | |
| 1999 } | |
| 2000 | |
| 2001 /** | |
| 2002 * Returns the code in $this->source, highlighted and surrounded by the | |
| 2003 * nessecary HTML. | |
| 2004 * | |
| 2005 * This should only be called ONCE, cos it's SLOW! If you want to highlight | |
| 2006 * the same source multiple times, you're better off doing a whole lot of | |
| 2007 * str_replaces to replace the <span>s | |
| 2008 * | |
| 2009 * @since 1.0.0 | |
| 2010 */ | |
| 2011 function parse_code () { | |
| 2012 // Start the timer | |
| 2013 $start_time = microtime(); | |
| 2014 | |
| 2015 // Firstly, if there is an error, we won't highlight | |
| 2016 if ($this->error) { | |
| 2017 //Escape the source for output | |
| 2018 $result = $this->hsc($this->source); | |
| 2019 | |
| 2020 //This fix is related to SF#1923020, but has to be applied regardles
s of | |
| 2021 //actually highlighting symbols. | |
| 2022 $result = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $r
esult); | |
| 2023 | |
| 2024 // Timing is irrelevant | |
| 2025 $this->set_time($start_time, $start_time); | |
| 2026 $this->finalise($result); | |
| 2027 return $result; | |
| 2028 } | |
| 2029 | |
| 2030 // make sure the parse cache is up2date | |
| 2031 if (!$this->parse_cache_built) { | |
| 2032 $this->build_parse_cache(); | |
| 2033 } | |
| 2034 | |
| 2035 // Replace all newlines to a common form. | |
| 2036 $code = str_replace("\r\n", "\n", $this->source); | |
| 2037 $code = str_replace("\r", "\n", $code); | |
| 2038 | |
| 2039 // Add spaces for regular expression matching and line numbers | |
| 2040 // $code = "\n" . $code . "\n"; | |
| 2041 | |
| 2042 // Initialise various stuff | |
| 2043 $length = strlen($code); | |
| 2044 $COMMENT_MATCHED = false; | |
| 2045 $stuff_to_parse = ''; | |
| 2046 $endresult = ''; | |
| 2047 | |
| 2048 // "Important" selections are handled like multiline comments | |
| 2049 // @todo GET RID OF THIS SHIZ | |
| 2050 if ($this->enable_important_blocks) { | |
| 2051 $this->language_data['COMMENT_MULTI'][GESHI_START_IMPORTANT] = GESHI
_END_IMPORTANT; | |
| 2052 } | |
| 2053 | |
| 2054 if ($this->strict_mode) { | |
| 2055 // Break the source into bits. Each bit will be a portion of the cod
e | |
| 2056 // within script delimiters - for example, HTML between < and > | |
| 2057 $k = 0; | |
| 2058 $parts = array(); | |
| 2059 $matches = array(); | |
| 2060 $next_match_pointer = null; | |
| 2061 // we use a copy to unset delimiters on demand (when they are not fo
und) | |
| 2062 $delim_copy = $this->language_data['SCRIPT_DELIMITERS']; | |
| 2063 $i = 0; | |
| 2064 while ($i < $length) { | |
| 2065 $next_match_pos = $length + 1; // never true | |
| 2066 foreach ($delim_copy as $dk => $delimiters) { | |
| 2067 if(is_array($delimiters)) { | |
| 2068 foreach ($delimiters as $open => $close) { | |
| 2069 // make sure the cache is setup properly | |
| 2070 if (!isset($matches[$dk][$open])) { | |
| 2071 $matches[$dk][$open] = array( | |
| 2072 'next_match' => -1, | |
| 2073 'dk' => $dk, | |
| 2074 | |
| 2075 'open' => $open, // needed for grouping of a
djacent code blocks (see below) | |
| 2076 'open_strlen' => strlen($open), | |
| 2077 | |
| 2078 'close' => $close, | |
| 2079 'close_strlen' => strlen($close), | |
| 2080 ); | |
| 2081 } | |
| 2082 // Get the next little bit for this opening string | |
| 2083 if ($matches[$dk][$open]['next_match'] < $i) { | |
| 2084 // only find the next pos if it was not already
cached | |
| 2085 $open_pos = strpos($code, $open, $i); | |
| 2086 if ($open_pos === false) { | |
| 2087 // no match for this delimiter ever | |
| 2088 unset($delim_copy[$dk][$open]); | |
| 2089 continue; | |
| 2090 } | |
| 2091 $matches[$dk][$open]['next_match'] = $open_pos; | |
| 2092 } | |
| 2093 if ($matches[$dk][$open]['next_match'] < $next_match
_pos) { | |
| 2094 //So we got a new match, update the close_pos | |
| 2095 $matches[$dk][$open]['close_pos'] = | |
| 2096 strpos($code, $close, $matches[$dk][$open]['
next_match']+1); | |
| 2097 | |
| 2098 $next_match_pointer =& $matches[$dk][$open]; | |
| 2099 $next_match_pos = $matches[$dk][$open]['next_mat
ch']; | |
| 2100 } | |
| 2101 } | |
| 2102 } else { | |
| 2103 //So we should match an RegExp as Strict Block ... | |
| 2104 /** | |
| 2105 * The value in $delimiters is expected to be an RegExp | |
| 2106 * containing exactly 2 matching groups: | |
| 2107 * - Group 1 is the opener | |
| 2108 * - Group 2 is the closer | |
| 2109 */ | |
| 2110 if(!GESHI_PHP_PRE_433 && //Needs proper rewrite to work
with PHP >=4.3.0; 4.3.3 is guaranteed to work. | |
| 2111 preg_match($delimiters, $code, $matches_rx, PREG_OFF
SET_CAPTURE, $i)) { | |
| 2112 //We got a match ... | |
| 2113 $matches[$dk] = array( | |
| 2114 'next_match' => $matches_rx[1][1], | |
| 2115 'dk' => $dk, | |
| 2116 | |
| 2117 'close_strlen' => strlen($matches_rx[2][0]), | |
| 2118 'close_pos' => $matches_rx[2][1], | |
| 2119 ); | |
| 2120 } else { | |
| 2121 // no match for this delimiter ever | |
| 2122 unset($delim_copy[$dk]); | |
| 2123 continue; | |
| 2124 } | |
| 2125 | |
| 2126 if ($matches[$dk]['next_match'] <= $next_match_pos) { | |
| 2127 $next_match_pointer =& $matches[$dk]; | |
| 2128 $next_match_pos = $matches[$dk]['next_match']; | |
| 2129 } | |
| 2130 } | |
| 2131 } | |
| 2132 // non-highlightable text | |
| 2133 $parts[$k] = array( | |
| 2134 1 => substr($code, $i, $next_match_pos - $i) | |
| 2135 ); | |
| 2136 ++$k; | |
| 2137 | |
| 2138 if ($next_match_pos > $length) { | |
| 2139 // out of bounds means no next match was found | |
| 2140 break; | |
| 2141 } | |
| 2142 | |
| 2143 // highlightable code | |
| 2144 $parts[$k][0] = $next_match_pointer['dk']; | |
| 2145 | |
| 2146 //Only combine for non-rx script blocks | |
| 2147 if(is_array($delim_copy[$next_match_pointer['dk']])) { | |
| 2148 // group adjacent script blocks, e.g. <foobar><asdf> should
be one block, not three! | |
| 2149 $i = $next_match_pos + $next_match_pointer['open_strlen']; | |
| 2150 while (true) { | |
| 2151 $close_pos = strpos($code, $next_match_pointer['close'],
$i); | |
| 2152 if ($close_pos == false) { | |
| 2153 break; | |
| 2154 } | |
| 2155 $i = $close_pos + $next_match_pointer['close_strlen']; | |
| 2156 if ($i == $length) { | |
| 2157 break; | |
| 2158 } | |
| 2159 if ($code[$i] == $next_match_pointer['open'][0] && ($nex
t_match_pointer['open_strlen'] == 1 || | |
| 2160 substr($code, $i, $next_match_pointer['open_strlen']
) == $next_match_pointer['open'])) { | |
| 2161 // merge adjacent but make sure we don't merge thing
s like <tag><!-- comment --> | |
| 2162 foreach ($matches as $submatches) { | |
| 2163 foreach ($submatches as $match) { | |
| 2164 if ($match['next_match'] == $i) { | |
| 2165 // a different block already matches her
e! | |
| 2166 break 3; | |
| 2167 } | |
| 2168 } | |
| 2169 } | |
| 2170 } else { | |
| 2171 break; | |
| 2172 } | |
| 2173 } | |
| 2174 } else { | |
| 2175 $close_pos = $next_match_pointer['close_pos'] + $next_match_
pointer['close_strlen']; | |
| 2176 $i = $close_pos; | |
| 2177 } | |
| 2178 | |
| 2179 if ($close_pos === false) { | |
| 2180 // no closing delimiter found! | |
| 2181 $parts[$k][1] = substr($code, $next_match_pos); | |
| 2182 ++$k; | |
| 2183 break; | |
| 2184 } else { | |
| 2185 $parts[$k][1] = substr($code, $next_match_pos, $i - $next_ma
tch_pos); | |
| 2186 ++$k; | |
| 2187 } | |
| 2188 } | |
| 2189 unset($delim_copy, $next_match_pointer, $next_match_pos, $matches); | |
| 2190 $num_parts = $k; | |
| 2191 | |
| 2192 if ($num_parts == 1 && $this->strict_mode == GESHI_MAYBE) { | |
| 2193 // when we have only one part, we don't have anything to highlig
ht at all. | |
| 2194 // if we have a "maybe" strict language, this should be handled
as highlightable code | |
| 2195 $parts = array( | |
| 2196 0 => array( | |
| 2197 0 => '', | |
| 2198 1 => '' | |
| 2199 ), | |
| 2200 1 => array( | |
| 2201 0 => null, | |
| 2202 1 => $parts[0][1] | |
| 2203 ) | |
| 2204 ); | |
| 2205 $num_parts = 2; | |
| 2206 } | |
| 2207 | |
| 2208 } else { | |
| 2209 // Not strict mode - simply dump the source into | |
| 2210 // the array at index 1 (the first highlightable block) | |
| 2211 $parts = array( | |
| 2212 0 => array( | |
| 2213 0 => '', | |
| 2214 1 => '' | |
| 2215 ), | |
| 2216 1 => array( | |
| 2217 0 => null, | |
| 2218 1 => $code | |
| 2219 ) | |
| 2220 ); | |
| 2221 $num_parts = 2; | |
| 2222 } | |
| 2223 | |
| 2224 //Unset variables we won't need any longer | |
| 2225 unset($code); | |
| 2226 | |
| 2227 //Preload some repeatedly used values regarding hardquotes ... | |
| 2228 $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['H
ARDQUOTE'][0] : false; | |
| 2229 $hq_strlen = strlen($hq); | |
| 2230 | |
| 2231 //Preload if line numbers are to be generated afterwards | |
| 2232 //Added a check if line breaks should be forced even without line number
s, fixes SF#1727398 | |
| 2233 $check_linenumbers = $this->line_numbers != GESHI_NO_LINE_NUMBERS || | |
| 2234 !empty($this->highlight_extra_lines) || !$this->allow_multiline_span
; | |
| 2235 | |
| 2236 //preload the escape char for faster checking ... | |
| 2237 $escaped_escape_char = $this->hsc($this->language_data['ESCAPE_CHAR']); | |
| 2238 | |
| 2239 // this is used for single-line comments | |
| 2240 $sc_disallowed_before = ""; | |
| 2241 $sc_disallowed_after = ""; | |
| 2242 | |
| 2243 if (isset($this->language_data['PARSER_CONTROL'])) { | |
| 2244 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) { | |
| 2245 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DI
SALLOWED_BEFORE'])) { | |
| 2246 $sc_disallowed_before = $this->language_data['PARSER_CONTROL
']['COMMENTS']['DISALLOWED_BEFORE']; | |
| 2247 } | |
| 2248 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DI
SALLOWED_AFTER'])) { | |
| 2249 $sc_disallowed_after = $this->language_data['PARSER_CONTROL'
]['COMMENTS']['DISALLOWED_AFTER']; | |
| 2250 } | |
| 2251 } | |
| 2252 } | |
| 2253 | |
| 2254 //Fix for SF#1932083: Multichar Quotemarks unsupported | |
| 2255 $is_string_starter = array(); | |
| 2256 if ($this->lexic_permissions['STRINGS']) { | |
| 2257 foreach ($this->language_data['QUOTEMARKS'] as $quotemark) { | |
| 2258 if (!isset($is_string_starter[$quotemark[0]])) { | |
| 2259 $is_string_starter[$quotemark[0]] = (string)$quotemark; | |
| 2260 } else if (is_string($is_string_starter[$quotemark[0]])) { | |
| 2261 $is_string_starter[$quotemark[0]] = array( | |
| 2262 $is_string_starter[$quotemark[0]], | |
| 2263 $quotemark); | |
| 2264 } else { | |
| 2265 $is_string_starter[$quotemark[0]][] = $quotemark; | |
| 2266 } | |
| 2267 } | |
| 2268 } | |
| 2269 | |
| 2270 // Now we go through each part. We know that even-indexed parts are | |
| 2271 // code that shouldn't be highlighted, and odd-indexed parts should | |
| 2272 // be highlighted | |
| 2273 for ($key = 0; $key < $num_parts; ++$key) { | |
| 2274 $STRICTATTRS = ''; | |
| 2275 | |
| 2276 // If this block should be highlighted... | |
| 2277 if (!($key & 1)) { | |
| 2278 // Else not a block to highlight | |
| 2279 $endresult .= $this->hsc($parts[$key][1]); | |
| 2280 unset($parts[$key]); | |
| 2281 continue; | |
| 2282 } | |
| 2283 | |
| 2284 $result = ''; | |
| 2285 $part = $parts[$key][1]; | |
| 2286 | |
| 2287 $highlight_part = true; | |
| 2288 if ($this->strict_mode && !is_null($parts[$key][0])) { | |
| 2289 // get the class key for this block of code | |
| 2290 $script_key = $parts[$key][0]; | |
| 2291 $highlight_part = $this->language_data['HIGHLIGHT_STRICT_BLOCK']
[$script_key]; | |
| 2292 if ($this->language_data['STYLES']['SCRIPT'][$script_key] != ''
&& | |
| 2293 $this->lexic_permissions['SCRIPT']) { | |
| 2294 // Add a span element around the source to | |
| 2295 // highlight the overall source block | |
| 2296 if (!$this->use_classes && | |
| 2297 $this->language_data['STYLES']['SCRIPT'][$script_key] !=
'') { | |
| 2298 $attributes = ' style="' . $this->language_data['STYLES'
]['SCRIPT'][$script_key] . '"'; | |
| 2299 } else { | |
| 2300 $attributes = ' class="sc' . $script_key . '"'; | |
| 2301 } | |
| 2302 $result .= "<span$attributes>"; | |
| 2303 $STRICTATTRS = $attributes; | |
| 2304 } | |
| 2305 } | |
| 2306 | |
| 2307 if ($highlight_part) { | |
| 2308 // Now, highlight the code in this block. This code | |
| 2309 // is really the engine of GeSHi (along with the method | |
| 2310 // parse_non_string_part). | |
| 2311 | |
| 2312 // cache comment regexps incrementally | |
| 2313 $next_comment_regexp_key = ''; | |
| 2314 $next_comment_regexp_pos = -1; | |
| 2315 $next_comment_multi_pos = -1; | |
| 2316 $next_comment_single_pos = -1; | |
| 2317 $comment_regexp_cache_per_key = array(); | |
| 2318 $comment_multi_cache_per_key = array(); | |
| 2319 $comment_single_cache_per_key = array(); | |
| 2320 $next_open_comment_multi = ''; | |
| 2321 $next_comment_single_key = ''; | |
| 2322 $escape_regexp_cache_per_key = array(); | |
| 2323 $next_escape_regexp_key = ''; | |
| 2324 $next_escape_regexp_pos = -1; | |
| 2325 | |
| 2326 $length = strlen($part); | |
| 2327 for ($i = 0; $i < $length; ++$i) { | |
| 2328 // Get the next char | |
| 2329 $char = $part[$i]; | |
| 2330 $char_len = 1; | |
| 2331 | |
| 2332 // update regexp comment cache if needed | |
| 2333 if (isset($this->language_data['COMMENT_REGEXP']) && $next_c
omment_regexp_pos < $i) { | |
| 2334 $next_comment_regexp_pos = $length; | |
| 2335 foreach ($this->language_data['COMMENT_REGEXP'] as $comm
ent_key => $regexp) { | |
| 2336 $match_i = false; | |
| 2337 if (isset($comment_regexp_cache_per_key[$comment_key
]) && | |
| 2338 ($comment_regexp_cache_per_key[$comment_key]['po
s'] >= $i || | |
| 2339 $comment_regexp_cache_per_key[$comment_key]['po
s'] === false)) { | |
| 2340 // we have already matched something | |
| 2341 if ($comment_regexp_cache_per_key[$comment_key][
'pos'] === false) { | |
| 2342 // this comment is never matched | |
| 2343 continue; | |
| 2344 } | |
| 2345 $match_i = $comment_regexp_cache_per_key[$commen
t_key]['pos']; | |
| 2346 } else if ( | |
| 2347 //This is to allow use of the offset parameter i
n preg_match and stay as compatible with older PHP versions as possible | |
| 2348 (GESHI_PHP_PRE_433 && preg_match($regexp, substr
($part, $i), $match, PREG_OFFSET_CAPTURE)) || | |
| 2349 (!GESHI_PHP_PRE_433 && preg_match($regexp, $part
, $match, PREG_OFFSET_CAPTURE, $i)) | |
| 2350 ) { | |
| 2351 $match_i = $match[0][1]; | |
| 2352 if (GESHI_PHP_PRE_433) { | |
| 2353 $match_i += $i; | |
| 2354 } | |
| 2355 | |
| 2356 $comment_regexp_cache_per_key[$comment_key] = ar
ray( | |
| 2357 'key' => $comment_key, | |
| 2358 'length' => strlen($match[0][0]), | |
| 2359 'pos' => $match_i | |
| 2360 ); | |
| 2361 } else { | |
| 2362 $comment_regexp_cache_per_key[$comment_key]['pos
'] = false; | |
| 2363 continue; | |
| 2364 } | |
| 2365 | |
| 2366 if ($match_i !== false && $match_i < $next_comment_r
egexp_pos) { | |
| 2367 $next_comment_regexp_pos = $match_i; | |
| 2368 $next_comment_regexp_key = $comment_key; | |
| 2369 if ($match_i === $i) { | |
| 2370 break; | |
| 2371 } | |
| 2372 } | |
| 2373 } | |
| 2374 } | |
| 2375 | |
| 2376 $string_started = false; | |
| 2377 | |
| 2378 if (isset($is_string_starter[$char])) { | |
| 2379 // Possibly the start of a new string ... | |
| 2380 | |
| 2381 //Check which starter it was ... | |
| 2382 //Fix for SF#1932083: Multichar Quotemarks unsupported | |
| 2383 if (is_array($is_string_starter[$char])) { | |
| 2384 $char_new = ''; | |
| 2385 foreach ($is_string_starter[$char] as $testchar) { | |
| 2386 if ($testchar === substr($part, $i, strlen($test
char)) && | |
| 2387 strlen($testchar) > strlen($char_new)) { | |
| 2388 $char_new = $testchar; | |
| 2389 $string_started = true; | |
| 2390 } | |
| 2391 } | |
| 2392 if ($string_started) { | |
| 2393 $char = $char_new; | |
| 2394 } | |
| 2395 } else { | |
| 2396 $testchar = $is_string_starter[$char]; | |
| 2397 if ($testchar === substr($part, $i, strlen($testchar
))) { | |
| 2398 $char = $testchar; | |
| 2399 $string_started = true; | |
| 2400 } | |
| 2401 } | |
| 2402 $char_len = strlen($char); | |
| 2403 } | |
| 2404 | |
| 2405 if ($string_started && $i != $next_comment_regexp_pos) { | |
| 2406 // Hand out the correct style information for this strin
g | |
| 2407 $string_key = array_search($char, $this->language_data['
QUOTEMARKS']); | |
| 2408 if (!isset($this->language_data['STYLES']['STRINGS'][$st
ring_key]) || | |
| 2409 !isset($this->language_data['STYLES']['ESCAPE_CHAR']
[$string_key])) { | |
| 2410 $string_key = 0; | |
| 2411 } | |
| 2412 | |
| 2413 // parse the stuff before this | |
| 2414 $result .= $this->parse_non_string_part($stuff_to_parse)
; | |
| 2415 $stuff_to_parse = ''; | |
| 2416 | |
| 2417 if (!$this->use_classes) { | |
| 2418 $string_attributes = ' style="' . $this->language_da
ta['STYLES']['STRINGS'][$string_key] . '"'; | |
| 2419 } else { | |
| 2420 $string_attributes = ' class="st'.$string_key.'"'; | |
| 2421 } | |
| 2422 | |
| 2423 // now handle the string | |
| 2424 $string = "<span$string_attributes>" . GeSHi::hsc($char)
; | |
| 2425 $start = $i + $char_len; | |
| 2426 $string_open = true; | |
| 2427 | |
| 2428 if(empty($this->language_data['ESCAPE_REGEXP'])) { | |
| 2429 $next_escape_regexp_pos = $length; | |
| 2430 } | |
| 2431 | |
| 2432 do { | |
| 2433 //Get the regular ending pos ... | |
| 2434 $close_pos = strpos($part, $char, $start); | |
| 2435 if(false === $close_pos) { | |
| 2436 $close_pos = $length; | |
| 2437 } | |
| 2438 | |
| 2439 if($this->lexic_permissions['ESCAPE_CHAR']) { | |
| 2440 // update escape regexp cache if needed | |
| 2441 if (isset($this->language_data['ESCAPE_REGEXP'])
&& $next_escape_regexp_pos < $start) { | |
| 2442 $next_escape_regexp_pos = $length; | |
| 2443 foreach ($this->language_data['ESCAPE_REGEXP
'] as $escape_key => $regexp) { | |
| 2444 $match_i = false; | |
| 2445 if (isset($escape_regexp_cache_per_key[$
escape_key]) && | |
| 2446 ($escape_regexp_cache_per_key[$escap
e_key]['pos'] >= $start || | |
| 2447 $escape_regexp_cache_per_key[$escap
e_key]['pos'] === false)) { | |
| 2448 // we have already matched something | |
| 2449 if ($escape_regexp_cache_per_key[$es
cape_key]['pos'] === false) { | |
| 2450 // this comment is never matched | |
| 2451 continue; | |
| 2452 } | |
| 2453 $match_i = $escape_regexp_cache_per_
key[$escape_key]['pos']; | |
| 2454 } else if ( | |
| 2455 //This is to allow use of the offset
parameter in preg_match and stay as compatible with older PHP versions as possi
ble | |
| 2456 (GESHI_PHP_PRE_433 && preg_match($re
gexp, substr($part, $start), $match, PREG_OFFSET_CAPTURE)) || | |
| 2457 (!GESHI_PHP_PRE_433 && preg_match($r
egexp, $part, $match, PREG_OFFSET_CAPTURE, $start)) | |
| 2458 ) { | |
| 2459 $match_i = $match[0][1]; | |
| 2460 if (GESHI_PHP_PRE_433) { | |
| 2461 $match_i += $start; | |
| 2462 } | |
| 2463 | |
| 2464 $escape_regexp_cache_per_key[$escape
_key] = array( | |
| 2465 'key' => $escape_key, | |
| 2466 'length' => strlen($match[0][0])
, | |
| 2467 'pos' => $match_i | |
| 2468 ); | |
| 2469 } else { | |
| 2470 $escape_regexp_cache_per_key[$escape
_key]['pos'] = false; | |
| 2471 continue; | |
| 2472 } | |
| 2473 | |
| 2474 if ($match_i !== false && $match_i < $ne
xt_escape_regexp_pos) { | |
| 2475 $next_escape_regexp_pos = $match_i; | |
| 2476 $next_escape_regexp_key = $escape_ke
y; | |
| 2477 if ($match_i === $start) { | |
| 2478 break; | |
| 2479 } | |
| 2480 } | |
| 2481 } | |
| 2482 } | |
| 2483 | |
| 2484 //Find the next simple escape position | |
| 2485 if('' != $this->language_data['ESCAPE_CHAR']) { | |
| 2486 $simple_escape = strpos($part, $this->langua
ge_data['ESCAPE_CHAR'], $start); | |
| 2487 if(false === $simple_escape) { | |
| 2488 $simple_escape = $length; | |
| 2489 } | |
| 2490 } else { | |
| 2491 $simple_escape = $length; | |
| 2492 } | |
| 2493 } else { | |
| 2494 $next_escape_regexp_pos = $length; | |
| 2495 $simple_escape = $length; | |
| 2496 } | |
| 2497 | |
| 2498 if($simple_escape < $next_escape_regexp_pos && | |
| 2499 $simple_escape < $length && | |
| 2500 $simple_escape < $close_pos) { | |
| 2501 //The nexxt escape sequence is a simple one ... | |
| 2502 $es_pos = $simple_escape; | |
| 2503 | |
| 2504 //Add the stuff not in the string yet ... | |
| 2505 $string .= $this->hsc(substr($part, $start, $es_
pos - $start)); | |
| 2506 | |
| 2507 //Get the style for this escaped char ... | |
| 2508 if (!$this->use_classes) { | |
| 2509 $escape_char_attributes = ' style="' . $this
->language_data['STYLES']['ESCAPE_CHAR'][0] . '"'; | |
| 2510 } else { | |
| 2511 $escape_char_attributes = ' class="es0"'; | |
| 2512 } | |
| 2513 | |
| 2514 //Add the style for the escape char ... | |
| 2515 $string .= "<span$escape_char_attributes>" . | |
| 2516 GeSHi::hsc($this->language_data['ESCAPE_CHAR
']); | |
| 2517 | |
| 2518 //Get the byte AFTER the ESCAPE_CHAR we just fou
nd | |
| 2519 $es_char = $part[$es_pos + 1]; | |
| 2520 if ($es_char == "\n") { | |
| 2521 // don't put a newline around newlines | |
| 2522 $string .= "</span>\n"; | |
| 2523 $start = $es_pos + 2; | |
| 2524 } else if (ord($es_char) >= 128) { | |
| 2525 //This is an non-ASCII char (UTF8 or single
byte) | |
| 2526 //This code tries to work around SF#2037598
... | |
| 2527 if(function_exists('mb_substr')) { | |
| 2528 $es_char_m = mb_substr(substr($part, $es
_pos+1, 16), 0, 1, $this->encoding); | |
| 2529 $string .= $es_char_m . '</span>'; | |
| 2530 } else if (!GESHI_PHP_PRE_433 && 'utf-8' ==
$this->encoding) { | |
| 2531 if(preg_match("/[\xC2-\xDF][\x80-\xBF]". | |
| 2532 "|\xE0[\xA0-\xBF][\x80-\xBF]". | |
| 2533 "|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}"
. | |
| 2534 "|\xED[\x80-\x9F][\x80-\xBF]". | |
| 2535 "|\xF0[\x90-\xBF][\x80-\xBF]{2}". | |
| 2536 "|[\xF1-\xF3][\x80-\xBF]{3}". | |
| 2537 "|\xF4[\x80-\x8F][\x80-\xBF]{2}/s", | |
| 2538 $part, $es_char_m, null, $es_pos + 1
)) { | |
| 2539 $es_char_m = $es_char_m[0]; | |
| 2540 } else { | |
| 2541 $es_char_m = $es_char; | |
| 2542 } | |
| 2543 $string .= $this->hsc($es_char_m) . '</s
pan>'; | |
| 2544 } else { | |
| 2545 $es_char_m = $this->hsc($es_char); | |
| 2546 } | |
| 2547 $start = $es_pos + strlen($es_char_m) + 1; | |
| 2548 } else { | |
| 2549 $string .= $this->hsc($es_char) . '</span>'; | |
| 2550 $start = $es_pos + 2; | |
| 2551 } | |
| 2552 } else if ($next_escape_regexp_pos < $length && | |
| 2553 $next_escape_regexp_pos < $close_pos) { | |
| 2554 $es_pos = $next_escape_regexp_pos; | |
| 2555 //Add the stuff not in the string yet ... | |
| 2556 $string .= $this->hsc(substr($part, $start, $es_
pos - $start)); | |
| 2557 | |
| 2558 //Get the key and length of this match ... | |
| 2559 $escape = $escape_regexp_cache_per_key[$next_esc
ape_regexp_key]; | |
| 2560 $escape_str = substr($part, $es_pos, $escape['le
ngth']); | |
| 2561 $escape_key = $escape['key']; | |
| 2562 | |
| 2563 //Get the style for this escaped char ... | |
| 2564 if (!$this->use_classes) { | |
| 2565 $escape_char_attributes = ' style="' . $this
->language_data['STYLES']['ESCAPE_CHAR'][$escape_key] . '"'; | |
| 2566 } else { | |
| 2567 $escape_char_attributes = ' class="es' . $es
cape_key . '"'; | |
| 2568 } | |
| 2569 | |
| 2570 //Add the style for the escape char ... | |
| 2571 $string .= "<span$escape_char_attributes>" . | |
| 2572 $this->hsc($escape_str) . '</span>'; | |
| 2573 | |
| 2574 $start = $es_pos + $escape['length']; | |
| 2575 } else { | |
| 2576 //Copy the remainder of the string ... | |
| 2577 $string .= $this->hsc(substr($part, $start, $clo
se_pos - $start + $char_len)) . '</span>'; | |
| 2578 $start = $close_pos + $char_len; | |
| 2579 $string_open = false; | |
| 2580 } | |
| 2581 } while($string_open); | |
| 2582 | |
| 2583 if ($check_linenumbers) { | |
| 2584 // Are line numbers used? If, we should end the stri
ng before | |
| 2585 // the newline and begin it again (so when <li>s are
put in the source | |
| 2586 // remains XHTML compliant) | |
| 2587 // note to self: This opens up possibility of config
files specifying | |
| 2588 // that languages can/cannot have multiline strings?
?? | |
| 2589 $string = str_replace("\n", "</span>\n<span$string_a
ttributes>", $string); | |
| 2590 } | |
| 2591 | |
| 2592 $result .= $string; | |
| 2593 $string = ''; | |
| 2594 $i = $start - 1; | |
| 2595 continue; | |
| 2596 } else if ($this->lexic_permissions['STRINGS'] && $hq && $hq
[0] == $char && | |
| 2597 substr($part, $i, $hq_strlen) == $hq) { | |
| 2598 // The start of a hard quoted string | |
| 2599 if (!$this->use_classes) { | |
| 2600 $string_attributes = ' style="' . $this->language_da
ta['STYLES']['STRINGS']['HARD'] . '"'; | |
| 2601 $escape_char_attributes = ' style="' . $this->langua
ge_data['STYLES']['ESCAPE_CHAR']['HARD'] . '"'; | |
| 2602 } else { | |
| 2603 $string_attributes = ' class="st_h"'; | |
| 2604 $escape_char_attributes = ' class="es_h"'; | |
| 2605 } | |
| 2606 // parse the stuff before this | |
| 2607 $result .= $this->parse_non_string_part($stuff_to_parse)
; | |
| 2608 $stuff_to_parse = ''; | |
| 2609 | |
| 2610 // now handle the string | |
| 2611 $string = ''; | |
| 2612 | |
| 2613 // look for closing quote | |
| 2614 $start = $i + $hq_strlen; | |
| 2615 while ($close_pos = strpos($part, $this->language_data['
HARDQUOTE'][1], $start)) { | |
| 2616 $start = $close_pos + 1; | |
| 2617 if ($this->lexic_permissions['ESCAPE_CHAR'] && $part
[$close_pos - 1] == $this->language_data['HARDCHAR']) { | |
| 2618 // make sure this quote is not escaped | |
| 2619 foreach ($this->language_data['HARDESCAPE'] as $
hardescape) { | |
| 2620 if (substr($part, $close_pos - 1, strlen($ha
rdescape)) == $hardescape) { | |
| 2621 // check wether this quote is escaped or
if it is something like '\\' | |
| 2622 $escape_char_pos = $close_pos - 1; | |
| 2623 while ($escape_char_pos > 0 | |
| 2624 && $part[$escape_char_pos - 1] =
= $this->language_data['HARDCHAR']) { | |
| 2625 --$escape_char_pos; | |
| 2626 } | |
| 2627 if (($close_pos - $escape_char_pos) & 1)
{ | |
| 2628 // uneven number of escape chars =>
this quote is escaped | |
| 2629 continue 2; | |
| 2630 } | |
| 2631 } | |
| 2632 } | |
| 2633 } | |
| 2634 | |
| 2635 // found closing quote | |
| 2636 break; | |
| 2637 } | |
| 2638 | |
| 2639 //Found the closing delimiter? | |
| 2640 if (!$close_pos) { | |
| 2641 // span till the end of this $part when no closing d
elimiter is found | |
| 2642 $close_pos = $length; | |
| 2643 } | |
| 2644 | |
| 2645 //Get the actual string | |
| 2646 $string = substr($part, $i, $close_pos - $i + 1); | |
| 2647 $i = $close_pos; | |
| 2648 | |
| 2649 // handle escape chars and encode html chars | |
| 2650 // (special because when we have escape chars within our
string they may not be escaped) | |
| 2651 if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->la
nguage_data['ESCAPE_CHAR']) { | |
| 2652 $start = 0; | |
| 2653 $new_string = ''; | |
| 2654 while ($es_pos = strpos($string, $this->language_dat
a['ESCAPE_CHAR'], $start)) { | |
| 2655 // hmtl escape stuff before | |
| 2656 $new_string .= $this->hsc(substr($string, $start
, $es_pos - $start)); | |
| 2657 // check if this is a hard escape | |
| 2658 foreach ($this->language_data['HARDESCAPE'] as $
hardescape) { | |
| 2659 if (substr($string, $es_pos, strlen($hardesc
ape)) == $hardescape) { | |
| 2660 // indeed, this is a hardescape | |
| 2661 $new_string .= "<span$escape_char_attrib
utes>" . | |
| 2662 $this->hsc($hardescape) . '</span>'; | |
| 2663 $start = $es_pos + strlen($hardescape); | |
| 2664 continue 2; | |
| 2665 } | |
| 2666 } | |
| 2667 // not a hard escape, but a normal escape | |
| 2668 // they come in pairs of two | |
| 2669 $c = 0; | |
| 2670 while (isset($string[$es_pos + $c]) && isset($st
ring[$es_pos + $c + 1]) | |
| 2671 && $string[$es_pos + $c] == $this->language_
data['ESCAPE_CHAR'] | |
| 2672 && $string[$es_pos + $c + 1] == $this->langu
age_data['ESCAPE_CHAR']) { | |
| 2673 $c += 2; | |
| 2674 } | |
| 2675 if ($c) { | |
| 2676 $new_string .= "<span$escape_char_attributes
>" . | |
| 2677 str_repeat($escaped_escape_char, $c) . | |
| 2678 '</span>'; | |
| 2679 $start = $es_pos + $c; | |
| 2680 } else { | |
| 2681 // this is just a single lonely escape char.
.. | |
| 2682 $new_string .= $escaped_escape_char; | |
| 2683 $start = $es_pos + 1; | |
| 2684 } | |
| 2685 } | |
| 2686 $string = $new_string . $this->hsc(substr($string, $
start)); | |
| 2687 } else { | |
| 2688 $string = $this->hsc($string); | |
| 2689 } | |
| 2690 | |
| 2691 if ($check_linenumbers) { | |
| 2692 // Are line numbers used? If, we should end the stri
ng before | |
| 2693 // the newline and begin it again (so when <li>s are
put in the source | |
| 2694 // remains XHTML compliant) | |
| 2695 // note to self: This opens up possibility of config
files specifying | |
| 2696 // that languages can/cannot have multiline strings?
?? | |
| 2697 $string = str_replace("\n", "</span>\n<span$string_a
ttributes>", $string); | |
| 2698 } | |
| 2699 | |
| 2700 $result .= "<span$string_attributes>" . $string . '</spa
n>'; | |
| 2701 $string = ''; | |
| 2702 continue; | |
| 2703 } else { | |
| 2704 //Have a look for regexp comments | |
| 2705 if ($i == $next_comment_regexp_pos) { | |
| 2706 $COMMENT_MATCHED = true; | |
| 2707 $comment = $comment_regexp_cache_per_key[$next_comme
nt_regexp_key]; | |
| 2708 $test_str = $this->hsc(substr($part, $i, $comment['l
ength'])); | |
| 2709 | |
| 2710 //@todo If remove important do remove here | |
| 2711 if ($this->lexic_permissions['COMMENTS']['MULTI']) { | |
| 2712 if (!$this->use_classes) { | |
| 2713 $attributes = ' style="' . $this->language_d
ata['STYLES']['COMMENTS'][$comment['key']] . '"'; | |
| 2714 } else { | |
| 2715 $attributes = ' class="co' . $comment['key']
. '"'; | |
| 2716 } | |
| 2717 | |
| 2718 $test_str = "<span$attributes>" . $test_str . "<
/span>"; | |
| 2719 | |
| 2720 // Short-cut through all the multiline code | |
| 2721 if ($check_linenumbers) { | |
| 2722 // strreplace to put close span and open spa
n around multiline newlines | |
| 2723 $test_str = str_replace( | |
| 2724 "\n", "</span>\n<span$attributes>", | |
| 2725 str_replace("\n ", "\n ", $test_str
) | |
| 2726 ); | |
| 2727 } | |
| 2728 } | |
| 2729 | |
| 2730 $i += $comment['length'] - 1; | |
| 2731 | |
| 2732 // parse the rest | |
| 2733 $result .= $this->parse_non_string_part($stuff_to_pa
rse); | |
| 2734 $stuff_to_parse = ''; | |
| 2735 } | |
| 2736 | |
| 2737 // If we haven't matched a regexp comment, try multi-lin
e comments | |
| 2738 if (!$COMMENT_MATCHED) { | |
| 2739 // Is this a multiline comment? | |
| 2740 if (!empty($this->language_data['COMMENT_MULTI']) &&
$next_comment_multi_pos < $i) { | |
| 2741 $next_comment_multi_pos = $length; | |
| 2742 foreach ($this->language_data['COMMENT_MULTI'] a
s $open => $close) { | |
| 2743 $match_i = false; | |
| 2744 if (isset($comment_multi_cache_per_key[$open
]) && | |
| 2745 ($comment_multi_cache_per_key[$open] >=
$i || | |
| 2746 $comment_multi_cache_per_key[$open] ===
false)) { | |
| 2747 // we have already matched something | |
| 2748 if ($comment_multi_cache_per_key[$open]
=== false) { | |
| 2749 // this comment is never matched | |
| 2750 continue; | |
| 2751 } | |
| 2752 $match_i = $comment_multi_cache_per_key[
$open]; | |
| 2753 } else if (($match_i = stripos($part, $open,
$i)) !== false) { | |
| 2754 $comment_multi_cache_per_key[$open] = $m
atch_i; | |
| 2755 } else { | |
| 2756 $comment_multi_cache_per_key[$open] = fa
lse; | |
| 2757 continue; | |
| 2758 } | |
| 2759 if ($match_i !== false && $match_i < $next_c
omment_multi_pos) { | |
| 2760 $next_comment_multi_pos = $match_i; | |
| 2761 $next_open_comment_multi = $open; | |
| 2762 if ($match_i === $i) { | |
| 2763 break; | |
| 2764 } | |
| 2765 } | |
| 2766 } | |
| 2767 } | |
| 2768 if ($i == $next_comment_multi_pos) { | |
| 2769 $open = $next_open_comment_multi; | |
| 2770 $close = $this->language_data['COMMENT_MULTI'][$
open]; | |
| 2771 $open_strlen = strlen($open); | |
| 2772 $close_strlen = strlen($close); | |
| 2773 $COMMENT_MATCHED = true; | |
| 2774 $test_str_match = $open; | |
| 2775 //@todo If remove important do remove here | |
| 2776 if ($this->lexic_permissions['COMMENTS']['MULTI'
] || | |
| 2777 $open == GESHI_START_IMPORTANT) { | |
| 2778 if ($open != GESHI_START_IMPORTANT) { | |
| 2779 if (!$this->use_classes) { | |
| 2780 $attributes = ' style="' . $this->la
nguage_data['STYLES']['COMMENTS']['MULTI'] . '"'; | |
| 2781 } else { | |
| 2782 $attributes = ' class="coMULTI"'; | |
| 2783 } | |
| 2784 $test_str = "<span$attributes>" . $this-
>hsc($open); | |
| 2785 } else { | |
| 2786 if (!$this->use_classes) { | |
| 2787 $attributes = ' style="' . $this->im
portant_styles . '"'; | |
| 2788 } else { | |
| 2789 $attributes = ' class="imp"'; | |
| 2790 } | |
| 2791 | |
| 2792 // We don't include the start of the com
ment if it's an | |
| 2793 // "important" part | |
| 2794 $test_str = "<span$attributes>"; | |
| 2795 } | |
| 2796 } else { | |
| 2797 $test_str = $this->hsc($open); | |
| 2798 } | |
| 2799 | |
| 2800 $close_pos = strpos( $part, $close, $i + $open_s
trlen ); | |
| 2801 | |
| 2802 if ($close_pos === false) { | |
| 2803 $close_pos = $length; | |
| 2804 } | |
| 2805 | |
| 2806 // Short-cut through all the multiline code | |
| 2807 $rest_of_comment = $this->hsc(substr($part, $i +
$open_strlen, $close_pos - $i - $open_strlen + $close_strlen)); | |
| 2808 if (($this->lexic_permissions['COMMENTS']['MULTI
'] || | |
| 2809 $test_str_match == GESHI_START_IMPORTANT) && | |
| 2810 $check_linenumbers) { | |
| 2811 | |
| 2812 // strreplace to put close span and open spa
n around multiline newlines | |
| 2813 $test_str .= str_replace( | |
| 2814 "\n", "</span>\n<span$attributes>", | |
| 2815 str_replace("\n ", "\n ", $rest_of_
comment) | |
| 2816 ); | |
| 2817 } else { | |
| 2818 $test_str .= $rest_of_comment; | |
| 2819 } | |
| 2820 | |
| 2821 if ($this->lexic_permissions['COMMENTS']['MULTI'
] || | |
| 2822 $test_str_match == GESHI_START_IMPORTANT) { | |
| 2823 $test_str .= '</span>'; | |
| 2824 } | |
| 2825 | |
| 2826 $i = $close_pos + $close_strlen - 1; | |
| 2827 | |
| 2828 // parse the rest | |
| 2829 $result .= $this->parse_non_string_part($stuff_t
o_parse); | |
| 2830 $stuff_to_parse = ''; | |
| 2831 } | |
| 2832 } | |
| 2833 | |
| 2834 // If we haven't matched a multiline comment, try single
-line comments | |
| 2835 if (!$COMMENT_MATCHED) { | |
| 2836 // cache potential single line comment occurances | |
| 2837 if (!empty($this->language_data['COMMENT_SINGLE']) &
& $next_comment_single_pos < $i) { | |
| 2838 $next_comment_single_pos = $length; | |
| 2839 foreach ($this->language_data['COMMENT_SINGLE']
as $comment_key => $comment_mark) { | |
| 2840 $match_i = false; | |
| 2841 if (isset($comment_single_cache_per_key[$com
ment_key]) && | |
| 2842 ($comment_single_cache_per_key[$comment_
key] >= $i || | |
| 2843 $comment_single_cache_per_key[$comment_
key] === false)) { | |
| 2844 // we have already matched something | |
| 2845 if ($comment_single_cache_per_key[$comme
nt_key] === false) { | |
| 2846 // this comment is never matched | |
| 2847 continue; | |
| 2848 } | |
| 2849 $match_i = $comment_single_cache_per_key
[$comment_key]; | |
| 2850 } else if ( | |
| 2851 // case sensitive comments | |
| 2852 ($this->language_data['CASE_SENSITIVE'][
GESHI_COMMENTS] && | |
| 2853 ($match_i = stripos($part, $comment_mark
, $i)) !== false) || | |
| 2854 // non case sensitive | |
| 2855 (!$this->language_data['CASE_SENSITIVE']
[GESHI_COMMENTS] && | |
| 2856 (($match_i = strpos($part, $comment_ma
rk, $i)) !== false))) { | |
| 2857 $comment_single_cache_per_key[$comment_k
ey] = $match_i; | |
| 2858 } else { | |
| 2859 $comment_single_cache_per_key[$comment_k
ey] = false; | |
| 2860 continue; | |
| 2861 } | |
| 2862 if ($match_i !== false && $match_i < $next_c
omment_single_pos) { | |
| 2863 $next_comment_single_pos = $match_i; | |
| 2864 $next_comment_single_key = $comment_key; | |
| 2865 if ($match_i === $i) { | |
| 2866 break; | |
| 2867 } | |
| 2868 } | |
| 2869 } | |
| 2870 } | |
| 2871 if ($next_comment_single_pos == $i) { | |
| 2872 $comment_key = $next_comment_single_key; | |
| 2873 $comment_mark = $this->language_data['COMMENT_SI
NGLE'][$comment_key]; | |
| 2874 $com_len = strlen($comment_mark); | |
| 2875 | |
| 2876 // This check will find special variables like $
# in bash | |
| 2877 // or compiler directives of Delphi beginning {$ | |
| 2878 if ((empty($sc_disallowed_before) || ($i == 0) |
| | |
| 2879 (false === strpos($sc_disallowed_before, $pa
rt[$i-1]))) && | |
| 2880 (empty($sc_disallowed_after) || ($length <=
$i + $com_len) || | |
| 2881 (false === strpos($sc_disallowed_after, $par
t[$i + $com_len])))) | |
| 2882 { | |
| 2883 // this is a valid comment | |
| 2884 $COMMENT_MATCHED = true; | |
| 2885 if ($this->lexic_permissions['COMMENTS'][$co
mment_key]) { | |
| 2886 if (!$this->use_classes) { | |
| 2887 $attributes = ' style="' . $this->la
nguage_data['STYLES']['COMMENTS'][$comment_key] . '"'; | |
| 2888 } else { | |
| 2889 $attributes = ' class="co' . $commen
t_key . '"'; | |
| 2890 } | |
| 2891 $test_str = "<span$attributes>" . $this-
>hsc($this->change_case($comment_mark)); | |
| 2892 } else { | |
| 2893 $test_str = $this->hsc($comment_mark); | |
| 2894 } | |
| 2895 | |
| 2896 //Check if this comment is the last in the s
ource | |
| 2897 $close_pos = strpos($part, "\n", $i); | |
| 2898 $oops = false; | |
| 2899 if ($close_pos === false) { | |
| 2900 $close_pos = $length; | |
| 2901 $oops = true; | |
| 2902 } | |
| 2903 $test_str .= $this->hsc(substr($part, $i + $
com_len, $close_pos - $i - $com_len)); | |
| 2904 if ($this->lexic_permissions['COMMENTS'][$co
mment_key]) { | |
| 2905 $test_str .= "</span>"; | |
| 2906 } | |
| 2907 | |
| 2908 // Take into account that the comment might
be the last in the source | |
| 2909 if (!$oops) { | |
| 2910 $test_str .= "\n"; | |
| 2911 } | |
| 2912 | |
| 2913 $i = $close_pos; | |
| 2914 | |
| 2915 // parse the rest | |
| 2916 $result .= $this->parse_non_string_part($stu
ff_to_parse); | |
| 2917 $stuff_to_parse = ''; | |
| 2918 } | |
| 2919 } | |
| 2920 } | |
| 2921 } | |
| 2922 | |
| 2923 // Where are we adding this char? | |
| 2924 if (!$COMMENT_MATCHED) { | |
| 2925 $stuff_to_parse .= $char; | |
| 2926 } else { | |
| 2927 $result .= $test_str; | |
| 2928 unset($test_str); | |
| 2929 $COMMENT_MATCHED = false; | |
| 2930 } | |
| 2931 } | |
| 2932 // Parse the last bit | |
| 2933 $result .= $this->parse_non_string_part($stuff_to_parse); | |
| 2934 $stuff_to_parse = ''; | |
| 2935 } else { | |
| 2936 $result .= $this->hsc($part); | |
| 2937 } | |
| 2938 // Close the <span> that surrounds the block | |
| 2939 if ($STRICTATTRS != '') { | |
| 2940 $result = str_replace("\n", "</span>\n<span$STRICTATTRS>", $resu
lt); | |
| 2941 $result .= '</span>'; | |
| 2942 } | |
| 2943 | |
| 2944 $endresult .= $result; | |
| 2945 unset($part, $parts[$key], $result); | |
| 2946 } | |
| 2947 | |
| 2948 //This fix is related to SF#1923020, but has to be applied regardless of | |
| 2949 //actually highlighting symbols. | |
| 2950 /** NOTE: memorypeak #3 */ | |
| 2951 $endresult = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $en
dresult); | |
| 2952 | |
| 2953 // // Parse the last stuff (redundant?) | |
| 2954 // $result .= $this->parse_non_string_part($stuff_to_parse); | |
| 2955 | |
| 2956 // Lop off the very first and last spaces | |
| 2957 // $result = substr($result, 1, -1); | |
| 2958 | |
| 2959 // We're finished: stop timing | |
| 2960 $this->set_time($start_time, microtime()); | |
| 2961 | |
| 2962 $this->finalise($endresult); | |
| 2963 return $endresult; | |
| 2964 } | |
| 2965 | |
| 2966 /** | |
| 2967 * Swaps out spaces and tabs for HTML indentation. Not needed if | |
| 2968 * the code is in a pre block... | |
| 2969 * | |
| 2970 * @param string The source to indent (reference!) | |
| 2971 * @since 1.0.0 | |
| 2972 * @access private | |
| 2973 */ | |
| 2974 function indent(&$result) { | |
| 2975 /// Replace tabs with the correct number of spaces | |
| 2976 if (false !== strpos($result, "\t")) { | |
| 2977 $lines = explode("\n", $result); | |
| 2978 $result = null;//Save memory while we process the lines individually | |
| 2979 $tab_width = $this->get_real_tab_width(); | |
| 2980 $tab_string = ' ' . str_repeat(' ', $tab_width); | |
| 2981 | |
| 2982 for ($key = 0, $n = count($lines); $key < $n; $key++) { | |
| 2983 $line = $lines[$key]; | |
| 2984 if (false === strpos($line, "\t")) { | |
| 2985 continue; | |
| 2986 } | |
| 2987 | |
| 2988 $pos = 0; | |
| 2989 $length = strlen($line); | |
| 2990 $lines[$key] = ''; // reduce memory | |
| 2991 | |
| 2992 $IN_TAG = false; | |
| 2993 for ($i = 0; $i < $length; ++$i) { | |
| 2994 $char = $line[$i]; | |
| 2995 // Simple engine to work out whether we're in a tag. | |
| 2996 // If we are we modify $pos. This is so we ignore HTML | |
| 2997 // in the line and only workout the tab replacement | |
| 2998 // via the actual content of the string | |
| 2999 // This test could be improved to include strings in the | |
| 3000 // html so that < or > would be allowed in user's styles | |
| 3001 // (e.g. quotes: '<' '>'; or similar) | |
| 3002 if ($IN_TAG) { | |
| 3003 if ('>' == $char) { | |
| 3004 $IN_TAG = false; | |
| 3005 } | |
| 3006 $lines[$key] .= $char; | |
| 3007 } else if ('<' == $char) { | |
| 3008 $IN_TAG = true; | |
| 3009 $lines[$key] .= '<'; | |
| 3010 } else if ('&' == $char) { | |
| 3011 $substr = substr($line, $i + 3, 5); | |
| 3012 $posi = strpos($substr, ';'); | |
| 3013 if (false === $posi) { | |
| 3014 ++$pos; | |
| 3015 } else { | |
| 3016 $pos -= $posi+2; | |
| 3017 } | |
| 3018 $lines[$key] .= $char; | |
| 3019 } else if ("\t" == $char) { | |
| 3020 $str = ''; | |
| 3021 // OPTIMISE - move $strs out. Make an array: | |
| 3022 // $tabs = array( | |
| 3023 // 1 => ' ', | |
| 3024 // 2 => ' ', | |
| 3025 // 3 => ' ' etc etc | |
| 3026 // to use instead of building a string every time | |
| 3027 $tab_end_width = $tab_width - ($pos % $tab_width); //Mov
ed out of the look as it doesn't change within the loop | |
| 3028 if (($pos & 1) || 1 == $tab_end_width) { | |
| 3029 $str .= substr($tab_string, 6, $tab_end_width); | |
| 3030 } else { | |
| 3031 $str .= substr($tab_string, 0, $tab_end_width+5); | |
| 3032 } | |
| 3033 $lines[$key] .= $str; | |
| 3034 $pos += $tab_end_width; | |
| 3035 | |
| 3036 if (false === strpos($line, "\t", $i + 1)) { | |
| 3037 $lines[$key] .= substr($line, $i + 1); | |
| 3038 break; | |
| 3039 } | |
| 3040 } else if (0 == $pos && ' ' == $char) { | |
| 3041 $lines[$key] .= ' '; | |
| 3042 ++$pos; | |
| 3043 } else { | |
| 3044 $lines[$key] .= $char; | |
| 3045 ++$pos; | |
| 3046 } | |
| 3047 } | |
| 3048 } | |
| 3049 $result = implode("\n", $lines); | |
| 3050 unset($lines);//We don't need the lines separated beyond this --- fr
ee them! | |
| 3051 } | |
| 3052 // Other whitespace | |
| 3053 // BenBE: Fix to reduce the number of replacements to be done | |
| 3054 $result = preg_replace('/^ /m', ' ', $result); | |
| 3055 $result = str_replace(' ', ' ', $result); | |
| 3056 | |
| 3057 if ($this->line_numbers == GESHI_NO_LINE_NUMBERS) { | |
| 3058 if ($this->line_ending === null) { | |
| 3059 $result = nl2br($result); | |
| 3060 } else { | |
| 3061 $result = str_replace("\n", $this->line_ending, $result); | |
| 3062 } | |
| 3063 } | |
| 3064 } | |
| 3065 | |
| 3066 /** | |
| 3067 * Changes the case of a keyword for those languages where a change is asked
for | |
| 3068 * | |
| 3069 * @param string The keyword to change the case of | |
| 3070 * @return string The keyword with its case changed | |
| 3071 * @since 1.0.0 | |
| 3072 * @access private | |
| 3073 */ | |
| 3074 function change_case($instr) { | |
| 3075 switch ($this->language_data['CASE_KEYWORDS']) { | |
| 3076 case GESHI_CAPS_UPPER: | |
| 3077 return strtoupper($instr); | |
| 3078 case GESHI_CAPS_LOWER: | |
| 3079 return strtolower($instr); | |
| 3080 default: | |
| 3081 return $instr; | |
| 3082 } | |
| 3083 } | |
| 3084 | |
| 3085 /** | |
| 3086 * Handles replacements of keywords to include markup and links if requested | |
| 3087 * | |
| 3088 * @param string The keyword to add the Markup to | |
| 3089 * @return The HTML for the match found | |
| 3090 * @since 1.0.8 | |
| 3091 * @access private | |
| 3092 * | |
| 3093 * @todo Get rid of ender in keyword links | |
| 3094 */ | |
| 3095 function handle_keyword_replace($match) { | |
| 3096 $k = $this->_kw_replace_group; | |
| 3097 $keyword = $match[0]; | |
| 3098 | |
| 3099 $before = ''; | |
| 3100 $after = ''; | |
| 3101 | |
| 3102 if ($this->keyword_links) { | |
| 3103 // Keyword links have been ebabled | |
| 3104 | |
| 3105 if (isset($this->language_data['URLS'][$k]) && | |
| 3106 $this->language_data['URLS'][$k] != '') { | |
| 3107 // There is a base group for this keyword | |
| 3108 | |
| 3109 // Old system: strtolower | |
| 3110 //$keyword = ( $this->language_data['CASE_SENSITIVE'][$group] )
? $keyword : strtolower($keyword); | |
| 3111 // New system: get keyword from language file to get correct cas
e | |
| 3112 if (!$this->language_data['CASE_SENSITIVE'][$k] && | |
| 3113 strpos($this->language_data['URLS'][$k], '{FNAME}') !== fals
e) { | |
| 3114 foreach ($this->language_data['KEYWORDS'][$k] as $word) { | |
| 3115 if (strcasecmp($word, $keyword) == 0) { | |
| 3116 break; | |
| 3117 } | |
| 3118 } | |
| 3119 } else { | |
| 3120 $word = $keyword; | |
| 3121 } | |
| 3122 | |
| 3123 $before = '<|UR1|"' . | |
| 3124 str_replace( | |
| 3125 array( | |
| 3126 '{FNAME}', | |
| 3127 '{FNAMEL}', | |
| 3128 '{FNAMEU}', | |
| 3129 '.'), | |
| 3130 array( | |
| 3131 str_replace('+', '%20', urlencode($this->hsc($word))
), | |
| 3132 str_replace('+', '%20', urlencode($this->hsc(strtolo
wer($word)))), | |
| 3133 str_replace('+', '%20', urlencode($this->hsc(strtoup
per($word)))), | |
| 3134 '<DOT>'), | |
| 3135 $this->language_data['URLS'][$k] | |
| 3136 ) . '">'; | |
| 3137 $after = '</a>'; | |
| 3138 } | |
| 3139 } | |
| 3140 | |
| 3141 return $before . '<|/'. $k .'/>' . $this->change_case($keyword) . '|>' .
$after; | |
| 3142 } | |
| 3143 | |
| 3144 /** | |
| 3145 * handles regular expressions highlighting-definitions with callback functi
ons | |
| 3146 * | |
| 3147 * @note this is a callback, don't use it directly | |
| 3148 * | |
| 3149 * @param array the matches array | |
| 3150 * @return The highlighted string | |
| 3151 * @since 1.0.8 | |
| 3152 * @access private | |
| 3153 */ | |
| 3154 function handle_regexps_callback($matches) { | |
| 3155 // before: "' style=\"' . call_user_func(\"$func\", '\\1') . '\"\\1|>'", | |
| 3156 return ' style="' . call_user_func($this->language_data['STYLES']['REGE
XPS'][$this->_rx_key], $matches[1]) . '"'. $matches[1] . '|>'; | |
| 3157 } | |
| 3158 | |
| 3159 /** | |
| 3160 * handles newlines in REGEXPS matches. Set the _hmr_* vars before calling t
his | |
| 3161 * | |
| 3162 * @note this is a callback, don't use it directly | |
| 3163 * | |
| 3164 * @param array the matches array | |
| 3165 * @return string | |
| 3166 * @since 1.0.8 | |
| 3167 * @access private | |
| 3168 */ | |
| 3169 function handle_multiline_regexps($matches) { | |
| 3170 $before = $this->_hmr_before; | |
| 3171 $after = $this->_hmr_after; | |
| 3172 if ($this->_hmr_replace) { | |
| 3173 $replace = $this->_hmr_replace; | |
| 3174 $search = array(); | |
| 3175 | |
| 3176 foreach (array_keys($matches) as $k) { | |
| 3177 $search[] = '\\' . $k; | |
| 3178 } | |
| 3179 | |
| 3180 $before = str_replace($search, $matches, $before); | |
| 3181 $after = str_replace($search, $matches, $after); | |
| 3182 $replace = str_replace($search, $matches, $replace); | |
| 3183 } else { | |
| 3184 $replace = $matches[0]; | |
| 3185 } | |
| 3186 return $before | |
| 3187 . '<|!REG3XP' . $this->_hmr_key .'!>' | |
| 3188 . str_replace("\n", "|>\n<|!REG3XP" . $this->_hmr_key .
'!>', $replace) | |
| 3189 . '|>' | |
| 3190 . $after; | |
| 3191 } | |
| 3192 | |
| 3193 /** | |
| 3194 * Takes a string that has no strings or comments in it, and highlights | |
| 3195 * stuff like keywords, numbers and methods. | |
| 3196 * | |
| 3197 * @param string The string to parse for keyword, numbers etc. | |
| 3198 * @since 1.0.0 | |
| 3199 * @access private | |
| 3200 * @todo BUGGY! Why? Why not build string and return? | |
| 3201 */ | |
| 3202 function parse_non_string_part($stuff_to_parse) { | |
| 3203 $stuff_to_parse = ' ' . $this->hsc($stuff_to_parse); | |
| 3204 | |
| 3205 // Regular expressions | |
| 3206 foreach ($this->language_data['REGEXPS'] as $key => $regexp) { | |
| 3207 if ($this->lexic_permissions['REGEXPS'][$key]) { | |
| 3208 if (is_array($regexp)) { | |
| 3209 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { | |
| 3210 // produce valid HTML when we match multiple lines | |
| 3211 $this->_hmr_replace = $regexp[GESHI_REPLACE]; | |
| 3212 $this->_hmr_before = $regexp[GESHI_BEFORE]; | |
| 3213 $this->_hmr_key = $key; | |
| 3214 $this->_hmr_after = $regexp[GESHI_AFTER]; | |
| 3215 $stuff_to_parse = preg_replace_callback( | |
| 3216 "/" . $regexp[GESHI_SEARCH] . "/{$regexp[GESHI_MODIF
IERS]}", | |
| 3217 array($this, 'handle_multiline_regexps'), | |
| 3218 $stuff_to_parse); | |
| 3219 $this->_hmr_replace = false; | |
| 3220 $this->_hmr_before = ''; | |
| 3221 $this->_hmr_after = ''; | |
| 3222 } else { | |
| 3223 $stuff_to_parse = preg_replace( | |
| 3224 '/' . $regexp[GESHI_SEARCH] . '/' . $regexp[GESHI_MO
DIFIERS], | |
| 3225 $regexp[GESHI_BEFORE] . '<|!REG3XP'. $key .'!>' . $r
egexp[GESHI_REPLACE] . '|>' . $regexp[GESHI_AFTER], | |
| 3226 $stuff_to_parse); | |
| 3227 } | |
| 3228 } else { | |
| 3229 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { | |
| 3230 // produce valid HTML when we match multiple lines | |
| 3231 $this->_hmr_key = $key; | |
| 3232 $stuff_to_parse = preg_replace_callback( "/(" . $regexp
. ")/", | |
| 3233 array($this, 'handle_multiline_reg
exps'), $stuff_to_parse); | |
| 3234 $this->_hmr_key = ''; | |
| 3235 } else { | |
| 3236 $stuff_to_parse = preg_replace( "/(" . $regexp . ")/", "
<|!REG3XP$key!>\\1|>", $stuff_to_parse); | |
| 3237 } | |
| 3238 } | |
| 3239 } | |
| 3240 } | |
| 3241 | |
| 3242 // Highlight numbers. As of 1.0.8 we support diffent types of numbers | |
| 3243 $numbers_found = false; | |
| 3244 if ($this->lexic_permissions['NUMBERS'] && preg_match('#\d#', $stuff_to_
parse )) { | |
| 3245 $numbers_found = true; | |
| 3246 | |
| 3247 //For each of the formats ... | |
| 3248 foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) { | |
| 3249 //Check if it should be highlighted ... | |
| 3250 $stuff_to_parse = preg_replace($regexp, "<|/NUM!$id/>\\1|>", $st
uff_to_parse); | |
| 3251 } | |
| 3252 } | |
| 3253 | |
| 3254 // Highlight keywords | |
| 3255 $disallowed_before = "(?<![a-zA-Z0-9\$_\|\#;>|^&"; | |
| 3256 $disallowed_after = "(?![a-zA-Z0-9_\|%\\-&;"; | |
| 3257 if ($this->lexic_permissions['STRINGS']) { | |
| 3258 $quotemarks = preg_quote(implode($this->language_data['QUOTEMARKS'])
, '/'); | |
| 3259 $disallowed_before .= $quotemarks; | |
| 3260 $disallowed_after .= $quotemarks; | |
| 3261 } | |
| 3262 $disallowed_before .= "])"; | |
| 3263 $disallowed_after .= "])"; | |
| 3264 | |
| 3265 $parser_control_pergroup = false; | |
| 3266 if (isset($this->language_data['PARSER_CONTROL'])) { | |
| 3267 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) { | |
| 3268 $x = 0; // check wether per-keyword-group parser_control is enab
led | |
| 3269 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DI
SALLOWED_BEFORE'])) { | |
| 3270 $disallowed_before = $this->language_data['PARSER_CONTROL'][
'KEYWORDS']['DISALLOWED_BEFORE']; | |
| 3271 ++$x; | |
| 3272 } | |
| 3273 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DI
SALLOWED_AFTER'])) { | |
| 3274 $disallowed_after = $this->language_data['PARSER_CONTROL']['
KEYWORDS']['DISALLOWED_AFTER']; | |
| 3275 ++$x; | |
| 3276 } | |
| 3277 $parser_control_pergroup = (count($this->language_data['PARSER_C
ONTROL']['KEYWORDS']) - $x) > 0; | |
| 3278 } | |
| 3279 } | |
| 3280 | |
| 3281 // if this is changed, don't forget to change it below | |
| 3282 // if (!empty($disallowed_before)) { | |
| 3283 // $disallowed_before = "(?<![$disallowed_before])"; | |
| 3284 // } | |
| 3285 // if (!empty($disallowed_after)) { | |
| 3286 // $disallowed_after = "(?![$disallowed_after])"; | |
| 3287 // } | |
| 3288 | |
| 3289 foreach (array_keys($this->language_data['KEYWORDS']) as $k) { | |
| 3290 if (!isset($this->lexic_permissions['KEYWORDS'][$k]) || | |
| 3291 $this->lexic_permissions['KEYWORDS'][$k]) { | |
| 3292 | |
| 3293 $case_sensitive = $this->language_data['CASE_SENSITIVE'][$k]; | |
| 3294 $modifiers = $case_sensitive ? '' : 'i'; | |
| 3295 | |
| 3296 // NEW in 1.0.8 - per-keyword-group parser control | |
| 3297 $disallowed_before_local = $disallowed_before; | |
| 3298 $disallowed_after_local = $disallowed_after; | |
| 3299 if ($parser_control_pergroup && isset($this->language_data['PARS
ER_CONTROL']['KEYWORDS'][$k])) { | |
| 3300 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']
[$k]['DISALLOWED_BEFORE'])) { | |
| 3301 $disallowed_before_local = | |
| 3302 $this->language_data['PARSER_CONTROL']['KEYWORDS'][$
k]['DISALLOWED_BEFORE']; | |
| 3303 } | |
| 3304 | |
| 3305 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']
[$k]['DISALLOWED_AFTER'])) { | |
| 3306 $disallowed_after_local = | |
| 3307 $this->language_data['PARSER_CONTROL']['KEYWORDS'][$
k]['DISALLOWED_AFTER']; | |
| 3308 } | |
| 3309 } | |
| 3310 | |
| 3311 $this->_kw_replace_group = $k; | |
| 3312 | |
| 3313 //NEW in 1.0.8, the cached regexp list | |
| 3314 // since we don't want PHP / PCRE to crash due to too large patt
erns we split them into smaller chunks | |
| 3315 for ($set = 0, $set_length = count($this->language_data['CACHED_
KEYWORD_LISTS'][$k]); $set < $set_length; ++$set) { | |
| 3316 $keywordset =& $this->language_data['CACHED_KEYWORD_LISTS'][
$k][$set]; | |
| 3317 // Might make a more unique string for putting the number in
soon | |
| 3318 // Basically, we don't put the styles in yet because then th
e styles themselves will | |
| 3319 // get highlighted if the language has a CSS keyword in it (
like CSS, for example ;)) | |
| 3320 $stuff_to_parse = preg_replace_callback( | |
| 3321 "/$disallowed_before_local({$keywordset})(?!\<DOT\>(?:ht
m|php))$disallowed_after_local/$modifiers", | |
| 3322 array($this, 'handle_keyword_replace'), | |
| 3323 $stuff_to_parse | |
| 3324 ); | |
| 3325 } | |
| 3326 } | |
| 3327 } | |
| 3328 | |
| 3329 // | |
| 3330 // Now that's all done, replace /[number]/ with the correct styles | |
| 3331 // | |
| 3332 foreach (array_keys($this->language_data['KEYWORDS']) as $k) { | |
| 3333 if (!$this->use_classes) { | |
| 3334 $attributes = ' style="' . | |
| 3335 (isset($this->language_data['STYLES']['KEYWORDS'][$k]) ? | |
| 3336 $this->language_data['STYLES']['KEYWORDS'][$k] : "") . '"'; | |
| 3337 } else { | |
| 3338 $attributes = ' class="kw' . $k . '"'; | |
| 3339 } | |
| 3340 $stuff_to_parse = str_replace("<|/$k/>", "<|$attributes>", $stuff_to
_parse); | |
| 3341 } | |
| 3342 | |
| 3343 if ($numbers_found) { | |
| 3344 // Put number styles in | |
| 3345 foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) { | |
| 3346 //Commented out for now, as this needs some review ... | |
| 3347 // if ($numbers_permissions & $id) { | |
| 3348 //Get the appropriate style ... | |
| 3349 //Checking for unset styles is done by the style cache b
uilder ... | |
| 3350 if (!$this->use_classes) { | |
| 3351 $attributes = ' style="' . $this->language_data['STYLES'
]['NUMBERS'][$id] . '"'; | |
| 3352 } else { | |
| 3353 $attributes = ' class="nu'.$id.'"'; | |
| 3354 } | |
| 3355 | |
| 3356 //Set in the correct styles ... | |
| 3357 $stuff_to_parse = str_replace("/NUM!$id/", $attributes, $stu
ff_to_parse); | |
| 3358 // } | |
| 3359 } | |
| 3360 } | |
| 3361 | |
| 3362 // Highlight methods and fields in objects | |
| 3363 if ($this->lexic_permissions['METHODS'] && $this->language_data['OOLANG'
]) { | |
| 3364 $oolang_spaces = "[\s]*"; | |
| 3365 $oolang_before = ""; | |
| 3366 $oolang_after = "[a-zA-Z][a-zA-Z0-9_]*"; | |
| 3367 if (isset($this->language_data['PARSER_CONTROL'])) { | |
| 3368 if (isset($this->language_data['PARSER_CONTROL']['OOLANG'])) { | |
| 3369 if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['
MATCH_BEFORE'])) { | |
| 3370 $oolang_before = $this->language_data['PARSER_CONTROL'][
'OOLANG']['MATCH_BEFORE']; | |
| 3371 } | |
| 3372 if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['
MATCH_AFTER'])) { | |
| 3373 $oolang_after = $this->language_data['PARSER_CONTROL']['
OOLANG']['MATCH_AFTER']; | |
| 3374 } | |
| 3375 if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['
MATCH_SPACES'])) { | |
| 3376 $oolang_spaces = $this->language_data['PARSER_CONTROL'][
'OOLANG']['MATCH_SPACES']; | |
| 3377 } | |
| 3378 } | |
| 3379 } | |
| 3380 | |
| 3381 foreach ($this->language_data['OBJECT_SPLITTERS'] as $key => $splitt
er) { | |
| 3382 if (false !== strpos($stuff_to_parse, $splitter)) { | |
| 3383 if (!$this->use_classes) { | |
| 3384 $attributes = ' style="' . $this->language_data['STYLES'
]['METHODS'][$key] . '"'; | |
| 3385 } else { | |
| 3386 $attributes = ' class="me' . $key . '"'; | |
| 3387 } | |
| 3388 $stuff_to_parse = preg_replace("/($oolang_before)(" . preg_q
uote($this->language_data['OBJECT_SPLITTERS'][$key], '/') . ")($oolang_spaces)($
oolang_after)/", "\\1\\2\\3<|$attributes>\\4|>", $stuff_to_parse); | |
| 3389 } | |
| 3390 } | |
| 3391 } | |
| 3392 | |
| 3393 // | |
| 3394 // Highlight brackets. Yes, I've tried adding a semi-colon to this list. | |
| 3395 // You try it, and see what happens ;) | |
| 3396 // TODO: Fix lexic permissions not converting entities if shouldn't | |
| 3397 // be highlighting regardless | |
| 3398 // | |
| 3399 if ($this->lexic_permissions['BRACKETS']) { | |
| 3400 $stuff_to_parse = str_replace( $this->language_data['CACHE_BRACKET_M
ATCH'], | |
| 3401 $this->language_data['CACHE_BRACKET_REPLACE'], $st
uff_to_parse ); | |
| 3402 } | |
| 3403 | |
| 3404 | |
| 3405 //FIX for symbol highlighting ... | |
| 3406 if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['
SYMBOLS'])) { | |
| 3407 //Get all matches and throw away those witin a block that is already
highlighted... (i.e. matched by a regexp) | |
| 3408 $n_symbols = preg_match_all("/<\|(?:<DOT>|[^>])+>(?:(?!\|>).*?)\|>|<
\/a>|(?:" . $this->language_data['SYMBOL_SEARCH'] . ")+/", $stuff_to_parse, $pot
_symbols, PREG_OFFSET_CAPTURE | PREG_SET_ORDER); | |
| 3409 $global_offset = 0; | |
| 3410 for ($s_id = 0; $s_id < $n_symbols; ++$s_id) { | |
| 3411 $symbol_match = $pot_symbols[$s_id][0][0]; | |
| 3412 if (strpos($symbol_match, '<') !== false || strpos($symbol_match
, '>') !== false) { | |
| 3413 // already highlighted blocks _must_ include either < or > | |
| 3414 // so if this conditional applies, we have to skip this matc
h | |
| 3415 // BenBE: UNLESS the block contains <SEMI> or <PIPE> | |
| 3416 if(strpos($symbol_match, '<SEMI>') === false && | |
| 3417 strpos($symbol_match, '<PIPE>') === false) { | |
| 3418 continue; | |
| 3419 } | |
| 3420 } | |
| 3421 | |
| 3422 // if we reach this point, we have a valid match which needs to
be highlighted | |
| 3423 | |
| 3424 $symbol_length = strlen($symbol_match); | |
| 3425 $symbol_offset = $pot_symbols[$s_id][0][1]; | |
| 3426 unset($pot_symbols[$s_id]); | |
| 3427 $symbol_end = $symbol_length + $symbol_offset; | |
| 3428 $symbol_hl = ""; | |
| 3429 | |
| 3430 // if we have multiple styles, we have to handle them properly | |
| 3431 if ($this->language_data['MULTIPLE_SYMBOL_GROUPS']) { | |
| 3432 $old_sym = -1; | |
| 3433 // Split the current stuff to replace into its atomic symbol
s ... | |
| 3434 preg_match_all("/" . $this->language_data['SYMBOL_SEARCH'] .
"/", $symbol_match, $sym_match_syms, PREG_PATTERN_ORDER); | |
| 3435 foreach ($sym_match_syms[0] as $sym_ms) { | |
| 3436 //Check if consequtive symbols belong to the same group
to save output ... | |
| 3437 if (isset($this->language_data['SYMBOL_DATA'][$sym_ms]) | |
| 3438 && ($this->language_data['SYMBOL_DATA'][$sym_ms] !=
$old_sym)) { | |
| 3439 if (-1 != $old_sym) { | |
| 3440 $symbol_hl .= "|>"; | |
| 3441 } | |
| 3442 $old_sym = $this->language_data['SYMBOL_DATA'][$sym_
ms]; | |
| 3443 if (!$this->use_classes) { | |
| 3444 $symbol_hl .= '<| style="' . $this->language_dat
a['STYLES']['SYMBOLS'][$old_sym] . '">'; | |
| 3445 } else { | |
| 3446 $symbol_hl .= '<| class="sy' . $old_sym . '">'; | |
| 3447 } | |
| 3448 } | |
| 3449 $symbol_hl .= $sym_ms; | |
| 3450 } | |
| 3451 unset($sym_match_syms); | |
| 3452 | |
| 3453 //Close remaining tags and insert the replacement at the rig
ht position ... | |
| 3454 //Take caution if symbol_hl is empty to avoid doubled closin
g spans. | |
| 3455 if (-1 != $old_sym) { | |
| 3456 $symbol_hl .= "|>"; | |
| 3457 } | |
| 3458 } else { | |
| 3459 if (!$this->use_classes) { | |
| 3460 $symbol_hl = '<| style="' . $this->language_data['STYLES
']['SYMBOLS'][0] . '">'; | |
| 3461 } else { | |
| 3462 $symbol_hl = '<| class="sy0">'; | |
| 3463 } | |
| 3464 $symbol_hl .= $symbol_match . '|>'; | |
| 3465 } | |
| 3466 | |
| 3467 $stuff_to_parse = substr_replace($stuff_to_parse, $symbol_hl, $s
ymbol_offset + $global_offset, $symbol_length); | |
| 3468 | |
| 3469 // since we replace old text with something of different size, | |
| 3470 // we'll have to keep track of the differences | |
| 3471 $global_offset += strlen($symbol_hl) - $symbol_length; | |
| 3472 } | |
| 3473 } | |
| 3474 //FIX for symbol highlighting ... | |
| 3475 | |
| 3476 // Add class/style for regexps | |
| 3477 foreach (array_keys($this->language_data['REGEXPS']) as $key) { | |
| 3478 if ($this->lexic_permissions['REGEXPS'][$key]) { | |
| 3479 if (is_callable($this->language_data['STYLES']['REGEXPS'][$key])
) { | |
| 3480 $this->_rx_key = $key; | |
| 3481 $stuff_to_parse = preg_replace_callback("/!REG3XP$key!(.*)\|
>/U", | |
| 3482 array($this, 'handle_regexps_callback'), | |
| 3483 $stuff_to_parse); | |
| 3484 } else { | |
| 3485 if (!$this->use_classes) { | |
| 3486 $attributes = ' style="' . $this->language_data['STYLES'
]['REGEXPS'][$key] . '"'; | |
| 3487 } else { | |
| 3488 if (is_array($this->language_data['REGEXPS'][$key]) && | |
| 3489 array_key_exists(GESHI_CLASS, $this->language_data['
REGEXPS'][$key])) { | |
| 3490 $attributes = ' class="' . | |
| 3491 $this->language_data['REGEXPS'][$key][GESHI_CLAS
S] . '"'; | |
| 3492 } else { | |
| 3493 $attributes = ' class="re' . $key . '"'; | |
| 3494 } | |
| 3495 } | |
| 3496 $stuff_to_parse = str_replace("!REG3XP$key!", "$attributes",
$stuff_to_parse); | |
| 3497 } | |
| 3498 } | |
| 3499 } | |
| 3500 | |
| 3501 // Replace <DOT> with . for urls | |
| 3502 $stuff_to_parse = str_replace('<DOT>', '.', $stuff_to_parse); | |
| 3503 // Replace <|UR1| with <a href= for urls also | |
| 3504 if (isset($this->link_styles[GESHI_LINK])) { | |
| 3505 if ($this->use_classes) { | |
| 3506 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_targe
t . ' href=', $stuff_to_parse); | |
| 3507 } else { | |
| 3508 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_targe
t . ' style="' . $this->link_styles[GESHI_LINK] . '" href=', $stuff_to_parse); | |
| 3509 } | |
| 3510 } else { | |
| 3511 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target .
' href=', $stuff_to_parse); | |
| 3512 } | |
| 3513 | |
| 3514 // | |
| 3515 // NOW we add the span thingy ;) | |
| 3516 // | |
| 3517 | |
| 3518 $stuff_to_parse = str_replace('<|', '<span', $stuff_to_parse); | |
| 3519 $stuff_to_parse = str_replace ( '|>', '</span>', $stuff_to_parse ); | |
| 3520 return substr($stuff_to_parse, 1); | |
| 3521 } | |
| 3522 | |
| 3523 /** | |
| 3524 * Sets the time taken to parse the code | |
| 3525 * | |
| 3526 * @param microtime The time when parsing started | |
| 3527 * @param microtime The time when parsing ended | |
| 3528 * @since 1.0.2 | |
| 3529 * @access private | |
| 3530 */ | |
| 3531 function set_time($start_time, $end_time) { | |
| 3532 $start = explode(' ', $start_time); | |
| 3533 $end = explode(' ', $end_time); | |
| 3534 $this->time = $end[0] + $end[1] - $start[0] - $start[1]; | |
| 3535 } | |
| 3536 | |
| 3537 /** | |
| 3538 * Gets the time taken to parse the code | |
| 3539 * | |
| 3540 * @return double The time taken to parse the code | |
| 3541 * @since 1.0.2 | |
| 3542 */ | |
| 3543 function get_time() { | |
| 3544 return $this->time; | |
| 3545 } | |
| 3546 | |
| 3547 /** | |
| 3548 * Merges arrays recursively, overwriting values of the first array with val
ues of later arrays | |
| 3549 * | |
| 3550 * @since 1.0.8 | |
| 3551 * @access private | |
| 3552 */ | |
| 3553 function merge_arrays() { | |
| 3554 $arrays = func_get_args(); | |
| 3555 $narrays = count($arrays); | |
| 3556 | |
| 3557 // check arguments | |
| 3558 // comment out if more performance is necessary (in this case the foreac
h loop will trigger a warning if the argument is not an array) | |
| 3559 for ($i = 0; $i < $narrays; $i ++) { | |
| 3560 if (!is_array($arrays[$i])) { | |
| 3561 // also array_merge_recursive returns nothing in this case | |
| 3562 trigger_error('Argument #' . ($i+1) . ' is not an array - trying
to merge array with scalar! Returning false!', E_USER_WARNING); | |
| 3563 return false; | |
| 3564 } | |
| 3565 } | |
| 3566 | |
| 3567 // the first array is in the output set in every case | |
| 3568 $ret = $arrays[0]; | |
| 3569 | |
| 3570 // merege $ret with the remaining arrays | |
| 3571 for ($i = 1; $i < $narrays; $i ++) { | |
| 3572 foreach ($arrays[$i] as $key => $value) { | |
| 3573 if (is_array($value) && isset($ret[$key])) { | |
| 3574 // if $ret[$key] is not an array you try to merge an scalar
value with an array - the result is not defined (incompatible arrays) | |
| 3575 // in this case the call will trigger an E_USER_WARNING and
the $ret[$key] will be false. | |
| 3576 $ret[$key] = $this->merge_arrays($ret[$key], $value); | |
| 3577 } else { | |
| 3578 $ret[$key] = $value; | |
| 3579 } | |
| 3580 } | |
| 3581 } | |
| 3582 | |
| 3583 return $ret; | |
| 3584 } | |
| 3585 | |
| 3586 /** | |
| 3587 * Gets language information and stores it for later use | |
| 3588 * | |
| 3589 * @param string The filename of the language file you want to load | |
| 3590 * @since 1.0.0 | |
| 3591 * @access private | |
| 3592 * @todo Needs to load keys for lexic permissions for keywords, regexps etc | |
| 3593 */ | |
| 3594 function load_language($file_name) { | |
| 3595 if ($file_name == $this->loaded_language) { | |
| 3596 // this file is already loaded! | |
| 3597 return; | |
| 3598 } | |
| 3599 | |
| 3600 //Prepare some stuff before actually loading the language file | |
| 3601 $this->loaded_language = $file_name; | |
| 3602 $this->parse_cache_built = false; | |
| 3603 $this->enable_highlighting(); | |
| 3604 $language_data = array(); | |
| 3605 | |
| 3606 //Load the language file | |
| 3607 require $file_name; | |
| 3608 | |
| 3609 // Perhaps some checking might be added here later to check that | |
| 3610 // $language data is a valid thing but maybe not | |
| 3611 $this->language_data = $language_data; | |
| 3612 | |
| 3613 // Set strict mode if should be set | |
| 3614 $this->strict_mode = $this->language_data['STRICT_MODE_APPLIES']; | |
| 3615 | |
| 3616 // Set permissions for all lexics to true | |
| 3617 // so they'll be highlighted by default | |
| 3618 foreach (array_keys($this->language_data['KEYWORDS']) as $key) { | |
| 3619 if (!empty($this->language_data['KEYWORDS'][$key])) { | |
| 3620 $this->lexic_permissions['KEYWORDS'][$key] = true; | |
| 3621 } else { | |
| 3622 $this->lexic_permissions['KEYWORDS'][$key] = false; | |
| 3623 } | |
| 3624 } | |
| 3625 | |
| 3626 foreach (array_keys($this->language_data['COMMENT_SINGLE']) as $key) { | |
| 3627 $this->lexic_permissions['COMMENTS'][$key] = true; | |
| 3628 } | |
| 3629 foreach (array_keys($this->language_data['REGEXPS']) as $key) { | |
| 3630 $this->lexic_permissions['REGEXPS'][$key] = true; | |
| 3631 } | |
| 3632 | |
| 3633 // for BenBE and future code reviews: | |
| 3634 // we can use empty here since we only check for existance and emptiness
of an array | |
| 3635 // if it is not an array at all but rather false or null this will work
as intended as well | |
| 3636 // even if $this->language_data['PARSER_CONTROL'] is undefined this won'
t trigger a notice | |
| 3637 if (!empty($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'])) { | |
| 3638 foreach ($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'] as $
flag => $value) { | |
| 3639 // it's either true or false and maybe is true as well | |
| 3640 $perm = $value !== GESHI_NEVER; | |
| 3641 if ($flag == 'ALL') { | |
| 3642 $this->enable_highlighting($perm); | |
| 3643 continue; | |
| 3644 } | |
| 3645 if (!isset($this->lexic_permissions[$flag])) { | |
| 3646 // unknown lexic permission | |
| 3647 continue; | |
| 3648 } | |
| 3649 if (is_array($this->lexic_permissions[$flag])) { | |
| 3650 foreach ($this->lexic_permissions[$flag] as $key => $val) { | |
| 3651 $this->lexic_permissions[$flag][$key] = $perm; | |
| 3652 } | |
| 3653 } else { | |
| 3654 $this->lexic_permissions[$flag] = $perm; | |
| 3655 } | |
| 3656 } | |
| 3657 unset($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS']); | |
| 3658 } | |
| 3659 | |
| 3660 //Fix: Problem where hardescapes weren't handled if no ESCAPE_CHAR was g
iven | |
| 3661 //You need to set one for HARDESCAPES only in this case. | |
| 3662 if(!isset($this->language_data['HARDCHAR'])) { | |
| 3663 $this->language_data['HARDCHAR'] = $this->language_data['ESCAPE_CHAR
']; | |
| 3664 } | |
| 3665 | |
| 3666 //NEW in 1.0.8: Allow styles to be loaded from a separate file to overri
de defaults | |
| 3667 $style_filename = substr($file_name, 0, -4) . '.style.php'; | |
| 3668 if (is_readable($style_filename)) { | |
| 3669 //Clear any style_data that could have been set before ... | |
| 3670 if (isset($style_data)) { | |
| 3671 unset($style_data); | |
| 3672 } | |
| 3673 | |
| 3674 //Read the Style Information from the style file | |
| 3675 include $style_filename; | |
| 3676 | |
| 3677 //Apply the new styles to our current language styles | |
| 3678 if (isset($style_data) && is_array($style_data)) { | |
| 3679 $this->language_data['STYLES'] = | |
| 3680 $this->merge_arrays($this->language_data['STYLES'], $style_d
ata); | |
| 3681 } | |
| 3682 } | |
| 3683 } | |
| 3684 | |
| 3685 /** | |
| 3686 * Takes the parsed code and various options, and creates the HTML | |
| 3687 * surrounding it to make it look nice. | |
| 3688 * | |
| 3689 * @param string The code already parsed (reference!) | |
| 3690 * @since 1.0.0 | |
| 3691 * @access private | |
| 3692 */ | |
| 3693 function finalise(&$parsed_code) { | |
| 3694 // Remove end parts of important declarations | |
| 3695 // This is BUGGY!! My fault for bad code: fix coming in 1.2 | |
| 3696 // @todo Remove this crap | |
| 3697 if ($this->enable_important_blocks && | |
| 3698 (strpos($parsed_code, $this->hsc(GESHI_START_IMPORTANT)) === false))
{ | |
| 3699 $parsed_code = str_replace($this->hsc(GESHI_END_IMPORTANT), '', $par
sed_code); | |
| 3700 } | |
| 3701 | |
| 3702 // Add HTML whitespace stuff if we're using the <div> header | |
| 3703 if ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESH
I_HEADER_PRE_VALID) { | |
| 3704 $this->indent($parsed_code); | |
| 3705 } | |
| 3706 | |
| 3707 // purge some unnecessary stuff | |
| 3708 /** NOTE: memorypeak #1 */ | |
| 3709 $parsed_code = preg_replace('#<span[^>]+>(\s*)</span>#', '\\1', $parsed_
code); | |
| 3710 | |
| 3711 // If we are using IDs for line numbers, there needs to be an overall | |
| 3712 // ID set to prevent collisions. | |
| 3713 if ($this->add_ids && !$this->overall_id) { | |
| 3714 $this->overall_id = 'geshi-' . substr(md5(microtime()), 0, 4); | |
| 3715 } | |
| 3716 | |
| 3717 // Get code into lines | |
| 3718 /** NOTE: memorypeak #2 */ | |
| 3719 $code = explode("\n", $parsed_code); | |
| 3720 $parsed_code = $this->header(); | |
| 3721 | |
| 3722 // If we're using line numbers, we insert <li>s and appropriate | |
| 3723 // markup to style them (otherwise we don't need to do anything) | |
| 3724 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS && $this->header_type !
= GESHI_HEADER_PRE_TABLE) { | |
| 3725 // If we're using the <pre> header, we shouldn't add newlines becaus
e | |
| 3726 // the <pre> will line-break them (and the <li>s already do this for
us) | |
| 3727 $ls = ($this->header_type != GESHI_HEADER_PRE && $this->header_type
!= GESHI_HEADER_PRE_VALID) ? "\n" : ''; | |
| 3728 | |
| 3729 // Set vars to defaults for following loop | |
| 3730 $i = 0; | |
| 3731 | |
| 3732 // Foreach line... | |
| 3733 for ($i = 0, $n = count($code); $i < $n;) { | |
| 3734 //Reset the attributes for a new line ... | |
| 3735 $attrs = array(); | |
| 3736 | |
| 3737 // Make lines have at least one space in them if they're empty | |
| 3738 // BenBE: Checking emptiness using trim instead of relying on bl
anks | |
| 3739 if ('' == trim($code[$i])) { | |
| 3740 $code[$i] = ' '; | |
| 3741 } | |
| 3742 | |
| 3743 // If this is a "special line"... | |
| 3744 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS && | |
| 3745 $i % $this->line_nth_row == ($this->line_nth_row - 1)) { | |
| 3746 // Set the attributes to style the line | |
| 3747 if ($this->use_classes) { | |
| 3748 //$attr = ' class="li2"'; | |
| 3749 $attrs['class'][] = 'li2'; | |
| 3750 $def_attr = ' class="de2"'; | |
| 3751 } else { | |
| 3752 //$attr = ' style="' . $this->line_style2 . '"'; | |
| 3753 $attrs['style'][] = $this->line_style2; | |
| 3754 // This style "covers up" the special styles set for spe
cial lines | |
| 3755 // so that styles applied to special lines don't apply t
o the actual | |
| 3756 // code on that line | |
| 3757 $def_attr = ' style="' . $this->code_style . '"'; | |
| 3758 } | |
| 3759 } else { | |
| 3760 if ($this->use_classes) { | |
| 3761 //$attr = ' class="li1"'; | |
| 3762 $attrs['class'][] = 'li1'; | |
| 3763 $def_attr = ' class="de1"'; | |
| 3764 } else { | |
| 3765 //$attr = ' style="' . $this->line_style1 . '"'; | |
| 3766 $attrs['style'][] = $this->line_style1; | |
| 3767 $def_attr = ' style="' . $this->code_style . '"'; | |
| 3768 } | |
| 3769 } | |
| 3770 | |
| 3771 //Check which type of tag to insert for this line | |
| 3772 if ($this->header_type == GESHI_HEADER_PRE_VALID) { | |
| 3773 $start = "<pre$def_attr>"; | |
| 3774 $end = '</pre>'; | |
| 3775 } else { | |
| 3776 // Span or div? | |
| 3777 $start = "<div$def_attr>"; | |
| 3778 $end = '</div>'; | |
| 3779 } | |
| 3780 | |
| 3781 ++$i; | |
| 3782 | |
| 3783 // Are we supposed to use ids? If so, add them | |
| 3784 if ($this->add_ids) { | |
| 3785 $attrs['id'][] = "$this->overall_id-$i"; | |
| 3786 } | |
| 3787 | |
| 3788 //Is this some line with extra styles??? | |
| 3789 if (in_array($i, $this->highlight_extra_lines)) { | |
| 3790 if ($this->use_classes) { | |
| 3791 if (isset($this->highlight_extra_lines_styles[$i])) { | |
| 3792 $attrs['class'][] = "lx$i"; | |
| 3793 } else { | |
| 3794 $attrs['class'][] = "ln-xtra"; | |
| 3795 } | |
| 3796 } else { | |
| 3797 array_push($attrs['style'], $this->get_line_style($i)); | |
| 3798 } | |
| 3799 } | |
| 3800 | |
| 3801 // Add in the line surrounded by appropriate list HTML | |
| 3802 $attr_string = ''; | |
| 3803 foreach ($attrs as $key => $attr) { | |
| 3804 $attr_string .= ' ' . $key . '="' . implode(' ', $attr) . '"
'; | |
| 3805 } | |
| 3806 | |
| 3807 $parsed_code .= "<li$attr_string>$start{$code[$i-1]}$end</li>$ls
"; | |
| 3808 unset($code[$i - 1]); | |
| 3809 } | |
| 3810 } else { | |
| 3811 $n = count($code); | |
| 3812 if ($this->use_classes) { | |
| 3813 $attributes = ' class="de1"'; | |
| 3814 } else { | |
| 3815 $attributes = ' style="'. $this->code_style .'"'; | |
| 3816 } | |
| 3817 if ($this->header_type == GESHI_HEADER_PRE_VALID) { | |
| 3818 $parsed_code .= '<pre'. $attributes .'>'; | |
| 3819 } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) { | |
| 3820 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { | |
| 3821 if ($this->use_classes) { | |
| 3822 $attrs = ' class="ln"'; | |
| 3823 } else { | |
| 3824 $attrs = ' style="'. $this->table_linenumber_style .'"'; | |
| 3825 } | |
| 3826 $parsed_code .= '<td'.$attrs.'><pre'.$attributes.'>'; | |
| 3827 // get linenumbers | |
| 3828 // we don't merge it with the for below, since it should be
better for | |
| 3829 // memory consumption this way | |
| 3830 // @todo: but... actually it would still be somewhat nice to
merge the two loops | |
| 3831 // the mem peaks are at different positions | |
| 3832 for ($i = 0; $i < $n; ++$i) { | |
| 3833 $close = 0; | |
| 3834 // fancy lines | |
| 3835 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS && | |
| 3836 $i % $this->line_nth_row == ($this->line_nth_row - 1
)) { | |
| 3837 // Set the attributes to style the line | |
| 3838 if ($this->use_classes) { | |
| 3839 $parsed_code .= '<span class="xtra li2"><span cl
ass="de2">'; | |
| 3840 } else { | |
| 3841 // This style "covers up" the special styles set
for special lines | |
| 3842 // so that styles applied to special lines don't
apply to the actual | |
| 3843 // code on that line | |
| 3844 $parsed_code .= '<span style="display:block;' .
$this->line_style2 . '">' | |
| 3845 .'<span style="' . $this->code
_style .'">'; | |
| 3846 } | |
| 3847 $close += 2; | |
| 3848 } | |
| 3849 //Is this some line with extra styles??? | |
| 3850 if (in_array($i + 1, $this->highlight_extra_lines)) { | |
| 3851 if ($this->use_classes) { | |
| 3852 if (isset($this->highlight_extra_lines_styles[$i
])) { | |
| 3853 $parsed_code .= "<span class=\"xtra lx$i\">"
; | |
| 3854 } else { | |
| 3855 $parsed_code .= "<span class=\"xtra ln-xtra\
">"; | |
| 3856 } | |
| 3857 } else { | |
| 3858 $parsed_code .= "<span style=\"display:block;" .
$this->get_line_style($i) . "\">"; | |
| 3859 } | |
| 3860 ++$close; | |
| 3861 } | |
| 3862 $parsed_code .= $this->line_numbers_start + $i; | |
| 3863 if ($close) { | |
| 3864 $parsed_code .= str_repeat('</span>', $close); | |
| 3865 } else if ($i != $n) { | |
| 3866 $parsed_code .= "\n"; | |
| 3867 } | |
| 3868 } | |
| 3869 $parsed_code .= '</pre></td><td'.$attributes.'>'; | |
| 3870 } | |
| 3871 $parsed_code .= '<pre'. $attributes .'>'; | |
| 3872 } | |
| 3873 // No line numbers, but still need to handle highlighting lines extr
a. | |
| 3874 // Have to use divs so the full width of the code is highlighted | |
| 3875 $close = 0; | |
| 3876 for ($i = 0; $i < $n; ++$i) { | |
| 3877 // Make lines have at least one space in them if they're empty | |
| 3878 // BenBE: Checking emptiness using trim instead of relying on bl
anks | |
| 3879 if ('' == trim($code[$i])) { | |
| 3880 $code[$i] = ' '; | |
| 3881 } | |
| 3882 // fancy lines | |
| 3883 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS && | |
| 3884 $i % $this->line_nth_row == ($this->line_nth_row - 1)) { | |
| 3885 // Set the attributes to style the line | |
| 3886 if ($this->use_classes) { | |
| 3887 $parsed_code .= '<span class="xtra li2"><span class="de2
">'; | |
| 3888 } else { | |
| 3889 // This style "covers up" the special styles set for spe
cial lines | |
| 3890 // so that styles applied to special lines don't apply t
o the actual | |
| 3891 // code on that line | |
| 3892 $parsed_code .= '<span style="display:block;' . $this->l
ine_style2 . '">' | |
| 3893 .'<span style="' . $this->code_style .
'">'; | |
| 3894 } | |
| 3895 $close += 2; | |
| 3896 } | |
| 3897 //Is this some line with extra styles??? | |
| 3898 if (in_array($i + 1, $this->highlight_extra_lines)) { | |
| 3899 if ($this->use_classes) { | |
| 3900 if (isset($this->highlight_extra_lines_styles[$i])) { | |
| 3901 $parsed_code .= "<span class=\"xtra lx$i\">"; | |
| 3902 } else { | |
| 3903 $parsed_code .= "<span class=\"xtra ln-xtra\">"; | |
| 3904 } | |
| 3905 } else { | |
| 3906 $parsed_code .= "<span style=\"display:block;" . $this->
get_line_style($i) . "\">"; | |
| 3907 } | |
| 3908 ++$close; | |
| 3909 } | |
| 3910 | |
| 3911 $parsed_code .= $code[$i]; | |
| 3912 | |
| 3913 if ($close) { | |
| 3914 $parsed_code .= str_repeat('</span>', $close); | |
| 3915 $close = 0; | |
| 3916 } | |
| 3917 elseif ($i + 1 < $n) { | |
| 3918 $parsed_code .= "\n"; | |
| 3919 } | |
| 3920 unset($code[$i]); | |
| 3921 } | |
| 3922 | |
| 3923 if ($this->header_type == GESHI_HEADER_PRE_VALID || $this->header_ty
pe == GESHI_HEADER_PRE_TABLE) { | |
| 3924 $parsed_code .= '</pre>'; | |
| 3925 } | |
| 3926 if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numb
ers != GESHI_NO_LINE_NUMBERS) { | |
| 3927 $parsed_code .= '</td>'; | |
| 3928 } | |
| 3929 } | |
| 3930 | |
| 3931 $parsed_code .= $this->footer(); | |
| 3932 } | |
| 3933 | |
| 3934 /** | |
| 3935 * Creates the header for the code block (with correct attributes) | |
| 3936 * | |
| 3937 * @return string The header for the code block | |
| 3938 * @since 1.0.0 | |
| 3939 * @access private | |
| 3940 */ | |
| 3941 function header() { | |
| 3942 // Get attributes needed | |
| 3943 /** | |
| 3944 * @todo Document behaviour change - class is outputted regardless of
whether | |
| 3945 * we're using classes or not. Same with style | |
| 3946 */ | |
| 3947 $attributes = ' class="' . $this->language; | |
| 3948 if ($this->overall_class != '') { | |
| 3949 $attributes .= " ".$this->overall_class; | |
| 3950 } | |
| 3951 $attributes .= '"'; | |
| 3952 | |
| 3953 if ($this->overall_id != '') { | |
| 3954 $attributes .= " id=\"{$this->overall_id}\""; | |
| 3955 } | |
| 3956 if ($this->overall_style != '') { | |
| 3957 $attributes .= ' style="' . $this->overall_style . '"'; | |
| 3958 } | |
| 3959 | |
| 3960 $ol_attributes = ''; | |
| 3961 | |
| 3962 if ($this->line_numbers_start != 1) { | |
| 3963 $ol_attributes .= ' start="' . $this->line_numbers_start . '"'; | |
| 3964 } | |
| 3965 | |
| 3966 // Get the header HTML | |
| 3967 $header = $this->header_content; | |
| 3968 if ($header) { | |
| 3969 if ($this->header_type == GESHI_HEADER_PRE || $this->header_type ==
GESHI_HEADER_PRE_VALID) { | |
| 3970 $header = str_replace("\n", '', $header); | |
| 3971 } | |
| 3972 $header = $this->replace_keywords($header); | |
| 3973 | |
| 3974 if ($this->use_classes) { | |
| 3975 $attr = ' class="head"'; | |
| 3976 } else { | |
| 3977 $attr = " style=\"{$this->header_content_style}\""; | |
| 3978 } | |
| 3979 if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numb
ers != GESHI_NO_LINE_NUMBERS) { | |
| 3980 $header = "<thead><tr><td colspan=\"2\" $attr>$header</td></tr><
/thead>"; | |
| 3981 } else { | |
| 3982 $header = "<div$attr>$header</div>"; | |
| 3983 } | |
| 3984 } | |
| 3985 | |
| 3986 if (GESHI_HEADER_NONE == $this->header_type) { | |
| 3987 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { | |
| 3988 return "$header<ol$attributes$ol_attributes>"; | |
| 3989 } | |
| 3990 return $header . ($this->force_code_block ? '<div>' : ''); | |
| 3991 } | |
| 3992 | |
| 3993 // Work out what to return and do it | |
| 3994 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { | |
| 3995 if ($this->header_type == GESHI_HEADER_PRE) { | |
| 3996 return "<pre$attributes>$header<ol$ol_attributes>"; | |
| 3997 } else if ($this->header_type == GESHI_HEADER_DIV || | |
| 3998 $this->header_type == GESHI_HEADER_PRE_VALID) { | |
| 3999 return "<div$attributes>$header<ol$ol_attributes>"; | |
| 4000 } else if ($this->header_type == GESHI_HEADER_PRE_TABLE) { | |
| 4001 return "<table$attributes>$header<tbody><tr class=\"li1\">"; | |
| 4002 } | |
| 4003 } else { | |
| 4004 if ($this->header_type == GESHI_HEADER_PRE) { | |
| 4005 return "<pre$attributes>$header" . | |
| 4006 ($this->force_code_block ? '<div>' : ''); | |
| 4007 } else { | |
| 4008 return "<div$attributes>$header" . | |
| 4009 ($this->force_code_block ? '<div>' : ''); | |
| 4010 } | |
| 4011 } | |
| 4012 } | |
| 4013 | |
| 4014 /** | |
| 4015 * Returns the footer for the code block. | |
| 4016 * | |
| 4017 * @return string The footer for the code block | |
| 4018 * @since 1.0.0 | |
| 4019 * @access private | |
| 4020 */ | |
| 4021 function footer() { | |
| 4022 $footer = $this->footer_content; | |
| 4023 if ($footer) { | |
| 4024 if ($this->header_type == GESHI_HEADER_PRE) { | |
| 4025 $footer = str_replace("\n", '', $footer);; | |
| 4026 } | |
| 4027 $footer = $this->replace_keywords($footer); | |
| 4028 | |
| 4029 if ($this->use_classes) { | |
| 4030 $attr = ' class="foot"'; | |
| 4031 } else { | |
| 4032 $attr = " style=\"{$this->footer_content_style}\""; | |
| 4033 } | |
| 4034 if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numb
ers != GESHI_NO_LINE_NUMBERS) { | |
| 4035 $footer = "<tfoot><tr><td colspan=\"2\">$footer</td></tr></tfoot
>"; | |
| 4036 } else { | |
| 4037 $footer = "<div$attr>$footer</div>"; | |
| 4038 } | |
| 4039 } | |
| 4040 | |
| 4041 if (GESHI_HEADER_NONE == $this->header_type) { | |
| 4042 return ($this->line_numbers != GESHI_NO_LINE_NUMBERS) ? '</ol>' . $f
ooter : $footer; | |
| 4043 } | |
| 4044 | |
| 4045 if ($this->header_type == GESHI_HEADER_DIV || $this->header_type == GESH
I_HEADER_PRE_VALID) { | |
| 4046 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { | |
| 4047 return "</ol>$footer</div>"; | |
| 4048 } | |
| 4049 return ($this->force_code_block ? '</div>' : '') . | |
| 4050 "$footer</div>"; | |
| 4051 } | |
| 4052 elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) { | |
| 4053 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { | |
| 4054 return "</tr></tbody>$footer</table>"; | |
| 4055 } | |
| 4056 return ($this->force_code_block ? '</div>' : '') . | |
| 4057 "$footer</div>"; | |
| 4058 } | |
| 4059 else { | |
| 4060 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { | |
| 4061 return "</ol>$footer</pre>"; | |
| 4062 } | |
| 4063 return ($this->force_code_block ? '</div>' : '') . | |
| 4064 "$footer</pre>"; | |
| 4065 } | |
| 4066 } | |
| 4067 | |
| 4068 /** | |
| 4069 * Replaces certain keywords in the header and footer with | |
| 4070 * certain configuration values | |
| 4071 * | |
| 4072 * @param string The header or footer content to do replacement on | |
| 4073 * @return string The header or footer with replaced keywords | |
| 4074 * @since 1.0.2 | |
| 4075 * @access private | |
| 4076 */ | |
| 4077 function replace_keywords($instr) { | |
| 4078 $keywords = $replacements = array(); | |
| 4079 | |
| 4080 $keywords[] = '<TIME>'; | |
| 4081 $keywords[] = '{TIME}'; | |
| 4082 $replacements[] = $replacements[] = number_format($time = $this->get_tim
e(), 3); | |
| 4083 | |
| 4084 $keywords[] = '<LANGUAGE>'; | |
| 4085 $keywords[] = '{LANGUAGE}'; | |
| 4086 $replacements[] = $replacements[] = $this->language_data['LANG_NAME']; | |
| 4087 | |
| 4088 $keywords[] = '<VERSION>'; | |
| 4089 $keywords[] = '{VERSION}'; | |
| 4090 $replacements[] = $replacements[] = GESHI_VERSION; | |
| 4091 | |
| 4092 $keywords[] = '<SPEED>'; | |
| 4093 $keywords[] = '{SPEED}'; | |
| 4094 if ($time <= 0) { | |
| 4095 $speed = 'N/A'; | |
| 4096 } else { | |
| 4097 $speed = strlen($this->source) / $time; | |
| 4098 if ($speed >= 1024) { | |
| 4099 $speed = sprintf("%.2f KB/s", $speed / 1024.0); | |
| 4100 } else { | |
| 4101 $speed = sprintf("%.0f B/s", $speed); | |
| 4102 } | |
| 4103 } | |
| 4104 $replacements[] = $replacements[] = $speed; | |
| 4105 | |
| 4106 return str_replace($keywords, $replacements, $instr); | |
| 4107 } | |
| 4108 | |
| 4109 /** | |
| 4110 * Secure replacement for PHP built-in function htmlspecialchars(). | |
| 4111 * | |
| 4112 * See ticket #427 (http://wush.net/trac/wikka/ticket/427) for the rationale | |
| 4113 * for this replacement function. | |
| 4114 * | |
| 4115 * The INTERFACE for this function is almost the same as that for | |
| 4116 * htmlspecialchars(), with the same default for quote style; however, there | |
| 4117 * is no 'charset' parameter. The reason for this is as follows: | |
| 4118 * | |
| 4119 * The PHP docs say: | |
| 4120 * "The third argument charset defines character set used in conversion
." | |
| 4121 * | |
| 4122 * I suspect PHP's htmlspecialchars() is working at the byte-value level and | |
| 4123 * thus _needs_ to know (or asssume) a character set because the special | |
| 4124 * characters to be replaced could exist at different code points in | |
| 4125 * different character sets. (If indeed htmlspecialchars() works at | |
| 4126 * byte-value level that goes some way towards explaining why the | |
| 4127 * vulnerability would exist in this function, too, and not only in | |
| 4128 * htmlentities() which certainly is working at byte-value level.) | |
| 4129 * | |
| 4130 * This replacement function however works at character level and should | |
| 4131 * therefore be "immune" to character set differences - so no charset | |
| 4132 * parameter is needed or provided. If a third parameter is passed, it will | |
| 4133 * be silently ignored. | |
| 4134 * | |
| 4135 * In the OUTPUT there is a minor difference in that we use ''' instead | |
| 4136 * of PHP's ''' for a single quote: this provides compatibility with | |
| 4137 * get_html_translation_table(HTML_SPECIALCHARS, ENT_QUOTES) | |
| 4138 * (see comment by mikiwoz at yahoo dot co dot uk on | |
| 4139 * http://php.net/htmlspecialchars); it also matches the entity definition | |
| 4140 * for XML 1.0 | |
| 4141 * (http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters). | |
| 4142 * Like PHP we use a numeric character reference instead of ''' for the | |
| 4143 * single quote. For the other special characters we use the named entity | |
| 4144 * references, as PHP is doing. | |
| 4145 * | |
| 4146 * @author {@link http://wikkawiki.org/JavaWoman Marjolein Katsma} | |
| 4147 * | |
| 4148 * @license http://www.gnu.org/copyleft/lgpl.html | |
| 4149 * GNU Lesser General Public License | |
| 4150 * @copyright Copyright 2007, {@link http://wikkawiki.org/CreditsPage | |
| 4151 * Wikka Development Team} | |
| 4152 * | |
| 4153 * @access private | |
| 4154 * @param string $string string to be converted | |
| 4155 * @param integer $quote_style | |
| 4156 * - ENT_COMPAT: escapes &, <, > and double quote (de
fault) | |
| 4157 * - ENT_NOQUOTES: escapes only &, < and > | |
| 4158 * - ENT_QUOTES: escapes &, <, >, double and single q
uotes | |
| 4159 * @return string converted string | |
| 4160 * @since 1.0.7.18 | |
| 4161 */ | |
| 4162 function hsc($string, $quote_style = ENT_COMPAT) { | |
| 4163 // init | |
| 4164 static $aTransSpecchar = array( | |
| 4165 '&' => '&', | |
| 4166 '"' => '"', | |
| 4167 '<' => '<', | |
| 4168 '>' => '>', | |
| 4169 | |
| 4170 //This fix is related to SF#1923020, but has to be applied | |
| 4171 //regardless of actually highlighting symbols. | |
| 4172 | |
| 4173 //Circumvent a bug with symbol highlighting | |
| 4174 //This is required as ; would produce undesirable side-effects if it | |
| 4175 //was not to be processed as an entity. | |
| 4176 ';' => '<SEMI>', // Force ; to be processed as entity | |
| 4177 '|' => '<PIPE>' // Force | to be processed as entity | |
| 4178 ); // ENT_COMPAT set | |
| 4179 | |
| 4180 switch ($quote_style) { | |
| 4181 case ENT_NOQUOTES: // don't convert double quotes | |
| 4182 unset($aTransSpecchar['"']); | |
| 4183 break; | |
| 4184 case ENT_QUOTES: // convert single quotes as well | |
| 4185 $aTransSpecchar["'"] = '''; // (apos) htmlspecialchars() use
s ''' | |
| 4186 break; | |
| 4187 } | |
| 4188 | |
| 4189 // return translated string | |
| 4190 return strtr($string, $aTransSpecchar); | |
| 4191 } | |
| 4192 | |
| 4193 /** | |
| 4194 * Returns a stylesheet for the highlighted code. If $economy mode | |
| 4195 * is true, we only return the stylesheet declarations that matter for | |
| 4196 * this code block instead of the whole thing | |
| 4197 * | |
| 4198 * @param boolean Whether to use economy mode or not | |
| 4199 * @return string A stylesheet built on the data for the current language | |
| 4200 * @since 1.0.0 | |
| 4201 */ | |
| 4202 function get_stylesheet($economy_mode = true) { | |
| 4203 // If there's an error, chances are that the language file | |
| 4204 // won't have populated the language data file, so we can't | |
| 4205 // risk getting a stylesheet... | |
| 4206 if ($this->error) { | |
| 4207 return ''; | |
| 4208 } | |
| 4209 | |
| 4210 //Check if the style rearrangements have been processed ... | |
| 4211 //This also does some preprocessing to check which style groups are usea
ble ... | |
| 4212 if(!isset($this->language_data['NUMBERS_CACHE'])) { | |
| 4213 $this->build_style_cache(); | |
| 4214 } | |
| 4215 | |
| 4216 // First, work out what the selector should be. If there's an ID, | |
| 4217 // that should be used, the same for a class. Otherwise, a selector | |
| 4218 // of '' means that these styles will be applied anywhere | |
| 4219 if ($this->overall_id) { | |
| 4220 $selector = '#' . $this->overall_id; | |
| 4221 } else { | |
| 4222 $selector = '.' . $this->language; | |
| 4223 if ($this->overall_class) { | |
| 4224 $selector .= '.' . $this->overall_class; | |
| 4225 } | |
| 4226 } | |
| 4227 $selector .= ' '; | |
| 4228 | |
| 4229 // Header of the stylesheet | |
| 4230 if (!$economy_mode) { | |
| 4231 $stylesheet = "/**\n". | |
| 4232 " * GeSHi Dynamically Generated Stylesheet\n". | |
| 4233 " * --------------------------------------\n". | |
| 4234 " * Dynamically generated stylesheet for {$this->language}\n". | |
| 4235 " * CSS class: {$this->overall_class}, CSS id: {$this->overall_i
d}\n". | |
| 4236 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann
\n" . | |
| 4237 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n". | |
| 4238 " * --------------------------------------\n". | |
| 4239 " */\n"; | |
| 4240 } else { | |
| 4241 $stylesheet = "/**\n". | |
| 4242 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann
\n" . | |
| 4243 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n". | |
| 4244 " */\n"; | |
| 4245 } | |
| 4246 | |
| 4247 // Set the <ol> to have no effect at all if there are line numbers | |
| 4248 // (<ol>s have margins that should be destroyed so all layout is | |
| 4249 // controlled by the set_overall_style method, which works on the | |
| 4250 // <pre> or <div> container). Additionally, set default styles for lines | |
| 4251 if (!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) { | |
| 4252 //$stylesheet .= "$selector, {$selector}ol, {$selector}ol li {margin
: 0;}\n"; | |
| 4253 $stylesheet .= "$selector.de1, $selector.de2 {{$this->code_style}}\n
"; | |
| 4254 } | |
| 4255 | |
| 4256 // Add overall styles | |
| 4257 // note: neglect economy_mode, empty styles are meaningless | |
| 4258 if ($this->overall_style != '') { | |
| 4259 $stylesheet .= "$selector {{$this->overall_style}}\n"; | |
| 4260 } | |
| 4261 | |
| 4262 // Add styles for links | |
| 4263 // note: economy mode does not make _any_ sense here | |
| 4264 // either the style is empty and thus no selector is needed | |
| 4265 // or the appropriate key is given. | |
| 4266 foreach ($this->link_styles as $key => $style) { | |
| 4267 if ($style != '') { | |
| 4268 switch ($key) { | |
| 4269 case GESHI_LINK: | |
| 4270 $stylesheet .= "{$selector}a:link {{$style}}\n"; | |
| 4271 break; | |
| 4272 case GESHI_HOVER: | |
| 4273 $stylesheet .= "{$selector}a:hover {{$style}}\n"; | |
| 4274 break; | |
| 4275 case GESHI_ACTIVE: | |
| 4276 $stylesheet .= "{$selector}a:active {{$style}}\n"; | |
| 4277 break; | |
| 4278 case GESHI_VISITED: | |
| 4279 $stylesheet .= "{$selector}a:visited {{$style}}\n"; | |
| 4280 break; | |
| 4281 } | |
| 4282 } | |
| 4283 } | |
| 4284 | |
| 4285 // Header and footer | |
| 4286 // note: neglect economy_mode, empty styles are meaningless | |
| 4287 if ($this->header_content_style != '') { | |
| 4288 $stylesheet .= "$selector.head {{$this->header_content_style}}\n"; | |
| 4289 } | |
| 4290 if ($this->footer_content_style != '') { | |
| 4291 $stylesheet .= "$selector.foot {{$this->footer_content_style}}\n"; | |
| 4292 } | |
| 4293 | |
| 4294 // Styles for important stuff | |
| 4295 // note: neglect economy_mode, empty styles are meaningless | |
| 4296 if ($this->important_styles != '') { | |
| 4297 $stylesheet .= "$selector.imp {{$this->important_styles}}\n"; | |
| 4298 } | |
| 4299 | |
| 4300 // Simple line number styles | |
| 4301 if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) &&
$this->line_style1 != '') { | |
| 4302 $stylesheet .= "{$selector}li, {$selector}.li1 {{$this->line_style1}
}\n"; | |
| 4303 } | |
| 4304 if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) &&
$this->table_linenumber_style != '') { | |
| 4305 $stylesheet .= "{$selector}.ln {{$this->table_linenumber_style}}\n"; | |
| 4306 } | |
| 4307 // If there is a style set for fancy line numbers, echo it out | |
| 4308 if ((!$economy_mode || $this->line_numbers == GESHI_FANCY_LINE_NUMBERS)
&& $this->line_style2 != '') { | |
| 4309 $stylesheet .= "{$selector}.li2 {{$this->line_style2}}\n"; | |
| 4310 } | |
| 4311 | |
| 4312 // note: empty styles are meaningless | |
| 4313 foreach ($this->language_data['STYLES']['KEYWORDS'] as $group => $styles
) { | |
| 4314 if ($styles != '' && (!$economy_mode || | |
| 4315 (isset($this->lexic_permissions['KEYWORDS'][$group]) && | |
| 4316 $this->lexic_permissions['KEYWORDS'][$group]))) { | |
| 4317 $stylesheet .= "$selector.kw$group {{$styles}}\n"; | |
| 4318 } | |
| 4319 } | |
| 4320 foreach ($this->language_data['STYLES']['COMMENTS'] as $group => $styles
) { | |
| 4321 if ($styles != '' && (!$economy_mode || | |
| 4322 (isset($this->lexic_permissions['COMMENTS'][$group]) && | |
| 4323 $this->lexic_permissions['COMMENTS'][$group]) || | |
| 4324 (!empty($this->language_data['COMMENT_REGEXP']) && | |
| 4325 !empty($this->language_data['COMMENT_REGEXP'][$group])))) { | |
| 4326 $stylesheet .= "$selector.co$group {{$styles}}\n"; | |
| 4327 } | |
| 4328 } | |
| 4329 foreach ($this->language_data['STYLES']['ESCAPE_CHAR'] as $group => $sty
les) { | |
| 4330 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['ES
CAPE_CHAR'])) { | |
| 4331 // NEW: since 1.0.8 we have to handle hardescapes | |
| 4332 if ($group === 'HARD') { | |
| 4333 $group = '_h'; | |
| 4334 } | |
| 4335 $stylesheet .= "$selector.es$group {{$styles}}\n"; | |
| 4336 } | |
| 4337 } | |
| 4338 foreach ($this->language_data['STYLES']['BRACKETS'] as $group => $styles
) { | |
| 4339 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['BR
ACKETS'])) { | |
| 4340 $stylesheet .= "$selector.br$group {{$styles}}\n"; | |
| 4341 } | |
| 4342 } | |
| 4343 foreach ($this->language_data['STYLES']['SYMBOLS'] as $group => $styles)
{ | |
| 4344 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['SY
MBOLS'])) { | |
| 4345 $stylesheet .= "$selector.sy$group {{$styles}}\n"; | |
| 4346 } | |
| 4347 } | |
| 4348 foreach ($this->language_data['STYLES']['STRINGS'] as $group => $styles)
{ | |
| 4349 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['ST
RINGS'])) { | |
| 4350 // NEW: since 1.0.8 we have to handle hardquotes | |
| 4351 if ($group === 'HARD') { | |
| 4352 $group = '_h'; | |
| 4353 } | |
| 4354 $stylesheet .= "$selector.st$group {{$styles}}\n"; | |
| 4355 } | |
| 4356 } | |
| 4357 foreach ($this->language_data['STYLES']['NUMBERS'] as $group => $styles)
{ | |
| 4358 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['NU
MBERS'])) { | |
| 4359 $stylesheet .= "$selector.nu$group {{$styles}}\n"; | |
| 4360 } | |
| 4361 } | |
| 4362 foreach ($this->language_data['STYLES']['METHODS'] as $group => $styles)
{ | |
| 4363 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['ME
THODS'])) { | |
| 4364 $stylesheet .= "$selector.me$group {{$styles}}\n"; | |
| 4365 } | |
| 4366 } | |
| 4367 // note: neglect economy_mode, empty styles are meaningless | |
| 4368 foreach ($this->language_data['STYLES']['SCRIPT'] as $group => $styles)
{ | |
| 4369 if ($styles != '') { | |
| 4370 $stylesheet .= "$selector.sc$group {{$styles}}\n"; | |
| 4371 } | |
| 4372 } | |
| 4373 foreach ($this->language_data['STYLES']['REGEXPS'] as $group => $styles)
{ | |
| 4374 if ($styles != '' && (!$economy_mode || | |
| 4375 (isset($this->lexic_permissions['REGEXPS'][$group]) && | |
| 4376 $this->lexic_permissions['REGEXPS'][$group]))) { | |
| 4377 if (is_array($this->language_data['REGEXPS'][$group]) && | |
| 4378 array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'
][$group])) { | |
| 4379 $stylesheet .= "$selector."; | |
| 4380 $stylesheet .= $this->language_data['REGEXPS'][$group][GESHI
_CLASS]; | |
| 4381 $stylesheet .= " {{$styles}}\n"; | |
| 4382 } else { | |
| 4383 $stylesheet .= "$selector.re$group {{$styles}}\n"; | |
| 4384 } | |
| 4385 } | |
| 4386 } | |
| 4387 // Styles for lines being highlighted extra | |
| 4388 if (!$economy_mode || (count($this->highlight_extra_lines)!=count($this-
>highlight_extra_lines_styles))) { | |
| 4389 $stylesheet .= "{$selector}.ln-xtra, {$selector}li.ln-xtra, {$select
or}div.ln-xtra {{$this->highlight_extra_lines_style}}\n"; | |
| 4390 } | |
| 4391 $stylesheet .= "{$selector}span.xtra { display:block; }\n"; | |
| 4392 foreach ($this->highlight_extra_lines_styles as $lineid => $linestyle) { | |
| 4393 $stylesheet .= "{$selector}.lx$lineid, {$selector}li.lx$lineid, {$se
lector}div.lx$lineid {{$linestyle}}\n"; | |
| 4394 } | |
| 4395 | |
| 4396 return $stylesheet; | |
| 4397 } | |
| 4398 | |
| 4399 /** | |
| 4400 * Get's the style that is used for the specified line | |
| 4401 * | |
| 4402 * @param int The line number information is requested for | |
| 4403 * @access private | |
| 4404 * @since 1.0.7.21 | |
| 4405 */ | |
| 4406 function get_line_style($line) { | |
| 4407 //$style = null; | |
| 4408 $style = null; | |
| 4409 if (isset($this->highlight_extra_lines_styles[$line])) { | |
| 4410 $style = $this->highlight_extra_lines_styles[$line]; | |
| 4411 } else { // if no "extra" style assigned | |
| 4412 $style = $this->highlight_extra_lines_style; | |
| 4413 } | |
| 4414 | |
| 4415 return $style; | |
| 4416 } | |
| 4417 | |
| 4418 /** | |
| 4419 * this functions creates an optimized regular expression list | |
| 4420 * of an array of strings. | |
| 4421 * | |
| 4422 * Example: | |
| 4423 * <code>$list = array('faa', 'foo', 'foobar'); | |
| 4424 * => string 'f(aa|oo(bar)?)'</code> | |
| 4425 * | |
| 4426 * @param $list array of (unquoted) strings | |
| 4427 * @param $regexp_delimiter your regular expression delimiter, @see preg_quot
e() | |
| 4428 * @return string for regular expression | |
| 4429 * @author Milian Wolff <mail@milianw.de> | |
| 4430 * @since 1.0.8 | |
| 4431 * @access private | |
| 4432 */ | |
| 4433 function optimize_regexp_list($list, $regexp_delimiter = '/') { | |
| 4434 $regex_chars = array('.', '\\', '+', '*', '?', '[', '^', ']', '$', | |
| 4435 '(', ')', '{', '}', '=', '!', '<', '>', '|', ':', $regexp_delimiter)
; | |
| 4436 sort($list); | |
| 4437 $regexp_list = array(''); | |
| 4438 $num_subpatterns = 0; | |
| 4439 $list_key = 0; | |
| 4440 | |
| 4441 // the tokens which we will use to generate the regexp list | |
| 4442 $tokens = array(); | |
| 4443 $prev_keys = array(); | |
| 4444 // go through all entries of the list and generate the token list | |
| 4445 $cur_len = 0; | |
| 4446 for ($i = 0, $i_max = count($list); $i < $i_max; ++$i) { | |
| 4447 if ($cur_len > GESHI_MAX_PCRE_LENGTH) { | |
| 4448 // seems like the length of this pcre is growing exorbitantly | |
| 4449 $regexp_list[++$list_key] = $this->_optimize_regexp_list_tokens_
to_string($tokens); | |
| 4450 $num_subpatterns = substr_count($regexp_list[$list_key], '(?:'); | |
| 4451 $tokens = array(); | |
| 4452 $cur_len = 0; | |
| 4453 } | |
| 4454 $level = 0; | |
| 4455 $entry = preg_quote((string) $list[$i], $regexp_delimiter); | |
| 4456 $pointer = &$tokens; | |
| 4457 // properly assign the new entry to the correct position in the toke
n array | |
| 4458 // possibly generate smaller common denominator keys | |
| 4459 while (true) { | |
| 4460 // get the common denominator | |
| 4461 if (isset($prev_keys[$level])) { | |
| 4462 if ($prev_keys[$level] == $entry) { | |
| 4463 // this is a duplicate entry, skip it | |
| 4464 continue 2; | |
| 4465 } | |
| 4466 $char = 0; | |
| 4467 while (isset($entry[$char]) && isset($prev_keys[$level][$cha
r]) | |
| 4468 && $entry[$char] == $prev_keys[$level][$char]) { | |
| 4469 ++$char; | |
| 4470 } | |
| 4471 if ($char > 0) { | |
| 4472 // this entry has at least some chars in common with the
current key | |
| 4473 if ($char == strlen($prev_keys[$level])) { | |
| 4474 // current key is totally matched, i.e. this entry h
as just some bits appended | |
| 4475 $pointer = &$pointer[$prev_keys[$level]]; | |
| 4476 } else { | |
| 4477 // only part of the keys match | |
| 4478 $new_key_part1 = substr($prev_keys[$level], 0, $char
); | |
| 4479 $new_key_part2 = substr($prev_keys[$level], $char); | |
| 4480 | |
| 4481 if (in_array($new_key_part1[0], $regex_chars) | |
| 4482 || in_array($new_key_part2[0], $regex_chars)) { | |
| 4483 // this is bad, a regex char as first character | |
| 4484 $pointer[$entry] = array('' => true); | |
| 4485 array_splice($prev_keys, $level, count($prev_key
s), $entry); | |
| 4486 $cur_len += strlen($entry); | |
| 4487 continue; | |
| 4488 } else { | |
| 4489 // relocate previous tokens | |
| 4490 $pointer[$new_key_part1] = array($new_key_part2
=> $pointer[$prev_keys[$level]]); | |
| 4491 unset($pointer[$prev_keys[$level]]); | |
| 4492 $pointer = &$pointer[$new_key_part1]; | |
| 4493 // recreate key index | |
| 4494 array_splice($prev_keys, $level, count($prev_key
s), array($new_key_part1, $new_key_part2)); | |
| 4495 $cur_len += strlen($new_key_part2); | |
| 4496 } | |
| 4497 } | |
| 4498 ++$level; | |
| 4499 $entry = substr($entry, $char); | |
| 4500 continue; | |
| 4501 } | |
| 4502 // else: fall trough, i.e. no common denominator was found | |
| 4503 } | |
| 4504 if ($level == 0 && !empty($tokens)) { | |
| 4505 // we can dump current tokens into the string and throw them
away afterwards | |
| 4506 $new_entry = $this->_optimize_regexp_list_tokens_to_string($
tokens); | |
| 4507 $new_subpatterns = substr_count($new_entry, '(?:'); | |
| 4508 if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + $new_su
bpatterns > GESHI_MAX_PCRE_SUBPATTERNS) { | |
| 4509 $regexp_list[++$list_key] = $new_entry; | |
| 4510 $num_subpatterns = $new_subpatterns; | |
| 4511 } else { | |
| 4512 if (!empty($regexp_list[$list_key])) { | |
| 4513 $new_entry = '|' . $new_entry; | |
| 4514 } | |
| 4515 $regexp_list[$list_key] .= $new_entry; | |
| 4516 $num_subpatterns += $new_subpatterns; | |
| 4517 } | |
| 4518 $tokens = array(); | |
| 4519 $cur_len = 0; | |
| 4520 } | |
| 4521 // no further common denominator found | |
| 4522 $pointer[$entry] = array('' => true); | |
| 4523 array_splice($prev_keys, $level, count($prev_keys), $entry); | |
| 4524 | |
| 4525 $cur_len += strlen($entry); | |
| 4526 break; | |
| 4527 } | |
| 4528 unset($list[$i]); | |
| 4529 } | |
| 4530 // make sure the last tokens get converted as well | |
| 4531 $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens); | |
| 4532 if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + substr_count($new_e
ntry, '(?:') > GESHI_MAX_PCRE_SUBPATTERNS) { | |
| 4533 $regexp_list[++$list_key] = $new_entry; | |
| 4534 } else { | |
| 4535 if (!empty($regexp_list[$list_key])) { | |
| 4536 $new_entry = '|' . $new_entry; | |
| 4537 } | |
| 4538 $regexp_list[$list_key] .= $new_entry; | |
| 4539 } | |
| 4540 return $regexp_list; | |
| 4541 } | |
| 4542 /** | |
| 4543 * this function creates the appropriate regexp string of an token array | |
| 4544 * you should not call this function directly, @see $this->optimize_regexp_li
st(). | |
| 4545 * | |
| 4546 * @param &$tokens array of tokens | |
| 4547 * @param $recursed bool to know wether we recursed or not | |
| 4548 * @return string | |
| 4549 * @author Milian Wolff <mail@milianw.de> | |
| 4550 * @since 1.0.8 | |
| 4551 * @access private | |
| 4552 */ | |
| 4553 function _optimize_regexp_list_tokens_to_string(&$tokens, $recursed = false)
{ | |
| 4554 $list = ''; | |
| 4555 foreach ($tokens as $token => $sub_tokens) { | |
| 4556 $list .= $token; | |
| 4557 $close_entry = isset($sub_tokens['']); | |
| 4558 unset($sub_tokens['']); | |
| 4559 if (!empty($sub_tokens)) { | |
| 4560 $list .= '(?:' . $this->_optimize_regexp_list_tokens_to_string($
sub_tokens, true) . ')'; | |
| 4561 if ($close_entry) { | |
| 4562 // make sub_tokens optional | |
| 4563 $list .= '?'; | |
| 4564 } | |
| 4565 } | |
| 4566 $list .= '|'; | |
| 4567 } | |
| 4568 if (!$recursed) { | |
| 4569 // do some optimizations | |
| 4570 // common trailing strings | |
| 4571 // BUGGY! | |
| 4572 //$list = preg_replace_callback('#(?<=^|\:|\|)\w+?(\w+)(?:\|.+\1)+(?
=\|)#', create_function( | |
| 4573 // '$matches', 'return "(?:" . preg_replace("#" . preg_quote($mat
ches[1], "#") . "(?=\||$)#", "", $matches[0]) . ")" . $matches[1];'), $list); | |
| 4574 // (?:p)? => p? | |
| 4575 $list = preg_replace('#\(\?\:(.)\)\?#', '\1?', $list); | |
| 4576 // (?:a|b|c|d|...)? => [abcd...]? | |
| 4577 // TODO: a|bb|c => [ac]|bb | |
| 4578 static $callback_2; | |
| 4579 if (!isset($callback_2)) { | |
| 4580 $callback_2 = create_function('$matches', 'return "[" . str_repl
ace("|", "", $matches[1]) . "]";'); | |
| 4581 } | |
| 4582 $list = preg_replace_callback('#\(\?\:((?:.\|)+.)\)#', $callback_2,
$list); | |
| 4583 } | |
| 4584 // return $list without trailing pipe | |
| 4585 return substr($list, 0, -1); | |
| 4586 } | |
| 4587 } // End Class GeSHi | |
| 4588 | |
| 4589 | |
| 4590 if (!function_exists('geshi_highlight')) { | |
| 4591 /** | |
| 4592 * Easy way to highlight stuff. Behaves just like highlight_string | |
| 4593 * | |
| 4594 * @param string The code to highlight | |
| 4595 * @param string The language to highlight the code in | |
| 4596 * @param string The path to the language files. You can leave this blank if
you need | |
| 4597 * as from version 1.0.7 the path should be automatically dete
cted | |
| 4598 * @param boolean Whether to return the result or to echo | |
| 4599 * @return string The code highlighted (if $return is true) | |
| 4600 * @since 1.0.2 | |
| 4601 */ | |
| 4602 function geshi_highlight($string, $language, $path = null, $return = false)
{ | |
| 4603 $geshi = new GeSHi($string, $language, $path); | |
| 4604 $geshi->set_header_type(GESHI_HEADER_NONE); | |
| 4605 | |
| 4606 if ($return) { | |
| 4607 return '<code>' . $geshi->parse_code() . '</code>'; | |
| 4608 } | |
| 4609 | |
| 4610 echo '<code>' . $geshi->parse_code() . '</code>'; | |
| 4611 | |
| 4612 if ($geshi->error()) { | |
| 4613 return false; | |
| 4614 } | |
| 4615 return true; | |
| 4616 } | |
| 4617 } | |
| 4618 | |
| 4619 ?> | |
| OLD | NEW |