| OLD | NEW |
| (Empty) |
| 1 /* This is JavaScriptCore's variant of the PCRE library. While this library | |
| 2 started out as a copy of PCRE, many of the features of PCRE have been | |
| 3 removed. This library now supports only the regular expression features | |
| 4 required by the JavaScript language specification, and has only the functions | |
| 5 needed by JavaScriptCore and the rest of WebKit. | |
| 6 | |
| 7 Originally written by Philip Hazel | |
| 8 Copyright (c) 1997-2006 University of Cambridge | |
| 9 Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved. | |
| 10 | |
| 11 ----------------------------------------------------------------------------- | |
| 12 Redistribution and use in source and binary forms, with or without | |
| 13 modification, are permitted provided that the following conditions are met: | |
| 14 | |
| 15 * Redistributions of source code must retain the above copyright notice, | |
| 16 this list of conditions and the following disclaimer. | |
| 17 | |
| 18 * Redistributions in binary form must reproduce the above copyright | |
| 19 notice, this list of conditions and the following disclaimer in the | |
| 20 documentation and/or other materials provided with the distribution. | |
| 21 | |
| 22 * Neither the name of the University of Cambridge nor the names of its | |
| 23 contributors may be used to endorse or promote products derived from | |
| 24 this software without specific prior written permission. | |
| 25 | |
| 26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
| 27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |
| 30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
| 31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
| 32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
| 33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
| 34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
| 35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
| 36 POSSIBILITY OF SUCH DAMAGE. | |
| 37 ----------------------------------------------------------------------------- | |
| 38 */ | |
| 39 | |
| 40 /************************************************* | |
| 41 * Unicode Property Table handler * | |
| 42 *************************************************/ | |
| 43 | |
| 44 /* Internal header file defining the layout of the bits in each pair of 32-bit | |
| 45 words that form a data item in the table. */ | |
| 46 | |
| 47 typedef struct cnode { | |
| 48 unsigned f0; | |
| 49 unsigned f1; | |
| 50 } cnode; | |
| 51 | |
| 52 /* Things for the f0 field */ | |
| 53 | |
| 54 #define f0_scriptmask 0xff000000 /* Mask for script field */ | |
| 55 #define f0_scriptshift 24 /* Shift for script value */ | |
| 56 #define f0_rangeflag 0x00f00000 /* Flag for a range item */ | |
| 57 #define f0_charmask 0x001fffff /* Mask for code point value */ | |
| 58 | |
| 59 /* Things for the f1 field */ | |
| 60 | |
| 61 #define f1_typemask 0xfc000000 /* Mask for char type field */ | |
| 62 #define f1_typeshift 26 /* Shift for the type field */ | |
| 63 #define f1_rangemask 0x0000ffff /* Mask for a range offset */ | |
| 64 #define f1_casemask 0x0000ffff /* Mask for a case offset */ | |
| 65 #define f1_caseneg 0xffff8000 /* Bits for negation */ | |
| 66 | |
| 67 /* The data consists of a vector of structures of type cnode. The two unsigned | |
| 68 32-bit integers are used as follows: | |
| 69 | |
| 70 (f0) (1) The most significant byte holds the script number. The numbers are | |
| 71 defined by the enum in ucp.h. | |
| 72 | |
| 73 (2) The 0x00800000 bit is set if this entry defines a range of characters. | |
| 74 It is not set if this entry defines a single character | |
| 75 | |
| 76 (3) The 0x00600000 bits are spare. | |
| 77 | |
| 78 (4) The 0x001fffff bits contain the code point. No Unicode code point will | |
| 79 ever be greater than 0x0010ffff, so this should be OK for ever. | |
| 80 | |
| 81 (f1) (1) The 0xfc000000 bits contain the character type number. The numbers are | |
| 82 defined by an enum in ucp.h. | |
| 83 | |
| 84 (2) The 0x03ff0000 bits are spare. | |
| 85 | |
| 86 (3) The 0x0000ffff bits contain EITHER the unsigned offset to the top of | |
| 87 range if this entry defines a range, OR the *signed* offset to the | |
| 88 character's "other case" partner if this entry defines a single | |
| 89 character. There is no partner if the value is zero. | |
| 90 | |
| 91 ------------------------------------------------------------------------------- | |
| 92 | script (8) |.|.|.| codepoint (21) || type (6) |.|.| spare (8) | offset (16) | | |
| 93 ------------------------------------------------------------------------------- | |
| 94 | | | | | | |
| 95 | | |-> spare | |-> spare | |
| 96 | | | | |
| 97 | |-> spare |-> spare | |
| 98 | | |
| 99 |-> range flag | |
| 100 | |
| 101 The upper/lower casing information is set only for characters that come in | |
| 102 pairs. The non-one-to-one mappings in the Unicode data are ignored. | |
| 103 | |
| 104 When searching the data, proceed as follows: | |
| 105 | |
| 106 (1) Set up for a binary chop search. | |
| 107 | |
| 108 (2) If the top is not greater than the bottom, the character is not in the | |
| 109 table. Its type must therefore be "Cn" ("Undefined"). | |
| 110 | |
| 111 (3) Find the middle vector element. | |
| 112 | |
| 113 (4) Extract the code point and compare. If equal, we are done. | |
| 114 | |
| 115 (5) If the test character is smaller, set the top to the current point, and | |
| 116 goto (2). | |
| 117 | |
| 118 (6) If the current entry defines a range, compute the last character by adding | |
| 119 the offset, and see if the test character is within the range. If it is, | |
| 120 we are done. | |
| 121 | |
| 122 (7) Otherwise, set the bottom to one element past the current point and goto | |
| 123 (2). | |
| 124 */ | |
| 125 | |
| 126 /* End of ucpinternal.h */ | |
| OLD | NEW |