OLD | NEW |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 /** | 5 /** |
6 * boilerpipe | 6 * boilerpipe |
7 * | 7 * |
8 * Copyright (c) 2009 Christian Kohlschütter | 8 * Copyright (c) 2009 Christian Kohlschütter |
9 * | 9 * |
10 * The author licenses this file to You under the Apache License, Version 2.0 | 10 * The author licenses this file to You under the Apache License, Version 2.0 |
11 * (the "License"); you may not use this file except in compliance with | 11 * (the "License"); you may not use this file except in compliance with |
12 * the License. You may obtain a copy of the License at | 12 * the License. You may obtain a copy of the License at |
13 * | 13 * |
14 * http://www.apache.org/licenses/LICENSE-2.0 | 14 * http://www.apache.org/licenses/LICENSE-2.0 |
15 * | 15 * |
16 * Unless required by applicable law or agreed to in writing, software | 16 * Unless required by applicable law or agreed to in writing, software |
17 * distributed under the License is distributed on an "AS IS" BASIS, | 17 * distributed under the License is distributed on an "AS IS" BASIS, |
18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
19 * See the License for the specific language governing permissions and | 19 * See the License for the specific language governing permissions and |
20 * limitations under the License. | 20 * limitations under the License. |
21 */ | 21 */ |
22 package de.l3s.boilerpipe.sax; | 22 package de.l3s.boilerpipe.sax; |
23 | 23 |
| 24 import com.dom_distiller.client.DomUtil; |
24 import com.dom_distiller.client.StringUtil; | 25 import com.dom_distiller.client.StringUtil; |
| 26 import com.google.gwt.dom.client.Element; |
| 27 import com.google.gwt.dom.client.Style; |
25 | 28 |
26 import de.l3s.boilerpipe.document.TextBlock; | 29 import de.l3s.boilerpipe.document.TextBlock; |
27 import de.l3s.boilerpipe.document.TextDocument; | 30 import de.l3s.boilerpipe.document.TextDocument; |
28 import de.l3s.boilerpipe.labels.LabelAction; | 31 import de.l3s.boilerpipe.labels.LabelAction; |
29 import de.l3s.boilerpipe.util.UnicodeTokenizer; | 32 import de.l3s.boilerpipe.util.UnicodeTokenizer; |
30 | 33 |
31 import com.dom_distiller.client.sax.Attributes; | 34 import com.dom_distiller.client.sax.Attributes; |
32 import com.dom_distiller.client.sax.ContentHandler; | 35 import com.dom_distiller.client.sax.ContentHandler; |
33 | 36 |
34 import java.util.ArrayList; | 37 import java.util.ArrayList; |
| 38 import java.util.HashMap; |
35 import java.util.HashSet; | 39 import java.util.HashSet; |
36 import java.util.LinkedList; | 40 import java.util.LinkedList; |
37 import java.util.List; | 41 import java.util.List; |
38 import java.util.Map; | 42 import java.util.Map; |
39 import java.util.regex.Pattern; | 43 import java.util.regex.Pattern; |
40 | 44 |
41 /** | 45 /** |
42 * A simple SAX {@link ContentHandler}, used by {@link com.dom_distiller.client.
ContentExtractor}. | 46 * A simple SAX {@link ContentHandler}, used by {@link com.dom_distiller.client.
ContentExtractor}. |
43 * Can be used by different parser implementations, e.g. NekoHTML and TagSoup. | 47 * Can be used by different parser implementations, e.g. NekoHTML and TagSoup. |
44 * | 48 * |
(...skipping 30 matching lines...) Expand all Loading... |
75 private int offsetBlocks = 0; | 79 private int offsetBlocks = 0; |
76 private HashSet<Integer> currentContainedTextElements = new HashSet<Integer>
(); | 80 private HashSet<Integer> currentContainedTextElements = new HashSet<Integer>
(); |
77 | 81 |
78 private boolean flush = false; | 82 private boolean flush = false; |
79 boolean inAnchorText = false; | 83 boolean inAnchorText = false; |
80 | 84 |
81 LinkedList<LinkedList<LabelAction>> labelStacks = new LinkedList<LinkedList<
LabelAction>>(); | 85 LinkedList<LinkedList<LabelAction>> labelStacks = new LinkedList<LinkedList<
LabelAction>>(); |
82 LinkedList<Integer> fontSizeStack = new LinkedList<Integer>(); | 86 LinkedList<Integer> fontSizeStack = new LinkedList<Integer>(); |
83 | 87 |
84 /** | 88 /** |
| 89 * Contains the computed style of each element. |
| 90 */ |
| 91 private final Map<Element, Style> computedStyleCache = new HashMap<Element,
Style>(); |
| 92 |
| 93 private final Map<String, TagAction> displayStyleToTagAction = new HashMap<S
tring, TagAction>(); |
| 94 |
| 95 /** |
85 * Recycles this instance. | 96 * Recycles this instance. |
86 */ | 97 */ |
87 public void recycle() { | 98 public void recycle() { |
88 tokenBuffer.setLength(0); | 99 tokenBuffer.setLength(0); |
89 textBuffer.setLength(0); | 100 textBuffer.setLength(0); |
90 | 101 |
91 inBody = 0; | 102 inBody = 0; |
92 inAnchor = 0; | 103 inAnchor = 0; |
93 inIgnorableElement = 0; | 104 inIgnorableElement = 0; |
94 sbLastWasWhitespace = false; | 105 sbLastWasWhitespace = false; |
(...skipping 23 matching lines...) Expand all Loading... |
118 /** | 129 /** |
119 * Constructs a {@link BoilerpipeHTMLContentHandler} using the given | 130 * Constructs a {@link BoilerpipeHTMLContentHandler} using the given |
120 * {@link TagActionMap}. | 131 * {@link TagActionMap}. |
121 * | 132 * |
122 * @param tagActions | 133 * @param tagActions |
123 * The {@link TagActionMap} to use, e.g. | 134 * The {@link TagActionMap} to use, e.g. |
124 * {@link DefaultTagActionMap}. | 135 * {@link DefaultTagActionMap}. |
125 */ | 136 */ |
126 public BoilerpipeHTMLContentHandler(final TagActionMap tagActions) { | 137 public BoilerpipeHTMLContentHandler(final TagActionMap tagActions) { |
127 this.tagActions = tagActions; | 138 this.tagActions = tagActions; |
| 139 setupDisplayToTagActionMapping(); |
| 140 } |
| 141 |
| 142 // TODO(nyquist) Merge with FilteringDomVisitor for display: none when this
class goes away. |
| 143 private void setupDisplayToTagActionMapping() { |
| 144 // See http://www.w3.org/TR/CSS2/visuren.html#display-prop |
| 145 displayStyleToTagAction.put("block", CommonTagActions.TA_BLOCK_LEVEL); |
| 146 displayStyleToTagAction.put("inline-block", CommonTagActions.TA_INLINE_B
LOCK_LEVEL); |
| 147 displayStyleToTagAction.put("inline", CommonTagActions.TA_INLINE_NO_WHIT
ESPACE); |
| 148 displayStyleToTagAction.put("list-item", CommonTagActions.TA_BLOCK_LEVEL
); |
| 149 |
| 150 // See http://www.w3.org/TR/CSS2/tables.html#table-display |
| 151 displayStyleToTagAction.put("table", CommonTagActions.TA_BLOCK_LEVEL); |
| 152 displayStyleToTagAction.put("inline-table", CommonTagActions.TA_INLINE_B
LOCK_LEVEL); |
| 153 displayStyleToTagAction.put("table-row", CommonTagActions.TA_BLOCK_LEVEL
); |
| 154 displayStyleToTagAction.put("table-row-group", CommonTagActions.TA_BLOCK
_LEVEL); |
| 155 displayStyleToTagAction.put("table-header-group", CommonTagActions.TA_BL
OCK_LEVEL); |
| 156 displayStyleToTagAction.put("table-footer-group", CommonTagActions.TA_BL
OCK_LEVEL); |
| 157 displayStyleToTagAction.put("table-column", CommonTagActions.TA_BLOCK_LE
VEL); |
| 158 displayStyleToTagAction.put("table-column-group", CommonTagActions.TA_BL
OCK_LEVEL); |
| 159 displayStyleToTagAction.put("table-cell", CommonTagActions.TA_BLOCK_LEVE
L); |
| 160 displayStyleToTagAction.put("table-caption", CommonTagActions.TA_BLOCK_L
EVEL); |
| 161 |
| 162 // See http://www.w3.org/TR/css-flexbox-1/#flex-containers |
| 163 displayStyleToTagAction.put("flex", CommonTagActions.TA_BLOCK_LEVEL); |
| 164 displayStyleToTagAction.put("inline-flex", CommonTagActions.TA_INLINE_BL
OCK_LEVEL); |
128 } | 165 } |
129 | 166 |
130 @Override | 167 @Override |
131 public void endDocument() { | 168 public void endDocument() { |
132 flushBlock(); | 169 flushBlock(); |
133 } | 170 } |
134 | 171 |
135 @Override | 172 @Override |
136 public void ignorableWhitespace(char[] ch, int start, int length) { | 173 public void ignorableWhitespace(char[] ch, int start, int length) { |
137 if (!sbLastWasWhitespace) { | 174 if (!sbLastWasWhitespace) { |
138 textBuffer.append(' '); | 175 textBuffer.append(' '); |
139 tokenBuffer.append(' '); | 176 tokenBuffer.append(' '); |
140 } | 177 } |
141 sbLastWasWhitespace = true; | 178 sbLastWasWhitespace = true; |
142 } | 179 } |
143 | 180 |
144 @Override | 181 @Override |
145 public void startDocument() { | 182 public void startDocument() { |
146 } | 183 } |
147 | 184 |
148 @Override | 185 @Override |
149 public void startElement(String uri, String localName, String qName, Attribu
tes atts) { | 186 public void startElement(Element element, Attributes atts) { |
150 labelStacks.add(null); | 187 labelStacks.add(null); |
151 | 188 |
152 TagAction ta = tagActions.get(localName); | 189 TagAction ta = getComputedTagAction(element); |
| 190 if (tagActions.containsKey(element.getTagName())) { |
| 191 ta = tagActions.get(element.getTagName()); |
| 192 } |
| 193 |
153 if (ta != null) { | 194 if (ta != null) { |
154 if(ta.changesTagLevel()) { | 195 if(ta.changesTagLevel()) { |
155 tagLevel++; | 196 tagLevel++; |
156 } | 197 } |
157 flush = ta.start(this, localName, qName, atts) | flush; | 198 flush = ta.start(this, atts) | flush; |
158 } else { | 199 } else { |
159 tagLevel++; | 200 tagLevel++; |
160 flush = true; | 201 flush = true; |
161 } | 202 } |
162 | 203 |
163 lastEvent = Event.START_TAG; | 204 lastEvent = Event.START_TAG; |
164 lastStartTag = localName; | 205 lastStartTag = element.getTagName(); |
| 206 } |
| 207 |
| 208 private TagAction getComputedTagAction(Element element) { |
| 209 if (computedStyleCache.containsKey(element)) { |
| 210 return getComputedTagAction(computedStyleCache.get(element)); |
| 211 } |
| 212 Style computedStyle = DomUtil.getComputedStyle(element); |
| 213 computedStyleCache.put(element, computedStyle); |
| 214 return getComputedTagAction(computedStyle); |
| 215 } |
| 216 |
| 217 private TagAction getComputedTagAction(Style style) { |
| 218 if (displayStyleToTagAction.containsKey(style.getDisplay())) { |
| 219 return displayStyleToTagAction.get(style.getDisplay()); |
| 220 } |
| 221 return null; |
165 } | 222 } |
166 | 223 |
167 @Override | 224 @Override |
168 public void endElement(String uri, String localName, String qName) { | 225 public void endElement(Element element) { |
169 TagAction ta = tagActions.get(localName); | 226 TagAction ta = getComputedTagAction(element); |
| 227 if (tagActions.containsKey(element.getTagName())) { |
| 228 ta = tagActions.get(element.getTagName()); |
| 229 } |
| 230 |
170 if (ta != null) { | 231 if (ta != null) { |
171 flush = ta.end(this, localName, qName) | flush; | 232 flush = ta.end(this) | flush; |
172 } else { | 233 } else { |
173 flush = true; | 234 flush = true; |
174 } | 235 } |
175 | 236 |
176 if(ta == null || ta.changesTagLevel()) { | 237 if(ta == null || ta.changesTagLevel()) { |
177 tagLevel--; | 238 tagLevel--; |
178 } | 239 } |
179 | 240 |
180 if (flush) { | 241 if (flush) { |
181 flushBlock(); | 242 flushBlock(); |
182 } | 243 } |
183 | 244 |
184 lastEvent = Event.END_TAG; | 245 lastEvent = Event.END_TAG; |
185 lastEndTag = localName; | 246 lastEndTag = element.getTagName(); |
186 | 247 |
187 labelStacks.removeLast(); | 248 labelStacks.removeLast(); |
188 } | 249 } |
189 | 250 |
190 @Override | 251 @Override |
191 public void characters(char[] ch, int start, int length) { | 252 public void characters(char[] ch, int start, int length) { |
192 textElementIdx++; | 253 textElementIdx++; |
193 | 254 |
194 | 255 |
195 if (flush) { | 256 if (flush) { |
(...skipping 218 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
414 labelStack.add(la); | 475 labelStack.add(la); |
415 } | 476 } |
416 | 477 |
417 private static final Pattern PAT_VALID_WORD_CHARACTER = Pattern | 478 private static final Pattern PAT_VALID_WORD_CHARACTER = Pattern |
418 .compile( | 479 .compile( |
419 "[" + | 480 "[" + |
420 "\u0030-\u0039\u0041-\u005a\u0061-\u007a\u00aa\u
00b2-\u00b3\u00b5\u00b9-\u00ba\u00bc-\u00be\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02
36\u0250-\u02c1\u02c6-\u02d1\u02e0-\u02e4\u02ee\u037a\u0386\u0388-\u038a\u038c\u
038e-\u03a1\u03a3-\u03ce\u03d0-\u03f5\u03f7-\u03fb\u0400-\u0481\u048a-\u04ce\u04
d0-\u04f5\u04f8-\u04f9\u0500-\u050f\u0531-\u0556\u0559\u0561-\u0587\u05d0-\u05ea
\u05f0-\u05f2\u0621-\u063a\u0640-\u064a\u0660-\u0669\u066e-\u066f\u0671-\u06d3\u
06d5\u06e5-\u06e6\u06ee-\u06fc\u06ff\u0710\u0712-\u072f\u074d-\u074f\u0780-\u07a
5\u07b1\u0904-\u0939\u093d\u0950\u0958-\u0961\u0966-\u096f\u0985-\u098c\u098f-\u
0990\u0993-\u09a8\u09aa-\u09b0\u09b2\u09b6-\u09b9\u09bd\u09dc-\u09dd\u09df-\u09e
1\u09e6-\u09f1\u09f4-\u09f9\u0a05-\u0a0a\u0a0f-\u0a10\u0a13-\u0a28\u0a2a-\u0a30\
u0a32-\u0a33\u0a35-\u0a36\u0a38-\u0a39\u0a59-\u0a5c\u0a5e\u0a66-\u0a6f\u0a72-\u0
a74\u0a85-\u0a8d\u0a8f-\u0a91\u0a93-\u0aa8\u0aaa-\u0ab0\u0ab2-\u0ab3\u0ab5-\u0ab
9\u0abd\u0ad0\u0ae0-\u0ae1\u0ae6-\u0aef\u0b05-\u0b0c\u0b0f-\u0b10\u0b13-\u0b28\u
0b2a-\u0b30\u0b32-\u0b33\u0b35-\u0b39\u0b3d\u0b5c-\u0b5d\u0b5f-\u0b61\u0b66-\u0b
6f\u0b71\u0b83\u0b85-\u0b8a\u0b8e-\u0b90\u0b92-\u0b95\u0b99-\u0b9a\u0b9c\u0b9e-\
u0b9f\u0ba3-\u0ba4\u0ba8-\u0baa\u0bae-\u0bb5\u0bb7-\u0bb9\u0be7-\u0bf2\u0c05-\u0
c0c\u0c0e-\u0c10\u0c12-\u0c28\u0c2a-\u0c33\u0c35-\u0c39\u0c60-\u0c61\u0c66-\u0c6
f\u0c85-\u0c8c\u0c8e-\u0c90\u0c92-\u0ca8\u0caa-\u0cb3\u0cb5-\u0cb9\u0cbd\u0cde\u
0ce0-\u0ce1\u0ce6-\u0cef\u0d05-\u0d0c\u0d0e-\u0d10\u0d12-\u0d28\u0d2a-\u0d39\u0d
60-\u0d61\u0d66-\u0d6f\u0d85-\u0d96\u0d9a-\u0db1\u0db3-\u0dbb\u0dbd\u0dc0-\u0dc6
\u0e01-\u0e30\u0e32-\u0e33\u0e40-\u0e46\u0e50-\u0e59\u0e81-\u0e82\u0e84\u0e87-\u
0e88\u0e8a\u0e8d\u0e94-\u0e97\u0e99-\u0e9f\u0ea1-\u0ea3\u0ea5\u0ea7\u0eaa-\u0eab
\u0ead-\u0eb0\u0eb2-\u0eb3\u0ebd\u0ec0-\u0ec4\u0ec6\u0ed0-\u0ed9\u0edc-\u0edd\u0
f00\u0f20-\u0f33\u0f40-\u0f47\u0f49-\u0f6a\u0f88-\u0f8b\u1000-\u1021\u1023-\u102
7\u1029-\u102a\u1040-\u1049\u1050-\u1055\u10a0-\u10c5\u10d0-\u10f8\u1100-\u1159\
u115f-\u11a2\u11a8-\u11f9\u1200-\u1206\u1208-\u1246\u1248\u124a-\u124d\u1250-\u1
256\u1258\u125a-\u125d\u1260-\u1286\u1288\u128a-\u128d\u1290-\u12ae\u12b0\u12b2-
\u12b5\u12b8-\u12be\u12c0\u12c2-\u12c5\u12c8-\u12ce\u12d0-\u12d6\u12d8-\u12ee\u1
2f0-\u130e\u1310\u1312-\u1315\u1318-\u131e\u1320-\u1346\u1348-\u135a\u1369-\u137
c\u13a0-\u13f4\u1401-\u166c\u166f-\u1676\u1681-\u169a\u16a0-\u16ea\u16ee-\u16f0\
u1700-\u170c\u170e-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u176c\u176e-\u1770\u1
780-\u17b3\u17d7\u17dc\u17e0-\u17e9\u17f0-\u17f9\u1810-\u1819\u1820-\u1877\u1880
-\u18a8\u1900-\u191c\u1946-\u196d\u1970-\u1974\u1d00-\u1d6b\u1e00-\u1e9b\u1ea0-\
u1ef9\u1f00-\u1f15\u1f18-\u1f1d\u1f20-\u1f45\u1f48-\u1f4d\u1f50-\u1f57\u1f59\u1f
5b\u1f5d\u1f5f-\u1f7d\u1f80-\u1fb4\u1fb6-\u1fbc\u1fbe\u1fc2-\u1fc4\u1fc6-\u1fcc\
u1fd0-\u1fd3\u1fd6-\u1fdb\u1fe0-\u1fec\u1ff2-\u1ff4\u1ff6-\u1ffc\u2070-\u2071\u2
074-\u2079\u207f-\u2089\u2102\u2107\u210a-\u2113\u2115\u2119-\u211d\u2124\u2126\
u2128\u212a-\u212d\u212f-\u2131\u2133-\u2139\u213d-\u213f\u2145-\u2149\u2153-\u2
183\u2460-\u249b\u24ea-\u24ff\u2776-\u2793\u3005-\u3007\u3021-\u3029\u3031-\u303
5\u3038-\u303c\u3041-\u3096\u309d-\u309f\u30a1-\u30fa\u30fc-\u30ff\u3105-\u312c\
u3131-\u318e\u3192-\u3195\u31a0-\u31b7\u31f0-\u31ff\u3220-\u3229\u3251-\u325f\u3
280-\u3289\u32b1-\u32bf\u3400-\u4db5\u4e00-\u9fa5\ua000-\ua48c\uac00-\ud7a3\uf90
0-\ufa2d\ufa30-\ufa6a\ufb00-\ufb06\ufb13-\ufb17\ufb1d\ufb1f-\ufb28\ufb2a-\ufb36\
ufb38-\ufb3c\ufb3e\ufb40-\ufb41\ufb43-\ufb44\ufb46-\ufbb1\ufbd3-\ufd3d\ufd50-\uf
d8f\ufd92-\ufdc7\ufdf0-\ufdfb\ufe70-\ufe74\ufe76-\ufefc\uff10-\uff19\uff21-\uff3
a\uff41-\uff5a\uff66-\uffbe\uffc2-\uffc7\uffca-\uffcf\uffd2-\uffd7\uffda-\uffdc" | 481 "\u0030-\u0039\u0041-\u005a\u0061-\u007a\u00aa\u
00b2-\u00b3\u00b5\u00b9-\u00ba\u00bc-\u00be\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02
36\u0250-\u02c1\u02c6-\u02d1\u02e0-\u02e4\u02ee\u037a\u0386\u0388-\u038a\u038c\u
038e-\u03a1\u03a3-\u03ce\u03d0-\u03f5\u03f7-\u03fb\u0400-\u0481\u048a-\u04ce\u04
d0-\u04f5\u04f8-\u04f9\u0500-\u050f\u0531-\u0556\u0559\u0561-\u0587\u05d0-\u05ea
\u05f0-\u05f2\u0621-\u063a\u0640-\u064a\u0660-\u0669\u066e-\u066f\u0671-\u06d3\u
06d5\u06e5-\u06e6\u06ee-\u06fc\u06ff\u0710\u0712-\u072f\u074d-\u074f\u0780-\u07a
5\u07b1\u0904-\u0939\u093d\u0950\u0958-\u0961\u0966-\u096f\u0985-\u098c\u098f-\u
0990\u0993-\u09a8\u09aa-\u09b0\u09b2\u09b6-\u09b9\u09bd\u09dc-\u09dd\u09df-\u09e
1\u09e6-\u09f1\u09f4-\u09f9\u0a05-\u0a0a\u0a0f-\u0a10\u0a13-\u0a28\u0a2a-\u0a30\
u0a32-\u0a33\u0a35-\u0a36\u0a38-\u0a39\u0a59-\u0a5c\u0a5e\u0a66-\u0a6f\u0a72-\u0
a74\u0a85-\u0a8d\u0a8f-\u0a91\u0a93-\u0aa8\u0aaa-\u0ab0\u0ab2-\u0ab3\u0ab5-\u0ab
9\u0abd\u0ad0\u0ae0-\u0ae1\u0ae6-\u0aef\u0b05-\u0b0c\u0b0f-\u0b10\u0b13-\u0b28\u
0b2a-\u0b30\u0b32-\u0b33\u0b35-\u0b39\u0b3d\u0b5c-\u0b5d\u0b5f-\u0b61\u0b66-\u0b
6f\u0b71\u0b83\u0b85-\u0b8a\u0b8e-\u0b90\u0b92-\u0b95\u0b99-\u0b9a\u0b9c\u0b9e-\
u0b9f\u0ba3-\u0ba4\u0ba8-\u0baa\u0bae-\u0bb5\u0bb7-\u0bb9\u0be7-\u0bf2\u0c05-\u0
c0c\u0c0e-\u0c10\u0c12-\u0c28\u0c2a-\u0c33\u0c35-\u0c39\u0c60-\u0c61\u0c66-\u0c6
f\u0c85-\u0c8c\u0c8e-\u0c90\u0c92-\u0ca8\u0caa-\u0cb3\u0cb5-\u0cb9\u0cbd\u0cde\u
0ce0-\u0ce1\u0ce6-\u0cef\u0d05-\u0d0c\u0d0e-\u0d10\u0d12-\u0d28\u0d2a-\u0d39\u0d
60-\u0d61\u0d66-\u0d6f\u0d85-\u0d96\u0d9a-\u0db1\u0db3-\u0dbb\u0dbd\u0dc0-\u0dc6
\u0e01-\u0e30\u0e32-\u0e33\u0e40-\u0e46\u0e50-\u0e59\u0e81-\u0e82\u0e84\u0e87-\u
0e88\u0e8a\u0e8d\u0e94-\u0e97\u0e99-\u0e9f\u0ea1-\u0ea3\u0ea5\u0ea7\u0eaa-\u0eab
\u0ead-\u0eb0\u0eb2-\u0eb3\u0ebd\u0ec0-\u0ec4\u0ec6\u0ed0-\u0ed9\u0edc-\u0edd\u0
f00\u0f20-\u0f33\u0f40-\u0f47\u0f49-\u0f6a\u0f88-\u0f8b\u1000-\u1021\u1023-\u102
7\u1029-\u102a\u1040-\u1049\u1050-\u1055\u10a0-\u10c5\u10d0-\u10f8\u1100-\u1159\
u115f-\u11a2\u11a8-\u11f9\u1200-\u1206\u1208-\u1246\u1248\u124a-\u124d\u1250-\u1
256\u1258\u125a-\u125d\u1260-\u1286\u1288\u128a-\u128d\u1290-\u12ae\u12b0\u12b2-
\u12b5\u12b8-\u12be\u12c0\u12c2-\u12c5\u12c8-\u12ce\u12d0-\u12d6\u12d8-\u12ee\u1
2f0-\u130e\u1310\u1312-\u1315\u1318-\u131e\u1320-\u1346\u1348-\u135a\u1369-\u137
c\u13a0-\u13f4\u1401-\u166c\u166f-\u1676\u1681-\u169a\u16a0-\u16ea\u16ee-\u16f0\
u1700-\u170c\u170e-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u176c\u176e-\u1770\u1
780-\u17b3\u17d7\u17dc\u17e0-\u17e9\u17f0-\u17f9\u1810-\u1819\u1820-\u1877\u1880
-\u18a8\u1900-\u191c\u1946-\u196d\u1970-\u1974\u1d00-\u1d6b\u1e00-\u1e9b\u1ea0-\
u1ef9\u1f00-\u1f15\u1f18-\u1f1d\u1f20-\u1f45\u1f48-\u1f4d\u1f50-\u1f57\u1f59\u1f
5b\u1f5d\u1f5f-\u1f7d\u1f80-\u1fb4\u1fb6-\u1fbc\u1fbe\u1fc2-\u1fc4\u1fc6-\u1fcc\
u1fd0-\u1fd3\u1fd6-\u1fdb\u1fe0-\u1fec\u1ff2-\u1ff4\u1ff6-\u1ffc\u2070-\u2071\u2
074-\u2079\u207f-\u2089\u2102\u2107\u210a-\u2113\u2115\u2119-\u211d\u2124\u2126\
u2128\u212a-\u212d\u212f-\u2131\u2133-\u2139\u213d-\u213f\u2145-\u2149\u2153-\u2
183\u2460-\u249b\u24ea-\u24ff\u2776-\u2793\u3005-\u3007\u3021-\u3029\u3031-\u303
5\u3038-\u303c\u3041-\u3096\u309d-\u309f\u30a1-\u30fa\u30fc-\u30ff\u3105-\u312c\
u3131-\u318e\u3192-\u3195\u31a0-\u31b7\u31f0-\u31ff\u3220-\u3229\u3251-\u325f\u3
280-\u3289\u32b1-\u32bf\u3400-\u4db5\u4e00-\u9fa5\ua000-\ua48c\uac00-\ud7a3\uf90
0-\ufa2d\ufa30-\ufa6a\ufb00-\ufb06\ufb13-\ufb17\ufb1d\ufb1f-\ufb28\ufb2a-\ufb36\
ufb38-\ufb3c\ufb3e\ufb40-\ufb41\ufb43-\ufb44\ufb46-\ufbb1\ufbd3-\ufd3d\ufd50-\uf
d8f\ufd92-\ufdc7\ufdf0-\ufdfb\ufe70-\ufe74\ufe76-\ufefc\uff10-\uff19\uff21-\uff3
a\uff41-\uff5a\uff66-\uffbe\uffc2-\uffc7\uffca-\uffcf\uffd2-\uffd7\uffda-\uffdc" |
421 + "]"); | 482 + "]"); |
422 | 483 |
423 } | 484 } |
OLD | NEW |