OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright (C) 2013 Google Inc. All rights reserved. | |
3 * | |
4 * Redistribution and use in source and binary forms, with or without | |
5 * modification, are permitted provided that the following conditions are | |
6 * met: | |
7 * | |
8 * * Redistributions of source code must retain the above copyright | |
9 * notice, this list of conditions and the following disclaimer. | |
10 * * Redistributions in binary form must reproduce the above | |
11 * copyright notice, this list of conditions and the following disclaimer | |
12 * in the documentation and/or other materials provided with the | |
13 * distribution. | |
14 * * Neither the name of Google Inc. nor the names of its | |
15 * contributors may be used to endorse or promote products derived from | |
16 * this software without specific prior written permission. | |
17 * | |
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
29 */ | |
30 Common.TextUtils = { | |
31 /** | |
32 * @param {string} char | |
33 * @return {boolean} | |
34 */ | |
35 isStopChar: function(char) { | |
36 return (char > ' ' && char < '0') || (char > '9' && char < 'A') || (char > '
Z' && char < '_') || | |
37 (char > '_' && char < 'a') || (char > 'z' && char <= '~'); | |
38 }, | |
39 | |
40 /** | |
41 * @param {string} char | |
42 * @return {boolean} | |
43 */ | |
44 isWordChar: function(char) { | |
45 return !Common.TextUtils.isStopChar(char) && !Common.TextUtils.isSpaceChar(c
har); | |
46 }, | |
47 | |
48 /** | |
49 * @param {string} char | |
50 * @return {boolean} | |
51 */ | |
52 isSpaceChar: function(char) { | |
53 return Common.TextUtils._SpaceCharRegex.test(char); | |
54 }, | |
55 | |
56 /** | |
57 * @param {string} word | |
58 * @return {boolean} | |
59 */ | |
60 isWord: function(word) { | |
61 for (var i = 0; i < word.length; ++i) { | |
62 if (!Common.TextUtils.isWordChar(word.charAt(i))) | |
63 return false; | |
64 } | |
65 return true; | |
66 }, | |
67 | |
68 /** | |
69 * @param {string} char | |
70 * @return {boolean} | |
71 */ | |
72 isOpeningBraceChar: function(char) { | |
73 return char === '(' || char === '{'; | |
74 }, | |
75 | |
76 /** | |
77 * @param {string} char | |
78 * @return {boolean} | |
79 */ | |
80 isClosingBraceChar: function(char) { | |
81 return char === ')' || char === '}'; | |
82 }, | |
83 | |
84 /** | |
85 * @param {string} char | |
86 * @return {boolean} | |
87 */ | |
88 isBraceChar: function(char) { | |
89 return Common.TextUtils.isOpeningBraceChar(char) || Common.TextUtils.isClosi
ngBraceChar(char); | |
90 }, | |
91 | |
92 /** | |
93 * @param {string} text | |
94 * @param {function(string):boolean} isWordChar | |
95 * @param {function(string)} wordCallback | |
96 */ | |
97 textToWords: function(text, isWordChar, wordCallback) { | |
98 var startWord = -1; | |
99 for (var i = 0; i < text.length; ++i) { | |
100 if (!isWordChar(text.charAt(i))) { | |
101 if (startWord !== -1) | |
102 wordCallback(text.substring(startWord, i)); | |
103 startWord = -1; | |
104 } else if (startWord === -1) { | |
105 startWord = i; | |
106 } | |
107 } | |
108 if (startWord !== -1) | |
109 wordCallback(text.substring(startWord)); | |
110 }, | |
111 | |
112 /** | |
113 * @param {string} line | |
114 * @return {string} | |
115 */ | |
116 lineIndent: function(line) { | |
117 var indentation = 0; | |
118 while (indentation < line.length && Common.TextUtils.isSpaceChar(line.charAt
(indentation))) | |
119 ++indentation; | |
120 return line.substr(0, indentation); | |
121 }, | |
122 | |
123 /** | |
124 * @param {string} text | |
125 * @return {boolean} | |
126 */ | |
127 isUpperCase: function(text) { | |
128 return text === text.toUpperCase(); | |
129 }, | |
130 | |
131 /** | |
132 * @param {string} text | |
133 * @return {boolean} | |
134 */ | |
135 isLowerCase: function(text) { | |
136 return text === text.toLowerCase(); | |
137 }, | |
138 | |
139 /** | |
140 * @param {string} text | |
141 * @param {!Array<!RegExp>} regexes | |
142 * @return {!Array<{value: string, position: number, regexIndex: number}>} | |
143 */ | |
144 splitStringByRegexes(text, regexes) { | |
145 var matches = []; | |
146 var globalRegexes = []; | |
147 for (var i = 0; i < regexes.length; i++) { | |
148 var regex = regexes[i]; | |
149 if (!regex.global) | |
150 globalRegexes.push(new RegExp(regex.source, regex.flags ? regex.flags +
'g' : 'g')); | |
151 else | |
152 globalRegexes.push(regex); | |
153 } | |
154 doSplit(text, 0, 0); | |
155 return matches; | |
156 | |
157 /** | |
158 * @param {string} text | |
159 * @param {number} regexIndex | |
160 * @param {number} startIndex | |
161 */ | |
162 function doSplit(text, regexIndex, startIndex) { | |
163 if (regexIndex >= globalRegexes.length) { | |
164 // Set regexIndex as -1 if text did not match with any regular expressio
n | |
165 matches.push({value: text, position: startIndex, regexIndex: -1}); | |
166 return; | |
167 } | |
168 var regex = globalRegexes[regexIndex]; | |
169 var currentIndex = 0; | |
170 var result; | |
171 regex.lastIndex = 0; | |
172 while ((result = regex.exec(text)) !== null) { | |
173 var stringBeforeMatch = text.substring(currentIndex, result.index); | |
174 if (stringBeforeMatch) | |
175 doSplit(stringBeforeMatch, regexIndex + 1, startIndex + currentIndex); | |
176 var match = result[0]; | |
177 matches.push({value: match, position: startIndex + result.index, regexIn
dex: regexIndex}); | |
178 currentIndex = result.index + match.length; | |
179 } | |
180 var stringAfterMatches = text.substring(currentIndex); | |
181 if (stringAfterMatches) | |
182 doSplit(stringAfterMatches, regexIndex + 1, startIndex + currentIndex); | |
183 } | |
184 } | |
185 }; | |
186 | |
187 Common.TextUtils._SpaceCharRegex = /\s/; | |
188 | |
189 /** | |
190 * @enum {string} | |
191 */ | |
192 Common.TextUtils.Indent = { | |
193 TwoSpaces: ' ', | |
194 FourSpaces: ' ', | |
195 EightSpaces: ' ', | |
196 TabCharacter: '\t' | |
197 }; | |
198 | |
199 /** | |
200 * @unrestricted | |
201 */ | |
202 Common.TextUtils.BalancedJSONTokenizer = class { | |
203 /** | |
204 * @param {function(string)} callback | |
205 * @param {boolean=} findMultiple | |
206 */ | |
207 constructor(callback, findMultiple) { | |
208 this._callback = callback; | |
209 this._index = 0; | |
210 this._balance = 0; | |
211 this._buffer = ''; | |
212 this._findMultiple = findMultiple || false; | |
213 this._closingDoubleQuoteRegex = /[^\\](?:\\\\)*"/g; | |
214 } | |
215 | |
216 /** | |
217 * @param {string} chunk | |
218 * @return {boolean} | |
219 */ | |
220 write(chunk) { | |
221 this._buffer += chunk; | |
222 var lastIndex = this._buffer.length; | |
223 var buffer = this._buffer; | |
224 for (var index = this._index; index < lastIndex; ++index) { | |
225 var character = buffer[index]; | |
226 if (character === '"') { | |
227 this._closingDoubleQuoteRegex.lastIndex = index; | |
228 if (!this._closingDoubleQuoteRegex.test(buffer)) | |
229 break; | |
230 index = this._closingDoubleQuoteRegex.lastIndex - 1; | |
231 } else if (character === '{') { | |
232 ++this._balance; | |
233 } else if (character === '}') { | |
234 --this._balance; | |
235 if (this._balance < 0) { | |
236 this._reportBalanced(); | |
237 return false; | |
238 } | |
239 if (!this._balance) { | |
240 this._lastBalancedIndex = index + 1; | |
241 if (!this._findMultiple) | |
242 break; | |
243 } | |
244 } else if (character === ']' && !this._balance) { | |
245 this._reportBalanced(); | |
246 return false; | |
247 } | |
248 } | |
249 this._index = index; | |
250 this._reportBalanced(); | |
251 return true; | |
252 } | |
253 | |
254 _reportBalanced() { | |
255 if (!this._lastBalancedIndex) | |
256 return; | |
257 this._callback(this._buffer.slice(0, this._lastBalancedIndex)); | |
258 this._buffer = this._buffer.slice(this._lastBalancedIndex); | |
259 this._index -= this._lastBalancedIndex; | |
260 this._lastBalancedIndex = 0; | |
261 } | |
262 | |
263 /** | |
264 * @return {string} | |
265 */ | |
266 remainder() { | |
267 return this._buffer; | |
268 } | |
269 }; | |
270 | |
271 /** | |
272 * @interface | |
273 */ | |
274 Common.TokenizerFactory = function() {}; | |
275 | |
276 Common.TokenizerFactory.prototype = { | |
277 /** | |
278 * @param {string} mimeType | |
279 * @return {function(string, function(string, ?string, number, number))} | |
280 */ | |
281 createTokenizer(mimeType) {} | |
282 }; | |
OLD | NEW |