OLD | NEW |
(Empty) | |
| 1 // Copyright 2014 The ChromeOS IME Authors. All Rights Reserved. |
| 2 // limitations under the License. |
| 3 // See the License for the specific language governing permissions and |
| 4 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 5 // distributed under the License is distributed on an "AS-IS" BASIS, |
| 6 // Unless required by applicable law or agreed to in writing, software |
| 7 // |
| 8 // http://www.apache.org/licenses/LICENSE-2.0 |
| 9 // |
| 10 // You may obtain a copy of the License at |
| 11 // you may not use this file except in compliance with the License. |
| 12 // Licensed under the Apache License, Version 2.0 (the "License"); |
| 13 // |
| 14 // Copyright 2013 The ChromeOS VK Authors. All Rights Reserved. |
| 15 // |
| 16 // Licensed under the Apache License, Version 2.0 (the "License"); |
| 17 // you may not use this file except in compliance with the License. |
| 18 // You may obtain a copy of the License at |
| 19 // |
| 20 // http://www.apache.org/licenses/LICENSE-2.0 |
| 21 // |
| 22 // Unless required by applicable law or agreed to in writing, software |
| 23 // distributed under the License is distributed on an "AS-IS" BASIS, |
| 24 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 25 // See the License for the specific language governing permissions and |
| 26 // limitations under the License. |
| 27 |
| 28 /** |
| 29 * @fileoverview Defines the parsed layout object which will do layout parsing |
| 30 * and expose the keymappings and the transforms to Model. |
| 31 */ |
| 32 |
| 33 goog.provide('i18n.input.chrome.vk.ParsedLayout'); |
| 34 |
| 35 goog.require('goog.object'); |
| 36 goog.require('i18n.input.chrome.vk.KeyCode'); |
| 37 |
| 38 |
| 39 |
| 40 /** |
| 41 * Creates the parsed layout object per the raw layout info. |
| 42 * |
| 43 * @param {!Object} layout The raw layout object defined in the |
| 44 * xxx_layout.js. |
| 45 * @constructor |
| 46 */ |
| 47 i18n.input.chrome.vk.ParsedLayout = function(layout) { |
| 48 /** |
| 49 * The layout code (a.k.a. id). |
| 50 * |
| 51 * @type {string} |
| 52 */ |
| 53 this.id = layout['id']; |
| 54 |
| 55 /** |
| 56 * The view object needed by UI rendering, including the key |
| 57 * mappings. Some extra keys are not appear in following, which are |
| 58 * '', 's', 'l', 'sl', 'cl', 'sc', 'scl'. They define the key mappings |
| 59 * for each keyboard mode: |
| 60 * '' means normal; |
| 61 * 's' means SHIFT; |
| 62 * 'l' means CAPSLOCK; |
| 63 * 'c' means CTRL+ALT. |
| 64 * Those modes will be filled when parsing the raw layout. |
| 65 * If certain modes are not defined by the raw layout, this.view.<mode> |
| 66 * won't be filled in. |
| 67 * The mode format is: { |
| 68 * '<keyChar>': ['<disp type(S|P)>', '<disp chars>', '<commit chars>'] |
| 69 * }. |
| 70 * |
| 71 * @type {!Object} |
| 72 */ |
| 73 this.view = { |
| 74 'id': layout['id'], |
| 75 'title': layout['title'], |
| 76 'isRTL': layout['direction'] == 'rtl', |
| 77 'is102': !!layout['is102Keyboard'], |
| 78 'mappings': goog.object.create([ |
| 79 '', null, |
| 80 's', null, |
| 81 'c', null, |
| 82 'l', null, |
| 83 'sc', null, |
| 84 'cl', null, |
| 85 'sl', null, |
| 86 'scl', null |
| 87 ]) |
| 88 }; |
| 89 |
| 90 /** |
| 91 * The parsed layout transforms. There are only 3 elements of this array. |
| 92 * !st is the long exgexp to match, 2nd is the map of: |
| 93 * <match location>: [<regexp>, <replacement>]. |
| 94 * 3rd/4th are the regexp for prefix matches. |
| 95 * |
| 96 * @type {Array.<!Object>} |
| 97 */ |
| 98 this.transforms = null; |
| 99 |
| 100 /** |
| 101 * The parsed layout ambiguous chars. |
| 102 * |
| 103 * @type {Object} |
| 104 * @private |
| 105 */ |
| 106 this.ambiRegex_ = null; |
| 107 |
| 108 // Parses the key mapping & transforms of the layout. |
| 109 this.parseKeyMappings_(layout); |
| 110 this.parseTransforms_(layout); |
| 111 }; |
| 112 |
| 113 |
| 114 /** |
| 115 * Parses the key mappings of the given layout. |
| 116 * |
| 117 * @param {!Object} layout The raw layout object. It's format is: |
| 118 * id: <layout id> in {string} |
| 119 * title: <layout title> in {string} |
| 120 * direction: 'rtl' or 'ltr' |
| 121 * is102Keyboard: True if vk is 102, False/undefined for 101 |
| 122 * mappings: key map in {Object.<string,string>} |
| 123 * '': keycodes (each char's charCode represents keycode) in normal state |
| 124 * s: keycodes in SHIFT state |
| 125 * c: keycodes in ALTGR state |
| 126 * l: keycodes in CAPSLOCK state |
| 127 * <the states could be combined, e.g. ',s,sc,sl,scl'> |
| 128 * transform: in {Object.<string,string>} |
| 129 * <regexp>: <replacement> |
| 130 * historyPruneRegex: <regexp string to represent the ambiguities>. |
| 131 * @private |
| 132 */ |
| 133 i18n.input.chrome.vk.ParsedLayout.prototype.parseKeyMappings_ = function( |
| 134 layout) { |
| 135 var codes = this.view['is102'] ? i18n.input.chrome.vk.KeyCode.CODES102 : |
| 136 i18n.input.chrome.vk.KeyCode.CODES101; |
| 137 |
| 138 var mappings = layout['mappings']; |
| 139 for (var m in mappings) { |
| 140 var map = mappings[m]; |
| 141 var modes = m.split(/,/); |
| 142 if (modes.join(',') != m) { |
| 143 modes.push(''); // IE splits 'a,b,' into ['a','b'] |
| 144 } |
| 145 var parsed = {}; |
| 146 // Example for map is like: |
| 147 // 1) {'': '\u00c0123456...', ...} |
| 148 // 2) {'QWERT': 'QWERT', ...} |
| 149 // 3) {'A': 'aa', ...} |
| 150 // 4) {'BCD': '{{bb}}cd', ...} |
| 151 // 5) {'EFG': '{{S||e||ee}}FG', ...} |
| 152 // 6) {'HI': '{{P||12||H}}i', ...} |
| 153 for (var from in map) { |
| 154 // In case #1, from is '', to is '\u00c0123456...'. |
| 155 // In case #3, from is 'A', to is 'aa'. |
| 156 var to = map[from]; |
| 157 if (from == '') { |
| 158 from = codes; |
| 159 // If is 102 keyboard, modify 'to' to be compatible with the old vk. |
| 160 if (this.view['is102']) { |
| 161 // Moves the 26th char {\} to be the 38th char (after {'}). |
| 162 var normalizedTo = to.slice(0, 25); |
| 163 normalizedTo += to.slice(26, 37); |
| 164 normalizedTo += to.charAt(25); |
| 165 normalizedTo += to.slice(37); |
| 166 to = normalizedTo; |
| 167 } |
| 168 } |
| 169 // Replaces some chars for backward compatibility to old layout |
| 170 // definitions. |
| 171 from = from.replace('m', '\u00bd'); |
| 172 from = from.replace('=', '\u00bb'); |
| 173 from = from.replace(';', '\u00ba'); |
| 174 if (from.length == 1) { |
| 175 // Case #3: single char map to chars. |
| 176 parsed[from] = ['S', to, to]; |
| 177 } else { |
| 178 var j = 0; |
| 179 for (var i = 0, c; c = from.charAt(i); ++i) { |
| 180 var t = to.charAt(j++); |
| 181 if (t == to.charAt(j) && t == '{') { |
| 182 // Case #4/5/6: {{}} to define single char map to chars. |
| 183 var k = to.indexOf('}}', j); |
| 184 if (k < j) break; |
| 185 var s = to.slice(j + 1, k); |
| 186 var parts = s.split('||'); |
| 187 if (parts.length == 3) { |
| 188 // Case #5/6: button/commit chars seperation. |
| 189 parsed[c] = parts; |
| 190 } else if (parts.length == 1) { |
| 191 // Case #4. |
| 192 parsed[c] = ['S', s, s]; |
| 193 } |
| 194 j = k + 2; |
| 195 } else { |
| 196 // Normal case: single char map to according single char. |
| 197 parsed[c] = ['S', t, t]; |
| 198 } |
| 199 } |
| 200 } |
| 201 } |
| 202 for (var i = 0, mode; mode = modes[i], mode != undefined; ++i) { |
| 203 this.view['mappings'][mode] = parsed; |
| 204 } |
| 205 } |
| 206 }; |
| 207 |
| 208 |
| 209 /** |
| 210 * Prefixalizes the regexp string. |
| 211 * |
| 212 * @param {string} re_str The original regexp string. |
| 213 * @return {string} The prefixalized the regexp string. |
| 214 * @private |
| 215 */ |
| 216 i18n.input.chrome.vk.ParsedLayout.prototype.prefixalizeRegexString_ = function( |
| 217 re_str) { |
| 218 // Makes sure [...\[\]...] won't impact the later replaces. |
| 219 re_str = re_str.replace(/\\./g, function(m) { |
| 220 if (/^\\\[/.test(m)) { |
| 221 return '\u0001'; |
| 222 } |
| 223 if (/^\\\]/.test(m)) { |
| 224 return '\u0002'; |
| 225 } |
| 226 return m; |
| 227 }); |
| 228 // Prefixalizes. |
| 229 re_str = re_str.replace(/\\.|\[[^\[\]]*\]|\{.*\}|[^\|\\\(\)\[\]\{\}\*\+\?]/g, |
| 230 function(m) { |
| 231 if (/^\{/.test(m)) { |
| 232 return m; |
| 233 } |
| 234 return '(?:' + m + '|$)'; |
| 235 }); |
| 236 // Restores the \[\]. |
| 237 re_str = re_str.replace(/\u0001/g, '\\['); |
| 238 re_str = re_str.replace(/\u0002/g, '\\]'); |
| 239 return re_str; |
| 240 }; |
| 241 |
| 242 |
| 243 /** |
| 244 * Parses the transforms of the given layout. |
| 245 * |
| 246 * @param {!Object} layout The raw layout object. It's format is: |
| 247 * id: <layout id> in {string} |
| 248 * title: <layout title> in {string} |
| 249 * direction: 'rtl' or 'ltr' |
| 250 * is102Keyboard: True if vk is 102, False/undefined for 101 |
| 251 * mappings: key map in {Object.<string,string>} |
| 252 * '': keycodes (each char's charCode represents keycode) in normal state |
| 253 * s: keycodes in SHIFT state |
| 254 * c: keycodes in ALTGR state |
| 255 * l: keycodes in CAPSLOCK state |
| 256 * <the states could be combined, e.g. ',s,sc,sl,scl'> |
| 257 * transform: in {Object.<string,string>} |
| 258 * <regexp>: <replacement> |
| 259 * historyPruneRegex: <regexp string to represent the ambiguities>. |
| 260 * @private |
| 261 */ |
| 262 i18n.input.chrome.vk.ParsedLayout.prototype.parseTransforms_ = function( |
| 263 layout) { |
| 264 var transforms = layout['transform']; |
| 265 if (transforms) { |
| 266 // regobjs is RegExp objects of the regexp string. |
| 267 // regexsalone will be used to get the long regexp which concats all the |
| 268 // transform regexp as (...$)|(...$)|... |
| 269 // The long regexp is needed because it is ineffecient to match each regexp |
| 270 // one by one. Instead, we match the long regexp only once. But we need to |
| 271 // know where the match happens and which replacement we need to use. |
| 272 // So regobjs will hold the map between the match location and the |
| 273 // regexp/replacement. |
| 274 var regobjs = [], regexesalone = [], partialRegexs = []; |
| 275 // sum_numgrps is the index of current reg group for future matching. |
| 276 // Don't care about the whole string in array index 0. |
| 277 var sum_numgrps = 1; |
| 278 for (var regex in transforms) { |
| 279 var regobj = new RegExp(regex + '$'); |
| 280 var repl = transforms[regex]; |
| 281 regobjs[sum_numgrps] = [regobj, repl]; |
| 282 regexesalone.push('(' + regex + '$)'); |
| 283 partialRegexs.push('^(' + this.prefixalizeRegexString_(regex) + ')'); |
| 284 // The match should happen to count braces. |
| 285 var grpCountRegexp = new RegExp(regex + '|.*'); |
| 286 // The length attribute would count whole string as well. |
| 287 // However, that extra count 1 is compensated by |
| 288 // extra braces added. |
| 289 var numgrps = grpCountRegexp.exec('').length; |
| 290 sum_numgrps += numgrps; |
| 291 } |
| 292 var longregobj = new RegExp(regexesalone.join('|')); |
| 293 // Saves 2 long regexp objects for later prefix matching. |
| 294 // The reason to save a regexp with '\u0001' is to make sure the whole |
| 295 // string won't match as a prefix for the whole pattern. For example, |
| 296 // 'abc' shouldn't match /abc/. |
| 297 // In above case, /abc/ is prefixalized as re = /(a|$)(b|$)(c|$)/. |
| 298 // 'a', 'ab' & 'abc' can all match re. |
| 299 // So make another re2 = /(a|$)(b|$)(c|$)\u0001/, therefore, 'abc' will |
| 300 // fail to match. Finally, we can use this checks to make sure the prefix |
| 301 // match: "s matches re but it doesn't match re2". |
| 302 var prefixregobj = new RegExp(partialRegexs.join('|')); |
| 303 // Uses reverse-ordered regexp for prefix matching. Details are explained |
| 304 // in predictTransform(). |
| 305 var prefixregobj2 = new RegExp(partialRegexs.reverse().join('|')); |
| 306 this.transforms = [longregobj, regobjs, prefixregobj, prefixregobj2]; |
| 307 } |
| 308 |
| 309 var hisPruReg = layout['historyPruneRegex']; |
| 310 if (hisPruReg) { |
| 311 this.ambiRegex_ = new RegExp('^(' + hisPruReg + ')$'); |
| 312 } |
| 313 }; |
| 314 |
| 315 |
| 316 /** |
| 317 * Predicts whether there would be future transforms for the given string. |
| 318 * |
| 319 * @param {string} text The given string. |
| 320 * @return {number} The matched position in the string. Returns -1 for no match. |
| 321 */ |
| 322 i18n.input.chrome.vk.ParsedLayout.prototype.predictTransform = function(text) { |
| 323 if (!this.transforms || !text) { |
| 324 return -1; |
| 325 } |
| 326 for (var i = 0; i < text.length; i++) { |
| 327 var s = text.slice(i - text.length); |
| 328 // Uses multiple mathches to make sure the prefix match. |
| 329 // Refers to comments in parseTransforms_() method. |
| 330 var matches = s.match(this.transforms[2]); |
| 331 if (matches && matches[0]) { |
| 332 for (var j = 1; j < matches.length && !matches[j]; j++) {} |
| 333 var matchedIndex = j; |
| 334 // Ties to match the reversed regexp and see whether the matched indexes |
| 335 // are pointed to the same rule. |
| 336 matches = s.match(this.transforms[3]); |
| 337 if (matches && matches[0]) { // This should always match! |
| 338 for (var j = 1; j < matches.length && !matches[j]; j++) {} |
| 339 if (matchedIndex != matches.length - j) { |
| 340 // If the matched and reverse-matched index are not the same, it |
| 341 // means the string must be a prefix, because the layout transforms |
| 342 // shouldn't have duplicated transforms. |
| 343 return i; |
| 344 } else { |
| 345 // Gets the matched rule regexp, and revise it to add a never-matched |
| 346 // char X in the end. And tries to match it with s+X. |
| 347 // If matched, it means the s is a full match instead of a prefix |
| 348 // match. |
| 349 var re = this.transforms[1][matchedIndex][0]; |
| 350 re = new RegExp(re.toString().match(/\/(.*)\//)[1] + '\u0001'); |
| 351 if (!(s + '\u0001').match(re)) { |
| 352 return i; |
| 353 } |
| 354 } |
| 355 } |
| 356 } |
| 357 } |
| 358 return -1; |
| 359 }; |
| 360 |
| 361 |
| 362 /** |
| 363 * Applies the layout transform and gets the result. |
| 364 * |
| 365 * @param {string} prevstr The previous text. |
| 366 * @param {number} transat The position of previous transform. If it's -1, |
| 367 * it means no transform happened. |
| 368 * @param {string} ch The new chars currently added to prevstr. |
| 369 * @return {Object} The transform result. It's format is: |
| 370 * {back: <the number of chars to be deleted in the end of the prevstr>, |
| 371 * chars: <the chars to add at the tail after the deletion>}. |
| 372 * If there is no transform applies, return null. |
| 373 */ |
| 374 i18n.input.chrome.vk.ParsedLayout.prototype.transform = function( |
| 375 prevstr, transat, ch) { |
| 376 if (!this.transforms) return null; |
| 377 |
| 378 var str; |
| 379 if (transat > 0) { |
| 380 str = prevstr.slice(0, transat) + '\u001d' + |
| 381 prevstr.slice(transat) + ch; |
| 382 } else { |
| 383 str = prevstr + ch; |
| 384 } |
| 385 var longr = this.transforms[0]; |
| 386 var matchArr = longr.exec(str); |
| 387 if (matchArr) { |
| 388 var rs = this.transforms[1]; |
| 389 |
| 390 for (var i = 1; i < matchArr.length && !matchArr[i]; i++) {} |
| 391 var matchGroup = i; |
| 392 |
| 393 var regobj = rs[matchGroup][0]; |
| 394 var repl = rs[matchGroup][1]; |
| 395 var m = regobj.exec(str); |
| 396 |
| 397 // String visible to user does not have LOOK_BEHIND_SEP_ and chars. |
| 398 // So need to discount them in backspace count. |
| 399 var rmstr = str.slice(m.index); |
| 400 var numseps = rmstr.search('\u001d') > -1 ? 1 : 0; |
| 401 var backlen = rmstr.length - numseps - ch.length; |
| 402 |
| 403 var newstr = str.replace(regobj, repl); |
| 404 var replstr = newstr.slice(m.index); |
| 405 replstr = replstr.replace('\u001d', ''); |
| 406 |
| 407 return {back: backlen, chars: replstr}; |
| 408 } |
| 409 |
| 410 return null; |
| 411 }; |
| 412 |
| 413 |
| 414 /** |
| 415 * Gets whether the given chars is ambiguious chars. |
| 416 * |
| 417 * @param {string} chars The chars to be judged. |
| 418 * @return {boolean} True if given chars is ambiguious chars, false |
| 419 * otherwise. |
| 420 */ |
| 421 i18n.input.chrome.vk.ParsedLayout.prototype.isAmbiChars = function(chars) { |
| 422 return this.ambiRegex_ ? !!this.ambiRegex_.exec(chars) : false; |
| 423 }; |
OLD | NEW |