| OLD | NEW |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 /** | 5 /** |
| 6 * Parser for a simple grammar that describes a tree structure using a function- | 6 * Parser for a simple grammar that describes a tree structure using a function- |
| 7 * like "a(b(c,d))" syntax. Original intended usage: to have browsertests | 7 * like "a(b(c,d))" syntax. Original intended usage: to have browsertests |
| 8 * specify an arbitrary tree of iframes, loaded from various sites, without | 8 * specify an arbitrary tree of iframes, loaded from various sites, without |
| 9 * having to write a .html page for each level or do crazy feats of data: url | 9 * having to write a .html page for each level or do crazy feats of data: url |
| 10 * escaping. But there's nothing really iframe-specific here. See below for some | 10 * escaping. But there's nothing really iframe-specific here. See below for some |
| 11 * examples of the grammar and the parser output. | 11 * examples of the grammar and the parser output. |
| 12 * | 12 * |
| 13 * @example <caption>Basic syntax: an identifier followed by arg list.</caption> | 13 * @example <caption>Basic syntax: an identifier, optionally followed by a list |
| 14 * TreeParserUtil.parse('abc ()'); // returns { value: 'abc', children: [] } | 14 * of attributes, optionally followed by a list of children.</caption> |
| 15 * // returns { value: 'abc', attributes: [], children: [] } |
| 16 * TreeParserUtil.parse('abc {} ()'); |
| 15 * | 17 * |
| 16 * @example <caption>The arg list is optional. Dots are legal in ids.</caption> | 18 * @example <caption>Both the attribute and child lists are optional. Dots and |
| 17 * TreeParserUtil.parse('b.com'); // returns { value: 'b.com', children: [] } | 19 * hyphens are legal in ids.</caption> |
| 20 * // returns { value: 'example-b.com', attributes: [], children: [] } |
| 21 * TreeParserUtil.parse('example-b.com'); |
| 18 * | 22 * |
| 19 * @example <caption>Commas separate children in the arg list.</caption> | 23 * @example <caption>Attributes are identifiers as well, separated by commas. |
| 20 * // returns { value: 'b', children: [ | 24 * </caption> |
| 21 * // { value: 'c', children: [] }, | 25 * // returns { value: 'abc', attributes: ['attr-1', 'attr-2'], children: [] } |
| 22 * // { value: 'd', children: [] } | 26 * TreeParserUtil.parse('abc {attr-1, attr-2}'); |
| 27 * |
| 28 * @example <caption>Commas separate children in the child list.</caption> |
| 29 * // returns { value: 'b', attributes: [], children: [ |
| 30 * // { value: 'c', attributes: [], children: [] }, |
| 31 * // { value: 'd', attributes: [], children: [] } |
| 23 * // ]} | 32 * // ]} |
| 24 * TreeParserUtil.parse('b (c, d)'; | 33 * TreeParserUtil.parse('b (c, d)'; |
| 25 * | 34 * |
| 26 * @example <caption>Children can have children, and so on.</caption> | 35 * @example <caption>Children can have children, and so on.</caption> |
| 27 * // returns { value: 'e', children: [ | 36 * // returns { value: 'e', attributes: [], children: [ |
| 28 * // { value: 'f', children: [ | 37 * // { value: 'f', attributes: [], children: [ |
| 29 * // { value: 'g', children: [ | 38 * // { value: 'g', attributes: [], children: [ |
| 30 * // { value: 'h', children: [] }, | 39 * // { value: 'h', attributes: [], children: [] }, |
| 31 * // { value: 'i', children: [ | 40 * // { value: 'i', attributes: [], children: [ |
| 32 * // { value: 'j', children: [] } | 41 * // { value: 'j', attributes: [], children: [] } |
| 33 * // ]}, | 42 * // ]}, |
| 34 * // ]} | 43 * // ]} |
| 35 * // ]} | 44 * // ]} |
| 36 * // ]} | 45 * // ]} |
| 37 * TreeParserUtil.parse('e(f(g(h(),i(j))))'; | 46 * TreeParserUtil.parse('e(f(g(h(),i(j))))'; |
| 38 * | 47 * |
| 48 * @example <caption>Attributes can be applied to children at any level of |
| 49 * nesting.</caption> |
| 50 * // returns { value: 'b', attributes: ['red', 'blue'], children: [ |
| 51 * // { value: 'c', attributes: [], children: [] }, |
| 52 * // { value: 'd', attributes: ['green'], children: [] } |
| 53 * // ]} |
| 54 * TreeParserUtil.parse('b{red,blue}(c,d{green})'; |
| 55 * |
| 39 * @example <caption>flatten() converts a [sub]tree back to a string.</caption> | 56 * @example <caption>flatten() converts a [sub]tree back to a string.</caption> |
| 40 * var tree = TreeParserUtil.parse('b.com (c.com(e.com), d.com)'); | 57 * var tree = TreeParserUtil.parse('b.com (c.com(e.com), d.com)'); |
| 41 * TreeParserUtil.flatten(tree.children[0]); // returns 'c.com(e.com())' | 58 * TreeParserUtil.flatten(tree.children[0]); // returns 'c.com(e.com())' |
| 42 */ | 59 */ |
| 43 var TreeParserUtil = (function() { | 60 var TreeParserUtil = (function() { |
| 44 'use strict'; | 61 'use strict'; |
| 45 | 62 |
| 46 /** | 63 /** |
| 47 * Parses an input string into a tree. See class comment for examples. | 64 * Parses an input string into a tree. See class comment for examples. |
| 48 * @returns A tree of the form {value: <string>, children: <Array.<tree>>}. | 65 * @returns A tree of the form {value: <string>, children: <Array.<tree>>}. |
| 49 */ | 66 */ |
| 50 function parse(input) { | 67 function parse(input) { |
| 51 var tokenStream = lex(input); | 68 var tokenStream = lex(input); |
| 52 | 69 |
| 53 var result = takeIdAndChild(tokenStream); | 70 var result = takeIdAndChild(tokenStream); |
| 54 if (tokenStream.length != 0) | 71 if (tokenStream.length != 0) |
| 55 throw new Error('Expected end of stream, but found "' + | 72 throw new Error('Expected end of stream, but found "' + |
| 56 tokenStream[0] + '".') | 73 tokenStream[0] + '".') |
| 57 return result; | 74 return result; |
| 58 } | 75 } |
| 59 | 76 |
| 60 /** | 77 /** |
| 61 * Inverse of |parse|. Converts a parsed tree object into a string. Can be | 78 * Inverse of |parse|. Converts a parsed tree object into a string. Can be |
| 62 * used to forward a subtree as an argument to a nested document. | 79 * used to forward a subtree as an argument to a nested document. |
| 63 */ | 80 */ |
| 64 function flatten(tree) { | 81 function flatten(tree) { |
| 65 return tree.value + '(' + tree.children.map(flatten).join(',') + ')'; | 82 var result = tree.value; |
| 83 if (tree.attributes && tree.attributes.length) |
| 84 result += '{' + tree.attributes.join(",") + "}"; |
| 85 return result + '(' + tree.children.map(flatten).join(',') + ')'; |
| 66 } | 86 } |
| 67 | 87 |
| 68 /** | 88 /** |
| 69 * Lexer function to convert an input string into a token stream. Splits the | 89 * Lexer function to convert an input string into a token stream. Splits the |
| 70 * input along whitespace, parens and commas. Whitespace is removed, while | 90 * input along whitespace, parens and commas. Whitespace is removed, while |
| 71 * parens and commas are preserved as standalone tokens. | 91 * parens and commas are preserved as standalone tokens. |
| 72 * | 92 * |
| 73 * @param {string} input The input string. | 93 * @param {string} input The input string. |
| 74 * @return {Array.<string>} The resulting token stream. | 94 * @return {Array.<string>} The resulting token stream. |
| 75 */ | 95 */ |
| 76 function lex(input) { | 96 function lex(input) { |
| 77 return input.split(/(\s+|\(|\)|,)/).reduce( | 97 return input.split(/(\s+|\(|\)|{|}|,)/).reduce( |
| 78 function (resultArray, token) { | 98 function (resultArray, token) { |
| 79 var trimmed = token.trim(); | 99 var trimmed = token.trim(); |
| 80 if (trimmed) { | 100 if (trimmed) { |
| 81 resultArray.push(trimmed); | 101 resultArray.push(trimmed); |
| 82 } | 102 } |
| 83 return resultArray; | 103 return resultArray; |
| 84 }, []); | 104 }, []); |
| 85 } | 105 } |
| 86 | 106 |
| 87 | |
| 88 /** | 107 /** |
| 89 * Consumes from the stream an identifier and optional child list, returning | 108 * Consumes from the stream an identifier with optional attribute and child |
| 90 * its parsed representation. | 109 * lists, returning its parsed representation. |
| 91 */ | 110 */ |
| 92 function takeIdAndChild(tokenStream) { | 111 function takeIdAndChild(tokenStream) { |
| 93 return { value: takeIdentifier(tokenStream), | 112 return { value: takeIdentifier(tokenStream), |
| 113 attributes: takeAttributeList(tokenStream), |
| 94 children: takeChildList(tokenStream) }; | 114 children: takeChildList(tokenStream) }; |
| 95 } | 115 } |
| 96 | 116 |
| 97 /** | 117 /** |
| 98 * Consumes from the stream an identifier, returning its value (as a string). | 118 * Consumes from the stream an identifier, returning its value (as a string). |
| 99 */ | 119 */ |
| 100 function takeIdentifier(tokenStream) { | 120 function takeIdentifier(tokenStream) { |
| 101 if (tokenStream.length == 0) | 121 if (tokenStream.length == 0) |
| 102 throw new Error('Expected an identifier, but found end-of-stream.'); | 122 throw new Error('Expected an identifier, but found end-of-stream.'); |
| 103 var token = tokenStream.shift(); | 123 var token = tokenStream.shift(); |
| 104 if (!token.match(/[a-zA-Z0-9.-]+/)) | 124 if (!token.match(/^[a-zA-Z0-9.-]+$/)) |
| 105 throw new Error('Expected an identifier, but found "' + token + '".'); | 125 throw new Error('Expected an identifier, but found "' + token + '".'); |
| 106 return token; | 126 return token; |
| 107 } | 127 } |
| 108 | 128 |
| 109 /** | 129 /** |
| 130 * Consumes an optional attribute list from the token stream, returning a list |
| 131 * of the parsed attribute identifiers. |
| 132 */ |
| 133 function takeAttributeList(tokenStream) { |
| 134 // Remove the next token from the stream if it matches |token|. |
| 135 function tryToEatA(token) { |
| 136 if (tokenStream[0] == token) { |
| 137 tokenStream.shift(); |
| 138 return true; |
| 139 } |
| 140 return false; |
| 141 } |
| 142 |
| 143 // Bare identifier case, as in 'b' in the input '(a (b, c))' |
| 144 if (!tryToEatA('{')) |
| 145 return []; |
| 146 |
| 147 // Empty list case, as in 'b' in the input 'a (b {}, c)'. |
| 148 if (tryToEatA('}')) { |
| 149 return []; |
| 150 } |
| 151 |
| 152 // List with at least one entry. |
| 153 var result = [ takeIdentifier(tokenStream) ]; |
| 154 |
| 155 // Additional entries allowed with comma. |
| 156 while (tryToEatA(',')) { |
| 157 result.push(takeIdentifier(tokenStream)); |
| 158 } |
| 159 |
| 160 // End of list. |
| 161 if (tryToEatA('}')) { |
| 162 return result; |
| 163 } |
| 164 if (tokenStream.length == 0) |
| 165 throw new Error('Expected "}" or ",", but found end-of-stream.'); |
| 166 throw new Error('Expected "}" or ",", but found "' + tokenStream[0] + '".'); |
| 167 } |
| 168 |
| 169 /** |
| 110 * Consumes an optional child list from the token stream, returning a list of | 170 * Consumes an optional child list from the token stream, returning a list of |
| 111 * the parsed children. | 171 * the parsed children. |
| 112 */ | 172 */ |
| 113 function takeChildList(tokenStream) { | 173 function takeChildList(tokenStream) { |
| 114 // Remove the next token from the stream if it matches |token|. | 174 // Remove the next token from the stream if it matches |token|. |
| 115 function tryToEatA(token) { | 175 function tryToEatA(token) { |
| 116 if (tokenStream[0] == token) { | 176 if (tokenStream[0] == token) { |
| 117 tokenStream.shift(); | 177 tokenStream.shift(); |
| 118 return true; | 178 return true; |
| 119 } | 179 } |
| (...skipping 24 matching lines...) Expand all Loading... |
| 144 if (tokenStream.length == 0) | 204 if (tokenStream.length == 0) |
| 145 throw new Error('Expected ")" or ",", but found end-of-stream.'); | 205 throw new Error('Expected ")" or ",", but found end-of-stream.'); |
| 146 throw new Error('Expected ")" or ",", but found "' + tokenStream[0] + '".'); | 206 throw new Error('Expected ")" or ",", but found "' + tokenStream[0] + '".'); |
| 147 } | 207 } |
| 148 | 208 |
| 149 return { | 209 return { |
| 150 parse: parse, | 210 parse: parse, |
| 151 flatten: flatten | 211 flatten: flatten |
| 152 }; | 212 }; |
| 153 })(); | 213 })(); |
| OLD | NEW |