Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 // | 4 // |
| 5 // ---------------------------------------------------------------------- | 5 // ---------------------------------------------------------------------- |
| 6 // This is a very specialized tool which was created in order to support | 6 // This is a very specialized tool which was created in order to support |
| 7 // adding hash values used as location markers in the LaTeX source of the | 7 // adding hash values used as location markers in the LaTeX source of the |
| 8 // language specification. It is intended to take its input file as the | 8 // language specification. It is intended to take its input file as the |
| 9 // first argument and the output file name as the second argument. From | 9 // first argument, an output file name as the second argument, and a |
| 10 // docs/language a typical usage would be as follows: | 10 // hash listing file name as the third argument. From docs/language a |
| 11 // typical usage would be as follows: | |
| 11 // | 12 // |
| 12 // dart ../../tools/addlatexhash.dart dartLangSpec.tex tmp.tex | 13 // dart ../../tools/addlatexhash.dart dartLangSpec.tex out.tex hash.txt |
|
ricow1
2014/10/27 10:08:03
hash.txt - what is this, windows 3.11? :-)
eernst
2014/11/03 14:17:46
What's wrong with that? ;-) The short file names
| |
| 13 // | 14 // |
| 14 // This will yield a normalized variant tmp.tex of the language | 15 // This will produce a normalized variant out.tex of the language |
| 15 // specification with hash values filled in. For more details, please | 16 // specification with hash values filled in, and a listing hash.txt of |
| 16 // check the language specification source itself. | 17 // all the hash values along with the label of their textual context |
| 18 // (section, subsection, subsubsection, paragraph) . For more details, | |
| 19 // please check the language specification source itself. | |
| 17 // | 20 // |
| 18 // NB: This utility assumes UN*X style line endings, \n, in the LaTeX | 21 // NB: This utility assumes UN*X style line endings, \n, in the LaTeX |
| 19 // source file receieved as input; it will not work with other styles. | 22 // source file receieved as input; it will not work with other styles. |
| 20 // | |
| 21 // TODO: The current version does not fill in hash values, it only | |
| 22 // standardizes the LaTeX source by removing comments and normalizing | |
| 23 // white space. | |
| 24 | 23 |
| 25 import 'dart:io'; | 24 import 'dart:io'; |
| 26 import 'dart:convert'; | 25 import 'dart:convert'; |
| 26 import '../pkg/utf/lib/utf.dart'; | |
| 27 import '../pkg/crypto/lib/crypto.dart'; | 27 import '../pkg/crypto/lib/crypto.dart'; |
| 28 | 28 |
| 29 // Normalization of the text, i.e., removal or normalization | 29 // ---------------------------------------------------------------------- |
| 30 // of elements that do not affect the output from latex | 30 // Normalization of the text: removal or normalization of parts that |
| 31 // do not affect the output from latex, such as white space | |
| 31 | 32 |
| 32 final commentRE = new RegExp(r"[^\\]%.*"); // NB: . does not match \n | 33 final commentRE = new RegExp(r"[^\\]%.*"); // NB: . does not match \n |
| 33 final whitespaceAllRE = new RegExp(r"^\s+$"); | 34 final whitespaceAllRE = new RegExp(r"^\s+$"); |
| 34 final whitespaceRE = new RegExp(r"[ \t]{2,}"); | 35 final whitespaceRE = new RegExp(r"[ \t]{2,}"); |
| 35 | 36 |
| 36 // normalization steps | 37 // Remove 'match'ing part of 'line', possibly with given offsets |
| 37 | 38 // and inserting the given 'glue' to replace the match |
| 38 cutMatch(line, match, {startOffset: 0, endOffset: 0, glue: ""}) { | 39 cutMatch(line, match, {startOffset: 0, endOffset: 0, glue: ""}) { |
| 39 if (match == null) return line; | 40 if (match == null) return line; |
| 40 var start = match.start + startOffset; | 41 var start = match.start + startOffset; |
| 41 var end = match.end + endOffset; | 42 var end = match.end + endOffset; |
| 42 var len = line.length; | 43 var len = line.length; |
| 43 if (start < 0) start = 0; | 44 if (start < 0) start = 0; |
| 44 if (end > len) end = len; | 45 if (end > len) end = len; |
| 45 return line.substring(0, start) + glue + line.substring(end); | 46 return line.substring(0, start) + glue + line.substring(end); |
| 46 } | 47 } |
| 47 | 48 |
| (...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 142 // no wsOnly or commentOnly lines preceed 'line' | 143 // no wsOnly or commentOnly lines preceed 'line' |
| 143 afterBlankLines = isWsOnly(line); | 144 afterBlankLines = isWsOnly(line); |
| 144 afterCommentLines = isCommentOnly(line); | 145 afterCommentLines = isCommentOnly(line); |
| 145 if (!afterCommentLines) newLines.add(line); | 146 if (!afterCommentLines) newLines.add(line); |
| 146 // else skipping commentOnly line after nonWs, nonComment text | 147 // else skipping commentOnly line after nonWs, nonComment text |
| 147 } | 148 } |
| 148 } | 149 } |
| 149 return newLines; | 150 return newLines; |
| 150 } | 151 } |
| 151 | 152 |
| 152 // Selecting the elements in the pipeline | 153 // Select the elements in the pipeline |
| 153 | 154 |
| 154 normalize(line) => normalizeWhitespace(stripComment(line)); | 155 normalize(line) => normalizeWhitespace(stripComment(line)); |
| 155 sispNormalize(line) => stripComment(line); | 156 sispNormalize(line) => stripComment(line); |
| 156 | 157 |
| 157 // Managing fragments with significant spacing | 158 // Manage fragments with significant spacing |
| 158 | 159 |
| 159 final dartCodeBeginRE = new RegExp(r"^\s*\\begin\{dartCode\}"); | 160 final dartCodeBeginRE = new RegExp(r"^\s*\\begin\{dartCode\}"); |
| 160 final dartCodeEndRE = new RegExp (r"^\s*\\end\{dartCode\}"); | 161 final dartCodeEndRE = new RegExp (r"^\s*\\end\{dartCode\}"); |
| 161 | 162 |
| 162 sispIs(line, targetRE) { | 163 // Recognize begin/end line of a Dart code block |
| 163 return targetRE.firstMatch(line) != null; | 164 |
| 164 } | 165 sispIs(line, targetRE) => targetRE.firstMatch(line) != null; |
| 165 | |
| 166 sispIsDartBegin(line) => sispIs(line, dartCodeBeginRE); | 166 sispIsDartBegin(line) => sispIs(line, dartCodeBeginRE); |
| 167 sispIsDartEnd(line) => sispIs(line, dartCodeEndRE); | 167 sispIsDartEnd(line) => sispIs(line, dartCodeEndRE); |
| 168 | 168 |
| 169 // Transform input file into output file | 169 // ---------------------------------------------------------------------- |
| 170 // Removal of non-normative elements of the text (rationale, commentary) | |
| 171 | |
| 172 final hashMarkRE = new RegExp(r"^\\LMHash{.*}\s*$"); | |
| 173 final hashBlockTerminatorRE = new RegExp(r"\\((|sub(|sub))section|paragraph)"); | |
| 174 | |
| 175 // Recognize begin/end line of each block of lines getting a hash value | |
| 176 isArg(argRE, line) => argRE.firstMatch(line) != null; | |
|
Lasse Reichstein Nielsen
2014/10/28 10:12:12
=> line.contains(argRE);
Probably reduces to the
eernst
2014/11/03 14:17:46
Done.
| |
| 177 isHashMarker(line) => isArg(hashMarkRE, line); | |
| 178 isHashBlockTerminator(line) => isArg(hashBlockTerminatorRE, line); | |
| 179 | |
| 180 // Return the indices of lines satisfying the given test | |
| 181 findLineNumbers(lines, test()) { | |
|
Lasse Reichstein Nielsen
2014/10/28 10:12:13
Type of `test` is incorrect, it's typed to be null
eernst
2014/11/03 14:17:46
Interesting! In fact, having worked with types fo
| |
| 182 var lineNumbers = new List(); | |
| 183 var lineNumber = 0; | |
| 184 for (var line in lines) { | |
| 185 if (test(line)) lineNumbers.add(lineNumber); | |
| 186 lineNumber++; | |
| 187 } | |
| 188 return lineNumbers; | |
| 189 } | |
| 190 | |
| 191 findHashLineNumbers(lines) => findLineNumbers(lines, isHashMarker); | |
| 192 | |
| 193 // Return 'line' without the "\\cmdName\s*{..}" command starting at | |
|
ricow1
2014/10/27 10:08:03
use doc style comments for method comments (///) s
eernst
2014/11/03 14:17:46
Done.
| |
| 194 // 'startIndex'; note that it is assumed but not checked that 'line' | |
| 195 // contains "\\cmdType\s*{..", and note that the end of the {..} block | |
| 196 // is found via brace matching (i.e., nested {..} blocks are handled), | |
| 197 // but it may break if '{' is made an active character etc.etc. | |
| 198 removeCommand(line, cmdName, startIndex) { | |
| 199 const BACKSLASH = 92; // char code for '\\' | |
| 200 const BRACE_BEGIN = 123; // char code for '{' | |
| 201 const BRACE_END = 125; // char code for '}' | |
|
Lasse Reichstein Nielsen
2014/10/28 10:12:12
Pedantry: Two spaces before '//' comments, and you
Lasse Reichstein Nielsen
2014/11/03 11:34:35
Obviously, to align it, it needs to be two *or mor
eernst
2014/11/03 14:17:46
Done.
| |
| 202 | |
| 203 var blockStartIndex = startIndex + cmdName.length + 1; | |
| 204 while (blockStartIndex < line.length && | |
| 205 line.codeUnitAt(blockStartIndex) != BRACE_BEGIN) { | |
| 206 blockStartIndex++; | |
| 207 } | |
| 208 blockStartIndex++; | |
| 209 if (blockStartIndex > line.length) { | |
| 210 // caller's fault | |
| 211 throw "Bug, please report to eernst@"; | |
| 212 } | |
| 213 // blockStartIndex just after '{' | |
| 214 | |
| 215 var afterEscape = false; // actually after '{' | |
|
Lasse Reichstein Nielsen
2014/10/28 10:12:11
Two space before '//'. I think that is a general s
eernst
2014/11/03 14:17:47
Couldn't find it in the style guide, but done anyw
| |
| 216 var braceLevel = 1; // number of '{' minus number of '}' seen | |
| 217 | |
| 218 for (var index = blockStartIndex; index < line.length; index++) { | |
| 219 switch (line.codeUnitAt(index)) { | |
| 220 case BRACE_BEGIN: | |
| 221 if (afterEscape) afterEscape = false; else braceLevel++; break; | |
|
Lasse Reichstein Nielsen
2014/10/28 10:12:13
An if-with-an-else should always be put on multipl
eernst
2014/11/03 14:17:46
Done.
| |
| 222 case BRACE_END: | |
| 223 if (afterEscape) afterEscape = false; else braceLevel--; break; | |
|
Lasse Reichstein Nielsen
2014/10/28 10:12:13
Consider checking if braceLevel goes negative.
Oth
eernst
2014/11/03 14:17:47
Actually, the latex command will be used on the so
| |
| 224 case BACKSLASH: | |
| 225 afterEscape = true; break; | |
|
Lasse Reichstein Nielsen
2014/10/28 10:12:11
Newline after ';'
eernst
2014/11/03 14:17:46
Done. Presumably this does not apply to 'for'?
Lasse Reichstein Nielsen
2014/11/03 16:32:04
Not to 'for' no. It's a matter of "only one statem
| |
| 226 default: | |
| 227 afterEscape = false; | |
| 228 } | |
| 229 if (braceLevel == 0) { | |
| 230 return line.substring(0, startIndex) + line.substring(index + 1); | |
| 231 } | |
| 232 } | |
| 233 // removal failed; we consider this to mean that the input is ill-formed | |
| 234 throw "Unmatched braces"; | |
| 235 } | |
| 236 | |
| 237 final commentaryRE = new RegExp(r"\\commentary\s*{"); | |
| 238 final rationaleRE = new RegExp(r"\\rationale\s*{"); | |
| 239 | |
| 240 removeCommentary(line) { | |
|
Lasse Reichstein Nielsen
2014/10/28 10:12:13
You are actively removing commentary code from the
eernst
2014/11/03 14:17:46
That wouldn't work in this case, because the strin
| |
| 241 var match = commentaryRE.firstMatch(line); | |
| 242 if (match == null) return line; | |
| 243 return removeCommentary(removeCommand(line, r"commentary", match.start)); | |
| 244 } | |
| 245 | |
| 246 removeRationale(line) { | |
| 247 var match = rationaleRE.firstMatch(line); | |
| 248 if (match == null) return line; | |
| 249 return removeRationale(removeCommand(line, r"rationale", match.start)); | |
| 250 } | |
| 251 | |
| 252 // Remove commentary and rationale from 'line' | |
| 253 simplifyLine(line) { | |
| 254 var simplerLine = removeCommentary(line); | |
| 255 simplerLine = removeRationale(simplerLine); | |
| 256 simplerLine = normalizeWhitespace(simplerLine); | |
| 257 return simplerLine; | |
| 258 } | |
| 259 | |
| 260 // ---------------------------------------------------------------------- | |
| 261 // Recognition of line blocks, insertion of block hash into \LMHash{} | |
| 262 | |
| 263 final hashMarkArgumentRE = new RegExp(r"{.*}"); | |
|
Lasse Reichstein Nielsen
2014/10/28 10:12:13
Escape '{' characters.
eernst
2014/11/03 14:17:46
Done, here and in several other similar locations.
| |
| 264 | |
| 265 cleanupLine(line) => cutRegexp(line, commentRE, startOffset: 1).trimRight(); | |
| 266 | |
| 267 gatherLines(lines, startIndex, nextIndex) { | |
| 268 var gatheredLine = ""; | |
| 269 var isFirst = true; | |
| 270 for (var index = startIndex; index < nextIndex; index++) { | |
| 271 var line = lines[index]; | |
| 272 if (isHashBlockTerminator(line)) break; | |
| 273 if (isFirst) { | |
| 274 gatheredLine += cleanupLine(line); | |
| 275 isFirst = false; | |
| 276 } else { | |
| 277 gatheredLine += " " + cleanupLine(line); | |
|
Lasse Reichstein Nielsen
2014/10/28 10:12:13
This will take time O(lines * chars-in-lines), so
Lasse Reichstein Nielsen
2014/10/31 13:52:39
Even more "functional":
lines.getRange(startInde
eernst
2014/11/03 14:17:46
Done, with some adjustments. Nice! ;)
| |
| 278 } | |
| 279 } | |
| 280 return gatheredLine; | |
| 281 } | |
| 282 | |
| 283 // Compute the hash value for the line block starting at 'startIndex' | |
| 284 // in 'lines' and stopping just before 'nextIndex'; SIDE EFFECT: | |
| 285 // output the simplified text and its hash value to 'listSink' | |
| 286 computeHashValue(lines, startIndex, nextIndex, listSink) { | |
| 287 final hashEncoder = new SHA1(); | |
| 288 final gatheredLine = gatherLines(lines, startIndex, nextIndex); | |
| 289 final simplifiedLine = simplifyLine(gatheredLine); | |
| 290 listSink.write(" % $simplifiedLine\n"); | |
| 291 hashEncoder.add(encodeUtf8(simplifiedLine)); | |
| 292 return hashEncoder.close(); | |
| 293 } | |
| 294 | |
| 295 computeHashString(lines, startIndex, nextIndex, listSink) => | |
| 296 CryptoUtils.bytesToHex(computeHashValue(lines, | |
| 297 startIndex, | |
| 298 nextIndex, | |
| 299 listSink)); | |
| 300 | |
| 301 // Compute and add hashes to \LMHash{} lines (which must be on the | |
| 302 // indices 'hashLineNumbers' of 'lines'), and emit the simplified | |
| 303 // text and hash values to 'listSink' | |
| 304 addHashMarks(lines, hashLineNumbers, listSink) { | |
| 305 if (hashLineNumbers.length == 0) return lines; // noop | |
| 306 for (var n = 0; n < hashLineNumbers.length - 1; n++) { | |
| 307 final hashIndex = hashLineNumbers[n]; | |
| 308 final nextIndex = hashLineNumbers[n + 1]; | |
| 309 final hashValue = computeHashString(lines, | |
| 310 hashIndex + 1, | |
| 311 nextIndex, | |
| 312 listSink); | |
| 313 lines[hashIndex] = | |
| 314 lines[hashIndex].replaceAll(hashMarkArgumentRE, "{" + hashValue + "}"); | |
| 315 listSink.write(" $hashValue\n"); | |
| 316 } | |
| 317 | |
| 318 final hashIndex = hashLineNumbers[hashLineNumbers.length - 1]; | |
| 319 final nextIndex = lines.length; | |
| 320 final hashValue = computeHashString(lines, | |
| 321 hashIndex + 1, | |
| 322 nextIndex, | |
| 323 listSink); | |
| 324 lines[hashIndex] = | |
| 325 lines[hashIndex].replaceAll(hashMarkArgumentRE, "{" + hashValue + "}"); | |
| 326 listSink.write(" $hashValue\n"); | |
| 327 return lines; | |
| 328 } | |
| 329 | |
| 330 // ---------------------------------------------------------------------- | |
| 331 // Transformation of input file to output file | |
| 170 | 332 |
| 171 main ([args]) { | 333 main ([args]) { |
| 172 if (args.length != 2) { | 334 if (args.length != 3) { |
| 173 print("Usage: addlatexhash.dart <input-file> <output-file>"); | 335 print("Usage: addlatexhash.dart <input-file> <output-file> <list-file>"); |
| 174 throw "Received ${args.length} arguments, expected two"; | 336 throw "Received ${args.length} arguments, expected three"; |
| 175 } | 337 } |
| 176 | 338 |
| 339 // latex source | |
|
Lasse Reichstein Nielsen
2014/10/28 10:12:12
Pedantry: It's "LaTeX" :)
(Yes, I also insist on A
eernst
2014/11/03 14:17:47
Done. ;-)
| |
| 177 var inputFile = new File(args[0]); | 340 var inputFile = new File(args[0]); |
| 341 assert(inputFile.existsSync()); | |
| 342 var lines = inputFile.readAsLinesSync(); | |
| 343 | |
| 344 // latex source with 'normalized' spacing etc., and with hash values | |
| 178 var outputFile = new File(args[1]); | 345 var outputFile = new File(args[1]); |
| 179 assert(inputFile.existsSync()); | 346 |
| 180 | 347 // hierarchical list of hash values |
| 181 var lines = inputFile.readAsLinesSync(); | 348 var listFile = new File(args[2]); |
| 349 var listSink = listFile.openWrite(); | |
| 350 | |
| 182 // single-line normalization | 351 // single-line normalization |
| 183 var inDartCode = false; | 352 var inDartCode = false; |
| 184 var newLines = new List(); | 353 var normalizedLines = new List(); |
| 185 | 354 |
| 186 for (var line in lines) { | 355 for (var line in lines) { |
| 187 if (sispIsDartBegin(line)) { | 356 if (sispIsDartBegin(line)) { |
| 188 inDartCode = true; | 357 inDartCode = true; |
| 189 } else if (sispIsDartEnd(line)) { | 358 } else if (sispIsDartEnd(line)) { |
| 190 inDartCode = false; | 359 inDartCode = false; |
| 191 } | 360 } |
| 192 if (inDartCode) { | 361 if (inDartCode) { |
| 193 newLines.add(sispNormalize(line + "\n")); | 362 normalizedLines.add(sispNormalize(line + "\n")); |
| 194 } else { | 363 } else { |
| 195 newLines.add(normalize(line + "\n")); | 364 normalizedLines.add(normalize(line + "\n")); |
| 196 } | 365 } |
| 197 } | 366 } |
| 198 | 367 |
| 199 // multi-line normalization | 368 // multi-line normalization |
| 200 newLines = multilineNormalize(newLines); | 369 normalizedLines = multilineNormalize(normalizedLines); |
| 201 | 370 |
| 202 // output result | 371 // insertion of hash values |
| 203 outputFile.writeAsStringSync(newLines.join()); | 372 var hashLineNumbers = findHashLineNumbers(normalizedLines); |
| 373 var hashMarkedLines = addHashMarks(normalizedLines,hashLineNumbers,listSink); | |
| 374 | |
| 375 // output | |
| 376 outputFile.writeAsStringSync(hashMarkedLines.join()); | |
| 377 listSink.close(); | |
| 204 } | 378 } |
| OLD | NEW |