OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. |
| 4 // |
| 5 // ---------------------------------------------------------------------- |
| 6 // This is a very specialized tool which was created in order to support |
| 7 // adding hash values used as location markers in the LaTeX source of the |
| 8 // language specification. It is intended to take its input file as the |
| 9 // first argument and the output file name as the second argument. From |
| 10 // docs/language a typical usage would be as follows: |
| 11 // |
| 12 // dart ../../tools/addlatexhash.dart dartLangSpec.tex tmp.tex |
| 13 // |
| 14 // This will yield a normalized variant tmp.tex of the language |
| 15 // specification with hash values filled in. For more details, please |
| 16 // check the language specification source itself. |
| 17 // |
| 18 // NB: This utility assumes UN*X style line endings, \n, in the LaTeX |
| 19 // source file receieved as input; it will not work with other styles. |
| 20 // |
| 21 // TODO: The current version does not fill in hash values, it only |
| 22 // standardizes the LaTeX source by removing comments and normalizing |
| 23 // white space. |
| 24 |
| 25 import 'dart:io'; |
| 26 import 'dart:convert'; |
| 27 import '../pkg/crypto/lib/crypto.dart'; |
| 28 |
| 29 // Normalization of the text, i.e., removal or normalization |
| 30 // of elements that do not affect the output from latex |
| 31 |
| 32 final commentRE = new RegExp(r"[^\\]%.*"); // NB: . does not match \n |
| 33 final whitespaceAllRE = new RegExp(r"^\s+$"); |
| 34 final whitespaceRE = new RegExp(r"[ \t]{2,}"); |
| 35 |
| 36 // normalization steps |
| 37 |
| 38 cutMatch(line, match, {startOffset: 0, endOffset: 0, glue: ""}) { |
| 39 if (match == null) return line; |
| 40 var start = match.start + startOffset; |
| 41 var end = match.end + endOffset; |
| 42 var len = line.length; |
| 43 if (start < 0) start = 0; |
| 44 if (end > len) end = len; |
| 45 return line.substring(0, start) + glue + line.substring(end); |
| 46 } |
| 47 |
| 48 cutRegexp(line, re, {startOffset: 0, endOffset: 0, glue: ""}) { |
| 49 return cutMatch(line, re.firstMatch(line), |
| 50 startOffset: startOffset, |
| 51 endOffset: endOffset, |
| 52 glue: glue); |
| 53 } |
| 54 |
| 55 cutFromMatch(line, match, {offset: 0, glue: ""}) { |
| 56 if (match == null) return line; |
| 57 return line.substring(0, match.start + offset) + glue; |
| 58 } |
| 59 |
| 60 cutFromRegexp(line, re, {offset: 0, glue: ""}) { |
| 61 return cutFromMatch(line, re.firstMatch(line), offset: offset, glue: glue); |
| 62 } |
| 63 |
| 64 isWsOnly(line) => whitespaceAllRE.firstMatch(line) != null; |
| 65 isCommentOnly(line) => line.startsWith("%"); |
| 66 |
| 67 justEol(line) { |
| 68 return line.endsWith("\n") ? "\n" : ""; |
| 69 } |
| 70 |
| 71 stripComment(line) { |
| 72 // NB: it is tempting to remove everything from the '%' and out, |
| 73 // including the final newline, if any, but this does not work. |
| 74 // The problem is that TeX will do exactly this, but then it will |
| 75 // add back a character that depends on its state (S, M, or N), |
| 76 // and it is tricky to maintain a similar state that matches the |
| 77 // state of TeX faithfully. Hence, we remove the content of |
| 78 // comments but do not remove the comments themselves, we just |
| 79 // leave the '%' at the end of the line and let TeX manage its |
| 80 // states in a way that does not differ from the file from before |
| 81 // stripComment |
| 82 if (isCommentOnly(line)) return "%\n"; |
| 83 return cutRegexp(line, commentRE, startOffset: 2); |
| 84 } |
| 85 |
| 86 // Reduce a wsOnly line to its eol, remove leading ws |
| 87 // entirely, and reduce multiple ws chars to one |
| 88 normalizeWhitespace(line) { |
| 89 var trimLine = line.trimLeft(); |
| 90 if (trimLine.isEmpty) return justEol(line); |
| 91 return trimLine.replaceAll(whitespaceRE, " "); |
| 92 } |
| 93 |
| 94 // Reduce sequences of >1 wsOnly lines to 1, and sequences of >1 |
| 95 // commentOnly lines to 1; moreover, treat commentOnly lines as |
| 96 // wsOnly when occurring in wsOnly line blocks |
| 97 multilineNormalize(lines) { |
| 98 var afterBlankLines = false; // does 'line' succeed >0 empty lines? |
| 99 var afterCommentLines = false; // .. succeed >0 commentOnly lines? |
| 100 var newLines = new List(); |
| 101 for (var line in lines) { |
| 102 if (afterBlankLines && afterCommentLines) { |
| 103 // can never happen |
| 104 throw "Bug, please report to eernst@"; |
| 105 } else if (afterBlankLines && !afterCommentLines) { |
| 106 // at least one line before 'line' is wsOnly |
| 107 if (!isWsOnly(line)) { |
| 108 // blank line block ended |
| 109 afterCommentLines = isCommentOnly(line); |
| 110 // special case: it seems to be safe to remove commentOnly lines |
| 111 // after wsOnly lines, so the TeX state must be predictably right; |
| 112 // next line will then be afterCommentLines and be dropped, so |
| 113 // we drop the entire comment block---which is very useful; we can |
| 114 // also consider this comment line to be an empty line, such that |
| 115 // subsequent empty lines can be considered to be in a block of |
| 116 // empty lines; note that almost all variants of this will break.. |
| 117 if (afterCommentLines) { |
| 118 // _current_ 'line' a commentOnly here |
| 119 afterBlankLines = true; |
| 120 afterCommentLines = false; |
| 121 // and do not add 'line' |
| 122 } else { |
| 123 // after blanks, but current 'line' is neither blank nor comment |
| 124 afterBlankLines = false; |
| 125 newLines.add(line); |
| 126 } |
| 127 } else { |
| 128 // blank line block continues, do not add 'line' |
| 129 } |
| 130 } else if (!afterBlankLines && afterCommentLines) { |
| 131 // at least one line before 'line' is commentOnly |
| 132 if (!isCommentOnly(line)) { |
| 133 // comment line block ended |
| 134 afterBlankLines = isWsOnly(line); |
| 135 afterCommentLines = false; |
| 136 newLines.add(line); |
| 137 } else { |
| 138 // comment line block continues, do not add 'line' |
| 139 } |
| 140 } else { |
| 141 assert(!afterBlankLines && !afterCommentLines); |
| 142 // no wsOnly or commentOnly lines preceed 'line' |
| 143 afterBlankLines = isWsOnly(line); |
| 144 afterCommentLines = isCommentOnly(line); |
| 145 if (!afterCommentLines) newLines.add(line); |
| 146 // else skipping commentOnly line after nonWs, nonComment text |
| 147 } |
| 148 } |
| 149 return newLines; |
| 150 } |
| 151 |
| 152 // Selecting the elements in the pipeline |
| 153 |
| 154 normalize(line) => normalizeWhitespace(stripComment(line)); |
| 155 sispNormalize(line) => stripComment(line); |
| 156 |
| 157 // Managing fragments with significant spacing |
| 158 |
| 159 final dartCodeBeginRE = new RegExp(r"^\s*\\begin\{dartCode\}"); |
| 160 final dartCodeEndRE = new RegExp (r"^\s*\\end\{dartCode\}"); |
| 161 |
| 162 sispIs(line, targetRE) { |
| 163 return targetRE.firstMatch(line) != null; |
| 164 } |
| 165 |
| 166 sispIsDartBegin(line) => sispIs(line, dartCodeBeginRE); |
| 167 sispIsDartEnd(line) => sispIs(line, dartCodeEndRE); |
| 168 |
| 169 // Transform input file into output file |
| 170 |
| 171 main ([args]) { |
| 172 if (args.length != 2) { |
| 173 print("Usage: addlatexhash.dart <input-file> <output-file>"); |
| 174 throw "Received ${args.length} arguments, expected two"; |
| 175 } |
| 176 |
| 177 var inputFile = new File(args[0]); |
| 178 var outputFile = new File(args[1]); |
| 179 assert(inputFile.existsSync()); |
| 180 |
| 181 var lines = inputFile.readAsLinesSync(); |
| 182 // single-line normalization |
| 183 var inDartCode = false; |
| 184 var newLines = new List(); |
| 185 |
| 186 for (var line in lines) { |
| 187 if (sispIsDartBegin(line)) { |
| 188 inDartCode = true; |
| 189 } else if (sispIsDartEnd(line)) { |
| 190 inDartCode = false; |
| 191 } |
| 192 if (inDartCode) { |
| 193 newLines.add(sispNormalize(line + "\n")); |
| 194 } else { |
| 195 newLines.add(normalize(line + "\n")); |
| 196 } |
| 197 } |
| 198 |
| 199 // multi-line normalization |
| 200 newLines = multilineNormalize(newLines); |
| 201 |
| 202 // output result |
| 203 outputFile.writeAsStringSync(newLines.join()); |
| 204 } |
OLD | NEW |