Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(266)

Side by Side Diff: tools/addlatexhash.dart

Issue 652993005: Working insertion of hash values; added a few labels in spec (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tests/standalone/io/addlatexhash_test_src.tex ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 // 4 //
5 // ---------------------------------------------------------------------- 5 // ----------------------------------------------------------------------
6 // This is a very specialized tool which was created in order to support 6 // This is a very specialized tool which was created in order to support
7 // adding hash values used as location markers in the LaTeX source of the 7 // adding hash values used as location markers in the LaTeX source of the
8 // language specification. It is intended to take its input file as the 8 // language specification. It is intended to take its input file as the
9 // first argument and the output file name as the second argument. From 9 // first argument, an output file name as the second argument, and a
10 // docs/language a typical usage would be as follows: 10 // hash listing file name as the third argument. From docs/language a
11 // typical usage would be as follows:
11 // 12 //
12 // dart ../../tools/addlatexhash.dart dartLangSpec.tex tmp.tex 13 // dart ../../tools/addlatexhash.dart dartLangSpec.tex out.tex hash.txt
ricow1 2014/10/27 10:08:03 hash.txt - what is this, windows 3.11? :-)
eernst 2014/11/03 14:17:46 What's wrong with that? ;-) The short file names
13 // 14 //
14 // This will yield a normalized variant tmp.tex of the language 15 // This will produce a normalized variant out.tex of the language
15 // specification with hash values filled in. For more details, please 16 // specification with hash values filled in, and a listing hash.txt of
16 // check the language specification source itself. 17 // all the hash values along with the label of their textual context
18 // (section, subsection, subsubsection, paragraph) . For more details,
19 // please check the language specification source itself.
17 // 20 //
18 // NB: This utility assumes UN*X style line endings, \n, in the LaTeX 21 // NB: This utility assumes UN*X style line endings, \n, in the LaTeX
19 // source file receieved as input; it will not work with other styles. 22 // source file receieved as input; it will not work with other styles.
20 //
21 // TODO: The current version does not fill in hash values, it only
22 // standardizes the LaTeX source by removing comments and normalizing
23 // white space.
24 23
25 import 'dart:io'; 24 import 'dart:io';
26 import 'dart:convert'; 25 import 'dart:convert';
26 import '../pkg/utf/lib/utf.dart';
27 import '../pkg/crypto/lib/crypto.dart'; 27 import '../pkg/crypto/lib/crypto.dart';
28 28
29 // Normalization of the text, i.e., removal or normalization 29 // ----------------------------------------------------------------------
30 // of elements that do not affect the output from latex 30 // Normalization of the text: removal or normalization of parts that
31 // do not affect the output from latex, such as white space
31 32
32 final commentRE = new RegExp(r"[^\\]%.*"); // NB: . does not match \n 33 final commentRE = new RegExp(r"[^\\]%.*"); // NB: . does not match \n
33 final whitespaceAllRE = new RegExp(r"^\s+$"); 34 final whitespaceAllRE = new RegExp(r"^\s+$");
34 final whitespaceRE = new RegExp(r"[ \t]{2,}"); 35 final whitespaceRE = new RegExp(r"[ \t]{2,}");
35 36
36 // normalization steps 37 // Remove 'match'ing part of 'line', possibly with given offsets
37 38 // and inserting the given 'glue' to replace the match
38 cutMatch(line, match, {startOffset: 0, endOffset: 0, glue: ""}) { 39 cutMatch(line, match, {startOffset: 0, endOffset: 0, glue: ""}) {
39 if (match == null) return line; 40 if (match == null) return line;
40 var start = match.start + startOffset; 41 var start = match.start + startOffset;
41 var end = match.end + endOffset; 42 var end = match.end + endOffset;
42 var len = line.length; 43 var len = line.length;
43 if (start < 0) start = 0; 44 if (start < 0) start = 0;
44 if (end > len) end = len; 45 if (end > len) end = len;
45 return line.substring(0, start) + glue + line.substring(end); 46 return line.substring(0, start) + glue + line.substring(end);
46 } 47 }
47 48
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after
142 // no wsOnly or commentOnly lines preceed 'line' 143 // no wsOnly or commentOnly lines preceed 'line'
143 afterBlankLines = isWsOnly(line); 144 afterBlankLines = isWsOnly(line);
144 afterCommentLines = isCommentOnly(line); 145 afterCommentLines = isCommentOnly(line);
145 if (!afterCommentLines) newLines.add(line); 146 if (!afterCommentLines) newLines.add(line);
146 // else skipping commentOnly line after nonWs, nonComment text 147 // else skipping commentOnly line after nonWs, nonComment text
147 } 148 }
148 } 149 }
149 return newLines; 150 return newLines;
150 } 151 }
151 152
152 // Selecting the elements in the pipeline 153 // Select the elements in the pipeline
153 154
154 normalize(line) => normalizeWhitespace(stripComment(line)); 155 normalize(line) => normalizeWhitespace(stripComment(line));
155 sispNormalize(line) => stripComment(line); 156 sispNormalize(line) => stripComment(line);
156 157
157 // Managing fragments with significant spacing 158 // Manage fragments with significant spacing
158 159
159 final dartCodeBeginRE = new RegExp(r"^\s*\\begin\{dartCode\}"); 160 final dartCodeBeginRE = new RegExp(r"^\s*\\begin\{dartCode\}");
160 final dartCodeEndRE = new RegExp (r"^\s*\\end\{dartCode\}"); 161 final dartCodeEndRE = new RegExp (r"^\s*\\end\{dartCode\}");
161 162
162 sispIs(line, targetRE) { 163 // Recognize begin/end line of a Dart code block
163 return targetRE.firstMatch(line) != null; 164
164 } 165 sispIs(line, targetRE) => targetRE.firstMatch(line) != null;
165
166 sispIsDartBegin(line) => sispIs(line, dartCodeBeginRE); 166 sispIsDartBegin(line) => sispIs(line, dartCodeBeginRE);
167 sispIsDartEnd(line) => sispIs(line, dartCodeEndRE); 167 sispIsDartEnd(line) => sispIs(line, dartCodeEndRE);
168 168
169 // Transform input file into output file 169 // ----------------------------------------------------------------------
170 // Removal of non-normative elements of the text (rationale, commentary)
171
172 final hashMarkRE = new RegExp(r"^\\LMHash{.*}\s*$");
173 final hashBlockTerminatorRE = new RegExp(r"\\((|sub(|sub))section|paragraph)");
174
175 // Recognize begin/end line of each block of lines getting a hash value
176 isArg(argRE, line) => argRE.firstMatch(line) != null;
Lasse Reichstein Nielsen 2014/10/28 10:12:12 => line.contains(argRE); Probably reduces to the
eernst 2014/11/03 14:17:46 Done.
177 isHashMarker(line) => isArg(hashMarkRE, line);
178 isHashBlockTerminator(line) => isArg(hashBlockTerminatorRE, line);
179
180 // Return the indices of lines satisfying the given test
181 findLineNumbers(lines, test()) {
Lasse Reichstein Nielsen 2014/10/28 10:12:13 Type of `test` is incorrect, it's typed to be null
eernst 2014/11/03 14:17:46 Interesting! In fact, having worked with types fo
182 var lineNumbers = new List();
183 var lineNumber = 0;
184 for (var line in lines) {
185 if (test(line)) lineNumbers.add(lineNumber);
186 lineNumber++;
187 }
188 return lineNumbers;
189 }
190
191 findHashLineNumbers(lines) => findLineNumbers(lines, isHashMarker);
192
193 // Return 'line' without the "\\cmdName\s*{..}" command starting at
ricow1 2014/10/27 10:08:03 use doc style comments for method comments (///) s
eernst 2014/11/03 14:17:46 Done.
194 // 'startIndex'; note that it is assumed but not checked that 'line'
195 // contains "\\cmdType\s*{..", and note that the end of the {..} block
196 // is found via brace matching (i.e., nested {..} blocks are handled),
197 // but it may break if '{' is made an active character etc.etc.
198 removeCommand(line, cmdName, startIndex) {
199 const BACKSLASH = 92; // char code for '\\'
200 const BRACE_BEGIN = 123; // char code for '{'
201 const BRACE_END = 125; // char code for '}'
Lasse Reichstein Nielsen 2014/10/28 10:12:12 Pedantry: Two spaces before '//' comments, and you
Lasse Reichstein Nielsen 2014/11/03 11:34:35 Obviously, to align it, it needs to be two *or mor
eernst 2014/11/03 14:17:46 Done.
202
203 var blockStartIndex = startIndex + cmdName.length + 1;
204 while (blockStartIndex < line.length &&
205 line.codeUnitAt(blockStartIndex) != BRACE_BEGIN) {
206 blockStartIndex++;
207 }
208 blockStartIndex++;
209 if (blockStartIndex > line.length) {
210 // caller's fault
211 throw "Bug, please report to eernst@";
212 }
213 // blockStartIndex just after '{'
214
215 var afterEscape = false; // actually after '{'
Lasse Reichstein Nielsen 2014/10/28 10:12:11 Two space before '//'. I think that is a general s
eernst 2014/11/03 14:17:47 Couldn't find it in the style guide, but done anyw
216 var braceLevel = 1; // number of '{' minus number of '}' seen
217
218 for (var index = blockStartIndex; index < line.length; index++) {
219 switch (line.codeUnitAt(index)) {
220 case BRACE_BEGIN:
221 if (afterEscape) afterEscape = false; else braceLevel++; break;
Lasse Reichstein Nielsen 2014/10/28 10:12:13 An if-with-an-else should always be put on multipl
eernst 2014/11/03 14:17:46 Done.
222 case BRACE_END:
223 if (afterEscape) afterEscape = false; else braceLevel--; break;
Lasse Reichstein Nielsen 2014/10/28 10:12:13 Consider checking if braceLevel goes negative. Oth
eernst 2014/11/03 14:17:47 Actually, the latex command will be used on the so
224 case BACKSLASH:
225 afterEscape = true; break;
Lasse Reichstein Nielsen 2014/10/28 10:12:11 Newline after ';'
eernst 2014/11/03 14:17:46 Done. Presumably this does not apply to 'for'?
Lasse Reichstein Nielsen 2014/11/03 16:32:04 Not to 'for' no. It's a matter of "only one statem
226 default:
227 afterEscape = false;
228 }
229 if (braceLevel == 0) {
230 return line.substring(0, startIndex) + line.substring(index + 1);
231 }
232 }
233 // removal failed; we consider this to mean that the input is ill-formed
234 throw "Unmatched braces";
235 }
236
237 final commentaryRE = new RegExp(r"\\commentary\s*{");
238 final rationaleRE = new RegExp(r"\\rationale\s*{");
239
240 removeCommentary(line) {
Lasse Reichstein Nielsen 2014/10/28 10:12:13 You are actively removing commentary code from the
eernst 2014/11/03 14:17:46 That wouldn't work in this case, because the strin
241 var match = commentaryRE.firstMatch(line);
242 if (match == null) return line;
243 return removeCommentary(removeCommand(line, r"commentary", match.start));
244 }
245
246 removeRationale(line) {
247 var match = rationaleRE.firstMatch(line);
248 if (match == null) return line;
249 return removeRationale(removeCommand(line, r"rationale", match.start));
250 }
251
252 // Remove commentary and rationale from 'line'
253 simplifyLine(line) {
254 var simplerLine = removeCommentary(line);
255 simplerLine = removeRationale(simplerLine);
256 simplerLine = normalizeWhitespace(simplerLine);
257 return simplerLine;
258 }
259
260 // ----------------------------------------------------------------------
261 // Recognition of line blocks, insertion of block hash into \LMHash{}
262
263 final hashMarkArgumentRE = new RegExp(r"{.*}");
Lasse Reichstein Nielsen 2014/10/28 10:12:13 Escape '{' characters.
eernst 2014/11/03 14:17:46 Done, here and in several other similar locations.
264
265 cleanupLine(line) => cutRegexp(line, commentRE, startOffset: 1).trimRight();
266
267 gatherLines(lines, startIndex, nextIndex) {
268 var gatheredLine = "";
269 var isFirst = true;
270 for (var index = startIndex; index < nextIndex; index++) {
271 var line = lines[index];
272 if (isHashBlockTerminator(line)) break;
273 if (isFirst) {
274 gatheredLine += cleanupLine(line);
275 isFirst = false;
276 } else {
277 gatheredLine += " " + cleanupLine(line);
Lasse Reichstein Nielsen 2014/10/28 10:12:13 This will take time O(lines * chars-in-lines), so
Lasse Reichstein Nielsen 2014/10/31 13:52:39 Even more "functional": lines.getRange(startInde
eernst 2014/11/03 14:17:46 Done, with some adjustments. Nice! ;)
278 }
279 }
280 return gatheredLine;
281 }
282
283 // Compute the hash value for the line block starting at 'startIndex'
284 // in 'lines' and stopping just before 'nextIndex'; SIDE EFFECT:
285 // output the simplified text and its hash value to 'listSink'
286 computeHashValue(lines, startIndex, nextIndex, listSink) {
287 final hashEncoder = new SHA1();
288 final gatheredLine = gatherLines(lines, startIndex, nextIndex);
289 final simplifiedLine = simplifyLine(gatheredLine);
290 listSink.write(" % $simplifiedLine\n");
291 hashEncoder.add(encodeUtf8(simplifiedLine));
292 return hashEncoder.close();
293 }
294
295 computeHashString(lines, startIndex, nextIndex, listSink) =>
296 CryptoUtils.bytesToHex(computeHashValue(lines,
297 startIndex,
298 nextIndex,
299 listSink));
300
301 // Compute and add hashes to \LMHash{} lines (which must be on the
302 // indices 'hashLineNumbers' of 'lines'), and emit the simplified
303 // text and hash values to 'listSink'
304 addHashMarks(lines, hashLineNumbers, listSink) {
305 if (hashLineNumbers.length == 0) return lines; // noop
306 for (var n = 0; n < hashLineNumbers.length - 1; n++) {
307 final hashIndex = hashLineNumbers[n];
308 final nextIndex = hashLineNumbers[n + 1];
309 final hashValue = computeHashString(lines,
310 hashIndex + 1,
311 nextIndex,
312 listSink);
313 lines[hashIndex] =
314 lines[hashIndex].replaceAll(hashMarkArgumentRE, "{" + hashValue + "}");
315 listSink.write(" $hashValue\n");
316 }
317
318 final hashIndex = hashLineNumbers[hashLineNumbers.length - 1];
319 final nextIndex = lines.length;
320 final hashValue = computeHashString(lines,
321 hashIndex + 1,
322 nextIndex,
323 listSink);
324 lines[hashIndex] =
325 lines[hashIndex].replaceAll(hashMarkArgumentRE, "{" + hashValue + "}");
326 listSink.write(" $hashValue\n");
327 return lines;
328 }
329
330 // ----------------------------------------------------------------------
331 // Transformation of input file to output file
170 332
171 main ([args]) { 333 main ([args]) {
172 if (args.length != 2) { 334 if (args.length != 3) {
173 print("Usage: addlatexhash.dart <input-file> <output-file>"); 335 print("Usage: addlatexhash.dart <input-file> <output-file> <list-file>");
174 throw "Received ${args.length} arguments, expected two"; 336 throw "Received ${args.length} arguments, expected three";
175 } 337 }
176 338
339 // latex source
Lasse Reichstein Nielsen 2014/10/28 10:12:12 Pedantry: It's "LaTeX" :) (Yes, I also insist on A
eernst 2014/11/03 14:17:47 Done. ;-)
177 var inputFile = new File(args[0]); 340 var inputFile = new File(args[0]);
341 assert(inputFile.existsSync());
342 var lines = inputFile.readAsLinesSync();
343
344 // latex source with 'normalized' spacing etc., and with hash values
178 var outputFile = new File(args[1]); 345 var outputFile = new File(args[1]);
179 assert(inputFile.existsSync()); 346
180 347 // hierarchical list of hash values
181 var lines = inputFile.readAsLinesSync(); 348 var listFile = new File(args[2]);
349 var listSink = listFile.openWrite();
350
182 // single-line normalization 351 // single-line normalization
183 var inDartCode = false; 352 var inDartCode = false;
184 var newLines = new List(); 353 var normalizedLines = new List();
185 354
186 for (var line in lines) { 355 for (var line in lines) {
187 if (sispIsDartBegin(line)) { 356 if (sispIsDartBegin(line)) {
188 inDartCode = true; 357 inDartCode = true;
189 } else if (sispIsDartEnd(line)) { 358 } else if (sispIsDartEnd(line)) {
190 inDartCode = false; 359 inDartCode = false;
191 } 360 }
192 if (inDartCode) { 361 if (inDartCode) {
193 newLines.add(sispNormalize(line + "\n")); 362 normalizedLines.add(sispNormalize(line + "\n"));
194 } else { 363 } else {
195 newLines.add(normalize(line + "\n")); 364 normalizedLines.add(normalize(line + "\n"));
196 } 365 }
197 } 366 }
198 367
199 // multi-line normalization 368 // multi-line normalization
200 newLines = multilineNormalize(newLines); 369 normalizedLines = multilineNormalize(normalizedLines);
201 370
202 // output result 371 // insertion of hash values
203 outputFile.writeAsStringSync(newLines.join()); 372 var hashLineNumbers = findHashLineNumbers(normalizedLines);
373 var hashMarkedLines = addHashMarks(normalizedLines,hashLineNumbers,listSink);
374
375 // output
376 outputFile.writeAsStringSync(hashMarkedLines.join());
377 listSink.close();
204 } 378 }
OLDNEW
« no previous file with comments | « tests/standalone/io/addlatexhash_test_src.tex ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698