Index: tools/addlatexhash.dart |
diff --git a/tools/addlatexhash.dart b/tools/addlatexhash.dart |
new file mode 100644 |
index 0000000000000000000000000000000000000000..6622f2ed50c71942e244219c12c09e6131e90cfe |
--- /dev/null |
+++ b/tools/addlatexhash.dart |
@@ -0,0 +1,211 @@ |
+// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
+// for details. All rights reserved. Use of this source code is governed by a |
+// BSD-style license that can be found in the LICENSE file. |
+// |
+// ---------------------------------------------------------------------- |
+// This is a very specialized tool which was created in order to support |
+// adding hash values used as location markers in the LaTeX source of the |
+// language specification. It is intended to be used as a filter from |
+// the directory ../docs/language, in commands like the following: |
+// |
+// dart ../../tools/addlatexhash.dart < dartLangSpec.tex >tmp.tex |
ricow1
2014/10/14 06:09:12
outdated comment, input and output is now taken as
eernst
2014/10/14 15:53:25
Done.
|
+// |
+// This will yield a variant tmp.tex of the language specification with |
ricow1
2014/10/14 06:09:11
tmp.tex -> whatever you call the output file aboe
eernst
2014/10/14 15:53:25
Done.
|
+// hash values filled in. For more details, please check the language |
ricow1
2014/10/14 06:09:12
well, not yet, add a todo. Maybe also state that w
eernst
2014/10/14 15:53:25
Done.
|
+// specification source itself. |
+// |
+// NB: This utility assumes UN*X style line endings, \n; it will not |
+// work with other styles. |
+ |
+import 'dart:io'; |
+import 'dart:convert'; |
+import '../pkg/crypto/lib/crypto.dart'; |
+ |
+// ---------------------------------------------------------------------- |
+// Normalization of the text, i.e., removal or normalization |
+// of elements that do not affect the output from latex |
+ |
+// regexps |
ricow1
2014/10/14 06:09:12
obvious, remove comment
eernst
2014/10/14 15:53:24
Done.
|
+ |
+final commentAllRe = new RegExp("^%"); |
+final commentRe = new RegExp("[^\\\\]%[^\\n]*"); |
+final whitespaceAllRe = new RegExp("^\\s+\$"); |
+final whitespaceLeadingRe = new RegExp("^\\s+[^\\n]"); |
+final whitespaceRe = new RegExp("[ \\t][ \\t]+"); |
+ |
+// normalization steps |
+ |
+cutMatch(line, match, {startOffset:0, endOffset:0, glue:""}) { |
ricow1
2014/10/14 06:09:12
space after :
eernst
2014/10/14 15:53:24
Done.
|
+ if (match == null) return line; |
+ var start = match.start + startOffset; |
+ var end = match.end + endOffset; |
+ var len = line.length; |
+ if (start < 0) start = 0; |
+ if (end > len) end = len; |
+ return line.substring(0, start) + glue + line.substring(end); |
+} |
+ |
+cutRegexp(line, re, {startOffset:0, endOffset:0, glue:""}) { |
ricow1
2014/10/14 06:09:11
space after :
eernst
2014/10/14 15:53:25
Done.
|
+ return cutMatch(line, re.firstMatch(line), |
+ startOffset: startOffset, |
+ endOffset: endOffset, |
+ glue: glue); |
ricow1
2014/10/14 06:09:11
indentation
ricow1
2014/10/14 06:09:12
indendtation
eernst
2014/10/14 15:53:25
Done.
eernst
2014/10/14 15:53:25
Done.
|
+} |
+ |
+cutFromMatch(line, match, {offset:0, glue:""}) { |
ricow1
2014/10/14 06:09:12
space after :
eernst
2014/10/14 15:53:24
Done.
|
+ if (match == null) return line; |
+ return line.substring(0, match.start + offset) + glue; |
+} |
+ |
+cutFromRegexp(line, re, {offset:0, glue:""}) { |
ricow1
2014/10/14 06:09:12
space after :
eernst
2014/10/14 15:53:25
Done.
|
+ return cutFromMatch(line, re.firstMatch(line), offset:offset, glue:glue); |
ricow1
2014/10/14 06:09:12
space after :
eernst
2014/10/14 15:53:25
Done.
|
+} |
+ |
+isWsOnly(line) => whitespaceAllRe.firstMatch(line) != null; |
+isCommentOnly(line) => commentAllRe.firstMatch(line) != null; |
+ |
+justEol(line) { |
+ if (line.length == 0) return line; |
+ return line[line.length-1] == '\n' ? "\n" : ""; |
ricow1
2014/10/14 06:09:11
be consistent in using either ' or " for strings i
eernst
2014/10/14 15:53:25
Using '"' for strings, "'" for imports.
|
+} |
+ |
+stripComment(line) { |
+ // NB: it is tempting to remove everything from the '%' and out, |
+ // including the final newline, if any, but this does not work. |
+ // The problem is that TeX will do exactly this, but then it will |
+ // add back a character that depends on its state (S, M, or N), |
+ // and it is tricky to maintain a similar state that matches the |
+ // state of TeX faithfully. Hence, we remove the content of |
+ // comments but do not remove the comments themselves, we just |
+ // leave the '%' at the end of the line and let TeX manage its |
+ // states in a way that does not differ from the file from before |
+ // stripComment |
+ if (isCommentOnly(line)) return "%\n"; |
+ return cutRegexp(line, commentRe, startOffset:2); |
ricow1
2014/10/14 06:09:11
space after :
eernst
2014/10/14 15:53:25
Done.
|
+} |
+ |
+// Reduce a wsOnly line to its eol, remove leading ws |
+// entirely, and reduce multiple ws chars to one |
+normalizeWhitespace(line) { |
+ if (isWsOnly(line)) return justEol(line); |
+ line = cutRegexp(line, whitespaceLeadingRe, endOffset:-1); |
ricow1
2014/10/14 06:09:12
space after :
eernst
2014/10/14 15:53:25
Done.
|
+ var match; |
+ while ((match = whitespaceRe.firstMatch(line)) != null) { |
+ line = cutMatch(line, match, glue:" "); |
ricow1
2014/10/14 06:09:12
space after :
eernst
2014/10/14 15:53:25
Done.
|
+ } |
+ return line; |
+} |
+ |
+// Reduce sequences of >1 wsOnly lines to 1, |
+// and sequences of >1 commentOnly lines to 1 |
+multilineNormalize(lines) { |
+ var oldlines = lines; |
ricow1
2014/10/14 06:09:11
why are we doing this name mangling:
oldlines line
eernst
2014/10/14 15:53:25
Was thinking "transforming lines several times" (w
|
+ var afterBlankLines = false; // does 'line' succeed >0 empty lines? |
+ var afterCommentLines = false; // .. succeed >0 commentOnly lines? |
+ lines = new List(); |
+ for (var line in oldlines) { |
+ if (afterBlankLines && afterCommentLines) { |
+ // can never happen |
+ throw "Bug, please report to eernst@"; |
+ } else if (afterBlankLines && !afterCommentLines) { |
+ // at least one line before 'line' is wsOnly |
+ if (!isWsOnly(line)) { |
+ // blank line block ended |
+ afterCommentLines = isCommentOnly(line); |
+ // special case: it seems to be safe to remove commentOnly lines |
+ // after wsOnly lines, so the TeX state must be predictably right; |
+ // next line will then be afterCommentLines and be dropped, so |
+ // we drop the entire comment block---which is very useful; we can |
+ // also consider this comment line to be an empty line, such that |
+ // subsequent empty lines can be considered to be in a block of |
+ // empty lines; note that almost all variants of this will break.. |
+ if (afterCommentLines) { |
+ // _current_ 'line' a commentOnly here |
+ afterBlankLines = true; |
+ afterCommentLines = false; |
+ // and do not add 'line' |
+ } else { |
+ // after blanks, but current 'line' is neither blank nor comment |
+ afterBlankLines = false; |
+ lines.add(line); |
+ } |
+ } else { |
+ // blank line block continues, do not add 'line' |
ricow1
2014/10/14 06:09:12
do we really want an else clause that does nothing
eernst
2014/10/14 15:53:25
Worried about performance? Not sure about the cos
ricow1
2014/10/15 08:29:05
I would just have it as a comment, but this is fin
|
+ } |
+ } else if (!afterBlankLines && afterCommentLines) { |
+ // at least one line before 'line' is commentOnly |
+ if (!isCommentOnly(line)) { |
+ // comment line block ended |
+ afterBlankLines = isWsOnly(line); |
+ afterCommentLines = false; |
+ lines.add(line); |
+ } else { |
+ // comment line block continues, do not add 'line' |
ricow1
2014/10/14 06:09:12
same as above
eernst
2014/10/14 15:53:25
Same issue, to be resolved together.
|
+ } |
+ } else /* !afterBlankLines && !afterCommentLines */ { |
ricow1
2014/10/14 06:09:12
you have comments after the line in all other plac
eernst
2014/10/14 15:53:25
This is actually a bit different, because this com
ricow1
2014/10/15 08:29:05
Acknowledged.
|
+ // no wsOnly or commentOnly lines preceed 'line' |
+ if (isWsOnly(line)) afterBlankLines = true; |
+ if (isCommentOnly(line)) afterCommentLines = true; |
+ if (!afterCommentLines) lines.add(line); |
+ // else skipping commentOnly line after nonWs, nonComment text |
+ } |
+ } |
+ return lines; |
+} |
+ |
+// select the elements in the pipeline |
+ |
+normalize(line) => normalizeWhitespace(stripComment(line)); |
+ |
+sispNormalize(line) => stripComment(line); |
+ |
+// ---------------------------------------------------------------------- |
+// Managing fragments with significant spacing |
+ |
+final dartCodeBeginRe = new RegExp("^\\s*\\\\begin{dartCode}"); |
+final dartCodeEndRe = new RegExp ("^\\s*\\\\end{dartCode}"); |
+ |
+sispIs(line, targetRe) { |
+ return targetRe.firstMatch(line) != null; |
+} |
+ |
+sispIsDartBegin(line) => sispIs(line, dartCodeBeginRe); |
+sispIsDartEnd(line) => sispIs(line, dartCodeEndRe); |
+ |
+// ---------------------------------------------------------------------- |
+// main |
ricow1
2014/10/14 06:09:12
obvious, remove comment
eernst
2014/10/14 15:53:24
It wasn't really meant to be unobvious, it should
ricow1
2014/10/15 08:29:05
I never do, if I want to structure something toget
|
+ |
+main ([args]) { |
+ if (args.length != 2) { |
+ print("Usage: addlatexhash.dart <input-file> <output-file>"); |
+ throw "Received ${args.length} arguments, expected two"; |
+ } |
+ |
+ var inputFile = new File(args[0]); |
+ var outputFile = new File(args[1]); |
+ assert(inputFile.existsSync()); |
+ |
+ var lines = inputFile.readAsLinesSync(); |
+ // single-line normalization |
+ var inDartCode = false; |
+ var newLines = new List(); |
+ |
+ for (var line in lines) { |
+ if (sispIsDartBegin(line)) { |
+ inDartCode = true; |
+ } else if (sispIsDartEnd(line)) { |
+ inDartCode = false; |
+ } |
+ if (inDartCode) { |
+ newLines.add(sispNormalize(line + "\n")); |
ricow1
2014/10/14 06:09:12
maybe remove the "\n" here and join on it when wri
eernst
2014/10/14 15:53:24
Would look better, but I would need to change many
ricow1
2014/10/15 08:29:05
Acknowledged.
|
+ } else { |
+ newLines.add(normalize(line + "\n")); |
+ } |
+ } |
+ |
+ // multi-line normalization |
+ newLines = multilineNormalize(newLines); |
+ |
+ // output result |
+ outputFile.writeAsStringSync(newLines.join()); |
+} |