Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Unified Diff: tools/addlatexhash.dart

Issue 646003002: Introduced hash valued location markers in the spec (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Revised after 2nd review Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « tests/standalone/io/addlatexhash_test.dart ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/addlatexhash.dart
diff --git a/tools/addlatexhash.dart b/tools/addlatexhash.dart
new file mode 100644
index 0000000000000000000000000000000000000000..f79a0b32296cffdb983eeb279a2930d41389b341
--- /dev/null
+++ b/tools/addlatexhash.dart
@@ -0,0 +1,204 @@
+// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+//
+// ----------------------------------------------------------------------
+// This is a very specialized tool which was created in order to support
+// adding hash values used as location markers in the LaTeX source of the
+// language specification. It is intended to take its input file as the
+// first argument and the output file name as the second argument. From
+// docs/language a typical usage would be as follows:
+//
+// dart ../../tools/addlatexhash.dart dartLangSpec.tex tmp.tex
+//
+// This will yield a normalized variant tmp.tex of the language
+// specification with hash values filled in. For more details, please
+// check the language specification source itself.
+//
+// NB: This utility assumes UN*X style line endings, \n, in the LaTeX
+// source file receieved as input; it will not work with other styles.
+//
+// TODO: The current version does not fill in hash values, it only
+// standardizes the LaTeX source by removing comments and normalizing
+// white space.
+
+import 'dart:io';
+import 'dart:convert';
+import '../pkg/crypto/lib/crypto.dart';
+
+// Normalization of the text, i.e., removal or normalization
+// of elements that do not affect the output from latex
+
+final commentRE = new RegExp(r"[^\\]%.*"); // NB: . does not match \n
+final whitespaceAllRE = new RegExp(r"^\s+$");
+final whitespaceRE = new RegExp(r"[ \t]{2,}");
+
+// normalization steps
+
+cutMatch(line, match, {startOffset: 0, endOffset: 0, glue: ""}) {
+ if (match == null) return line;
+ var start = match.start + startOffset;
+ var end = match.end + endOffset;
+ var len = line.length;
+ if (start < 0) start = 0;
+ if (end > len) end = len;
+ return line.substring(0, start) + glue + line.substring(end);
+}
+
+cutRegexp(line, re, {startOffset: 0, endOffset: 0, glue: ""}) {
+ return cutMatch(line, re.firstMatch(line),
+ startOffset: startOffset,
+ endOffset: endOffset,
+ glue: glue);
+}
+
+cutFromMatch(line, match, {offset: 0, glue: ""}) {
+ if (match == null) return line;
+ return line.substring(0, match.start + offset) + glue;
+}
+
+cutFromRegexp(line, re, {offset: 0, glue: ""}) {
+ return cutFromMatch(line, re.firstMatch(line), offset: offset, glue: glue);
+}
+
+isWsOnly(line) => whitespaceAllRE.firstMatch(line) != null;
+isCommentOnly(line) => line.startsWith("%");
+
+justEol(line) {
+ return line.endsWith("\n") ? "\n" : "";
+}
+
+stripComment(line) {
+ // NB: it is tempting to remove everything from the '%' and out,
+ // including the final newline, if any, but this does not work.
+ // The problem is that TeX will do exactly this, but then it will
+ // add back a character that depends on its state (S, M, or N),
+ // and it is tricky to maintain a similar state that matches the
+ // state of TeX faithfully. Hence, we remove the content of
+ // comments but do not remove the comments themselves, we just
+ // leave the '%' at the end of the line and let TeX manage its
+ // states in a way that does not differ from the file from before
+ // stripComment
+ if (isCommentOnly(line)) return "%\n";
+ return cutRegexp(line, commentRE, startOffset: 2);
+}
+
+// Reduce a wsOnly line to its eol, remove leading ws
+// entirely, and reduce multiple ws chars to one
+normalizeWhitespace(line) {
+ var trimLine = line.trimLeft();
+ if (trimLine.isEmpty) return justEol(line);
+ return trimLine.replaceAll(whitespaceRE, " ");
+}
+
+// Reduce sequences of >1 wsOnly lines to 1, and sequences of >1
+// commentOnly lines to 1; moreover, treat commentOnly lines as
+// wsOnly when occurring in wsOnly line blocks
+multilineNormalize(lines) {
+ var afterBlankLines = false; // does 'line' succeed >0 empty lines?
+ var afterCommentLines = false; // .. succeed >0 commentOnly lines?
+ var newLines = new List();
+ for (var line in lines) {
+ if (afterBlankLines && afterCommentLines) {
+ // can never happen
+ throw "Bug, please report to eernst@";
+ } else if (afterBlankLines && !afterCommentLines) {
+ // at least one line before 'line' is wsOnly
+ if (!isWsOnly(line)) {
+ // blank line block ended
+ afterCommentLines = isCommentOnly(line);
+ // special case: it seems to be safe to remove commentOnly lines
+ // after wsOnly lines, so the TeX state must be predictably right;
+ // next line will then be afterCommentLines and be dropped, so
+ // we drop the entire comment block---which is very useful; we can
+ // also consider this comment line to be an empty line, such that
+ // subsequent empty lines can be considered to be in a block of
+ // empty lines; note that almost all variants of this will break..
+ if (afterCommentLines) {
+ // _current_ 'line' a commentOnly here
+ afterBlankLines = true;
+ afterCommentLines = false;
+ // and do not add 'line'
+ } else {
+ // after blanks, but current 'line' is neither blank nor comment
+ afterBlankLines = false;
+ newLines.add(line);
+ }
+ } else {
+ // blank line block continues, do not add 'line'
+ }
+ } else if (!afterBlankLines && afterCommentLines) {
+ // at least one line before 'line' is commentOnly
+ if (!isCommentOnly(line)) {
+ // comment line block ended
+ afterBlankLines = isWsOnly(line);
+ afterCommentLines = false;
+ newLines.add(line);
+ } else {
+ // comment line block continues, do not add 'line'
+ }
+ } else {
+ assert(!afterBlankLines && !afterCommentLines);
+ // no wsOnly or commentOnly lines preceed 'line'
+ afterBlankLines = isWsOnly(line);
+ afterCommentLines = isCommentOnly(line);
+ if (!afterCommentLines) newLines.add(line);
+ // else skipping commentOnly line after nonWs, nonComment text
+ }
+ }
+ return newLines;
+}
+
+// Selecting the elements in the pipeline
+
+normalize(line) => normalizeWhitespace(stripComment(line));
+sispNormalize(line) => stripComment(line);
+
+// Managing fragments with significant spacing
+
+final dartCodeBeginRE = new RegExp(r"^\s*\\begin\{dartCode\}");
+final dartCodeEndRE = new RegExp (r"^\s*\\end\{dartCode\}");
+
+sispIs(line, targetRE) {
+ return targetRE.firstMatch(line) != null;
+}
+
+sispIsDartBegin(line) => sispIs(line, dartCodeBeginRE);
+sispIsDartEnd(line) => sispIs(line, dartCodeEndRE);
+
+// Transform input file into output file
+
+main ([args]) {
+ if (args.length != 2) {
+ print("Usage: addlatexhash.dart <input-file> <output-file>");
+ throw "Received ${args.length} arguments, expected two";
+ }
+
+ var inputFile = new File(args[0]);
+ var outputFile = new File(args[1]);
+ assert(inputFile.existsSync());
+
+ var lines = inputFile.readAsLinesSync();
+ // single-line normalization
+ var inDartCode = false;
+ var newLines = new List();
+
+ for (var line in lines) {
+ if (sispIsDartBegin(line)) {
+ inDartCode = true;
+ } else if (sispIsDartEnd(line)) {
+ inDartCode = false;
+ }
+ if (inDartCode) {
+ newLines.add(sispNormalize(line + "\n"));
+ } else {
+ newLines.add(normalize(line + "\n"));
+ }
+ }
+
+ // multi-line normalization
+ newLines = multilineNormalize(newLines);
+
+ // output result
+ outputFile.writeAsStringSync(newLines.join());
+}
« no previous file with comments | « tests/standalone/io/addlatexhash_test.dart ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698