| Index: tools/addlatexhash.dart
|
| diff --git a/tools/addlatexhash.dart b/tools/addlatexhash.dart
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..f79a0b32296cffdb983eeb279a2930d41389b341
|
| --- /dev/null
|
| +++ b/tools/addlatexhash.dart
|
| @@ -0,0 +1,204 @@
|
| +// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
|
| +// for details. All rights reserved. Use of this source code is governed by a
|
| +// BSD-style license that can be found in the LICENSE file.
|
| +//
|
| +// ----------------------------------------------------------------------
|
| +// This is a very specialized tool which was created in order to support
|
| +// adding hash values used as location markers in the LaTeX source of the
|
| +// language specification. It is intended to take its input file as the
|
| +// first argument and the output file name as the second argument. From
|
| +// docs/language a typical usage would be as follows:
|
| +//
|
| +// dart ../../tools/addlatexhash.dart dartLangSpec.tex tmp.tex
|
| +//
|
| +// This will yield a normalized variant tmp.tex of the language
|
| +// specification with hash values filled in. For more details, please
|
| +// check the language specification source itself.
|
| +//
|
| +// NB: This utility assumes UN*X style line endings, \n, in the LaTeX
|
| +// source file receieved as input; it will not work with other styles.
|
| +//
|
| +// TODO: The current version does not fill in hash values, it only
|
| +// standardizes the LaTeX source by removing comments and normalizing
|
| +// white space.
|
| +
|
| +import 'dart:io';
|
| +import 'dart:convert';
|
| +import '../pkg/crypto/lib/crypto.dart';
|
| +
|
| +// Normalization of the text, i.e., removal or normalization
|
| +// of elements that do not affect the output from latex
|
| +
|
| +final commentRE = new RegExp(r"[^\\]%.*"); // NB: . does not match \n
|
| +final whitespaceAllRE = new RegExp(r"^\s+$");
|
| +final whitespaceRE = new RegExp(r"[ \t]{2,}");
|
| +
|
| +// normalization steps
|
| +
|
| +cutMatch(line, match, {startOffset: 0, endOffset: 0, glue: ""}) {
|
| + if (match == null) return line;
|
| + var start = match.start + startOffset;
|
| + var end = match.end + endOffset;
|
| + var len = line.length;
|
| + if (start < 0) start = 0;
|
| + if (end > len) end = len;
|
| + return line.substring(0, start) + glue + line.substring(end);
|
| +}
|
| +
|
| +cutRegexp(line, re, {startOffset: 0, endOffset: 0, glue: ""}) {
|
| + return cutMatch(line, re.firstMatch(line),
|
| + startOffset: startOffset,
|
| + endOffset: endOffset,
|
| + glue: glue);
|
| +}
|
| +
|
| +cutFromMatch(line, match, {offset: 0, glue: ""}) {
|
| + if (match == null) return line;
|
| + return line.substring(0, match.start + offset) + glue;
|
| +}
|
| +
|
| +cutFromRegexp(line, re, {offset: 0, glue: ""}) {
|
| + return cutFromMatch(line, re.firstMatch(line), offset: offset, glue: glue);
|
| +}
|
| +
|
| +isWsOnly(line) => whitespaceAllRE.firstMatch(line) != null;
|
| +isCommentOnly(line) => line.startsWith("%");
|
| +
|
| +justEol(line) {
|
| + return line.endsWith("\n") ? "\n" : "";
|
| +}
|
| +
|
| +stripComment(line) {
|
| + // NB: it is tempting to remove everything from the '%' and out,
|
| + // including the final newline, if any, but this does not work.
|
| + // The problem is that TeX will do exactly this, but then it will
|
| + // add back a character that depends on its state (S, M, or N),
|
| + // and it is tricky to maintain a similar state that matches the
|
| + // state of TeX faithfully. Hence, we remove the content of
|
| + // comments but do not remove the comments themselves, we just
|
| + // leave the '%' at the end of the line and let TeX manage its
|
| + // states in a way that does not differ from the file from before
|
| + // stripComment
|
| + if (isCommentOnly(line)) return "%\n";
|
| + return cutRegexp(line, commentRE, startOffset: 2);
|
| +}
|
| +
|
| +// Reduce a wsOnly line to its eol, remove leading ws
|
| +// entirely, and reduce multiple ws chars to one
|
| +normalizeWhitespace(line) {
|
| + var trimLine = line.trimLeft();
|
| + if (trimLine.isEmpty) return justEol(line);
|
| + return trimLine.replaceAll(whitespaceRE, " ");
|
| +}
|
| +
|
| +// Reduce sequences of >1 wsOnly lines to 1, and sequences of >1
|
| +// commentOnly lines to 1; moreover, treat commentOnly lines as
|
| +// wsOnly when occurring in wsOnly line blocks
|
| +multilineNormalize(lines) {
|
| + var afterBlankLines = false; // does 'line' succeed >0 empty lines?
|
| + var afterCommentLines = false; // .. succeed >0 commentOnly lines?
|
| + var newLines = new List();
|
| + for (var line in lines) {
|
| + if (afterBlankLines && afterCommentLines) {
|
| + // can never happen
|
| + throw "Bug, please report to eernst@";
|
| + } else if (afterBlankLines && !afterCommentLines) {
|
| + // at least one line before 'line' is wsOnly
|
| + if (!isWsOnly(line)) {
|
| + // blank line block ended
|
| + afterCommentLines = isCommentOnly(line);
|
| + // special case: it seems to be safe to remove commentOnly lines
|
| + // after wsOnly lines, so the TeX state must be predictably right;
|
| + // next line will then be afterCommentLines and be dropped, so
|
| + // we drop the entire comment block---which is very useful; we can
|
| + // also consider this comment line to be an empty line, such that
|
| + // subsequent empty lines can be considered to be in a block of
|
| + // empty lines; note that almost all variants of this will break..
|
| + if (afterCommentLines) {
|
| + // _current_ 'line' a commentOnly here
|
| + afterBlankLines = true;
|
| + afterCommentLines = false;
|
| + // and do not add 'line'
|
| + } else {
|
| + // after blanks, but current 'line' is neither blank nor comment
|
| + afterBlankLines = false;
|
| + newLines.add(line);
|
| + }
|
| + } else {
|
| + // blank line block continues, do not add 'line'
|
| + }
|
| + } else if (!afterBlankLines && afterCommentLines) {
|
| + // at least one line before 'line' is commentOnly
|
| + if (!isCommentOnly(line)) {
|
| + // comment line block ended
|
| + afterBlankLines = isWsOnly(line);
|
| + afterCommentLines = false;
|
| + newLines.add(line);
|
| + } else {
|
| + // comment line block continues, do not add 'line'
|
| + }
|
| + } else {
|
| + assert(!afterBlankLines && !afterCommentLines);
|
| + // no wsOnly or commentOnly lines preceed 'line'
|
| + afterBlankLines = isWsOnly(line);
|
| + afterCommentLines = isCommentOnly(line);
|
| + if (!afterCommentLines) newLines.add(line);
|
| + // else skipping commentOnly line after nonWs, nonComment text
|
| + }
|
| + }
|
| + return newLines;
|
| +}
|
| +
|
| +// Selecting the elements in the pipeline
|
| +
|
| +normalize(line) => normalizeWhitespace(stripComment(line));
|
| +sispNormalize(line) => stripComment(line);
|
| +
|
| +// Managing fragments with significant spacing
|
| +
|
| +final dartCodeBeginRE = new RegExp(r"^\s*\\begin\{dartCode\}");
|
| +final dartCodeEndRE = new RegExp (r"^\s*\\end\{dartCode\}");
|
| +
|
| +sispIs(line, targetRE) {
|
| + return targetRE.firstMatch(line) != null;
|
| +}
|
| +
|
| +sispIsDartBegin(line) => sispIs(line, dartCodeBeginRE);
|
| +sispIsDartEnd(line) => sispIs(line, dartCodeEndRE);
|
| +
|
| +// Transform input file into output file
|
| +
|
| +main ([args]) {
|
| + if (args.length != 2) {
|
| + print("Usage: addlatexhash.dart <input-file> <output-file>");
|
| + throw "Received ${args.length} arguments, expected two";
|
| + }
|
| +
|
| + var inputFile = new File(args[0]);
|
| + var outputFile = new File(args[1]);
|
| + assert(inputFile.existsSync());
|
| +
|
| + var lines = inputFile.readAsLinesSync();
|
| + // single-line normalization
|
| + var inDartCode = false;
|
| + var newLines = new List();
|
| +
|
| + for (var line in lines) {
|
| + if (sispIsDartBegin(line)) {
|
| + inDartCode = true;
|
| + } else if (sispIsDartEnd(line)) {
|
| + inDartCode = false;
|
| + }
|
| + if (inDartCode) {
|
| + newLines.add(sispNormalize(line + "\n"));
|
| + } else {
|
| + newLines.add(normalize(line + "\n"));
|
| + }
|
| + }
|
| +
|
| + // multi-line normalization
|
| + newLines = multilineNormalize(newLines);
|
| +
|
| + // output result
|
| + outputFile.writeAsStringSync(newLines.join());
|
| +}
|
|
|