tools/addlatexhash.dart - Issue 646003002: Introduced hash valued location markers in the spec

Unified Diff: tools/addlatexhash.dart

Issue 646003002: Introduced hash valued location markers in the spec (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Created spec location marker test, adjusted filter Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/addlatexhash.dart

diff --git a/tools/addlatexhash.dart b/tools/addlatexhash.dart

new file mode 100644

index 0000000000000000000000000000000000000000..6622f2ed50c71942e244219c12c09e6131e90cfe

--- /dev/null

+++ b/tools/addlatexhash.dart

@@ -0,0 +1,211 @@

+// BSD-style license that can be found in the LICENSE file.

+//

+// ----------------------------------------------------------------------

+// This is a very specialized tool which was created in order to support

+// adding hash values used as location markers in the LaTeX source of the

+// language specification. It is intended to be used as a filter from

+// the directory ../docs/language, in commands like the following:

+//

+// dart ../../tools/addlatexhash.dart < dartLangSpec.tex >tmp.tex

ricow1 2014/10/14 06:09:12 outdated comment, input and output is now taken as

eernst 2014/10/14 15:53:25 Done.

+//

+// This will yield a variant tmp.tex of the language specification with

ricow1 2014/10/14 06:09:11 tmp.tex -> whatever you call the output file aboe

eernst 2014/10/14 15:53:25 Done.

+// hash values filled in. For more details, please check the language

ricow1 2014/10/14 06:09:12 well, not yet, add a todo. Maybe also state that w

eernst 2014/10/14 15:53:25 Done.

+// specification source itself.

+//

+// NB: This utility assumes UN*X style line endings, \n; it will not

+// work with other styles.

+import 'dart:io';

+import 'dart:convert';

+import '../pkg/crypto/lib/crypto.dart';

+// ----------------------------------------------------------------------

+// Normalization of the text, i.e., removal or normalization

+// of elements that do not affect the output from latex

+// regexps

ricow1 2014/10/14 06:09:12 obvious, remove comment

eernst 2014/10/14 15:53:24 Done.

+final commentAllRe = new RegExp("^%");

+final commentRe = new RegExp("[^\\\\]%[^\\n]*");

+final whitespaceAllRe = new RegExp("^\\s+\$");

+final whitespaceLeadingRe = new RegExp("^\\s+[^\\n]");

+final whitespaceRe = new RegExp("[ \\t][ \\t]+");

+// normalization steps

+cutMatch(line, match, {startOffset:0, endOffset:0, glue:""}) {

ricow1 2014/10/14 06:09:12 space after :

eernst 2014/10/14 15:53:24 Done.

+ if (match == null) return line;

+ var start = match.start + startOffset;

+ var end = match.end + endOffset;

+ var len = line.length;

+ if (start < 0) start = 0;

+ if (end > len) end = len;

+ return line.substring(0, start) + glue + line.substring(end);

+cutRegexp(line, re, {startOffset:0, endOffset:0, glue:""}) {

ricow1 2014/10/14 06:09:11 space after :

eernst 2014/10/14 15:53:25 Done.

+ return cutMatch(line, re.firstMatch(line),

+ startOffset: startOffset,

+ endOffset: endOffset,

+ glue: glue);

ricow1 2014/10/14 06:09:11 indentation

ricow1 2014/10/14 06:09:12 indendtation

eernst 2014/10/14 15:53:25 Done.

+cutFromMatch(line, match, {offset:0, glue:""}) {

ricow1 2014/10/14 06:09:12 space after :

eernst 2014/10/14 15:53:24 Done.

+ if (match == null) return line;

+ return line.substring(0, match.start + offset) + glue;

+cutFromRegexp(line, re, {offset:0, glue:""}) {

ricow1 2014/10/14 06:09:12 space after :

eernst 2014/10/14 15:53:25 Done.

+ return cutFromMatch(line, re.firstMatch(line), offset:offset, glue:glue);

ricow1 2014/10/14 06:09:12 space after :

eernst 2014/10/14 15:53:25 Done.

+isWsOnly(line) => whitespaceAllRe.firstMatch(line) != null;

+isCommentOnly(line) => commentAllRe.firstMatch(line) != null;

+justEol(line) {

+ if (line.length == 0) return line;

+ return line[line.length-1] == '\n' ? "\n" : "";

ricow1 2014/10/14 06:09:11 be consistent in using either ' or " for strings i

eernst 2014/10/14 15:53:25 Using '"' for strings, "'" for imports.

+stripComment(line) {

+ // NB: it is tempting to remove everything from the '%' and out,

+ // including the final newline, if any, but this does not work.

+ // The problem is that TeX will do exactly this, but then it will

+ // add back a character that depends on its state (S, M, or N),

+ // and it is tricky to maintain a similar state that matches the

+ // state of TeX faithfully. Hence, we remove the content of

+ // comments but do not remove the comments themselves, we just

+ // leave the '%' at the end of the line and let TeX manage its

+ // states in a way that does not differ from the file from before

+ // stripComment

+ if (isCommentOnly(line)) return "%\n";

+ return cutRegexp(line, commentRe, startOffset:2);

ricow1 2014/10/14 06:09:11 space after :

eernst 2014/10/14 15:53:25 Done.

+// Reduce a wsOnly line to its eol, remove leading ws

+// entirely, and reduce multiple ws chars to one

+normalizeWhitespace(line) {

+ if (isWsOnly(line)) return justEol(line);

+ line = cutRegexp(line, whitespaceLeadingRe, endOffset:-1);

ricow1 2014/10/14 06:09:12 space after :

eernst 2014/10/14 15:53:25 Done.

+ var match;

+ while ((match = whitespaceRe.firstMatch(line)) != null) {

+ line = cutMatch(line, match, glue:" ");

ricow1 2014/10/14 06:09:12 space after :

eernst 2014/10/14 15:53:25 Done.

+ }

+ return line;

+// Reduce sequences of >1 wsOnly lines to 1,

+// and sequences of >1 commentOnly lines to 1

+multilineNormalize(lines) {

+ var oldlines = lines;

ricow1 2014/10/14 06:09:11 why are we doing this name mangling: oldlines line

eernst 2014/10/14 15:53:25 Was thinking "transforming lines several times" (w

+ var afterBlankLines = false; // does 'line' succeed >0 empty lines?

+ var afterCommentLines = false; // .. succeed >0 commentOnly lines?

+ lines = new List();

+ for (var line in oldlines) {

+ if (afterBlankLines && afterCommentLines) {

+ // can never happen

+ throw "Bug, please report to eernst@";

+ } else if (afterBlankLines && !afterCommentLines) {

+ // at least one line before 'line' is wsOnly

+ if (!isWsOnly(line)) {

+ // blank line block ended

+ afterCommentLines = isCommentOnly(line);

+ // special case: it seems to be safe to remove commentOnly lines

+ // after wsOnly lines, so the TeX state must be predictably right;

+ // next line will then be afterCommentLines and be dropped, so

+ // we drop the entire comment block---which is very useful; we can

+ // also consider this comment line to be an empty line, such that

+ // subsequent empty lines can be considered to be in a block of

+ // empty lines; note that almost all variants of this will break..

+ if (afterCommentLines) {

+ // _current_ 'line' a commentOnly here

+ afterBlankLines = true;

+ afterCommentLines = false;

+ // and do not add 'line'

+ } else {

+ // after blanks, but current 'line' is neither blank nor comment

+ afterBlankLines = false;

+ lines.add(line);

+ }

+ } else {

+ // blank line block continues, do not add 'line'

ricow1 2014/10/14 06:09:12 do we really want an else clause that does nothing

eernst 2014/10/14 15:53:25 Worried about performance? Not sure about the cos

ricow1 2014/10/15 08:29:05 I would just have it as a comment, but this is fin

+ }

+ } else if (!afterBlankLines && afterCommentLines) {

+ // at least one line before 'line' is commentOnly

+ if (!isCommentOnly(line)) {

+ // comment line block ended

+ afterBlankLines = isWsOnly(line);

+ afterCommentLines = false;

+ lines.add(line);

+ } else {

+ // comment line block continues, do not add 'line'

ricow1 2014/10/14 06:09:12 same as above

eernst 2014/10/14 15:53:25 Same issue, to be resolved together.

+ }

+ } else /* !afterBlankLines && !afterCommentLines */ {

ricow1 2014/10/14 06:09:12 you have comments after the line in all other plac

eernst 2014/10/14 15:53:25 This is actually a bit different, because this com

ricow1 2014/10/15 08:29:05 Acknowledged.

+ // no wsOnly or commentOnly lines preceed 'line'

+ if (isWsOnly(line)) afterBlankLines = true;

+ if (isCommentOnly(line)) afterCommentLines = true;

+ if (!afterCommentLines) lines.add(line);

+ // else skipping commentOnly line after nonWs, nonComment text

+ }

+ return lines;

+// select the elements in the pipeline

+normalize(line) => normalizeWhitespace(stripComment(line));

+sispNormalize(line) => stripComment(line);

+// ----------------------------------------------------------------------

+// Managing fragments with significant spacing

+final dartCodeBeginRe = new RegExp("^\\s*\\\\begin{dartCode}");

+final dartCodeEndRe = new RegExp ("^\\s*\\\\end{dartCode}");

+sispIs(line, targetRe) {

+ return targetRe.firstMatch(line) != null;

+sispIsDartBegin(line) => sispIs(line, dartCodeBeginRe);

+sispIsDartEnd(line) => sispIs(line, dartCodeEndRe);

+// ----------------------------------------------------------------------

+// main

ricow1 2014/10/14 06:09:12 obvious, remove comment

eernst 2014/10/14 15:53:24 It wasn't really meant to be unobvious, it should

ricow1 2014/10/15 08:29:05 I never do, if I want to structure something toget

+main ([args]) {

+ if (args.length != 2) {

+ print("Usage: addlatexhash.dart <input-file> <output-file>");

+ throw "Received ${args.length} arguments, expected two";

+ }

+ var inputFile = new File(args[0]);

+ var outputFile = new File(args[1]);

+ assert(inputFile.existsSync());

+ var lines = inputFile.readAsLinesSync();

+ // single-line normalization

+ var inDartCode = false;

+ var newLines = new List();

+ for (var line in lines) {

+ if (sispIsDartBegin(line)) {

+ inDartCode = true;

+ } else if (sispIsDartEnd(line)) {

+ inDartCode = false;

+ }

+ if (inDartCode) {

+ newLines.add(sispNormalize(line + "\n"));

ricow1 2014/10/14 06:09:12 maybe remove the "\n" here and join on it when wri

eernst 2014/10/14 15:53:24 Would look better, but I would need to change many

ricow1 2014/10/15 08:29:05 Acknowledged.

+ } else {

+ newLines.add(normalize(line + "\n"));

+ }

+ // multi-line normalization

+ newLines = multilineNormalize(newLines);

+ // output result

+ outputFile.writeAsStringSync(newLines.join());

« tests/standalone/io/addlatexhash_test.dart ('K') | « tests/standalone/io/addlatexhash_test.dart ('k') | no next file » | no next file with comments »