tools/addlatexhash.dart - Issue 646003002: Introduced hash valued location markers in the spec

Side by Side Diff: tools/addlatexhash.dart

Issue 646003002: Introduced hash valued location markers in the spec (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Created spec location marker test, adjusted filter Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file

	2 // for details. All rights reserved. Use of this source code is governed by a

	3 // BSD-style license that can be found in the LICENSE file.

	4 //

	5 // ----------------------------------------------------------------------

	6 // This is a very specialized tool which was created in order to support

	7 // adding hash values used as location markers in the LaTeX source of the

	8 // language specification. It is intended to be used as a filter from

	9 // the directory ../docs/language, in commands like the following:

	10 //

	11 // dart ../../tools/addlatexhash.dart < dartLangSpec.tex >tmp.tex
	ricow1 2014/10/14 06:09:12 outdated comment, input and output is now taken as outdated comment, input and output is now taken as paramters eernst 2014/10/14 15:53:25 Done. Show quoted text On 2014/10/14 06:09:12, ricow1 wrote: > outdated comment, input and output is now taken as paramters Done.
	12 //

	13 // This will yield a variant tmp.tex of the language specification with
	ricow1 2014/10/14 06:09:11 tmp.tex -> whatever you call the output file aboe tmp.tex -> whatever you call the output file aboe eernst 2014/10/14 15:53:25 Done. Show quoted text On 2014/10/14 06:09:11, ricow1 wrote: > tmp.tex -> whatever you call the output file aboe Done.
	14 // hash values filled in. For more details, please check the language
	ricow1 2014/10/14 06:09:12 well, not yet, add a todo. Maybe also state that w well, not yet, add a todo. Maybe also state that we remove the comments we can eernst 2014/10/14 15:53:25 Done. Show quoted text On 2014/10/14 06:09:12, ricow1 wrote: > well, not yet, add a todo. Maybe also state that we remove the comments we can Done.
	15 // specification source itself.

	16 //

	17 // NB: This utility assumes UN*X style line endings, \n; it will not

	18 // work with other styles.

	19

	20 import 'dart:io';

	21 import 'dart:convert';

	22 import '../pkg/crypto/lib/crypto.dart';

	23

	24 // ----------------------------------------------------------------------

	25 // Normalization of the text, i.e., removal or normalization

	26 // of elements that do not affect the output from latex

	27

	28 // regexps
	ricow1 2014/10/14 06:09:12 obvious, remove comment obvious, remove comment eernst 2014/10/14 15:53:24 Done. Show quoted text On 2014/10/14 06:09:12, ricow1 wrote: > obvious, remove comment Done.
	29

	30 final commentAllRe = new RegExp("^%");

	31 final commentRe = new RegExp("[^\\\\]%[^\\n]*");

	32 final whitespaceAllRe = new RegExp("^\\s+\$");

	33 final whitespaceLeadingRe = new RegExp("^\\s+[^\\n]");

	34 final whitespaceRe = new RegExp("[ \\t][ \\t]+");

	35

	36 // normalization steps

	37

	38 cutMatch(line, match, {startOffset:0, endOffset:0, glue:""}) {
	ricow1 2014/10/14 06:09:12 space after : space after : eernst 2014/10/14 15:53:24 Done. Show quoted text On 2014/10/14 06:09:12, ricow1 wrote: > space after : Done.
	39 if (match == null) return line;

	40 var start = match.start + startOffset;

	41 var end = match.end + endOffset;

	42 var len = line.length;

	43 if (start < 0) start = 0;

	44 if (end > len) end = len;

	45 return line.substring(0, start) + glue + line.substring(end);

	46 }

	47

	48 cutRegexp(line, re, {startOffset:0, endOffset:0, glue:""}) {
	ricow1 2014/10/14 06:09:11 space after : space after : eernst 2014/10/14 15:53:25 Done. Show quoted text On 2014/10/14 06:09:11, ricow1 wrote: > space after : Done.
	49 return cutMatch(line, re.firstMatch(line),

	50 startOffset: startOffset,

	51 endOffset: endOffset,

	52 glue: glue);
	ricow1 2014/10/14 06:09:11 indentation indentation ricow1 2014/10/14 06:09:12 indendtation indendtation eernst 2014/10/14 15:53:25 Done. Show quoted text On 2014/10/14 06:09:11, ricow1 wrote: > indentation Done. eernst 2014/10/14 15:53:25 Done. Show quoted text On 2014/10/14 06:09:11, ricow1 wrote: > indentation Done.
	53 }

	54

	55 cutFromMatch(line, match, {offset:0, glue:""}) {
	ricow1 2014/10/14 06:09:12 space after : space after : eernst 2014/10/14 15:53:24 Done. Show quoted text On 2014/10/14 06:09:12, ricow1 wrote: > space after : Done.
	56 if (match == null) return line;

	57 return line.substring(0, match.start + offset) + glue;

	58 }

	59

	60 cutFromRegexp(line, re, {offset:0, glue:""}) {
	ricow1 2014/10/14 06:09:12 space after : space after : eernst 2014/10/14 15:53:25 Done. Show quoted text On 2014/10/14 06:09:12, ricow1 wrote: > space after : Done.
	61 return cutFromMatch(line, re.firstMatch(line), offset:offset, glue:glue);
	ricow1 2014/10/14 06:09:12 space after : space after : eernst 2014/10/14 15:53:25 Done. Show quoted text On 2014/10/14 06:09:12, ricow1 wrote: > space after : Done.
	62 }

	63

	64 isWsOnly(line) => whitespaceAllRe.firstMatch(line) != null;

	65 isCommentOnly(line) => commentAllRe.firstMatch(line) != null;

	66

	67 justEol(line) {

	68 if (line.length == 0) return line;

	69 return line[line.length-1] == '\n' ? "\n" : "";
	ricow1 2014/10/14 06:09:11 be consistent in using either ' or " for strings i be consistent in using either ' or " for strings in the whole file also, space around - in the array index calculation eernst 2014/10/14 15:53:25 Using '"' for strings, "'" for imports. Show quoted text On 2014/10/14 06:09:11, ricow1 wrote: > be consistent in using either ' or " for strings in the whole file > also, space around - in the array index calculation Using '"' for strings, "'" for imports.
	70 }

	71

	72 stripComment(line) {

	73 // NB: it is tempting to remove everything from the '%' and out,

	74 // including the final newline, if any, but this does not work.

	75 // The problem is that TeX will do exactly this, but then it will

	76 // add back a character that depends on its state (S, M, or N),

	77 // and it is tricky to maintain a similar state that matches the

	78 // state of TeX faithfully. Hence, we remove the content of

	79 // comments but do not remove the comments themselves, we just

	80 // leave the '%' at the end of the line and let TeX manage its

	81 // states in a way that does not differ from the file from before

	82 // stripComment

	83 if (isCommentOnly(line)) return "%\n";

	84 return cutRegexp(line, commentRe, startOffset:2);
	ricow1 2014/10/14 06:09:11 space after : space after : eernst 2014/10/14 15:53:25 Done. Show quoted text On 2014/10/14 06:09:11, ricow1 wrote: > space after : Done.
	85 }

	86

	87 // Reduce a wsOnly line to its eol, remove leading ws

	88 // entirely, and reduce multiple ws chars to one

	89 normalizeWhitespace(line) {

	90 if (isWsOnly(line)) return justEol(line);

	91 line = cutRegexp(line, whitespaceLeadingRe, endOffset:-1);
	ricow1 2014/10/14 06:09:12 space after : space after : eernst 2014/10/14 15:53:25 Done. Show quoted text On 2014/10/14 06:09:12, ricow1 wrote: > space after : Done.
	92 var match;

	93 while ((match = whitespaceRe.firstMatch(line)) != null) {

	94 line = cutMatch(line, match, glue:" ");
	ricow1 2014/10/14 06:09:12 space after : space after : eernst 2014/10/14 15:53:25 Done. Show quoted text On 2014/10/14 06:09:12, ricow1 wrote: > space after : Done.
	95 }

	96 return line;

	97 }

	98

	99 // Reduce sequences of >1 wsOnly lines to 1,

	100 // and sequences of >1 commentOnly lines to 1

	101 multilineNormalize(lines) {

	102 var oldlines = lines;
	ricow1 2014/10/14 06:09:11 why are we doing this name mangling: oldlines line why are we doing this name mangling: oldlines lines; lines = new List(); We now reuse the input parameter, which is pretty strange eernst 2014/10/14 15:53:25 Was thinking "transforming lines several times" (w Show quoted text On 2014/10/14 06:09:11, ricow1 wrote: > why are we doing this name mangling: > oldlines lines; > lines = new List(); > > We now reuse the input parameter, which is pretty strange Was thinking "transforming lines several times" (where 'oldlines' would repeatedly be the previous form, and 'lines' would accumulate the transformed text). Now using the slightly more functional style where the transformed text gets a new name 'newLines'.
	103 var afterBlankLines = false; // does 'line' succeed >0 empty lines?

	104 var afterCommentLines = false; // .. succeed >0 commentOnly lines?

	105 lines = new List();

	106 for (var line in oldlines) {

	107 if (afterBlankLines && afterCommentLines) {

	108 // can never happen

	109 throw "Bug, please report to eernst@";

	110 } else if (afterBlankLines && !afterCommentLines) {

	111 // at least one line before 'line' is wsOnly

	112 if (!isWsOnly(line)) {

	113 // blank line block ended

	114 afterCommentLines = isCommentOnly(line);

	115 // special case: it seems to be safe to remove commentOnly lines

	116 // after wsOnly lines, so the TeX state must be predictably right;

	117 // next line will then be afterCommentLines and be dropped, so

	118 // we drop the entire comment block---which is very useful; we can

	119 // also consider this comment line to be an empty line, such that

	120 // subsequent empty lines can be considered to be in a block of

	121 // empty lines; note that almost all variants of this will break..

	122 if (afterCommentLines) {

	123 // _current_ 'line' a commentOnly here

	124 afterBlankLines = true;

	125 afterCommentLines = false;

	126 // and do not add 'line'

	127 } else {

	128 // after blanks, but current 'line' is neither blank nor comment

	129 afterBlankLines = false;

	130 lines.add(line);

	131 }

	132 } else {

	133 // blank line block continues, do not add 'line'
	ricow1 2014/10/14 06:09:12 do we really want an else clause that does nothing do we really want an else clause that does nothing eernst 2014/10/14 15:53:25 Worried about performance? Not sure about the cos Show quoted text On 2014/10/14 06:09:12, ricow1 wrote: > do we really want an else clause that does nothing Worried about performance? Not sure about the costs. Where else would you put an invariant that justifies leaving this case as a noop (rather than just a forgotten case)? ricow1 2014/10/15 08:29:05 I would just have it as a comment, but this is fin Show quoted text On 2014/10/14 15:53:25, eernst wrote: > On 2014/10/14 06:09:12, ricow1 wrote: > > do we really want an else clause that does nothing > > Worried about performance? Not sure about the costs. Where else would you put > an invariant that justifies leaving this case as a noop (rather than just a > forgotten case)? I would just have it as a comment, but this is fine, just leave it
	134 }

	135 } else if (!afterBlankLines && afterCommentLines) {

	136 // at least one line before 'line' is commentOnly

	137 if (!isCommentOnly(line)) {

	138 // comment line block ended

	139 afterBlankLines = isWsOnly(line);

	140 afterCommentLines = false;

	141 lines.add(line);

	142 } else {

	143 // comment line block continues, do not add 'line'
	ricow1 2014/10/14 06:09:12 same as above same as above eernst 2014/10/14 15:53:25 Same issue, to be resolved together. Show quoted text On 2014/10/14 06:09:12, ricow1 wrote: > same as above Same issue, to be resolved together.
	144 }

	145 } else /* !afterBlankLines && !afterCommentLines */ {
	ricow1 2014/10/14 06:09:12 you have comments after the line in all other plac you have comments after the line in all other places in this file, also do that here eernst 2014/10/14 15:53:25 This is actually a bit different, because this com Show quoted text On 2014/10/14 06:09:12, ricow1 wrote: > you have comments after the line in all other places in this file, also do that > here This is actually a bit different, because this comment contains the exact code that would have occurred here if I had used a style where all cases are spelled out (and hence exactly one of them applies at runtime). But maybe the best treatment of this situation is actually to make it real code, in an assertion, such that we catch situations where the other cases are edited, or null is encountered, etc. Did that. ricow1 2014/10/15 08:29:05 Acknowledged. Show quoted text On 2014/10/14 15:53:25, eernst wrote: > On 2014/10/14 06:09:12, ricow1 wrote: > > you have comments after the line in all other places in this file, also do > that > > here > > This is actually a bit different, because this comment contains the exact code > that would have occurred here if I had used a style where all cases are spelled > out (and hence exactly one of them applies at runtime). But maybe the best > treatment of this situation is actually to make it real code, in an assertion, > such that we catch situations where the other cases are edited, or null is > encountered, etc. Did that. Acknowledged.
	146 // no wsOnly or commentOnly lines preceed 'line'

	147 if (isWsOnly(line)) afterBlankLines = true;

	148 if (isCommentOnly(line)) afterCommentLines = true;

	149 if (!afterCommentLines) lines.add(line);

	150 // else skipping commentOnly line after nonWs, nonComment text

	151 }

	152 }

	153 return lines;

	154 }

	155

	156 // select the elements in the pipeline

	157

	158 normalize(line) => normalizeWhitespace(stripComment(line));

	159

	160 sispNormalize(line) => stripComment(line);

	161

	162 // ----------------------------------------------------------------------

	163 // Managing fragments with significant spacing

	164

	165 final dartCodeBeginRe = new RegExp("^\\s*\\\\begin{dartCode}");

	166 final dartCodeEndRe = new RegExp ("^\\s*\\\\end{dartCode}");

	167

	168 sispIs(line, targetRe) {

	169 return targetRe.firstMatch(line) != null;

	170 }

	171

	172 sispIsDartBegin(line) => sispIs(line, dartCodeBeginRe);

	173 sispIsDartEnd(line) => sispIs(line, dartCodeEndRe);

	174

	175 // ----------------------------------------------------------------------

	176 // main
	ricow1 2014/10/14 06:09:12 obvious, remove comment obvious, remove comment eernst 2014/10/14 15:53:24 It wasn't really meant to be unobvious, it should Show quoted text On 2014/10/14 06:09:12, ricow1 wrote: > obvious, remove comment It wasn't really meant to be unobvious, it should make the file as a whole more navigable, because of "section" dividers like the line here. Don't we use any such thing? ricow1 2014/10/15 08:29:05 I never do, if I want to structure something toget Show quoted text On 2014/10/14 15:53:24, eernst wrote: > On 2014/10/14 06:09:12, ricow1 wrote: > > obvious, remove comment > > It wasn't really meant to be unobvious, it should make the file as a whole more > navigable, because of "section" dividers like the line here. Don't we use any > such thing? I never do, if I want to structure something together logically, I will put it in a class. The problem with doing it like this is that over time people will add code in sections where it does not belong, the comments will be outdated/wrong, and people don use a comment syntax so it is hard to do anything with command line wise. If the code is self explanatory I never add a comment. There are a few cases where section headers like this make sense, e.g., if you have code that is preprocessed before compilation
	177

	178 main ([args]) {

	179 if (args.length != 2) {

	180 print("Usage: addlatexhash.dart <input-file> <output-file>");

	181 throw "Received ${args.length} arguments, expected two";

	182 }

	183

	184 var inputFile = new File(args[0]);

	185 var outputFile = new File(args[1]);

	186 assert(inputFile.existsSync());

	187

	188 var lines = inputFile.readAsLinesSync();

	189 // single-line normalization

	190 var inDartCode = false;

	191 var newLines = new List();

	192

	193 for (var line in lines) {

	194 if (sispIsDartBegin(line)) {

	195 inDartCode = true;

	196 } else if (sispIsDartEnd(line)) {

	197 inDartCode = false;

	198 }

	199 if (inDartCode) {

	200 newLines.add(sispNormalize(line + "\n"));
	ricow1 2014/10/14 06:09:12 maybe remove the "\n" here and join on it when wri maybe remove the "\n" here and join on it when writing the output file eernst 2014/10/14 15:53:24 Would look better, but I would need to change many Show quoted text On 2014/10/14 06:09:12, ricow1 wrote: > maybe remove the "\n" here and join on it when writing the output file Would look better, but I would need to change many other parts of the algorithm, because lines with an eol char have a different effect than lines without an eol char (TeX and LaTeX are very peculiar with newlines). ricow1 2014/10/15 08:29:05 Acknowledged. Show quoted text On 2014/10/14 15:53:24, eernst wrote: > On 2014/10/14 06:09:12, ricow1 wrote: > > maybe remove the "\n" here and join on it when writing the output file > > Would look better, but I would need to change many other parts of the algorithm, > because lines with an eol char have a different effect than lines without an eol > char (TeX and LaTeX are very peculiar with newlines). Acknowledged.
	201 } else {

	202 newLines.add(normalize(line + "\n"));

	203 }

	204 }

	205

	206 // multi-line normalization

	207 newLines = multilineNormalize(newLines);

	208

	209 // output result

	210 outputFile.writeAsStringSync(newLines.join());

	211 }

OLD	NEW

« tests/standalone/io/addlatexhash_test.dart ('K') | « tests/standalone/io/addlatexhash_test.dart ('k') | no next file » | no next file with comments »