tools/addlatexhash.dart - Issue 646003002: Introduced hash valued location markers in the spec

Side by Side Diff: tools/addlatexhash.dart

Issue 646003002: Introduced hash valued location markers in the spec (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Revised after 2nd review Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file

	2 // for details. All rights reserved. Use of this source code is governed by a

	3 // BSD-style license that can be found in the LICENSE file.

	4 //

	5 // ----------------------------------------------------------------------

	6 // This is a very specialized tool which was created in order to support

	7 // adding hash values used as location markers in the LaTeX source of the

	8 // language specification. It is intended to take its input file as the

	9 // first argument and the output file name as the second argument. From

	10 // docs/language a typical usage would be as follows:

	11 //

	12 // dart ../../tools/addlatexhash.dart dartLangSpec.tex tmp.tex

	13 //

	14 // This will yield a normalized variant tmp.tex of the language

	15 // specification with hash values filled in. For more details, please

	16 // check the language specification source itself.

	17 //

	18 // NB: This utility assumes UN*X style line endings, \n, in the LaTeX

	19 // source file receieved as input; it will not work with other styles.

	20 //

	21 // TODO: The current version does not fill in hash values, it only

	22 // standardizes the LaTeX source by removing comments and normalizing

	23 // white space.

	24

	25 import 'dart:io';

	26 import 'dart:convert';

	27 import '../pkg/crypto/lib/crypto.dart';

	28

	29 // Normalization of the text, i.e., removal or normalization

	30 // of elements that do not affect the output from latex

	31

	32 final commentRE = new RegExp(r"[^\\]%.*"); // NB: . does not match \n

	33 final whitespaceAllRE = new RegExp(r"^\s+$");

	34 final whitespaceRE = new RegExp(r"[ \t]{2,}");

	35

	36 // normalization steps

	37

	38 cutMatch(line, match, {startOffset: 0, endOffset: 0, glue: ""}) {

	39 if (match == null) return line;

	40 var start = match.start + startOffset;

	41 var end = match.end + endOffset;

	42 var len = line.length;

	43 if (start < 0) start = 0;

	44 if (end > len) end = len;

	45 return line.substring(0, start) + glue + line.substring(end);

	46 }

	47

	48 cutRegexp(line, re, {startOffset: 0, endOffset: 0, glue: ""}) {

	49 return cutMatch(line, re.firstMatch(line),

	50 startOffset: startOffset,

	51 endOffset: endOffset,

	52 glue: glue);

	53 }

	54

	55 cutFromMatch(line, match, {offset: 0, glue: ""}) {

	56 if (match == null) return line;

	57 return line.substring(0, match.start + offset) + glue;

	58 }

	59

	60 cutFromRegexp(line, re, {offset: 0, glue: ""}) {

	61 return cutFromMatch(line, re.firstMatch(line), offset: offset, glue: glue);

	62 }

	63

	64 isWsOnly(line) => whitespaceAllRE.firstMatch(line) != null;

	65 isCommentOnly(line) => line.startsWith("%");

	66

	67 justEol(line) {

	68 return line.endsWith("\n") ? "\n" : "";

	69 }

	70

	71 stripComment(line) {

	72 // NB: it is tempting to remove everything from the '%' and out,

	73 // including the final newline, if any, but this does not work.

	74 // The problem is that TeX will do exactly this, but then it will

	75 // add back a character that depends on its state (S, M, or N),

	76 // and it is tricky to maintain a similar state that matches the

	77 // state of TeX faithfully. Hence, we remove the content of

	78 // comments but do not remove the comments themselves, we just

	79 // leave the '%' at the end of the line and let TeX manage its

	80 // states in a way that does not differ from the file from before

	81 // stripComment

	82 if (isCommentOnly(line)) return "%\n";

	83 return cutRegexp(line, commentRE, startOffset: 2);

	84 }

	85

	86 // Reduce a wsOnly line to its eol, remove leading ws

	87 // entirely, and reduce multiple ws chars to one

	88 normalizeWhitespace(line) {

	89 var trimLine = line.trimLeft();

	90 if (trimLine.isEmpty) return justEol(line);

	91 return trimLine.replaceAll(whitespaceRE, " ");

	92 }

	93

	94 // Reduce sequences of >1 wsOnly lines to 1, and sequences of >1

	95 // commentOnly lines to 1; moreover, treat commentOnly lines as

	96 // wsOnly when occurring in wsOnly line blocks

	97 multilineNormalize(lines) {

	98 var afterBlankLines = false; // does 'line' succeed >0 empty lines?

	99 var afterCommentLines = false; // .. succeed >0 commentOnly lines?

	100 var newLines = new List();

	101 for (var line in lines) {

	102 if (afterBlankLines && afterCommentLines) {

	103 // can never happen

	104 throw "Bug, please report to eernst@";

	105 } else if (afterBlankLines && !afterCommentLines) {

	106 // at least one line before 'line' is wsOnly

	107 if (!isWsOnly(line)) {

	108 // blank line block ended

	109 afterCommentLines = isCommentOnly(line);

	110 // special case: it seems to be safe to remove commentOnly lines

	111 // after wsOnly lines, so the TeX state must be predictably right;

	112 // next line will then be afterCommentLines and be dropped, so

	113 // we drop the entire comment block---which is very useful; we can

	114 // also consider this comment line to be an empty line, such that

	115 // subsequent empty lines can be considered to be in a block of

	116 // empty lines; note that almost all variants of this will break..

	117 if (afterCommentLines) {

	118 // _current_ 'line' a commentOnly here

	119 afterBlankLines = true;

	120 afterCommentLines = false;

	121 // and do not add 'line'

	122 } else {

	123 // after blanks, but current 'line' is neither blank nor comment

	124 afterBlankLines = false;

	125 newLines.add(line);

	126 }

	127 } else {

	128 // blank line block continues, do not add 'line'

	129 }

	130 } else if (!afterBlankLines && afterCommentLines) {

	131 // at least one line before 'line' is commentOnly

	132 if (!isCommentOnly(line)) {

	133 // comment line block ended

	134 afterBlankLines = isWsOnly(line);

	135 afterCommentLines = false;

	136 newLines.add(line);

	137 } else {

	138 // comment line block continues, do not add 'line'

	139 }

	140 } else {

	141 assert(!afterBlankLines && !afterCommentLines);

	142 // no wsOnly or commentOnly lines preceed 'line'

	143 afterBlankLines = isWsOnly(line);

	144 afterCommentLines = isCommentOnly(line);

	145 if (!afterCommentLines) newLines.add(line);

	146 // else skipping commentOnly line after nonWs, nonComment text

	147 }

	148 }

	149 return newLines;

	150 }

	151

	152 // Selecting the elements in the pipeline

	153

	154 normalize(line) => normalizeWhitespace(stripComment(line));

	155 sispNormalize(line) => stripComment(line);

	156

	157 // Managing fragments with significant spacing

	158

	159 final dartCodeBeginRE = new RegExp(r"^\s*\\begin\{dartCode\}");

	160 final dartCodeEndRE = new RegExp (r"^\s*\\end\{dartCode\}");

	161

	162 sispIs(line, targetRE) {

	163 return targetRE.firstMatch(line) != null;

	164 }

	165

	166 sispIsDartBegin(line) => sispIs(line, dartCodeBeginRE);

	167 sispIsDartEnd(line) => sispIs(line, dartCodeEndRE);

	168

	169 // Transform input file into output file

	170

	171 main ([args]) {

	172 if (args.length != 2) {

	173 print("Usage: addlatexhash.dart <input-file> <output-file>");

	174 throw "Received ${args.length} arguments, expected two";

	175 }

	176

	177 var inputFile = new File(args[0]);

	178 var outputFile = new File(args[1]);

	179 assert(inputFile.existsSync());

	180

	181 var lines = inputFile.readAsLinesSync();

	182 // single-line normalization

	183 var inDartCode = false;

	184 var newLines = new List();

	185

	186 for (var line in lines) {

	187 if (sispIsDartBegin(line)) {

	188 inDartCode = true;

	189 } else if (sispIsDartEnd(line)) {

	190 inDartCode = false;

	191 }

	192 if (inDartCode) {

	193 newLines.add(sispNormalize(line + "\n"));

	194 } else {

	195 newLines.add(normalize(line + "\n"));

	196 }

	197 }

	198

	199 // multi-line normalization

	200 newLines = multilineNormalize(newLines);

	201

	202 // output result

	203 outputFile.writeAsStringSync(newLines.join());

	204 }

OLD	NEW

« no previous file with comments | « tests/standalone/io/addlatexhash_test.dart ('k') | no next file » | no next file with comments »