Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(316)

Side by Side Diff: tools/addlatexhash.dart

Issue 646003002: Introduced hash valued location markers in the spec (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Adjusted after review Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4 //
5 // ----------------------------------------------------------------------
6 // This is a very specialized tool which was created in order to support
7 // adding hash values used as location markers in the LaTeX source of the
8 // language specification. It is intended to take its input file as the
9 // first argument and the output file name as the second argument. From
10 // docs/language a typical usage would be as follows:
11 //
12 // dart ../../tools/addlatexhash.dart dartLangSpec.tex tmp.tex
13 //
14 // This will yield a normalized variant tmp.tex of the language
15 // specification with hash values filled in. For more details, please
16 // check the language specification source itself.
17 //
18 // NB: This utility assumes UN*X style line endings, \n, in the LaTeX
19 // source file receieved as input; it will not work with other styles.
20 //
21 // TODO: The current version does not fill in hash values, it only
22 // standardizes the LaTeX source by removing comments and normalizing
23 // white space.
24
25 import 'dart:io';
26 import 'dart:convert';
27 import '../pkg/crypto/lib/crypto.dart';
28
29 // ----------------------------------------------------------------------
30 // Normalization of the text, i.e., removal or normalization
31 // of elements that do not affect the output from latex
32
33 final commentAllRe = new RegExp("^%");
Lasse Reichstein Nielsen 2014/10/15 09:13:17 Using a RegExp for this is overkill, just do strin
eernst 2014/10/15 13:19:27 Looks more meaningful with "RE". Done.
34 final commentRe = new RegExp("[^\\\\]%[^\\n]*");
Lasse Reichstein Nielsen 2014/10/15 09:13:17 I recommend using raw strings for RegExp sources:
eernst 2014/10/15 13:19:27 Indeed; added the final "*", done.
Lasse Reichstein Nielsen 2014/10/15 14:09:57 Also just noticed that as a regexp, "[^n]" is equi
eernst 2014/10/15 14:26:41 Cool! Done + Added a comment, just in case someon
35 final whitespaceAllRe = new RegExp("^\\s+\$");
36 final whitespaceLeadingRe = new RegExp("^\\s+[^\\n]");
37 final whitespaceRe = new RegExp("[ \\t][ \\t]+");
Lasse Reichstein Nielsen 2014/10/15 09:13:17 Shorter regexp possible: final whitespaceRE = ne
eernst 2014/10/15 13:19:27 Done.
38
39 // normalization steps
40
41 cutMatch(line, match, {startOffset: 0, endOffset: 0, glue: ""}) {
42 if (match == null) return line;
43 var start = match.start + startOffset;
44 var end = match.end + endOffset;
45 var len = line.length;
46 if (start < 0) start = 0;
47 if (end > len) end = len;
48 return line.substring(0, start) + glue + line.substring(end);
49 }
50
51 cutRegexp(line, re, {startOffset: 0, endOffset: 0, glue: ""}) {
52 return cutMatch(line, re.firstMatch(line),
53 startOffset: startOffset,
54 endOffset: endOffset,
55 glue: glue);
56 }
57
58 cutFromMatch(line, match, {offset: 0, glue: ""}) {
59 if (match == null) return line;
60 return line.substring(0, match.start + offset) + glue;
61 }
62
63 cutFromRegexp(line, re, {offset: 0, glue: ""}) {
64 return cutFromMatch(line, re.firstMatch(line), offset: offset, glue: glue);
65 }
66
67 isWsOnly(line) => whitespaceAllRe.firstMatch(line) != null;
68 isCommentOnly(line) => commentAllRe.firstMatch(line) != null;
Lasse Reichstein Nielsen 2014/10/15 09:13:17 This would be the non-regexp version: => line.star
eernst 2014/10/15 13:19:27 Arg null is treated also as an error by firstMatch
69
70 justEol(line) {
Lasse Reichstein Nielsen 2014/10/15 09:13:17 I'd prefer return types, and parameter types, in g
eernst 2014/10/15 13:19:27 I decided to use a typeless style in this particul
71 if (line.length == 0) return line;
72 return line[line.length-1] == "\n" ? "\n" : "";
Lasse Reichstein Nielsen 2014/10/15 09:13:17 return line.endsWith("\n") ? "\n" : "";
eernst 2014/10/15 13:19:27 Done.
73 }
74
75 stripComment(line) {
76 // NB: it is tempting to remove everything from the '%' and out,
77 // including the final newline, if any, but this does not work.
78 // The problem is that TeX will do exactly this, but then it will
79 // add back a character that depends on its state (S, M, or N),
80 // and it is tricky to maintain a similar state that matches the
81 // state of TeX faithfully. Hence, we remove the content of
82 // comments but do not remove the comments themselves, we just
83 // leave the '%' at the end of the line and let TeX manage its
84 // states in a way that does not differ from the file from before
85 // stripComment
86 if (isCommentOnly(line)) return "%\n";
87 return cutRegexp(line, commentRe, startOffset: 2);
Lasse Reichstein Nielsen 2014/10/15 09:13:17 Doesn't this loose the trailing '\n'?
eernst 2014/10/15 13:19:27 No, commentRE avoids matching a trailing '\n', so
88 }
89
90 // Reduce a wsOnly line to its eol, remove leading ws
91 // entirely, and reduce multiple ws chars to one
92 normalizeWhitespace(line) {
Lasse Reichstein Nielsen 2014/10/15 09:13:17 To remove leading WS (including empty lines), try:
eernst 2014/10/15 13:19:27 Done.
93 if (isWsOnly(line)) return justEol(line);
94 line = cutRegexp(line, whitespaceLeadingRe, endOffset: -1);
95 var match;
96 while ((match = whitespaceRe.firstMatch(line)) != null) {
97 line = cutMatch(line, match, glue: " ");
98 }
Lasse Reichstein Nielsen 2014/10/15 09:13:17 Maybe just: line = line.replaceAll(whitespaceRe,
eernst 2014/10/15 13:19:27 Entire method much nicer now! Done.
99 return line;
100 }
101
102 // Reduce sequences of >1 wsOnly lines to 1, and sequences of >1
103 // commentOnly lines to 1; moreover, treat commentOnly lines as
104 // wsOnly when occurring in wsOnly line blocks
105 multilineNormalize(lines) {
106 var afterBlankLines = false; // does 'line' succeed >0 empty lines?
107 var afterCommentLines = false; // .. succeed >0 commentOnly lines?
108 var newLines = new List();
109 for (var line in lines) {
110 if (afterBlankLines && afterCommentLines) {
111 // can never happen
112 throw "Bug, please report to eernst@";
113 } else if (afterBlankLines && !afterCommentLines) {
114 // at least one line before 'line' is wsOnly
115 if (!isWsOnly(line)) {
116 // blank line block ended
117 afterCommentLines = isCommentOnly(line);
118 // special case: it seems to be safe to remove commentOnly lines
119 // after wsOnly lines, so the TeX state must be predictably right;
120 // next line will then be afterCommentLines and be dropped, so
121 // we drop the entire comment block---which is very useful; we can
122 // also consider this comment line to be an empty line, such that
123 // subsequent empty lines can be considered to be in a block of
124 // empty lines; note that almost all variants of this will break..
125 if (afterCommentLines) {
126 // _current_ 'line' a commentOnly here
127 afterBlankLines = true;
128 afterCommentLines = false;
129 // and do not add 'line'
130 } else {
131 // after blanks, but current 'line' is neither blank nor comment
132 afterBlankLines = false;
133 newLines.add(line);
134 }
135 } else {
136 // blank line block continues, do not add 'line'
137 }
138 } else if (!afterBlankLines && afterCommentLines) {
139 // at least one line before 'line' is commentOnly
140 if (!isCommentOnly(line)) {
141 // comment line block ended
142 afterBlankLines = isWsOnly(line);
143 afterCommentLines = false;
144 newLines.add(line);
145 } else {
146 // comment line block continues, do not add 'line'
147 }
148 } else {
149 assert(!afterBlankLines && !afterCommentLines);
150 // no wsOnly or commentOnly lines preceed 'line'
151 if (isWsOnly(line)) afterBlankLines = true;
152 if (isCommentOnly(line)) afterCommentLines = true;
Lasse Reichstein Nielsen 2014/10/15 09:13:17 Maybe: if (isCommentOnly(line)) { afterCOmmentL
eernst 2014/10/15 13:19:27 Actually that was because of the following pattern
153 if (!afterCommentLines) newLines.add(line);
154 // else skipping commentOnly line after nonWs, nonComment text
155 }
156 }
157 return newLines;
158 }
159
160 // select the elements in the pipeline
161
162 normalize(line) => normalizeWhitespace(stripComment(line));
163
164 sispNormalize(line) => stripComment(line);
165
166 // ----------------------------------------------------------------------
167 // Managing fragments with significant spacing
168
169 final dartCodeBeginRe = new RegExp("^\\s*\\\\begin{dartCode}");
Lasse Reichstein Nielsen 2014/10/15 09:13:17 Need to escape '{' and '}' in RegExp: new RegEx
eernst 2014/10/15 13:19:27 OK. Note that we get no exceptions for this one,
Lasse Reichstein Nielsen 2014/10/15 14:09:57 RegExp in browsers have traditionally been very fo
eernst 2014/10/15 14:26:41 What's the smart way to install a wakeup call to a
170 final dartCodeEndRe = new RegExp ("^\\s*\\\\end{dartCode}");
171
172 sispIs(line, targetRe) {
173 return targetRe.firstMatch(line) != null;
174 }
175
176 sispIsDartBegin(line) => sispIs(line, dartCodeBeginRe);
177 sispIsDartEnd(line) => sispIs(line, dartCodeEndRe);
178
179 // ----------------------------------------------------------------------
180 // main
181
182 main ([args]) {
183 if (args.length != 2) {
184 print("Usage: addlatexhash.dart <input-file> <output-file>");
185 throw "Received ${args.length} arguments, expected two";
186 }
187
188 var inputFile = new File(args[0]);
189 var outputFile = new File(args[1]);
190 assert(inputFile.existsSync());
191
192 var lines = inputFile.readAsLinesSync();
193 // single-line normalization
194 var inDartCode = false;
195 var newLines = new List();
196
197 for (var line in lines) {
198 if (sispIsDartBegin(line)) {
199 inDartCode = true;
200 } else if (sispIsDartEnd(line)) {
201 inDartCode = false;
202 }
203 if (inDartCode) {
204 newLines.add(sispNormalize(line + "\n"));
205 } else {
206 newLines.add(normalize(line + "\n"));
207 }
208 }
209
210 // multi-line normalization
211 newLines = multilineNormalize(newLines);
212
213 // output result
214 outputFile.writeAsStringSync(newLines.join());
215 }
OLDNEW
« tests/standalone/io/addlatexhash_test.dart ('K') | « tests/standalone/io/addlatexhash_test.dart ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698