Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(289)

Side by Side Diff: tools/addlatexhash.dart

Issue 652993005: Working insertion of hash values; added a few labels in spec (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Adjusted according to review Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tests/standalone/io/addlatexhash_test_src.tex ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env dart
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file 2 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 3 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 4 // BSD-style license that can be found in the LICENSE file.
4 // 5 //
5 // ---------------------------------------------------------------------- 6 // ----------------------------------------------------------------------
6 // This is a very specialized tool which was created in order to support 7 // This is a very specialized tool which was created in order to support
7 // adding hash values used as location markers in the LaTeX source of the 8 // adding hash values used as location markers in the LaTeX source of the
8 // language specification. It is intended to take its input file as the 9 // language specification. It is intended to take its input file as the
9 // first argument and the output file name as the second argument. From 10 // first argument, an output file name as the second argument, and a
10 // docs/language a typical usage would be as follows: 11 // hash listing file name as the third argument. From docs/language a
12 // typical usage would be as follows:
11 // 13 //
12 // dart ../../tools/addlatexhash.dart dartLangSpec.tex tmp.tex 14 // dart ../../tools/addlatexhash.dart dartLangSpec.tex out.tex hash.txt
13 // 15 //
14 // This will yield a normalized variant tmp.tex of the language 16 // This will produce a normalized variant out.tex of the language
15 // specification with hash values filled in. For more details, please 17 // specification with hash values filled in, and a listing hash.txt of
16 // check the language specification source itself. 18 // all the hash values along with the label of their textual context
19 // (section, subsection, subsubsection, paragraph) . For more details,
20 // please check the language specification source itself.
17 // 21 //
18 // NB: This utility assumes UN*X style line endings, \n, in the LaTeX 22 // NB: This utility assumes UN*X style line endings, \n, in the LaTeX
19 // source file receieved as input; it will not work with other styles. 23 // source file receieved as input; it will not work with other styles.
20 //
21 // TODO: The current version does not fill in hash values, it only
22 // standardizes the LaTeX source by removing comments and normalizing
23 // white space.
24 24
25 import 'dart:io'; 25 import 'dart:io';
26 import 'dart:convert'; 26 import 'dart:convert';
27 import '../pkg/utf/lib/utf.dart';
27 import '../pkg/crypto/lib/crypto.dart'; 28 import '../pkg/crypto/lib/crypto.dart';
28 29
29 // Normalization of the text, i.e., removal or normalization 30 // ----------------------------------------------------------------------
30 // of elements that do not affect the output from latex 31 // Normalization of the text: removal or normalization of parts that
32 // do not affect the output from latex, such as white space.
31 33
32 final commentRE = new RegExp(r"[^\\]%.*"); // NB: . does not match \n 34 final commentRE = new RegExp(r"[^\\]%.*"); // NB: . does not match \n.
33 final whitespaceAllRE = new RegExp(r"^\s+$"); 35 final whitespaceAllRE = new RegExp(r"^\s+$");
34 final whitespaceRE = new RegExp(r"[ \t]{2,}"); 36 final whitespaceRE = new RegExp(r"[ \t]{2,}");
Lasse Reichstein Nielsen 2014/11/11 08:13:48 Why is one "whitespace" using \s and the other [ \
eernst 2014/11/11 09:04:54 That's indeed wrong --- the intention is to match
Lasse Reichstein Nielsen 2014/11/11 09:13:23 There is no special recommendation in a RegExp con
35 37
36 // normalization steps 38 /// Removes [match]ing part of [line], adjusting that part with the
37 39 /// given [startOffset] and [endOffset], bounded to be valid indices
40 /// into the string if needed, then inserts [glue] where text was
41 /// removed. If there is no match then [line] is returned.
38 cutMatch(line, match, {startOffset: 0, endOffset: 0, glue: ""}) { 42 cutMatch(line, match, {startOffset: 0, endOffset: 0, glue: ""}) {
39 if (match == null) return line; 43 if (match == null) return line;
40 var start = match.start + startOffset; 44 var start = match.start + startOffset;
41 var end = match.end + endOffset; 45 var end = match.end + endOffset;
42 var len = line.length; 46 var len = line.length;
43 if (start < 0) start = 0; 47 if (start < 0) start = 0;
44 if (end > len) end = len; 48 if (end > len) end = len;
45 return line.substring(0, start) + glue + line.substring(end); 49 return line.substring(0, start) + glue + line.substring(end);
46 } 50 }
47 51
48 cutRegexp(line, re, {startOffset: 0, endOffset: 0, glue: ""}) { 52 cutRegexp(line, re, {startOffset: 0, endOffset: 0, glue: ""}) {
49 return cutMatch(line, re.firstMatch(line), 53 return cutMatch(line, re.firstMatch(line),
50 startOffset: startOffset, 54 startOffset: startOffset,
51 endOffset: endOffset, 55 endOffset: endOffset,
52 glue: glue); 56 glue: glue);
53 } 57 }
54 58
59 /// Removes the rest of [line] starting from the beginning of the
60 /// given [match], and adjusting with the given [offset]. If there
61 /// is no match then [line] is returned.
55 cutFromMatch(line, match, {offset: 0, glue: ""}) { 62 cutFromMatch(line, match, {offset: 0, glue: ""}) {
56 if (match == null) return line; 63 if (match == null) return line;
57 return line.substring(0, match.start + offset) + glue; 64 return line.substring(0, match.start + offset) + glue;
58 } 65 }
59 66
60 cutFromRegexp(line, re, {offset: 0, glue: ""}) { 67 cutFromRegexp(line, re, {offset: 0, glue: ""}) {
61 return cutFromMatch(line, re.firstMatch(line), offset: offset, glue: glue); 68 return cutFromMatch(line, re.firstMatch(line), offset: offset, glue: glue);
62 } 69 }
63 70
64 isWsOnly(line) => whitespaceAllRE.firstMatch(line) != null; 71 isWsOnly(line) => whitespaceAllRE.firstMatch(line) != null;
65 isCommentOnly(line) => line.startsWith("%"); 72 isCommentOnly(line) => line.startsWith("%");
66 73
74 /// Returns the end-of-line character at the end of [line], if any,
75 /// otherwise returns the empty string.
67 justEol(line) { 76 justEol(line) {
68 return line.endsWith("\n") ? "\n" : ""; 77 return line.endsWith("\n") ? "\n" : "";
69 } 78 }
70 79
80 /// Removes the contents of the comment at the end of [line],
81 /// leaving the "%" in place. If no comment is present,
82 /// return [line].
83 ///
84 /// NB: it is tempting to remove everything from the '%' and out,
85 /// including the final newline, if any, but this does not work.
86 /// The problem is that TeX will do exactly this, but then it will
87 /// add back a character that depends on its state (S, M, or N),
88 /// and it is tricky to maintain a similar state that matches the
89 /// state of TeX faithfully. Hence, we remove the content of
90 /// comments but do not remove the comments themselves, we just
91 /// leave the '%' at the end of the line and let TeX manage its
92 /// states in a way that does not differ from the file from before
93 /// stripComment.
71 stripComment(line) { 94 stripComment(line) {
72 // NB: it is tempting to remove everything from the '%' and out,
73 // including the final newline, if any, but this does not work.
74 // The problem is that TeX will do exactly this, but then it will
75 // add back a character that depends on its state (S, M, or N),
76 // and it is tricky to maintain a similar state that matches the
77 // state of TeX faithfully. Hence, we remove the content of
78 // comments but do not remove the comments themselves, we just
79 // leave the '%' at the end of the line and let TeX manage its
80 // states in a way that does not differ from the file from before
81 // stripComment
82 if (isCommentOnly(line)) return "%\n"; 95 if (isCommentOnly(line)) return "%\n";
83 return cutRegexp(line, commentRE, startOffset: 2); 96 return cutRegexp(line, commentRE, startOffset: 2);
84 } 97 }
85 98
86 // Reduce a wsOnly line to its eol, remove leading ws 99 /// Reduces a white-space-only [line] to its eol character,
87 // entirely, and reduce multiple ws chars to one 100 /// removes leading ws entirely, and reduces multiple
101 /// white-space chars to one.
88 normalizeWhitespace(line) { 102 normalizeWhitespace(line) {
89 var trimLine = line.trimLeft(); 103 var trimLine = line.trimLeft();
90 if (trimLine.isEmpty) return justEol(line); 104 if (trimLine.isEmpty) return justEol(line);
91 return trimLine.replaceAll(whitespaceRE, " "); 105 return trimLine.replaceAll(whitespaceRE, " ");
92 } 106 }
93 107
94 // Reduce sequences of >1 wsOnly lines to 1, and sequences of >1 108 /// Reduces sequences of >1 white-space-only lines in [lines] to 1,
95 // commentOnly lines to 1; moreover, treat commentOnly lines as 109 /// and sequences of >1 comment-only lines to 1. Treats comment-only
96 // wsOnly when occurring in wsOnly line blocks 110 /// lines as white-space-only when they occur in white-space-only
111 /// line blocks.
97 multilineNormalize(lines) { 112 multilineNormalize(lines) {
98 var afterBlankLines = false; // does 'line' succeed >0 empty lines? 113 var afterBlankLines = false; // Does [line] succeed >0 empty lines?
99 var afterCommentLines = false; // .. succeed >0 commentOnly lines? 114 var afterCommentLines = false; // Does [line] succeed >0 commentOnly lines?
100 var newLines = new List(); 115 var newLines = new List();
101 for (var line in lines) { 116 for (var line in lines) {
102 if (afterBlankLines && afterCommentLines) { 117 if (afterBlankLines && afterCommentLines) {
103 // can never happen 118 // Previous line was both blank and a comment: not possible.
104 throw "Bug, please report to eernst@"; 119 throw "Bug, please report to eernst@";
105 } else if (afterBlankLines && !afterCommentLines) { 120 } else if (afterBlankLines && !afterCommentLines) {
106 // at least one line before 'line' is wsOnly 121 // At least one line before [line] is wsOnly.
107 if (!isWsOnly(line)) { 122 if (!isWsOnly(line)) {
108 // blank line block ended 123 // Blank line block ended.
109 afterCommentLines = isCommentOnly(line); 124 afterCommentLines = isCommentOnly(line);
110 // special case: it seems to be safe to remove commentOnly lines 125 // Special case: It seems to be safe to remove commentOnly lines
111 // after wsOnly lines, so the TeX state must be predictably right; 126 // after wsOnly lines, so the TeX state must be predictably right;
112 // next line will then be afterCommentLines and be dropped, so 127 // next line will then be afterCommentLines and be dropped, so
113 // we drop the entire comment block---which is very useful; we can 128 // we drop the entire comment block---which is very useful. We can
114 // also consider this comment line to be an empty line, such that 129 // also consider this comment line to be an empty line, such that
115 // subsequent empty lines can be considered to be in a block of 130 // subsequent empty lines can be considered to be in a block of
116 // empty lines; note that almost all variants of this will break.. 131 // empty lines. Note that almost all variants of this breaks.
117 if (afterCommentLines) { 132 if (afterCommentLines) {
118 // _current_ 'line' a commentOnly here 133 // _Current_ 'line' is a commentOnly here.
119 afterBlankLines = true; 134 afterBlankLines = true;
120 afterCommentLines = false; 135 afterCommentLines = false;
121 // and do not add 'line' 136 // Omit addition of [line].
122 } else { 137 } else {
123 // after blanks, but current 'line' is neither blank nor comment 138 // After blanks, but current 'line' is neither blank nor comment.
124 afterBlankLines = false; 139 afterBlankLines = false;
125 newLines.add(line); 140 newLines.add(line);
126 } 141 }
127 } else { 142 } else {
128 // blank line block continues, do not add 'line' 143 // Blank line block continues, omit addition of [line].
129 } 144 }
130 } else if (!afterBlankLines && afterCommentLines) { 145 } else if (!afterBlankLines && afterCommentLines) {
131 // at least one line before 'line' is commentOnly 146 // At least one line before [line] is commentOnly.
132 if (!isCommentOnly(line)) { 147 if (!isCommentOnly(line)) {
133 // comment line block ended 148 // Comment block ended.
134 afterBlankLines = isWsOnly(line); 149 afterBlankLines = isWsOnly(line);
135 afterCommentLines = false; 150 afterCommentLines = false;
136 newLines.add(line); 151 newLines.add(line);
137 } else { 152 } else {
138 // comment line block continues, do not add 'line' 153 // Comment block continues, do not add [line].
139 } 154 }
140 } else { 155 } else {
141 assert(!afterBlankLines && !afterCommentLines); 156 assert(!afterBlankLines && !afterCommentLines);
142 // no wsOnly or commentOnly lines preceed 'line' 157 // No wsOnly or commentOnly lines preceed [line].
143 afterBlankLines = isWsOnly(line); 158 afterBlankLines = isWsOnly(line);
144 afterCommentLines = isCommentOnly(line); 159 afterCommentLines = isCommentOnly(line);
145 if (!afterCommentLines) newLines.add(line); 160 if (!afterCommentLines) {
146 // else skipping commentOnly line after nonWs, nonComment text 161 newLines.add(line);
162 } else {
163 // skip commentOnly line after nonWs/nonComment text.
164 }
147 } 165 }
148 } 166 }
149 return newLines; 167 return newLines;
150 } 168 }
151 169
152 // Selecting the elements in the pipeline 170 /// Selects the elements in the normalization pipeline.
153
154 normalize(line) => normalizeWhitespace(stripComment(line)); 171 normalize(line) => normalizeWhitespace(stripComment(line));
172
173 /// Selects the elements in the significant-spacing block
174 /// normalization pipeline.
155 sispNormalize(line) => stripComment(line); 175 sispNormalize(line) => stripComment(line);
156 176
157 // Managing fragments with significant spacing 177 // Managing fragments with significant spacing.
158 178
159 final dartCodeBeginRE = new RegExp(r"^\s*\\begin\{dartCode\}"); 179 final dartCodeBeginRE = new RegExp(r"^\s*\\begin\s*\{dartCode\}");
160 final dartCodeEndRE = new RegExp (r"^\s*\\end\{dartCode\}"); 180 final dartCodeEndRE = new RegExp (r"^\s*\\end\s*\{dartCode\}");
161 181
162 sispIs(line, targetRE) { 182 /// Recognizes beginning of dartCode block.
163 return targetRE.firstMatch(line) != null; 183 sispIsDartBegin(line) => line.contains(dartCodeBeginRE);
164 } 184
165 185 /// Recognizes end of dartCode block.
166 sispIsDartBegin(line) => sispIs(line, dartCodeBeginRE); 186 sispIsDartEnd(line) => line.contains(dartCodeEndRE);
167 sispIsDartEnd(line) => sispIs(line, dartCodeEndRE); 187
168 188 // ----------------------------------------------------------------------
169 // Transform input file into output file 189 // Analyzing the input to point out "interesting" lines
170 190
191 /// Returns the event information for [lines] as determined by the
192 /// given [analyzer]. The method [analyzer.analyze] indicates that a
193 /// line is "uninteresting" by returning null (i.e., no events here),
194 /// and "interesting" lines may be characterized by [analysisFunc] via
195 /// the returned event object.
196 findEvents(lines, analyzer) {
197 var events = new List();
198 for (var line in lines) {
199 var event = analyzer.analyze(line);
200 if (event != null) events.add(event);
201 }
202 return events;
203 }
204
205 /// Returns RegExp text for recognizing a command occupying a line
206 /// of its own, given the part of the RegExp that recognizes the
207 /// command name, [cmdNameRE]
208 lineCommandRE(cmdNameRE) =>
209 new RegExp(r"^\s*\\" + cmdNameRE + r"\s*\{.*\}\s*$");
210
211 final hashLabelStartRE = new RegExp(r"^\s*\\LMLabel\s*\{");
212 final hashLabelEndRE = new RegExp(r"\}\s*$");
213
214 final hashMarkRE = lineCommandRE("LMHash");
215 final hashLabelRE = lineCommandRE("LMLabel");
216 final sectioningRE = lineCommandRE("((|sub(|sub))section|paragraph)");
217 final sectionRE = lineCommandRE("section");
218 final subsectionRE = lineCommandRE("subsection");
219 final subsubsectionRE = lineCommandRE("subsubsection");
220 final paragraphRE = lineCommandRE("paragraph");
221
222 /// Returns true iff [line] begins a block of lines that gets a hash value.
223 isHashMarker(line) => line.contains(hashMarkRE);
224
225 /// Returns true iff [line] defines a sectioning label.
226 isHashLabel(line) => line.contains(hashLabelRE);
227
228 /// Returns true iff [line] is a sectioning command resp. one of its
229 /// more specific forms; note that it is assumed that sectioning commands
230 /// do not contain a newline between the command name and the '{'.
231 isSectioningCommand(line) => line.contains(sectioningRE);
232 isSectionCommand(line) => line.contains(sectionRE);
233 isSubsectionCommand(line) => line.contains(subsectionRE);
234 isSubsubsectionCommand(line) => line.contains(subsubsectionRE);
235 isParagraphCommand(line) => line.contains(paragraphRE);
236
237 /// Returns true iff [line] does not end a block of lines that gets
238 /// a hash value.
239 isntHashBlockTerminator(line) => !isSectioningCommand(line);
240
241 /// Returns the label text part from [line], based on the assumption
242 /// that isHashLabel(line) returns true.
243 extractHashLabel(line) {
244 var startMatch = hashLabelStartRE.firstMatch(line);
245 var endMatch = hashLabelEndRE.firstMatch(line);
246 assert(startMatch != null && endMatch != null);
247 return line.substring(startMatch.end, endMatch.start);
248 }
249
250 // Event classes: Keep track of relevant information about the LaTeX
251 // source code lines, such as where \LMHash and \LMLabel commands are
252 // used, and how they are embedded in the sectioning structure.
253
254 /// Abstract events, enabling us to [setEndLineNumber] on all events.
255 abstract class HashEvent {
256 /// For events that have an endLineNumber, set it; otherwise ignore.
257 /// The endLineNumber specifies the end of the block of lines
258 /// associated with a given event, for event types concerned with
259 /// blocks of lines rather than single lines.
260 setEndLineNumber(n) {}
261
262 /// Returns null except for \LMHash{} events, where it returns
263 /// the startLineNumber. This serves to specify a boundary because
264 /// the preceding \LMHash{} block should stop before the line of
265 /// this \LMHash{} command. Note that hash blocks may stop earlier,
266 /// because they cannot contain sectioning commands.
267 getStartLineNumber() => null;
268 }
269
270 class HashMarkerEvent extends HashEvent {
271
272 // Line number of first line in block that gets hashed.
273 var startLineNumber;
274
275 // Highest possible number of first line after block that gets
276 // hashed (where the next \LMHash{} occurs). Note that this value
277 // is not known initially (because that line has not yet been
278 // reached), so [endLineNumber] will be initialized in a separate
279 // scan. Also note that the block may end earlier, because a block
280 // ends if it would otherwise include a sectioning command.
281 var endLineNumber;
282
283 HashMarkerEvent(this.startLineNumber);
284
285 setEndLineNumber(n) { endLineNumber = n; }
286 getStartLineNumber() => startLineNumber;
287 }
288
289 class HashLabelEvent extends HashEvent {
290 var labelText;
291 HashLabelEvent(this.labelText);
292 }
293
294 class HashAnalyzer {
295 // List of kinds of pending (= most recently seen) sectioning command.
296 // When updating this list, also update sectioningPrefix below.
297 static const PENDING_IS_NONE = 0;
298 static const PENDING_IS_SECTION = 1;
299 static const PENDING_IS_SUBSECTION = 2;
300 static const PENDING_IS_SUBSUBSECTION = 3;
301 static const PENDING_IS_PARAGRAPH = 1;
302
303 var lineNumber = 0;
304 var pendingSectioning = PENDING_IS_NONE;
305
306 HashAnalyzer();
307
308 setPendingToSection() {
309 pendingSectioning = PENDING_IS_SECTION;
310 }
311
312 setPendingToSubsection() {
313 pendingSectioning = PENDING_IS_SUBSECTION;
314 }
315
316 setPendingToSubsubsection() {
317 pendingSectioning = PENDING_IS_SUBSUBSECTION;
318 }
319
320 setPendingToParagraph() {
321 pendingSectioning = PENDING_IS_PARAGRAPH;
322 }
323
324 clearPending() {
325 pendingSectioning = PENDING_IS_NONE;
326 }
327
328 sectioningPrefix() {
329 switch (pendingSectioning) {
330 case PENDING_IS_SECTION: return "sec:";
331 case PENDING_IS_SUBSECTION: return "subsec:";
332 case PENDING_IS_SUBSUBSECTION: return "subsubsec:";
333 case PENDING_IS_PARAGRAPH: return "par:";
334 case PENDING_IS_NONE:
335 throw
336 "\\LMHash{..} should only be used after a sectioning command " +
337 "(\\section, \\subsection, \\subsubsection, \\paragraph)";
338 default:
339 // set of PENDING_IS_.. was extended, but updates here omitted
340 throw "Bug, please report to eernst@";
341 }
342 }
343
344 analyze(line) {
345 var currentLineNumber = lineNumber++;
346 if (isHashMarker(line)) {
347 return new HashMarkerEvent(currentLineNumber);
348 } else if (isHashLabel(line)) {
349 var labelText = sectioningPrefix() + extractHashLabel(line);
350 return new HashLabelEvent(labelText);
351 } else {
352 // No events to emit, but we may need to note state changes
353 if (isSectionCommand(line)) {
354 setPendingToSection();
355 } else if (isSubsectionCommand(line)) {
356 setPendingToSubsection();
357 } else if (isSubsubsectionCommand(line)) {
358 setPendingToSubsubsection();
359 } else if (isParagraphCommand(line)) {
360 setPendingToParagraph();
361 } else {
362 // No state changes.
363 }
364 return null;
365 }
366 }
367 }
368
369 findHashEvents(lines) {
370 // Create the list of events, omitting endLineNumbers.
371 var events = findEvents(lines, new HashAnalyzer());
372 // Set the endLineNumbers.
373 var currentEndLineNumber = lines.length;
374 for (var event in events.reversed) {
375 event.setEndLineNumber(currentEndLineNumber);
376 var nextEndLineNumber = event.getStartLineNumber();
377 if (nextEndLineNumber != null) currentEndLineNumber = nextEndLineNumber;
378 }
379 return events;
380 }
381
382 // ----------------------------------------------------------------------
383 // Removal of non-normative elements of the text (rationale, commentary).
384
385 /// Returns [line] without the command [cmdName] (based on a match
386 /// on "\\cmdName\s*{..}") starting at [startIndex]; note that it is
387 /// assumed but not checked that [line] contains "\\cmdType\s*{..",
388 /// and note that the end of the {..} block is found via brace matching
389 /// (i.e., nested {..} blocks are handled), but it may break if '{' is
390 /// made an active character etc.etc.
391 removeCommand(line, cmdName, startIndex) {
392 const BACKSLASH = 92; // char code for '\\'.
393 const BRACE_BEGIN = 123; // char code for '{'.
394 const BRACE_END = 125; // char code for '}'.
395
396 var blockStartIndex = startIndex + cmdName.length + 1;
397 while (blockStartIndex < line.length &&
398 line.codeUnitAt(blockStartIndex) != BRACE_BEGIN) {
399 blockStartIndex++;
400 }
401 blockStartIndex++;
402 if (blockStartIndex > line.length) {
403 throw "Bug, please report to eernst@";
404 }
405 // [blockStartIndex] has index just after '{'.
406
407 var afterEscape = false; // Is true iff [index] is just after '{'.
408 var braceLevel = 1; // Have seen so many '{'s minus so many '}'s.
409
410 for (var index = blockStartIndex; index < line.length; index++) {
411 switch (line.codeUnitAt(index)) {
412 case BRACE_BEGIN:
413 if (afterEscape) {
414 afterEscape = false;
415 } else {
416 braceLevel++;
417 }
418 break;
419 case BRACE_END:
420 if (afterEscape) {
421 afterEscape = false;
422 } else {
423 braceLevel--;
424 }
425 break;
426 case BACKSLASH:
427 afterEscape = true;
428 break;
429 default:
430 afterEscape = false;
431 }
432 if (braceLevel == 0) {
433 return line.substring(0, startIndex) + line.substring(index + 1);
434 }
435 }
436 // Removal failed; we consider this to mean that the input is ill-formed.
437 throw "Unmatched braces";
438 }
439
440 final commentaryRE = new RegExp(r"\\commentary\s*\{");
441 final rationaleRE = new RegExp(r"\\rationale\s*\{");
442
443 /// Removes {}-balanced '\commentary{..}' commands from [line].
444 removeCommentary(line) {
445 var match = commentaryRE.firstMatch(line);
446 if (match == null) return line;
447 return removeCommentary(removeCommand(line, r"commentary", match.start));
448 }
449
450 /// Removes {}-balanced '\rationale{..}' commands from [line].
451 removeRationale(line) {
452 var match = rationaleRE.firstMatch(line);
453 if (match == null) return line;
454 return removeRationale(removeCommand(line, r"rationale", match.start));
455 }
456
457 /// Removes {}-balanced '\commentary{..}' and '\rationale{..}'
458 /// commands from [line], then normalizes its white-space.
459 simplifyLine(line) {
460 var simplerLine = removeCommentary(line);
461 simplerLine = removeRationale(simplerLine);
462 simplerLine = normalizeWhitespace(simplerLine);
463 return simplerLine;
464 }
465
466 // ----------------------------------------------------------------------
467 // Recognition of line blocks, insertion of block hash into \LMHash{}.
468
469 final latexArgumentRE = new RegExp(r"\{.*\}");
470
471 cleanupLine(line) => cutRegexp(line, commentRE, startOffset: 1).trimRight();
472
473 /// Returns concatenation of all lines from [startIndex] in [lines] until
474 /// a hash block terminator is encountered or [nextIndex] reached (if so,
475 /// the line lines[nextIndex] itself is not included); each line is cleaned
476 /// up using [cleanupLine], and " " is inserted between the lines gathered.
477 gatherLines(lines, startIndex, nextIndex) =>
478 lines.getRange(startIndex, nextIndex)
479 .takeWhile(isntHashBlockTerminator)
480 .map(cleanupLine)
481 .join(" ");
482
483 /// Computes the hash value for the line block starting at [startIndex]
484 /// in [lines], stopping just before [nextIndex]. SIDE EFFECT:
485 /// Outputs the simplified text and its hash value to [listSink].
486 computeHashValue(lines, startIndex, nextIndex, listSink) {
487 final hashEncoder = new SHA1();
488 final gatheredLine = gatherLines(lines, startIndex, nextIndex);
489 final simplifiedLine = simplifyLine(gatheredLine);
490 listSink.write(" % $simplifiedLine\n");
491 hashEncoder.add(encodeUtf8(simplifiedLine));
492 return hashEncoder.close();
493 }
494
495 computeHashString(lines, startIndex, nextIndex, listSink) =>
496 CryptoUtils.bytesToHex(computeHashValue(lines,
497 startIndex,
498 nextIndex,
499 listSink));
500
501 /// Computes and adds hashes to \LMHash{} lines in [lines] (which
502 /// must be on the line numbers specified in [hashEvents]), and emits
503 /// sectioning markers and hash values to [listSink], along with
504 /// "comments" containing the simplified text (using the format
505 /// ' % <text>', where the text is one, long line, for easy grepping
506 /// etc.).
507 addHashMarks(lines, hashEvents, listSink) {
508 for (var hashEvent in hashEvents) {
509 if (hashEvent is HashMarkerEvent) {
510 var start = hashEvent.startLineNumber;
511 var end = hashEvent.endLineNumber;
512 final hashValue = computeHashString(lines, start + 1, end, listSink);
513 lines[start] =
514 lines[start].replaceAll(latexArgumentRE, "{" + hashValue + "}");
515 listSink.write(" $hashValue\n");
516 } else if (hashEvent is HashLabelEvent) {
517 listSink.write("${hashEvent.labelText}\n");
518 }
519 }
520 }
521
522 /// Transforms LaTeX input to LaTeX output plus hash value list file.
171 main ([args]) { 523 main ([args]) {
172 if (args.length != 2) { 524 if (args.length != 3) {
173 print("Usage: addlatexhash.dart <input-file> <output-file>"); 525 print("Usage: addlatexhash.dart <input-file> <output-file> <list-file>");
174 throw "Received ${args.length} arguments, expected two"; 526 throw "Received ${args.length} arguments, expected three";
175 } 527 }
176 528
529 // Get LaTeX source.
177 var inputFile = new File(args[0]); 530 var inputFile = new File(args[0]);
531 assert(inputFile.existsSync());
532 var lines = inputFile.readAsLinesSync();
533
534 // Will hold LaTeX source with normalized spacing etc., plus hash values.
178 var outputFile = new File(args[1]); 535 var outputFile = new File(args[1]);
179 assert(inputFile.existsSync()); 536
180 537 // Will hold hierarchical list of hash values.
181 var lines = inputFile.readAsLinesSync(); 538 var listFile = new File(args[2]);
182 // single-line normalization 539 var listSink = listFile.openWrite();
540
541 // Perform single-line normalization.
183 var inDartCode = false; 542 var inDartCode = false;
184 var newLines = new List(); 543 var normalizedLines = new List();
185 544
186 for (var line in lines) { 545 for (var line in lines) {
187 if (sispIsDartBegin(line)) { 546 if (sispIsDartBegin(line)) {
188 inDartCode = true; 547 inDartCode = true;
189 } else if (sispIsDartEnd(line)) { 548 } else if (sispIsDartEnd(line)) {
190 inDartCode = false; 549 inDartCode = false;
191 } 550 }
192 if (inDartCode) { 551 if (inDartCode) {
193 newLines.add(sispNormalize(line + "\n")); 552 normalizedLines.add(sispNormalize(line + "\n"));
194 } else { 553 } else {
195 newLines.add(normalize(line + "\n")); 554 normalizedLines.add(normalize(line + "\n"));
196 } 555 }
197 } 556 }
198 557
199 // multi-line normalization 558 // Perform multi-line normalization.
200 newLines = multilineNormalize(newLines); 559 normalizedLines = multilineNormalize(normalizedLines);
201 560
202 // output result 561 // Insert hash values.
203 outputFile.writeAsStringSync(newLines.join()); 562 var hashEvents = findHashEvents(normalizedLines);
563 addHashMarks(normalizedLines, hashEvents, listSink);
564
565 // Produce/finalize output.
566 outputFile.writeAsStringSync(normalizedLines.join());
567 listSink.close();
204 } 568 }
OLDNEW
« no previous file with comments | « tests/standalone/io/addlatexhash_test_src.tex ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698