Index: tools/addlatexhash.dart |
diff --git a/tools/addlatexhash.dart b/tools/addlatexhash.dart |
old mode 100644 |
new mode 100755 |
index f79a0b32296cffdb983eeb279a2930d41389b341..28d194da65bdbd5701b38c05b49edaa43dd7d991 |
--- a/tools/addlatexhash.dart |
+++ b/tools/addlatexhash.dart |
@@ -1,3 +1,4 @@ |
+#!/usr/bin/env dart |
// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
// for details. All rights reserved. Use of this source code is governed by a |
// BSD-style license that can be found in the LICENSE file. |
@@ -6,35 +7,38 @@ |
// This is a very specialized tool which was created in order to support |
// adding hash values used as location markers in the LaTeX source of the |
// language specification. It is intended to take its input file as the |
-// first argument and the output file name as the second argument. From |
-// docs/language a typical usage would be as follows: |
+// first argument, an output file name as the second argument, and a |
+// hash listing file name as the third argument. From docs/language a |
+// typical usage would be as follows: |
// |
-// dart ../../tools/addlatexhash.dart dartLangSpec.tex tmp.tex |
+// dart ../../tools/addlatexhash.dart dartLangSpec.tex out.tex hash.txt |
// |
-// This will yield a normalized variant tmp.tex of the language |
-// specification with hash values filled in. For more details, please |
-// check the language specification source itself. |
+// This will produce a normalized variant out.tex of the language |
+// specification with hash values filled in, and a listing hash.txt of |
+// all the hash values along with the label of their textual context |
+// (section, subsection, subsubsection, paragraph) . For more details, |
+// please check the language specification source itself. |
// |
// NB: This utility assumes UN*X style line endings, \n, in the LaTeX |
// source file receieved as input; it will not work with other styles. |
-// |
-// TODO: The current version does not fill in hash values, it only |
-// standardizes the LaTeX source by removing comments and normalizing |
-// white space. |
import 'dart:io'; |
import 'dart:convert'; |
+import '../pkg/utf/lib/utf.dart'; |
import '../pkg/crypto/lib/crypto.dart'; |
-// Normalization of the text, i.e., removal or normalization |
-// of elements that do not affect the output from latex |
+// ---------------------------------------------------------------------- |
+// Normalization of the text: removal or normalization of parts that |
+// do not affect the output from latex, such as white space. |
-final commentRE = new RegExp(r"[^\\]%.*"); // NB: . does not match \n |
+final commentRE = new RegExp(r"[^\\]%.*"); // NB: . does not match \n. |
final whitespaceAllRE = new RegExp(r"^\s+$"); |
-final whitespaceRE = new RegExp(r"[ \t]{2,}"); |
- |
-// normalization steps |
+final whitespaceRE = new RegExp(r"(?:(?=\s).){2,}"); // \s except end-of-line |
+/// Removes [match]ing part of [line], adjusting that part with the |
+/// given [startOffset] and [endOffset], bounded to be valid indices |
+/// into the string if needed, then inserts [glue] where text was |
+/// removed. If there is no match then [line] is returned. |
cutMatch(line, match, {startOffset: 0, endOffset: 0, glue: ""}) { |
if (match == null) return line; |
var start = match.start + startOffset; |
@@ -52,6 +56,9 @@ cutRegexp(line, re, {startOffset: 0, endOffset: 0, glue: ""}) { |
glue: glue); |
} |
+/// Removes the rest of [line] starting from the beginning of the |
+/// given [match], and adjusting with the given [offset]. If there |
+/// is no match then [line] is returned. |
cutFromMatch(line, match, {offset: 0, glue: ""}) { |
if (match == null) return line; |
return line.substring(0, match.start + offset) + glue; |
@@ -61,127 +68,479 @@ cutFromRegexp(line, re, {offset: 0, glue: ""}) { |
return cutFromMatch(line, re.firstMatch(line), offset: offset, glue: glue); |
} |
-isWsOnly(line) => whitespaceAllRE.firstMatch(line) != null; |
+isWsOnly(line) => line.contains(whitespaceAllRE); |
isCommentOnly(line) => line.startsWith("%"); |
+/// Returns the end-of-line character at the end of [line], if any, |
+/// otherwise returns the empty string. |
justEol(line) { |
return line.endsWith("\n") ? "\n" : ""; |
} |
+/// Removes the contents of the comment at the end of [line], |
+/// leaving the "%" in place. If no comment is present, |
+/// return [line]. |
+/// |
+/// NB: it is tempting to remove everything from the '%' and out, |
+/// including the final newline, if any, but this does not work. |
+/// The problem is that TeX will do exactly this, but then it will |
+/// add back a character that depends on its state (S, M, or N), |
+/// and it is tricky to maintain a similar state that matches the |
+/// state of TeX faithfully. Hence, we remove the content of |
+/// comments but do not remove the comments themselves, we just |
+/// leave the '%' at the end of the line and let TeX manage its |
+/// states in a way that does not differ from the file from before |
+/// stripComment. |
stripComment(line) { |
- // NB: it is tempting to remove everything from the '%' and out, |
- // including the final newline, if any, but this does not work. |
- // The problem is that TeX will do exactly this, but then it will |
- // add back a character that depends on its state (S, M, or N), |
- // and it is tricky to maintain a similar state that matches the |
- // state of TeX faithfully. Hence, we remove the content of |
- // comments but do not remove the comments themselves, we just |
- // leave the '%' at the end of the line and let TeX manage its |
- // states in a way that does not differ from the file from before |
- // stripComment |
if (isCommentOnly(line)) return "%\n"; |
return cutRegexp(line, commentRE, startOffset: 2); |
} |
-// Reduce a wsOnly line to its eol, remove leading ws |
-// entirely, and reduce multiple ws chars to one |
+/// Reduces a white-space-only [line] to its eol character, |
+/// removes leading ws entirely, and reduces multiple |
+/// white-space chars to one. |
normalizeWhitespace(line) { |
var trimLine = line.trimLeft(); |
if (trimLine.isEmpty) return justEol(line); |
return trimLine.replaceAll(whitespaceRE, " "); |
} |
-// Reduce sequences of >1 wsOnly lines to 1, and sequences of >1 |
-// commentOnly lines to 1; moreover, treat commentOnly lines as |
-// wsOnly when occurring in wsOnly line blocks |
+/// Reduces sequences of >1 white-space-only lines in [lines] to 1, |
+/// and sequences of >1 comment-only lines to 1. Treats comment-only |
+/// lines as white-space-only when they occur in white-space-only |
+/// line blocks. |
multilineNormalize(lines) { |
- var afterBlankLines = false; // does 'line' succeed >0 empty lines? |
- var afterCommentLines = false; // .. succeed >0 commentOnly lines? |
+ var afterBlankLines = false; // Does [line] succeed >0 empty lines? |
+ var afterCommentLines = false; // Does [line] succeed >0 commentOnly lines? |
var newLines = new List(); |
for (var line in lines) { |
if (afterBlankLines && afterCommentLines) { |
- // can never happen |
+ // Previous line was both blank and a comment: not possible. |
throw "Bug, please report to eernst@"; |
} else if (afterBlankLines && !afterCommentLines) { |
- // at least one line before 'line' is wsOnly |
+ // At least one line before [line] is wsOnly. |
if (!isWsOnly(line)) { |
- // blank line block ended |
+ // Blank line block ended. |
afterCommentLines = isCommentOnly(line); |
- // special case: it seems to be safe to remove commentOnly lines |
+ // Special case: It seems to be safe to remove commentOnly lines |
// after wsOnly lines, so the TeX state must be predictably right; |
// next line will then be afterCommentLines and be dropped, so |
- // we drop the entire comment block---which is very useful; we can |
+ // we drop the entire comment block---which is very useful. We can |
// also consider this comment line to be an empty line, such that |
// subsequent empty lines can be considered to be in a block of |
- // empty lines; note that almost all variants of this will break.. |
+ // empty lines. Note that almost all variants of this breaks. |
if (afterCommentLines) { |
- // _current_ 'line' a commentOnly here |
+ // _Current_ 'line' is a commentOnly here. |
afterBlankLines = true; |
afterCommentLines = false; |
- // and do not add 'line' |
+ // Omit addition of [line]. |
} else { |
- // after blanks, but current 'line' is neither blank nor comment |
+ // After blanks, but current 'line' is neither blank nor comment. |
afterBlankLines = false; |
newLines.add(line); |
} |
} else { |
- // blank line block continues, do not add 'line' |
+ // Blank line block continues, omit addition of [line]. |
} |
} else if (!afterBlankLines && afterCommentLines) { |
- // at least one line before 'line' is commentOnly |
+ // At least one line before [line] is commentOnly. |
if (!isCommentOnly(line)) { |
- // comment line block ended |
+ // Comment block ended. |
afterBlankLines = isWsOnly(line); |
afterCommentLines = false; |
newLines.add(line); |
} else { |
- // comment line block continues, do not add 'line' |
+ // Comment block continues, do not add [line]. |
} |
} else { |
assert(!afterBlankLines && !afterCommentLines); |
- // no wsOnly or commentOnly lines preceed 'line' |
+ // No wsOnly or commentOnly lines preceed [line]. |
afterBlankLines = isWsOnly(line); |
afterCommentLines = isCommentOnly(line); |
- if (!afterCommentLines) newLines.add(line); |
- // else skipping commentOnly line after nonWs, nonComment text |
+ if (!afterCommentLines) { |
+ newLines.add(line); |
+ } else { |
+ // skip commentOnly line after nonWs/nonComment text. |
+ } |
} |
} |
return newLines; |
} |
-// Selecting the elements in the pipeline |
- |
+/// Selects the elements in the normalization pipeline. |
normalize(line) => normalizeWhitespace(stripComment(line)); |
+ |
+/// Selects the elements in the significant-spacing block |
+/// normalization pipeline. |
sispNormalize(line) => stripComment(line); |
-// Managing fragments with significant spacing |
+// Managing fragments with significant spacing. |
+ |
+final dartCodeBeginRE = new RegExp(r"^\s*\\begin\s*\{dartCode\}"); |
+final dartCodeEndRE = new RegExp (r"^\s*\\end\s*\{dartCode\}"); |
+ |
+/// Recognizes beginning of dartCode block. |
+sispIsDartBegin(line) => line.contains(dartCodeBeginRE); |
+ |
+/// Recognizes end of dartCode block. |
+sispIsDartEnd(line) => line.contains(dartCodeEndRE); |
+ |
+// ---------------------------------------------------------------------- |
+// Analyzing the input to point out "interesting" lines |
+ |
+/// Returns the event information for [lines] as determined by the |
+/// given [analyzer]. The method [analyzer.analyze] indicates that a |
+/// line is "uninteresting" by returning null (i.e., no events here), |
+/// and "interesting" lines may be characterized by [analysisFunc] via |
+/// the returned event object. |
+findEvents(lines, analyzer) { |
+ var events = new List(); |
+ for (var line in lines) { |
+ var event = analyzer.analyze(line); |
+ if (event != null) events.add(event); |
+ } |
+ return events; |
+} |
+ |
+/// Returns RegExp text for recognizing a command occupying a line |
+/// of its own, given the part of the RegExp that recognizes the |
+/// command name, [cmdNameRE] |
+lineCommandRE(cmdNameRE) => |
+ new RegExp(r"^\s*\\" + cmdNameRE + r"\s*\{.*\}\s*$"); |
+ |
+final hashLabelStartRE = new RegExp(r"^\s*\\LMLabel\s*\{"); |
+final hashLabelEndRE = new RegExp(r"\}\s*$"); |
+ |
+final hashMarkRE = lineCommandRE("LMHash"); |
+final hashLabelRE = lineCommandRE("LMLabel"); |
+final sectioningRE = lineCommandRE("((|sub(|sub))section|paragraph)"); |
+final sectionRE = lineCommandRE("section"); |
+final subsectionRE = lineCommandRE("subsection"); |
+final subsubsectionRE = lineCommandRE("subsubsection"); |
+final paragraphRE = lineCommandRE("paragraph"); |
+ |
+/// Returns true iff [line] begins a block of lines that gets a hash value. |
+isHashMarker(line) => line.contains(hashMarkRE); |
+ |
+/// Returns true iff [line] defines a sectioning label. |
+isHashLabel(line) => line.contains(hashLabelRE); |
+ |
+/// Returns true iff [line] is a sectioning command resp. one of its |
+/// more specific forms; note that it is assumed that sectioning commands |
+/// do not contain a newline between the command name and the '{'. |
+isSectioningCommand(line) => line.contains(sectioningRE); |
+isSectionCommand(line) => line.contains(sectionRE); |
+isSubsectionCommand(line) => line.contains(subsectionRE); |
+isSubsubsectionCommand(line) => line.contains(subsubsectionRE); |
+isParagraphCommand(line) => line.contains(paragraphRE); |
+ |
+/// Returns true iff [line] does not end a block of lines that gets |
+/// a hash value. |
+isntHashBlockTerminator(line) => !isSectioningCommand(line); |
+ |
+/// Returns the label text part from [line], based on the assumption |
+/// that isHashLabel(line) returns true. |
+extractHashLabel(line) { |
+ var startMatch = hashLabelStartRE.firstMatch(line); |
+ var endMatch = hashLabelEndRE.firstMatch(line); |
+ assert(startMatch != null && endMatch != null); |
+ return line.substring(startMatch.end, endMatch.start); |
+} |
+ |
+// Event classes: Keep track of relevant information about the LaTeX |
+// source code lines, such as where \LMHash and \LMLabel commands are |
+// used, and how they are embedded in the sectioning structure. |
+ |
+/// Abstract events, enabling us to [setEndLineNumber] on all events. |
+abstract class HashEvent { |
+ /// For events that have an endLineNumber, set it; otherwise ignore. |
+ /// The endLineNumber specifies the end of the block of lines |
+ /// associated with a given event, for event types concerned with |
+ /// blocks of lines rather than single lines. |
+ setEndLineNumber(n) {} |
+ |
+ /// Returns null except for \LMHash{} events, where it returns |
+ /// the startLineNumber. This serves to specify a boundary because |
+ /// the preceding \LMHash{} block should stop before the line of |
+ /// this \LMHash{} command. Note that hash blocks may stop earlier, |
+ /// because they cannot contain sectioning commands. |
+ getStartLineNumber() => null; |
+} |
+ |
+class HashMarkerEvent extends HashEvent { |
+ |
+ // Line number of first line in block that gets hashed. |
+ var startLineNumber; |
+ |
+ // Highest possible number of first line after block that gets |
+ // hashed (where the next \LMHash{} occurs). Note that this value |
+ // is not known initially (because that line has not yet been |
+ // reached), so [endLineNumber] will be initialized in a separate |
+ // scan. Also note that the block may end earlier, because a block |
+ // ends if it would otherwise include a sectioning command. |
+ var endLineNumber; |
+ |
+ HashMarkerEvent(this.startLineNumber); |
+ |
+ setEndLineNumber(n) { endLineNumber = n; } |
+ getStartLineNumber() => startLineNumber; |
+} |
+ |
+class HashLabelEvent extends HashEvent { |
+ var labelText; |
+ HashLabelEvent(this.labelText); |
+} |
+ |
+class HashAnalyzer { |
+ // List of kinds of pending (= most recently seen) sectioning command. |
+ // When updating this list, also update sectioningPrefix below. |
+ static const PENDING_IS_NONE = 0; |
+ static const PENDING_IS_SECTION = 1; |
+ static const PENDING_IS_SUBSECTION = 2; |
+ static const PENDING_IS_SUBSUBSECTION = 3; |
+ static const PENDING_IS_PARAGRAPH = 1; |
+ |
+ var lineNumber = 0; |
+ var pendingSectioning = PENDING_IS_NONE; |
+ |
+ HashAnalyzer(); |
+ |
+ setPendingToSection() { |
+ pendingSectioning = PENDING_IS_SECTION; |
+ } |
+ |
+ setPendingToSubsection() { |
+ pendingSectioning = PENDING_IS_SUBSECTION; |
+ } |
+ |
+ setPendingToSubsubsection() { |
+ pendingSectioning = PENDING_IS_SUBSUBSECTION; |
+ } |
+ |
+ setPendingToParagraph() { |
+ pendingSectioning = PENDING_IS_PARAGRAPH; |
+ } |
+ |
+ clearPending() { |
+ pendingSectioning = PENDING_IS_NONE; |
+ } |
+ |
+ sectioningPrefix() { |
+ switch (pendingSectioning) { |
+ case PENDING_IS_SECTION: return "sec:"; |
+ case PENDING_IS_SUBSECTION: return "subsec:"; |
+ case PENDING_IS_SUBSUBSECTION: return "subsubsec:"; |
+ case PENDING_IS_PARAGRAPH: return "par:"; |
+ case PENDING_IS_NONE: |
+ throw |
+ "\\LMHash{..} should only be used after a sectioning command " + |
+ "(\\section, \\subsection, \\subsubsection, \\paragraph)"; |
+ default: |
+ // set of PENDING_IS_.. was extended, but updates here omitted |
+ throw "Bug, please report to eernst@"; |
+ } |
+ } |
+ |
+ analyze(line) { |
+ var currentLineNumber = lineNumber++; |
+ if (isHashMarker(line)) { |
+ return new HashMarkerEvent(currentLineNumber); |
+ } else if (isHashLabel(line)) { |
+ var labelText = sectioningPrefix() + extractHashLabel(line); |
+ return new HashLabelEvent(labelText); |
+ } else { |
+ // No events to emit, but we may need to note state changes |
+ if (isSectionCommand(line)) { |
+ setPendingToSection(); |
+ } else if (isSubsectionCommand(line)) { |
+ setPendingToSubsection(); |
+ } else if (isSubsubsectionCommand(line)) { |
+ setPendingToSubsubsection(); |
+ } else if (isParagraphCommand(line)) { |
+ setPendingToParagraph(); |
+ } else { |
+ // No state changes. |
+ } |
+ return null; |
+ } |
+ } |
+} |
+ |
+findHashEvents(lines) { |
+ // Create the list of events, omitting endLineNumbers. |
+ var events = findEvents(lines, new HashAnalyzer()); |
+ // Set the endLineNumbers. |
+ var currentEndLineNumber = lines.length; |
+ for (var event in events.reversed) { |
+ event.setEndLineNumber(currentEndLineNumber); |
+ var nextEndLineNumber = event.getStartLineNumber(); |
+ if (nextEndLineNumber != null) currentEndLineNumber = nextEndLineNumber; |
+ } |
+ return events; |
+} |
+ |
+// ---------------------------------------------------------------------- |
+// Removal of non-normative elements of the text (rationale, commentary). |
+ |
+/// Returns [line] without the command [cmdName] (based on a match |
+/// on "\\cmdName\s*{..}") starting at [startIndex]; note that it is |
+/// assumed but not checked that [line] contains "\\cmdType\s*{..", |
+/// and note that the end of the {..} block is found via brace matching |
+/// (i.e., nested {..} blocks are handled), but it may break if '{' is |
+/// made an active character etc.etc. |
+removeCommand(line, cmdName, startIndex) { |
+ const BACKSLASH = 92; // char code for '\\'. |
+ const BRACE_BEGIN = 123; // char code for '{'. |
+ const BRACE_END = 125; // char code for '}'. |
+ |
+ var blockStartIndex = startIndex + cmdName.length + 1; |
+ while (blockStartIndex < line.length && |
+ line.codeUnitAt(blockStartIndex) != BRACE_BEGIN) { |
+ blockStartIndex++; |
+ } |
+ blockStartIndex++; |
+ if (blockStartIndex > line.length) { |
+ throw "Bug, please report to eernst@"; |
+ } |
+ // [blockStartIndex] has index just after '{'. |
+ |
+ var afterEscape = false; // Is true iff [index] is just after '{'. |
+ var braceLevel = 1; // Have seen so many '{'s minus so many '}'s. |
+ |
+ for (var index = blockStartIndex; index < line.length; index++) { |
+ switch (line.codeUnitAt(index)) { |
+ case BRACE_BEGIN: |
+ if (afterEscape) { |
+ afterEscape = false; |
+ } else { |
+ braceLevel++; |
+ } |
+ break; |
+ case BRACE_END: |
+ if (afterEscape) { |
+ afterEscape = false; |
+ } else { |
+ braceLevel--; |
+ } |
+ break; |
+ case BACKSLASH: |
+ afterEscape = true; |
+ break; |
+ default: |
+ afterEscape = false; |
+ } |
+ if (braceLevel == 0) { |
+ return line.substring(0, startIndex) + line.substring(index + 1); |
+ } |
+ } |
+ // Removal failed; we consider this to mean that the input is ill-formed. |
+ throw "Unmatched braces"; |
+} |
+ |
+final commentaryRE = new RegExp(r"\\commentary\s*\{"); |
+final rationaleRE = new RegExp(r"\\rationale\s*\{"); |
+ |
+/// Removes {}-balanced '\commentary{..}' commands from [line]. |
+removeCommentary(line) { |
+ var match = commentaryRE.firstMatch(line); |
+ if (match == null) return line; |
+ return removeCommentary(removeCommand(line, r"commentary", match.start)); |
+} |
+ |
+/// Removes {}-balanced '\rationale{..}' commands from [line]. |
+removeRationale(line) { |
+ var match = rationaleRE.firstMatch(line); |
+ if (match == null) return line; |
+ return removeRationale(removeCommand(line, r"rationale", match.start)); |
+} |
+ |
+/// Removes {}-balanced '\commentary{..}' and '\rationale{..}' |
+/// commands from [line], then normalizes its white-space. |
+simplifyLine(line) { |
+ var simplerLine = removeCommentary(line); |
+ simplerLine = removeRationale(simplerLine); |
+ simplerLine = normalizeWhitespace(simplerLine); |
+ return simplerLine; |
+} |
+ |
+// ---------------------------------------------------------------------- |
+// Recognition of line blocks, insertion of block hash into \LMHash{}. |
-final dartCodeBeginRE = new RegExp(r"^\s*\\begin\{dartCode\}"); |
-final dartCodeEndRE = new RegExp (r"^\s*\\end\{dartCode\}"); |
+final latexArgumentRE = new RegExp(r"\{.*\}"); |
-sispIs(line, targetRE) { |
- return targetRE.firstMatch(line) != null; |
+cleanupLine(line) => cutRegexp(line, commentRE, startOffset: 1).trimRight(); |
+ |
+/// Returns concatenation of all lines from [startIndex] in [lines] until |
+/// a hash block terminator is encountered or [nextIndex] reached (if so, |
+/// the line lines[nextIndex] itself is not included); each line is cleaned |
+/// up using [cleanupLine], and " " is inserted between the lines gathered. |
+gatherLines(lines, startIndex, nextIndex) => |
+ lines.getRange(startIndex, nextIndex) |
+ .takeWhile(isntHashBlockTerminator) |
+ .map(cleanupLine) |
+ .join(" "); |
+ |
+/// Computes the hash value for the line block starting at [startIndex] |
+/// in [lines], stopping just before [nextIndex]. SIDE EFFECT: |
+/// Outputs the simplified text and its hash value to [listSink]. |
+computeHashValue(lines, startIndex, nextIndex, listSink) { |
+ final hashEncoder = new SHA1(); |
+ final gatheredLine = gatherLines(lines, startIndex, nextIndex); |
+ final simplifiedLine = simplifyLine(gatheredLine); |
+ listSink.write(" % $simplifiedLine\n"); |
+ hashEncoder.add(encodeUtf8(simplifiedLine)); |
+ return hashEncoder.close(); |
} |
-sispIsDartBegin(line) => sispIs(line, dartCodeBeginRE); |
-sispIsDartEnd(line) => sispIs(line, dartCodeEndRE); |
+computeHashString(lines, startIndex, nextIndex, listSink) => |
+ CryptoUtils.bytesToHex(computeHashValue(lines, |
+ startIndex, |
+ nextIndex, |
+ listSink)); |
-// Transform input file into output file |
+/// Computes and adds hashes to \LMHash{} lines in [lines] (which |
+/// must be on the line numbers specified in [hashEvents]), and emits |
+/// sectioning markers and hash values to [listSink], along with |
+/// "comments" containing the simplified text (using the format |
+/// ' % <text>', where the text is one, long line, for easy grepping |
+/// etc.). |
+addHashMarks(lines, hashEvents, listSink) { |
+ for (var hashEvent in hashEvents) { |
+ if (hashEvent is HashMarkerEvent) { |
+ var start = hashEvent.startLineNumber; |
+ var end = hashEvent.endLineNumber; |
+ final hashValue = computeHashString(lines, start + 1, end, listSink); |
+ lines[start] = |
+ lines[start].replaceAll(latexArgumentRE, "{" + hashValue + "}"); |
+ listSink.write(" $hashValue\n"); |
+ } else if (hashEvent is HashLabelEvent) { |
+ listSink.write("${hashEvent.labelText}\n"); |
+ } |
+ } |
+} |
+/// Transforms LaTeX input to LaTeX output plus hash value list file. |
main ([args]) { |
- if (args.length != 2) { |
- print("Usage: addlatexhash.dart <input-file> <output-file>"); |
- throw "Received ${args.length} arguments, expected two"; |
+ if (args.length != 3) { |
+ print("Usage: addlatexhash.dart <input-file> <output-file> <list-file>"); |
+ throw "Received ${args.length} arguments, expected three"; |
} |
+ // Get LaTeX source. |
var inputFile = new File(args[0]); |
- var outputFile = new File(args[1]); |
assert(inputFile.existsSync()); |
- |
var lines = inputFile.readAsLinesSync(); |
- // single-line normalization |
+ |
+ // Will hold LaTeX source with normalized spacing etc., plus hash values. |
+ var outputFile = new File(args[1]); |
+ |
+ // Will hold hierarchical list of hash values. |
+ var listFile = new File(args[2]); |
+ var listSink = listFile.openWrite(); |
+ |
+ // Perform single-line normalization. |
var inDartCode = false; |
- var newLines = new List(); |
+ var normalizedLines = new List(); |
for (var line in lines) { |
if (sispIsDartBegin(line)) { |
@@ -190,15 +549,20 @@ main ([args]) { |
inDartCode = false; |
} |
if (inDartCode) { |
- newLines.add(sispNormalize(line + "\n")); |
+ normalizedLines.add(sispNormalize(line + "\n")); |
} else { |
- newLines.add(normalize(line + "\n")); |
+ normalizedLines.add(normalize(line + "\n")); |
} |
} |
- // multi-line normalization |
- newLines = multilineNormalize(newLines); |
+ // Perform multi-line normalization. |
+ normalizedLines = multilineNormalize(normalizedLines); |
+ |
+ // Insert hash values. |
+ var hashEvents = findHashEvents(normalizedLines); |
+ addHashMarks(normalizedLines, hashEvents, listSink); |
- // output result |
- outputFile.writeAsStringSync(newLines.join()); |
+ // Produce/finalize output. |
+ outputFile.writeAsStringSync(normalizedLines.join()); |
+ listSink.close(); |
} |