OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file | |
2 // for details. All rights reserved. Use of this source code is governed by a | |
3 // BSD-style license that can be found in the LICENSE file. | |
4 // | |
5 // ---------------------------------------------------------------------- | |
6 // This is a very specialized tool which was created in order to support | |
7 // adding hash values used as location markers in the LaTeX source of the | |
8 // language specification. It is intended to be used as a filter from | |
9 // the directory ../docs/language, in commands like the following: | |
10 // | |
11 // dart ../../tools/addlatexhash.dart < dartLangSpec.tex >tmp.tex | |
ricow1
2014/10/14 06:09:12
outdated comment, input and output is now taken as
eernst
2014/10/14 15:53:25
Done.
| |
12 // | |
13 // This will yield a variant tmp.tex of the language specification with | |
ricow1
2014/10/14 06:09:11
tmp.tex -> whatever you call the output file aboe
eernst
2014/10/14 15:53:25
Done.
| |
14 // hash values filled in. For more details, please check the language | |
ricow1
2014/10/14 06:09:12
well, not yet, add a todo. Maybe also state that w
eernst
2014/10/14 15:53:25
Done.
| |
15 // specification source itself. | |
16 // | |
17 // NB: This utility assumes UN*X style line endings, \n; it will not | |
18 // work with other styles. | |
19 | |
20 import 'dart:io'; | |
21 import 'dart:convert'; | |
22 import '../pkg/crypto/lib/crypto.dart'; | |
23 | |
24 // ---------------------------------------------------------------------- | |
25 // Normalization of the text, i.e., removal or normalization | |
26 // of elements that do not affect the output from latex | |
27 | |
28 // regexps | |
ricow1
2014/10/14 06:09:12
obvious, remove comment
eernst
2014/10/14 15:53:24
Done.
| |
29 | |
30 final commentAllRe = new RegExp("^%"); | |
31 final commentRe = new RegExp("[^\\\\]%[^\\n]*"); | |
32 final whitespaceAllRe = new RegExp("^\\s+\$"); | |
33 final whitespaceLeadingRe = new RegExp("^\\s+[^\\n]"); | |
34 final whitespaceRe = new RegExp("[ \\t][ \\t]+"); | |
35 | |
36 // normalization steps | |
37 | |
38 cutMatch(line, match, {startOffset:0, endOffset:0, glue:""}) { | |
ricow1
2014/10/14 06:09:12
space after :
eernst
2014/10/14 15:53:24
Done.
| |
39 if (match == null) return line; | |
40 var start = match.start + startOffset; | |
41 var end = match.end + endOffset; | |
42 var len = line.length; | |
43 if (start < 0) start = 0; | |
44 if (end > len) end = len; | |
45 return line.substring(0, start) + glue + line.substring(end); | |
46 } | |
47 | |
48 cutRegexp(line, re, {startOffset:0, endOffset:0, glue:""}) { | |
ricow1
2014/10/14 06:09:11
space after :
eernst
2014/10/14 15:53:25
Done.
| |
49 return cutMatch(line, re.firstMatch(line), | |
50 startOffset: startOffset, | |
51 endOffset: endOffset, | |
52 glue: glue); | |
ricow1
2014/10/14 06:09:11
indentation
ricow1
2014/10/14 06:09:12
indendtation
eernst
2014/10/14 15:53:25
Done.
eernst
2014/10/14 15:53:25
Done.
| |
53 } | |
54 | |
55 cutFromMatch(line, match, {offset:0, glue:""}) { | |
ricow1
2014/10/14 06:09:12
space after :
eernst
2014/10/14 15:53:24
Done.
| |
56 if (match == null) return line; | |
57 return line.substring(0, match.start + offset) + glue; | |
58 } | |
59 | |
60 cutFromRegexp(line, re, {offset:0, glue:""}) { | |
ricow1
2014/10/14 06:09:12
space after :
eernst
2014/10/14 15:53:25
Done.
| |
61 return cutFromMatch(line, re.firstMatch(line), offset:offset, glue:glue); | |
ricow1
2014/10/14 06:09:12
space after :
eernst
2014/10/14 15:53:25
Done.
| |
62 } | |
63 | |
64 isWsOnly(line) => whitespaceAllRe.firstMatch(line) != null; | |
65 isCommentOnly(line) => commentAllRe.firstMatch(line) != null; | |
66 | |
67 justEol(line) { | |
68 if (line.length == 0) return line; | |
69 return line[line.length-1] == '\n' ? "\n" : ""; | |
ricow1
2014/10/14 06:09:11
be consistent in using either ' or " for strings i
eernst
2014/10/14 15:53:25
Using '"' for strings, "'" for imports.
| |
70 } | |
71 | |
72 stripComment(line) { | |
73 // NB: it is tempting to remove everything from the '%' and out, | |
74 // including the final newline, if any, but this does not work. | |
75 // The problem is that TeX will do exactly this, but then it will | |
76 // add back a character that depends on its state (S, M, or N), | |
77 // and it is tricky to maintain a similar state that matches the | |
78 // state of TeX faithfully. Hence, we remove the content of | |
79 // comments but do not remove the comments themselves, we just | |
80 // leave the '%' at the end of the line and let TeX manage its | |
81 // states in a way that does not differ from the file from before | |
82 // stripComment | |
83 if (isCommentOnly(line)) return "%\n"; | |
84 return cutRegexp(line, commentRe, startOffset:2); | |
ricow1
2014/10/14 06:09:11
space after :
eernst
2014/10/14 15:53:25
Done.
| |
85 } | |
86 | |
87 // Reduce a wsOnly line to its eol, remove leading ws | |
88 // entirely, and reduce multiple ws chars to one | |
89 normalizeWhitespace(line) { | |
90 if (isWsOnly(line)) return justEol(line); | |
91 line = cutRegexp(line, whitespaceLeadingRe, endOffset:-1); | |
ricow1
2014/10/14 06:09:12
space after :
eernst
2014/10/14 15:53:25
Done.
| |
92 var match; | |
93 while ((match = whitespaceRe.firstMatch(line)) != null) { | |
94 line = cutMatch(line, match, glue:" "); | |
ricow1
2014/10/14 06:09:12
space after :
eernst
2014/10/14 15:53:25
Done.
| |
95 } | |
96 return line; | |
97 } | |
98 | |
99 // Reduce sequences of >1 wsOnly lines to 1, | |
100 // and sequences of >1 commentOnly lines to 1 | |
101 multilineNormalize(lines) { | |
102 var oldlines = lines; | |
ricow1
2014/10/14 06:09:11
why are we doing this name mangling:
oldlines line
eernst
2014/10/14 15:53:25
Was thinking "transforming lines several times" (w
| |
103 var afterBlankLines = false; // does 'line' succeed >0 empty lines? | |
104 var afterCommentLines = false; // .. succeed >0 commentOnly lines? | |
105 lines = new List(); | |
106 for (var line in oldlines) { | |
107 if (afterBlankLines && afterCommentLines) { | |
108 // can never happen | |
109 throw "Bug, please report to eernst@"; | |
110 } else if (afterBlankLines && !afterCommentLines) { | |
111 // at least one line before 'line' is wsOnly | |
112 if (!isWsOnly(line)) { | |
113 // blank line block ended | |
114 afterCommentLines = isCommentOnly(line); | |
115 // special case: it seems to be safe to remove commentOnly lines | |
116 // after wsOnly lines, so the TeX state must be predictably right; | |
117 // next line will then be afterCommentLines and be dropped, so | |
118 // we drop the entire comment block---which is very useful; we can | |
119 // also consider this comment line to be an empty line, such that | |
120 // subsequent empty lines can be considered to be in a block of | |
121 // empty lines; note that almost all variants of this will break.. | |
122 if (afterCommentLines) { | |
123 // _current_ 'line' a commentOnly here | |
124 afterBlankLines = true; | |
125 afterCommentLines = false; | |
126 // and do not add 'line' | |
127 } else { | |
128 // after blanks, but current 'line' is neither blank nor comment | |
129 afterBlankLines = false; | |
130 lines.add(line); | |
131 } | |
132 } else { | |
133 // blank line block continues, do not add 'line' | |
ricow1
2014/10/14 06:09:12
do we really want an else clause that does nothing
eernst
2014/10/14 15:53:25
Worried about performance? Not sure about the cos
ricow1
2014/10/15 08:29:05
I would just have it as a comment, but this is fin
| |
134 } | |
135 } else if (!afterBlankLines && afterCommentLines) { | |
136 // at least one line before 'line' is commentOnly | |
137 if (!isCommentOnly(line)) { | |
138 // comment line block ended | |
139 afterBlankLines = isWsOnly(line); | |
140 afterCommentLines = false; | |
141 lines.add(line); | |
142 } else { | |
143 // comment line block continues, do not add 'line' | |
ricow1
2014/10/14 06:09:12
same as above
eernst
2014/10/14 15:53:25
Same issue, to be resolved together.
| |
144 } | |
145 } else /* !afterBlankLines && !afterCommentLines */ { | |
ricow1
2014/10/14 06:09:12
you have comments after the line in all other plac
eernst
2014/10/14 15:53:25
This is actually a bit different, because this com
ricow1
2014/10/15 08:29:05
Acknowledged.
| |
146 // no wsOnly or commentOnly lines preceed 'line' | |
147 if (isWsOnly(line)) afterBlankLines = true; | |
148 if (isCommentOnly(line)) afterCommentLines = true; | |
149 if (!afterCommentLines) lines.add(line); | |
150 // else skipping commentOnly line after nonWs, nonComment text | |
151 } | |
152 } | |
153 return lines; | |
154 } | |
155 | |
156 // select the elements in the pipeline | |
157 | |
158 normalize(line) => normalizeWhitespace(stripComment(line)); | |
159 | |
160 sispNormalize(line) => stripComment(line); | |
161 | |
162 // ---------------------------------------------------------------------- | |
163 // Managing fragments with significant spacing | |
164 | |
165 final dartCodeBeginRe = new RegExp("^\\s*\\\\begin{dartCode}"); | |
166 final dartCodeEndRe = new RegExp ("^\\s*\\\\end{dartCode}"); | |
167 | |
168 sispIs(line, targetRe) { | |
169 return targetRe.firstMatch(line) != null; | |
170 } | |
171 | |
172 sispIsDartBegin(line) => sispIs(line, dartCodeBeginRe); | |
173 sispIsDartEnd(line) => sispIs(line, dartCodeEndRe); | |
174 | |
175 // ---------------------------------------------------------------------- | |
176 // main | |
ricow1
2014/10/14 06:09:12
obvious, remove comment
eernst
2014/10/14 15:53:24
It wasn't really meant to be unobvious, it should
ricow1
2014/10/15 08:29:05
I never do, if I want to structure something toget
| |
177 | |
178 main ([args]) { | |
179 if (args.length != 2) { | |
180 print("Usage: addlatexhash.dart <input-file> <output-file>"); | |
181 throw "Received ${args.length} arguments, expected two"; | |
182 } | |
183 | |
184 var inputFile = new File(args[0]); | |
185 var outputFile = new File(args[1]); | |
186 assert(inputFile.existsSync()); | |
187 | |
188 var lines = inputFile.readAsLinesSync(); | |
189 // single-line normalization | |
190 var inDartCode = false; | |
191 var newLines = new List(); | |
192 | |
193 for (var line in lines) { | |
194 if (sispIsDartBegin(line)) { | |
195 inDartCode = true; | |
196 } else if (sispIsDartEnd(line)) { | |
197 inDartCode = false; | |
198 } | |
199 if (inDartCode) { | |
200 newLines.add(sispNormalize(line + "\n")); | |
ricow1
2014/10/14 06:09:12
maybe remove the "\n" here and join on it when wri
eernst
2014/10/14 15:53:24
Would look better, but I would need to change many
ricow1
2014/10/15 08:29:05
Acknowledged.
| |
201 } else { | |
202 newLines.add(normalize(line + "\n")); | |
203 } | |
204 } | |
205 | |
206 // multi-line normalization | |
207 newLines = multilineNormalize(newLines); | |
208 | |
209 // output result | |
210 outputFile.writeAsStringSync(newLines.join()); | |
211 } | |
OLD | NEW |