Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(376)

Side by Side Diff: tools/addlatexhash.dart

Issue 646003002: Introduced hash valued location markers in the spec (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« docs/language/dart.sty ('K') | « docs/language/dartLangSpec.tex ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // ----------------------------------------------------------------------
ricow1 2014/10/13 06:07:03 Add copyright header to this file
eernst 2014/10/13 08:03:27 Done.
2 // File 'addlatexhash.dart'
ricow1 2014/10/13 06:07:03 We don't normally have this as a header, just leav
eernst 2014/10/13 08:03:27 Done.
3 //
4 // This is a very specialized tool which was created in order to support
5 // adding hash values used as location markers in the LaTeX source of the
6 // language specification. It is intended to be used as a filter from
7 // the directory ../docs/language, in commands like the following:
8 //
9 // dart ../../tools/addlatexhash.dart < dartLangSpec.tex >tmp.tex
ricow1 2014/10/13 06:07:03 If I where you I would simple use command line par
eernst 2014/10/13 08:03:27 OK. But there are myriads of ways to define the k
Lasse Reichstein Nielsen 2014/10/15 09:13:17 I'd favor taking the input name as unnnamed argume
ricow1 2014/10/15 09:23:06 That is already the case, you are looking at the o
eernst 2014/10/15 12:01:10 Actually, using -o for the output and default to s
10 //
11 // This will yield a variant tmp.tex of the language specification with
12 // hash values filled in. For more details, please check the language
13 // specification source itself.
14
15 import 'dart:io';
16 import 'dart:convert';
17
18 // ----------------------------------------------------------------------
19 // Computation of SHA1 sums
20 //
21 // NB: To keep this script as independent of installation as possible,
22 // this section was copied from crypto-0.9.0.tar.gz on Oct 9, 2014,
23 // from https://pub.dartlang.org/packages/crypto, from the source files
24 // crypto.dart, hash_utils.dart, and sha1.dart,
25 // with the following copyright statement:
26 //
27 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
28 // for details. All rights reserved. Use of this source code is governed by a
29 // BSD-style license that can be found in the LICENSE file.
30
31 /**
32 * Interface for cryptographic hash functions.
33 *
34 * The [add] method is used to add data to the hash. The [close] method
35 * is used to extract the message digest.
36 *
37 * Once the [close] method has been called no more data can be added using the
38 * [add] method. If [add] is called after the first call to [close] a
39 * HashException is thrown.
40 *
41 * If multiple instances of a given Hash is needed the [newInstance]
42 * method can provide a new instance.
43 */
44 // TODO(floitsch): make Hash implement Sink, EventSink or similar.
45 abstract class Hash {
46 /**
47 * Add a list of bytes to the hash computation.
48 */
49 void add(List<int> data);
50
51 /**
52 * Finish the hash computation and extract the message digest as
53 * a list of bytes.
54 */
55 List<int> close();
56
57 /**
58 * Returns a new instance of this hash function.
59 */
60 Hash newInstance();
61
62 /**
63 * Internal block size of the hash in bytes.
64 *
65 * This is exposed for use by the HMAC class which needs to know the
66 * block size for the [Hash] it is using.
67 */
68 int get blockSize;
69 }
70
71 // Constants.
72 const _MASK_8 = 0xff;
73 const _MASK_32 = 0xffffffff;
74 const _BITS_PER_BYTE = 8;
75 const _BYTES_PER_WORD = 4;
76
77 // Helper functions used by more than one hasher.
78
79 // Rotate left limiting to unsigned 32-bit values.
80 int _rotl32(int val, int shift) {
81 var mod_shift = shift & 31;
82 return ((val << mod_shift) & _MASK_32) |
83 ((val & _MASK_32) >> (32 - mod_shift));
84 }
85
86 // Base class encapsulating common behavior for cryptographic hash
87 // functions.
88 abstract class _HashBase implements Hash {
89 _HashBase(int chunkSizeInWords,
90 int digestSizeInWords,
91 bool this._bigEndianWords)
92 : _pendingData = [],
93 _currentChunk = new List(chunkSizeInWords),
94 _h = new List(digestSizeInWords),
95 _chunkSizeInWords = chunkSizeInWords,
96 _digestSizeInWords = digestSizeInWords;
97
98 // Update the hasher with more data.
99 void add(List<int> data) {
100 if (_digestCalled) {
101 throw new StateError(
102 'Hash update method called after digest was retrieved');
103 }
104 _lengthInBytes += data.length;
105 _pendingData.addAll(data);
106 _iterate();
107 }
108
109 // Finish the hash computation and return the digest string.
110 List<int> close() {
111 if (_digestCalled) {
112 return _resultAsBytes();
113 }
114 _digestCalled = true;
115 _finalizeData();
116 _iterate();
117 assert(_pendingData.length == 0);
118 return _resultAsBytes();
119 }
120
121 // Returns the block size of the hash in bytes.
122 int get blockSize {
123 return _chunkSizeInWords * _BYTES_PER_WORD;
124 }
125
126 // One round of the hash computation.
127 void _updateHash(List<int> m);
128
129 // Helper methods.
130 int _add32(x, y) => (x + y) & _MASK_32;
131 int _roundUp(val, n) => (val + n - 1) & -n;
132
133 // Compute the final result as a list of bytes from the hash words.
134 List<int> _resultAsBytes() {
135 var result = [];
136 for (var i = 0; i < _h.length; i++) {
137 result.addAll(_wordToBytes(_h[i]));
138 }
139 return result;
140 }
141
142 // Converts a list of bytes to a chunk of 32-bit words.
143 void _bytesToChunk(List<int> data, int dataIndex) {
144 assert((data.length - dataIndex) >= (_chunkSizeInWords * _BYTES_PER_WORD));
145
146 for (var wordIndex = 0; wordIndex < _chunkSizeInWords; wordIndex++) {
147 var w3 = _bigEndianWords ? data[dataIndex] : data[dataIndex + 3];
148 var w2 = _bigEndianWords ? data[dataIndex + 1] : data[dataIndex + 2];
149 var w1 = _bigEndianWords ? data[dataIndex + 2] : data[dataIndex + 1];
150 var w0 = _bigEndianWords ? data[dataIndex + 3] : data[dataIndex];
151 dataIndex += 4;
152 var word = (w3 & 0xff) << 24;
153 word |= (w2 & _MASK_8) << 16;
154 word |= (w1 & _MASK_8) << 8;
155 word |= (w0 & _MASK_8);
156 _currentChunk[wordIndex] = word;
157 }
158 }
159
160 // Convert a 32-bit word to four bytes.
161 List<int> _wordToBytes(int word) {
162 List<int> bytes = new List(_BYTES_PER_WORD);
163 bytes[0] = (word >> (_bigEndianWords ? 24 : 0)) & _MASK_8;
164 bytes[1] = (word >> (_bigEndianWords ? 16 : 8)) & _MASK_8;
165 bytes[2] = (word >> (_bigEndianWords ? 8 : 16)) & _MASK_8;
166 bytes[3] = (word >> (_bigEndianWords ? 0 : 24)) & _MASK_8;
167 return bytes;
168 }
169
170 // Iterate through data updating the hash computation for each
171 // chunk.
172 void _iterate() {
173 var len = _pendingData.length;
174 var chunkSizeInBytes = _chunkSizeInWords * _BYTES_PER_WORD;
175 if (len >= chunkSizeInBytes) {
176 var index = 0;
177 for (; (len - index) >= chunkSizeInBytes; index += chunkSizeInBytes) {
178 _bytesToChunk(_pendingData, index);
179 _updateHash(_currentChunk);
180 }
181 _pendingData = _pendingData.sublist(index, len);
182 }
183 }
184
185 // Finalize the data. Add a 1 bit to the end of the message. Expand with
186 // 0 bits and add the length of the message.
187 void _finalizeData() {
188 _pendingData.add(0x80);
189 var contentsLength = _lengthInBytes + 9;
190 var chunkSizeInBytes = _chunkSizeInWords * _BYTES_PER_WORD;
191 var finalizedLength = _roundUp(contentsLength, chunkSizeInBytes);
192 var zeroPadding = finalizedLength - contentsLength;
193 for (var i = 0; i < zeroPadding; i++) {
194 _pendingData.add(0);
195 }
196 var lengthInBits = _lengthInBytes * _BITS_PER_BYTE;
197 assert(lengthInBits < pow(2, 32));
198 if (_bigEndianWords) {
199 _pendingData.addAll(_wordToBytes(0));
200 _pendingData.addAll(_wordToBytes(lengthInBits & _MASK_32));
201 } else {
202 _pendingData.addAll(_wordToBytes(lengthInBits & _MASK_32));
203 _pendingData.addAll(_wordToBytes(0));
204 }
205 }
206
207 // Hasher state.
208 final int _chunkSizeInWords;
209 final int _digestSizeInWords;
210 final bool _bigEndianWords;
211 int _lengthInBytes = 0;
212 List<int> _pendingData;
213 final List<int> _currentChunk;
214 final List<int> _h;
215 bool _digestCalled = false;
216 }
217
218 /**
219 * SHA1 hash function implementation.
220 */
221 class SHA1 extends _HashBase {
222 // Construct a SHA1 hasher object.
223 SHA1() : _w = new List(80), super(16, 5, true) {
224 _h[0] = 0x67452301;
225 _h[1] = 0xEFCDAB89;
226 _h[2] = 0x98BADCFE;
227 _h[3] = 0x10325476;
228 _h[4] = 0xC3D2E1F0;
229 }
230
231 // Returns a new instance of this Hash.
232 SHA1 newInstance() {
233 return new SHA1();
234 }
235
236 // Compute one iteration of the SHA1 algorithm with a chunk of
237 // 16 32-bit pieces.
238 void _updateHash(List<int> m) {
239 assert(m.length == 16);
240
241 var a = _h[0];
242 var b = _h[1];
243 var c = _h[2];
244 var d = _h[3];
245 var e = _h[4];
246
247 for (var i = 0; i < 80; i++) {
248 if (i < 16) {
249 _w[i] = m[i];
250 } else {
251 var n = _w[i - 3] ^ _w[i - 8] ^ _w[i - 14] ^ _w[i - 16];
252 _w[i] = _rotl32(n, 1);
253 }
254 var t = _add32(_add32(_rotl32(a, 5), e), _w[i]);
255 if (i < 20) {
256 t = _add32(_add32(t, (b & c) | (~b & d)), 0x5A827999);
257 } else if (i < 40) {
258 t = _add32(_add32(t, (b ^ c ^ d)), 0x6ED9EBA1);
259 } else if (i < 60) {
260 t = _add32(_add32(t, (b & c) | (b & d) | (c & d)), 0x8F1BBCDC);
261 } else {
262 t = _add32(_add32(t, b ^ c ^ d), 0xCA62C1D6);
263 }
264
265 e = d;
266 d = c;
267 c = _rotl32(b, 30);
268 b = a;
269 a = t & _MASK_32;
270 }
271
272 _h[0] = _add32(a, _h[0]);
273 _h[1] = _add32(b, _h[1]);
274 _h[2] = _add32(c, _h[2]);
275 _h[3] = _add32(d, _h[3]);
276 _h[4] = _add32(e, _h[4]);
277 }
278
279 List<int> _w;
280 }
281
282 // ----------------------------------------------------------------------
283 // Normalization of the text, i.e., removal or normalization
284 // of elements that do not affect the output from latex
285
286 // regexps
287
288 var comment_all_re = new RegExp("^%");
ricow1 2014/10/13 06:07:04 camel case variables, these could be const? (in wh
eernst 2014/10/13 08:03:26 Now using camel case, but 'const RegExp(..)' is no
eernst 2014/10/13 08:03:30 CamelCasing done. But it can't be const (can't use
289 var comment_re = new RegExp("[^\\\\]%[^\\n]*");
290 var whitespace_all_re = new RegExp("^\\s+\$");
291 var whitespace_leading_re = new RegExp("^\\s+[^\\n]");
292 var whitespace_re = new RegExp("[ \\t][ \\t]+");
293
294 // normalization steps
295
296 cut_match(line, match, {start_offset:0, end_offset:0, glue:""}) {
ricow1 2014/10/13 06:07:03 Camel case method names, here and below
eernst 2014/10/13 08:03:30 Done.
297 if (match == null) return line;
298 var start = match.start + start_offset;
299 var end = match.end + end_offset;
300 var len = line.length;
301 if (start < 0) start=0;
ricow1 2014/10/13 06:07:03 space around =
eernst 2014/10/13 08:03:29 Done.
302 if (end > len) end=len;
ricow1 2014/10/13 06:07:04 space around =
eernst 2014/10/13 08:03:29 Done.
303 return line.substring(0,start) + glue + line.substring(end);
304 }
305
306 cut_regexp(line, re, {start_offset:0, end_offset:0, glue:""}) {
Lasse Reichstein Nielsen 2014/10/15 09:13:17 Is this function used?
Lasse Reichstein Nielsen 2014/10/15 09:13:58 Is this comment still here? Yes it is. Should it b
307 return cut_match(line, re.firstMatch(line),
308 start_offset: start_offset,
309 end_offset: end_offset,
310 glue: glue);
311 }
312
313 cut_from_match(line, match, {offset:0, glue:""}) {
314 if (match == null) return line;
315 return line.substring(0,match.start+offset) + glue;
ricow1 2014/10/13 06:07:03 space around +
eernst 2014/10/13 08:03:29 Done.
316 }
317
318 cut_from_regexp(line, re, {offset:0, glue:""}) {
319 return cut_from_match(line, re.firstMatch(line), offset:offset, glue:glue);
320 }
321
322 is_ws_only(line) => whitespace_all_re.firstMatch(line) != null;
323 is_comment_only(line) => comment_all_re.firstMatch(line) != null;
Lasse Reichstein Nielsen 2014/10/15 09:13:17 This would be the non-regexp version: => line.star
eernst 2014/10/15 12:01:10 Done. Used to have a slightly more general commen
324
325 just_eol(line) {
326 if (line.length == 0) return line;
327 return line[line.length-1] == '\n'? "\n" : "";
ricow1 2014/10/13 06:07:03 space before ?
eernst 2014/10/13 08:03:29 Done.
328 }
329
330 strip_comment(line) {
331 // NB: it is tempting to remove everything from the '%' and out,
332 // including the final newline, if any, but this does not work.
333 // The problem is that TeX will do exactly this, but then it will
334 // add back a character that depends on its state (S, M, or N),
335 // and it is tricky to maintain a similar state that matches the
336 // state of TeX faithfully. Hence, we remove the content of
337 // comments but do not remove the comments themselves, we just
338 // leave the '%' at the end of the line and let TeX manage its
339 // states in a way that does not differ from the file from before
340 // strip_comment
341 if (is_comment_only(line)) return "%\n";
342 return cut_regexp(line, comment_re, start_offset:2);
343 }
344
345 // reduce a ws_only line to its eol, remove leading ws
346 // entirely, and reduce multiple ws chars to one
347 normalize_whitespace(line) {
348 if (is_ws_only(line)) return just_eol(line);
349 line = cut_regexp(line, whitespace_leading_re, end_offset:-1);
350 var match;
351 while ((match = whitespace_re.firstMatch(line)) != null)
352 line = cut_match(line, match, glue:" ");
ricow1 2014/10/13 06:07:03 always encapsulate loop and conditional blocks in
eernst 2014/10/13 08:03:26 Done.
eernst 2014/10/13 08:03:30 Done.
353 return line;
354 }
355
356 // reduce sequences of >1 ws_only lines to 1, and
ricow1 2014/10/13 06:07:03 reduce -> Reduce
eernst 2014/10/13 08:03:25 Done.
eernst 2014/10/13 08:03:29 Done.
357 // and sequences of >1 comment_only lines to 1
ricow1 2014/10/13 06:07:03 and and -> and
eernst 2014/10/13 08:03:30 Done.
358 multiline_normalize(lines) {
359 var oldlines = lines;
360 var after_blank_lines = false; // does 'line' succeed >0 empty lines?
361 var after_comment_lines = false; // .. succeed >0 comment_only lines?
362 lines = new List();
363 for (var line in oldlines) {
364 if (after_blank_lines && after_comment_lines) {
365 // can never happen
366 throw new Error("Bug, please report");
ricow1 2014/10/13 06:07:04 I would do Bug, please report to eernst@
eernst 2014/10/13 08:03:29 Done.
367 }
ricow1 2014/10/13 06:07:04 move else if up on this line
eernst 2014/10/15 12:01:10 Done.
368 else if (after_blank_lines && !after_comment_lines) {
369 // at least one line before 'line' is ws_only
370 if (!is_ws_only(line)) {
371 // blank line block ended
372 after_comment_lines = is_comment_only(line);
373 // special case: it seems to be safe to remove comment_only lines
374 // after ws_only lines, so the TeX state must be predictably right;
375 // next line will then be after_comment_lines and be dropped, so
376 // we drop the entire comment block---which is very useful; we can
377 // also consider this comment line to be an empty line, such that
378 // subsequent empty lines can be considered to be in a block of
379 // empty lines; note that almost all variants of this will break..
380 if (after_comment_lines) {
381 // _current_ 'line' a comment_only here
382 after_blank_lines = true;
383 after_comment_lines = false;
384 // and do not add 'line'
385 }
ricow1 2014/10/13 06:07:03 move else up here
eernst 2014/10/13 08:03:29 Done.
386 else {
387 // after blanks, but current 'line' is neither blank nor comment
388 after_blank_lines = false;
389 lines.add(line);
390 }
391 }
392 else {
393 // blank line block continues, do not add 'line'
394 }
395 }
396 else if (!after_blank_lines && after_comment_lines) {
397 // at least one line before 'line' is comment_only
398 if (!is_comment_only(line)) {
399 // comment line block ended
400 after_blank_lines = is_ws_only(line);
401 after_comment_lines = false;
402 lines.add(line);
403 }
404 else {
405 // comment line block continues, do not add 'line'
406 }
407 }
408 else /* !after_blank_lines && !after_comment_lines */ {
409 // no ws_only or comment_only lines preceed 'line'
410 if (is_ws_only(line))
411 after_blank_lines = true;
412 if (is_comment_only(line))
413 after_comment_lines = true;
414 if (!after_comment_lines)
415 lines.add(line);
416 else {
417 // skipping comment_only line after non_ws, non_comment text
418 }
419 }
420 }
421 return lines;
422 }
423
424 // select the elements in the pipeline
425
426 normalize(line) => normalize_whitespace(strip_comment(line));
427
428 sisp_normalize(line) => strip_comment(line);
429
430 // testing
431
ricow1 2014/10/13 06:07:03 you should add a real test that imports this file
eernst 2014/10/13 08:03:30 Need a bit more input on how to do this.
432 one_test_cut_match(line,re,expected) {
433 stdout.write("cut_match: ${line} --[${re}]--> ");
434 var result = cut_match(line,new RegExp(re).firstMatch(line));
435 stdout.write(result+"\n");
436 return expected == result;
437 }
438
439 test_cut_match() {
440 one_test_cut_match("test","e","tst") &&
441 one_test_cut_match("test","te","st") &&
442 one_test_cut_match("test","st","te") &&
443 one_test_cut_match("test","","test") &&
444 one_test_cut_match("test","test","")
445 ? print("OK") : print("ERROR");
446 }
447
448 // ----------------------------------------------------------------------
449 // Managing fragments with significant spacing
450
451 final dart_code_begin_re = new RegExp("^\\s*\\\\begin{dartCode}");
452 final dart_code_end_re = new RegExp ("^\\s*\\\\end{dartCode}");
453
454 sisp_is(line, target_re) {
455 return target_re.firstMatch(line) != null;
456 }
457
458 sisp_is_dart_begin(line) => sisp_is(line, dart_code_begin_re);
459 sisp_is_dart_end(line) => sisp_is(line, dart_code_end_re);
460
461 // testing
462
463 one_test_sisp(sisp_fun, line, expectation) {
464 var result = sisp_fun(line) == expectation;
465 stdout.write("sisp_is_dart_*: ${line}: ${expectation}\n");
466 return result;
467 }
468
469 test_sisp() {
470 one_test_sisp(sisp_is_dart_begin,"\\begin{dartCode}\n", true) &&
471 one_test_sisp(sisp_is_dart_begin," \\begin{dartCode}\n", true) &&
472 one_test_sisp(sisp_is_dart_begin,"whatever else ..", false) &&
473 one_test_sisp(sisp_is_dart_end,"\\end{dartCode}", true) &&
474 one_test_sisp(sisp_is_dart_end," \\end{dartCode}\t \n", true) &&
475 one_test_sisp(sisp_is_dart_end,"whatever else ..", false)
476 ? print("OK") : print("ERROR");
477 }
478
479 // ----------------------------------------------------------------------
480 // io
481
482 rl() => stdin.readLineSync(
ricow1 2014/10/13 06:07:03 we don't normally shorten names, i.e., rl should b
eernst 2014/10/13 08:03:30 Done.
483 retainNewlines: true,
484 encoding: const AsciiCodec());
485
486 // ----------------------------------------------------------------------
487 // main
488
489 main () {
ricow1 2014/10/13 06:07:03 As stated in the top this becomes much easier if y
490 var lines = new List(), line;
ricow1 2014/10/13 06:07:03 I would do the definition of line on a separate li
eernst 2014/10/13 08:03:30 Done.
491
492 // single-line normalization
493 var in_dart_code = false;
494 while ((line = rl()) != null) {
495 if (sisp_is_dart_begin(line))
496 in_dart_code = true;
ricow1 2014/10/13 06:07:03 block in {}
eernst 2014/10/13 08:03:28 Already changed this to single line as a result of
eernst 2014/10/13 08:03:29 Changed this to single line already when you descr
497 else if (sisp_is_dart_end(line))
498 in_dart_code = false;
ricow1 2014/10/13 06:07:03 block in {}
eernst 2014/10/13 08:03:27 Same situation as l.496.
499 if (in_dart_code) lines.add(sisp_normalize(line));
ricow1 2014/10/13 06:07:04 always use {} blocks when you have anything but si
eernst 2014/10/13 08:03:30 Ah, so you're saying that the presence of 'else' (
500 else lines.add(normalize(line));
501 }
502
503 // multi-line normalization
504 lines = multiline_normalize(lines);
505
506 // output result
507 for (var line in lines) stdout.write(line);
508 }
OLDNEW
« docs/language/dart.sty ('K') | « docs/language/dartLangSpec.tex ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698