Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(66)

Side by Side Diff: third_party/libxml/src/genChRanges.py

Issue 1193533007: Upgrade to libxml 2.9.2 and libxslt 1.1.28 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: remove suppressions, have landed in blink now Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #!/usr/bin/python -u
2 #
3 # Portions of this script have been (shamelessly) stolen from the
4 # prior work of Daniel Veillard (genUnicode.py)
5 #
6 # I, however, take full credit for any bugs, errors or difficulties :-)
7 #
8 # William Brack
9 # October 2003
10 #
11 # 18 October 2003
12 # Modified to maintain binary compatibility with previous library versions
13 # by adding a suffix 'Q' ('quick') to the macro generated for the original,
14 # function, and adding generation of a function (with the original name) which
15 # instantiates the macro.
16 #
17
18 import sys
19 import string
20 import time
21
22 #
23 # A routine to take a list of yes/no (1, 0) values and turn it
24 # into a list of ranges. This will later be used to determine whether
25 # to generate single-byte lookup tables, or inline comparisons
26 #
27 def makeRange(lst):
28 ret = []
29 pos = 0
30 while pos < len(lst):
31 try: # index generates exception if not present
32 s = lst[pos:].index(1) # look for start of next range
33 except:
34 break # if no more, finished
35 pos += s # pointer to start of possible range
36 try:
37 e = lst[pos:].index(0) # look for end of range
38 e += pos
39 except: # if no end, set to end of list
40 e = len(lst)
41 ret.append((pos, e-1)) # append range tuple to list
42 pos = e + 1 # ready to check for next range
43 return ret
44
45 sources = "chvalid.def" # input filename
46
47 # minTableSize gives the minimum number of ranges which must be present
48 # before a 256-byte lookup table is produced. If there are less than this
49 # number, a macro with inline comparisons is generated
50 minTableSize = 6
51
52 # dictionary of functions, key=name, element contains char-map and range-list
53 Functs = {}
54
55 state = 0
56
57 try:
58 defines = open("chvalid.def", "r")
59 except:
60 print "Missing chvalid.def, aborting ..."
61 sys.exit(1)
62
63 #
64 # The lines in the .def file have three types:-
65 # name: Defines a new function block
66 # ur: Defines individual or ranges of unicode values
67 # end: Indicates the end of the function block
68 #
69 # These lines are processed below.
70 #
71 for line in defines.readlines():
72 # ignore blank lines, or lines beginning with '#'
73 if line[0] == '#':
74 continue
75 line = string.strip(line)
76 if line == '':
77 continue
78 # split line into space-separated fields, then split on type
79 try:
80 fields = string.split(line, ' ')
81 #
82 # name line:
83 # validate any previous function block already ended
84 # validate this function not already defined
85 # initialize an entry in the function dicitonary
86 # including a mask table with no values yet defined
87 #
88 if fields[0] == 'name':
89 name = fields[1]
90 if state != 0:
91 print "'name' %s found before previous name" \
92 "completed" % (fields[1])
93 continue
94 state = 1
95 if Functs.has_key(name):
96 print "name '%s' already present - may give" \
97 " wrong results" % (name)
98 else:
99 # dict entry with two list elements (chdata, rangedata)
100 Functs[name] = [ [], [] ]
101 for v in range(256):
102 Functs[name][0].append(0)
103 #
104 # end line:
105 # validate there was a preceding function name line
106 # set state to show no current function active
107 #
108 elif fields[0] == 'end':
109 if state == 0:
110 print "'end' found outside of function block"
111 continue
112 state = 0
113
114 #
115 # ur line:
116 # validate function has been defined
117 # process remaining fields on the line, which may be either
118 # individual unicode values or ranges of values
119 #
120 elif fields[0] == 'ur':
121 if state != 1:
122 raise ValidationError, "'ur' found outside of 'name' block"
123 for el in fields[1:]:
124 pos = string.find(el, '..')
125 # pos <=0 means not a range, so must be individual value
126 if pos <= 0:
127 # cheap handling of hex or decimal values
128 if el[0:2] == '0x':
129 value = int(el[2:],16)
130 elif el[0] == "'":
131 value = ord(el[1])
132 else:
133 value = int(el)
134 if ((value < 0) | (value > 0x1fffff)):
135 raise ValidationError, 'Illegal value (%s) in ch for'\
136 ' name %s' % (el,name)
137 # for ur we have only ranges (makes things simpler),
138 # so convert val to range
139 currange = (value, value)
140 # pos > 0 means this is a range, so isolate/validate
141 # the interval
142 else:
143 # split the range into it's first-val, last-val
144 (first, last) = string.split(el, "..")
145 # convert values from text into binary
146 if first[0:2] == '0x':
147 start = int(first[2:],16)
148 elif first[0] == "'":
149 start = ord(first[1])
150 else:
151 start = int(first)
152 if last[0:2] == '0x':
153 end = int(last[2:],16)
154 elif last[0] == "'":
155 end = ord(last[1])
156 else:
157 end = int(last)
158 if (start < 0) | (end > 0x1fffff) | (start > end):
159 raise ValidationError, "Invalid range '%s'" % el
160 currange = (start, end)
161 # common path - 'currange' has the range, now take care of it
162 # We split on single-byte values vs. multibyte
163 if currange[1] < 0x100: # single-byte
164 for ch in range(currange[0],currange[1]+1):
165 # validate that value not previously defined
166 if Functs[name][0][ch]:
167 msg = "Duplicate ch value '%s' for name '%s'" % (el, name)
168 raise ValidationError, msg
169 Functs[name][0][ch] = 1
170 else: # multi-byte
171 if currange in Functs[name][1]:
172 raise ValidationError, "range already defined in" \
173 " function"
174 else:
175 Functs[name][1].append(currange)
176
177 except:
178 print "Failed to process line: %s" % (line)
179 raise
180 #
181 # At this point, the entire definition file has been processed. Now we
182 # enter the output phase, where we generate the two files chvalid.c and'
183 # chvalid.h
184 #
185 # To do this, we first output the 'static' data (heading, fixed
186 # definitions, etc.), then output the 'dynamic' data (the results
187 # of the above processing), and finally output closing 'static' data
188 # (e.g. the subroutine to process the ranges)
189 #
190
191 #
192 # Generate the headings:
193 #
194 try:
195 header = open("include/libxml/chvalid.h", "w")
196 except:
197 print "Failed to open include/libxml/chvalid.h"
198 sys.exit(1)
199
200 try:
201 output = open("chvalid.c", "w")
202 except:
203 print "Failed to open chvalid.c"
204 sys.exit(1)
205
206 date = time.asctime(time.localtime(time.time()))
207
208 header.write(
209 """/*
210 * Summary: Unicode character range checking
211 * Description: this module exports interfaces for the character
212 * range validation APIs
213 *
214 * This file is automatically generated from the cvs source
215 * definition files using the genChRanges.py Python script
216 *
217 * Generation date: %s
218 * Sources: %s
219 * Author: William Brack <wbrack@mmm.com.hk>
220 */
221
222 #ifndef __XML_CHVALID_H__
223 #define __XML_CHVALID_H__
224
225 #include <libxml/xmlversion.h>
226 #include <libxml/xmlstring.h>
227
228 #ifdef __cplusplus
229 extern "C" {
230 #endif
231
232 /*
233 * Define our typedefs and structures
234 *
235 */
236 typedef struct _xmlChSRange xmlChSRange;
237 typedef xmlChSRange *xmlChSRangePtr;
238 struct _xmlChSRange {
239 unsigned short low;
240 unsigned short high;
241 };
242
243 typedef struct _xmlChLRange xmlChLRange;
244 typedef xmlChLRange *xmlChLRangePtr;
245 struct _xmlChLRange {
246 unsigned int low;
247 unsigned int high;
248 };
249
250 typedef struct _xmlChRangeGroup xmlChRangeGroup;
251 typedef xmlChRangeGroup *xmlChRangeGroupPtr;
252 struct _xmlChRangeGroup {
253 int nbShortRange;
254 int nbLongRange;
255 const xmlChSRange *shortRange; /* points to an array of ranges */
256 const xmlChLRange *longRange;
257 };
258
259 /**
260 * Range checking routine
261 */
262 XMLPUBFUN int XMLCALL
263 xmlCharInRange(unsigned int val, const xmlChRangeGroup *group);
264
265 """ % (date, sources));
266 output.write(
267 """/*
268 * chvalid.c: this module implements the character range
269 * validation APIs
270 *
271 * This file is automatically generated from the cvs source
272 * definition files using the genChRanges.py Python script
273 *
274 * Generation date: %s
275 * Sources: %s
276 * William Brack <wbrack@mmm.com.hk>
277 */
278
279 #define IN_LIBXML
280 #include "libxml.h"
281 #include <libxml/chvalid.h>
282
283 /*
284 * The initial tables ({func_name}_tab) are used to validate whether a
285 * single-byte character is within the specified group. Each table
286 * contains 256 bytes, with each byte representing one of the 256
287 * possible characters. If the table byte is set, the character is
288 * allowed.
289 *
290 */
291 """ % (date, sources));
292
293 #
294 # Now output the generated data.
295 # We try to produce the best execution times. Tests have shown that validation
296 # with direct table lookup is, when there are a "small" number of valid items,
297 # still not as fast as a sequence of inline compares. So, if the single-byte
298 # portion of a range has a "small" number of ranges, we output a macro for inlin e
299 # compares, otherwise we output a 256-byte table and a macro to use it.
300 #
301
302 fkeys = Functs.keys() # Dictionary of all defined functions
303 fkeys.sort() # Put some order to our output
304
305 for f in fkeys:
306
307 # First we convert the specified single-byte values into a group of ranges.
308 # If the total number of such ranges is less than minTableSize, we generate
309 # an inline macro for direct comparisons; if greater, we generate a lookup
310 # table.
311 if max(Functs[f][0]) > 0: # only check if at least one entry
312 rangeTable = makeRange(Functs[f][0])
313 numRanges = len(rangeTable)
314 if numRanges >= minTableSize: # table is worthwhile
315 header.write("XMLPUBVAR const unsigned char %s_tab[256];\n" % f)
316 header.write("""
317 /**
318 * %s_ch:
319 * @c: char to validate
320 *
321 * Automatically generated by genChRanges.py
322 */
323 """ % f)
324 header.write("#define %s_ch(c)\t(%s_tab[(c)])\n" % (f, f))
325
326 # write the constant data to the code file
327 output.write("const unsigned char %s_tab[256] = {\n" % f)
328 pline = " "
329 for n in range(255):
330 pline += " 0x%02x," % Functs[f][0][n]
331 if len(pline) > 72:
332 output.write(pline + "\n")
333 pline = " "
334 output.write(pline + " 0x%02x };\n\n" % Functs[f][0][255])
335
336 else: # inline check is used
337 # first another little optimisation - if space is present,
338 # put it at the front of the list so it is checked first
339 try:
340 ix = rangeTable.remove((0x20, 0x20))
341 rangeTable.insert(0, (0x20, 0x20))
342 except:
343 pass
344 firstFlag = 1
345
346 header.write("""
347 /**
348 * %s_ch:
349 * @c: char to validate
350 *
351 * Automatically generated by genChRanges.py
352 */
353 """ % f)
354 # okay, I'm tired of the messy lineup - let's automate it!
355 pline = "#define %s_ch(c)" % f
356 # 'ntab' is number of tabs needed to position to col. 33 from name e nd
357 ntab = 4 - (len(pline)) / 8
358 if ntab < 0:
359 ntab = 0
360 just = ""
361 for i in range(ntab):
362 just += "\t"
363 pline = pline + just + "("
364 for rg in rangeTable:
365 if not firstFlag:
366 pline += " || \\\n\t\t\t\t "
367 else:
368 firstFlag = 0
369 if rg[0] == rg[1]: # single value - check equal
370 pline += "((c) == 0x%x)" % rg[0]
371 else: # value range
372 # since we are doing char, also change range ending in 0xff
373 if rg[1] != 0xff:
374 pline += "((0x%x <= (c)) &&" % rg[0]
375 pline += " ((c) <= 0x%x))" % rg[1]
376 else:
377 pline += " (0x%x <= (c))" % rg[0]
378 pline += ")\n"
379 header.write(pline)
380
381 header.write("""
382 /**
383 * %sQ:
384 * @c: char to validate
385 *
386 * Automatically generated by genChRanges.py
387 */
388 """ % f)
389 pline = "#define %sQ(c)" % f
390 ntab = 4 - (len(pline)) / 8
391 if ntab < 0:
392 ntab = 0
393 just = ""
394 for i in range(ntab):
395 just += "\t"
396 header.write(pline + just + "(((c) < 0x100) ? \\\n\t\t\t\t ")
397 if max(Functs[f][0]) > 0:
398 header.write("%s_ch((c)) :" % f)
399 else:
400 header.write("0 :")
401
402 # if no ranges defined, value invalid if >= 0x100
403 numRanges = len(Functs[f][1])
404 if numRanges == 0:
405 header.write(" 0)\n\n")
406 else:
407 if numRanges >= minTableSize:
408 header.write(" \\\n\t\t\t\t xmlCharInRange((c), &%sGroup))\n\n" % f )
409 else: # if < minTableSize, generate inline code
410 firstFlag = 1
411 for rg in Functs[f][1]:
412 if not firstFlag:
413 pline += " || \\\n\t\t\t\t "
414 else:
415 firstFlag = 0
416 pline = "\\\n\t\t\t\t("
417 if rg[0] == rg[1]: # single value - check equal
418 pline += "((c) == 0x%x)" % rg[0]
419 else: # value range
420 pline += "((0x%x <= (c)) &&" % rg[0]
421 pline += " ((c) <= 0x%x))" % rg[1]
422 pline += "))\n\n"
423 header.write(pline)
424
425
426 if len(Functs[f][1]) > 0:
427 header.write("XMLPUBVAR const xmlChRangeGroup %sGroup;\n" % f)
428
429
430 #
431 # Next we do the unicode ranges
432 #
433
434 for f in fkeys:
435 if len(Functs[f][1]) > 0: # only generate if unicode ranges present
436 rangeTable = Functs[f][1]
437 rangeTable.sort() # ascending tuple sequence
438 numShort = 0
439 numLong = 0
440 for rg in rangeTable:
441 if rg[1] < 0x10000: # if short value
442 if numShort == 0: # first occurence
443 pline = "static const xmlChSRange %s_srng[] = { " % f
444 else:
445 pline += ", "
446 numShort += 1
447 if len(pline) > 60:
448 output.write(pline + "\n")
449 pline = " "
450 pline += "{0x%x, 0x%x}" % (rg[0], rg[1])
451 else: # if long value
452 if numLong == 0: # first occurence
453 if numShort > 0: # if there were shorts, finish them off
454 output.write(pline + "};\n")
455 pline = "static const xmlChLRange %s_lrng[] = { " % f
456 else:
457 pline += ", "
458 numLong += 1
459 if len(pline) > 60:
460 output.write(pline + "\n")
461 pline = " "
462 pline += "{0x%x, 0x%x}" % (rg[0], rg[1])
463 output.write(pline + "};\n") # finish off last group
464
465 pline = "const xmlChRangeGroup %sGroup =\n\t{%d, %d, " % (f, numShort, n umLong)
466 if numShort > 0:
467 pline += "%s_srng" % f
468 else:
469 pline += "(xmlChSRangePtr)0"
470 if numLong > 0:
471 pline += ", %s_lrng" % f
472 else:
473 pline += ", (xmlChLRangePtr)0"
474
475 output.write(pline + "};\n\n")
476
477 output.write(
478 """
479 /**
480 * xmlCharInRange:
481 * @val: character to be validated
482 * @rptr: pointer to range to be used to validate
483 *
484 * Does a binary search of the range table to determine if char
485 * is valid
486 *
487 * Returns: true if character valid, false otherwise
488 */
489 int
490 xmlCharInRange (unsigned int val, const xmlChRangeGroup *rptr) {
491 int low, high, mid;
492 const xmlChSRange *sptr;
493 const xmlChLRange *lptr;
494
495 if (rptr == NULL) return(0);
496 if (val < 0x10000) { /* is val in 'short' or 'long' array? */
497 if (rptr->nbShortRange == 0)
498 return 0;
499 low = 0;
500 high = rptr->nbShortRange - 1;
501 sptr = rptr->shortRange;
502 while (low <= high) {
503 mid = (low + high) / 2;
504 if ((unsigned short) val < sptr[mid].low) {
505 high = mid - 1;
506 } else {
507 if ((unsigned short) val > sptr[mid].high) {
508 low = mid + 1;
509 } else {
510 return 1;
511 }
512 }
513 }
514 } else {
515 if (rptr->nbLongRange == 0) {
516 return 0;
517 }
518 low = 0;
519 high = rptr->nbLongRange - 1;
520 lptr = rptr->longRange;
521 while (low <= high) {
522 mid = (low + high) / 2;
523 if (val < lptr[mid].low) {
524 high = mid - 1;
525 } else {
526 if (val > lptr[mid].high) {
527 low = mid + 1;
528 } else {
529 return 1;
530 }
531 }
532 }
533 }
534 return 0;
535 }
536
537 """);
538
539 #
540 # finally, generate the ABI compatibility functions
541 #
542 for f in fkeys:
543 output.write("""
544 /**
545 * %s:
546 * @ch: character to validate
547 *
548 * This function is DEPRECATED.
549 """ % f);
550 if max(Functs[f][0]) > 0:
551 output.write(" * Use %s_ch or %sQ instead" % (f, f))
552 else:
553 output.write(" * Use %sQ instead" % f)
554 output.write("""
555 *
556 * Returns true if argument valid, false otherwise
557 */
558 """)
559 output.write("int\n%s(unsigned int ch) {\n return(%sQ(ch));\n}\n\n" % (f, f))
560 header.write("XMLPUBFUN int XMLCALL\n\t\t%s(unsigned int ch);\n" % f);
561 #
562 # Run complete - write trailers and close the output files
563 #
564
565 header.write("""
566 #ifdef __cplusplus
567 }
568 #endif
569 #endif /* __XML_CHVALID_H__ */
570 """)
571
572 header.close()
573
574 output.write("""#define bottom_chvalid
575 #include "elfgcchack.h"
576 """)
577 output.close()
578
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698