third_party/libxml/src/genChRanges.py - Issue 1193533007: Upgrade to libxml 2.9.2 and libxslt 1.1.28

Side by Side Diff: third_party/libxml/src/genChRanges.py

Issue 1193533007: Upgrade to libxml 2.9.2 and libxslt 1.1.28 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: remove suppressions, have landed in blink now Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 #!/usr/bin/python -u

	2 #

	3 # Portions of this script have been (shamelessly) stolen from the

	4 # prior work of Daniel Veillard (genUnicode.py)

	5 #

	6 # I, however, take full credit for any bugs, errors or difficulties :-)

	7 #

	8 # William Brack

	9 # October 2003

	10 #

	11 # 18 October 2003

	12 # Modified to maintain binary compatibility with previous library versions

	13 # by adding a suffix 'Q' ('quick') to the macro generated for the original,

	14 # function, and adding generation of a function (with the original name) which

	15 # instantiates the macro.

	16 #

	17

	18 import sys

	19 import string

	20 import time

	21

	22 #

	23 # A routine to take a list of yes/no (1, 0) values and turn it

	24 # into a list of ranges. This will later be used to determine whether

	25 # to generate single-byte lookup tables, or inline comparisons

	26 #

	27 def makeRange(lst):

	28 ret = []

	29 pos = 0

	30 while pos < len(lst):

	31 try: # index generates exception if not present

	32 s = lst[pos:].index(1) # look for start of next range

	33 except:

	34 break # if no more, finished

	35 pos += s # pointer to start of possible range

	36 try:

	37 e = lst[pos:].index(0) # look for end of range

	38 e += pos

	39 except: # if no end, set to end of list

	40 e = len(lst)

	41 ret.append((pos, e-1)) # append range tuple to list

	42 pos = e + 1 # ready to check for next range

	43 return ret

	44

	45 sources = "chvalid.def" # input filename

	46

	47 # minTableSize gives the minimum number of ranges which must be present

	48 # before a 256-byte lookup table is produced. If there are less than this

	49 # number, a macro with inline comparisons is generated

	50 minTableSize = 6

	51

	52 # dictionary of functions, key=name, element contains char-map and range-list

	53 Functs = {}

	54

	55 state = 0

	56

	57 try:

	58 defines = open("chvalid.def", "r")

	59 except:

	60 print "Missing chvalid.def, aborting ..."

	61 sys.exit(1)

	62

	63 #

	64 # The lines in the .def file have three types:-

	65 # name: Defines a new function block

	66 # ur: Defines individual or ranges of unicode values

	67 # end: Indicates the end of the function block

	68 #

	69 # These lines are processed below.

	70 #

	71 for line in defines.readlines():

	72 # ignore blank lines, or lines beginning with '#'

	73 if line[0] == '#':

	74 continue

	75 line = string.strip(line)

	76 if line == '':

	77 continue

	78 # split line into space-separated fields, then split on type

	79 try:

	80 fields = string.split(line, ' ')

	81 #

	82 # name line:

	83 # validate any previous function block already ended

	84 # validate this function not already defined

	85 # initialize an entry in the function dicitonary

	86 # including a mask table with no values yet defined

	87 #

	88 if fields[0] == 'name':

	89 name = fields[1]

	90 if state != 0:

	91 print "'name' %s found before previous name" \

	92 "completed" % (fields[1])

	93 continue

	94 state = 1

	95 if Functs.has_key(name):

	96 print "name '%s' already present - may give" \

	97 " wrong results" % (name)

	98 else:

	99 # dict entry with two list elements (chdata, rangedata)

	100 Functs[name] = [ [], [] ]

	101 for v in range(256):

	102 Functs[name][0].append(0)

	103 #

	104 # end line:

	105 # validate there was a preceding function name line

	106 # set state to show no current function active

	107 #

	108 elif fields[0] == 'end':

	109 if state == 0:

	110 print "'end' found outside of function block"

	111 continue

	112 state = 0

	113

	114 #

	115 # ur line:

	116 # validate function has been defined

	117 # process remaining fields on the line, which may be either

	118 # individual unicode values or ranges of values

	119 #

	120 elif fields[0] == 'ur':

	121 if state != 1:

	122 raise ValidationError, "'ur' found outside of 'name' block"

	123 for el in fields[1:]:

	124 pos = string.find(el, '..')

	125 # pos <=0 means not a range, so must be individual value

	126 if pos <= 0:

	127 # cheap handling of hex or decimal values

	128 if el[0:2] == '0x':

	129 value = int(el[2:],16)

	130 elif el[0] == "'":

	131 value = ord(el[1])

	132 else:

	133 value = int(el)

	134 if ((value < 0) \| (value > 0x1fffff)):

	135 raise ValidationError, 'Illegal value (%s) in ch for'\

	136 ' name %s' % (el,name)

	137 # for ur we have only ranges (makes things simpler),

	138 # so convert val to range

	139 currange = (value, value)

	140 # pos > 0 means this is a range, so isolate/validate

	141 # the interval

	142 else:

	143 # split the range into it's first-val, last-val

	144 (first, last) = string.split(el, "..")

	145 # convert values from text into binary

	146 if first[0:2] == '0x':

	147 start = int(first[2:],16)

	148 elif first[0] == "'":

	149 start = ord(first[1])

	150 else:

	151 start = int(first)

	152 if last[0:2] == '0x':

	153 end = int(last[2:],16)

	154 elif last[0] == "'":

	155 end = ord(last[1])

	156 else:

	157 end = int(last)

	158 if (start < 0) \| (end > 0x1fffff) \| (start > end):

	159 raise ValidationError, "Invalid range '%s'" % el

	160 currange = (start, end)

	161 # common path - 'currange' has the range, now take care of it

	162 # We split on single-byte values vs. multibyte

	163 if currange[1] < 0x100: # single-byte

	164 for ch in range(currange[0],currange[1]+1):

	165 # validate that value not previously defined

	166 if Functs[name][0][ch]:

	167 msg = "Duplicate ch value '%s' for name '%s'" % (el, name)

	168 raise ValidationError, msg

	169 Functs[name][0][ch] = 1

	170 else: # multi-byte

	171 if currange in Functs[name][1]:

	172 raise ValidationError, "range already defined in" \

	173 " function"

	174 else:

	175 Functs[name][1].append(currange)

	176

	177 except:

	178 print "Failed to process line: %s" % (line)

	179 raise

	180 #

	181 # At this point, the entire definition file has been processed. Now we

	182 # enter the output phase, where we generate the two files chvalid.c and'

	183 # chvalid.h

	184 #

	185 # To do this, we first output the 'static' data (heading, fixed

	186 # definitions, etc.), then output the 'dynamic' data (the results

	187 # of the above processing), and finally output closing 'static' data

	188 # (e.g. the subroutine to process the ranges)

	189 #

	190

	191 #

	192 # Generate the headings:

	193 #

	194 try:

	195 header = open("include/libxml/chvalid.h", "w")

	196 except:

	197 print "Failed to open include/libxml/chvalid.h"

	198 sys.exit(1)

	199

	200 try:

	201 output = open("chvalid.c", "w")

	202 except:

	203 print "Failed to open chvalid.c"

	204 sys.exit(1)

	205

	206 date = time.asctime(time.localtime(time.time()))

	207

	208 header.write(

	209 """/*

	210 * Summary: Unicode character range checking

	211 * Description: this module exports interfaces for the character

	212 * range validation APIs

	213 *

	214 * This file is automatically generated from the cvs source

	215 * definition files using the genChRanges.py Python script

	216 *

	217 * Generation date: %s

	218 * Sources: %s

	219 * Author: William Brack <wbrack@mmm.com.hk>

	220 */

	221

	222 #ifndef __XML_CHVALID_H__

	223 #define __XML_CHVALID_H__

	224

	225 #include <libxml/xmlversion.h>

	226 #include <libxml/xmlstring.h>

	227

	228 #ifdef __cplusplus

	229 extern "C" {

	230 #endif

	231

	232 /*

	233 * Define our typedefs and structures

	234 *

	235 */

	236 typedef struct _xmlChSRange xmlChSRange;

	237 typedef xmlChSRange *xmlChSRangePtr;

	238 struct _xmlChSRange {

	239 unsigned short low;

	240 unsigned short high;

	241 };

	242

	243 typedef struct _xmlChLRange xmlChLRange;

	244 typedef xmlChLRange *xmlChLRangePtr;

	245 struct _xmlChLRange {

	246 unsigned int low;

	247 unsigned int high;

	248 };

	249

	250 typedef struct _xmlChRangeGroup xmlChRangeGroup;

	251 typedef xmlChRangeGroup *xmlChRangeGroupPtr;

	252 struct _xmlChRangeGroup {

	253 int nbShortRange;

	254 int nbLongRange;

	255 const xmlChSRange shortRange; / points to an array of ranges */

	256 const xmlChLRange *longRange;

	257 };

	258

	259 /**

	260 * Range checking routine

	261 */

	262 XMLPUBFUN int XMLCALL

	263 xmlCharInRange(unsigned int val, const xmlChRangeGroup *group);

	264

	265 """ % (date, sources));

	266 output.write(

	267 """/*

	268 * chvalid.c: this module implements the character range

	269 * validation APIs

	270 *

	271 * This file is automatically generated from the cvs source

	272 * definition files using the genChRanges.py Python script

	273 *

	274 * Generation date: %s

	275 * Sources: %s

	276 * William Brack <wbrack@mmm.com.hk>

	277 */

	278

	279 #define IN_LIBXML

	280 #include "libxml.h"

	281 #include <libxml/chvalid.h>

	282

	283 /*

	284 * The initial tables ({func_name}_tab) are used to validate whether a

	285 * single-byte character is within the specified group. Each table

	286 * contains 256 bytes, with each byte representing one of the 256

	287 * possible characters. If the table byte is set, the character is

	288 * allowed.

	289 *

	290 */

	291 """ % (date, sources));

	292

	293 #

	294 # Now output the generated data.

	295 # We try to produce the best execution times. Tests have shown that validation

	296 # with direct table lookup is, when there are a "small" number of valid items,

	297 # still not as fast as a sequence of inline compares. So, if the single-byte

	298 # portion of a range has a "small" number of ranges, we output a macro for inlin e

	299 # compares, otherwise we output a 256-byte table and a macro to use it.

	300 #

	301

	302 fkeys = Functs.keys() # Dictionary of all defined functions

	303 fkeys.sort() # Put some order to our output

	304

	305 for f in fkeys:

	306

	307 # First we convert the specified single-byte values into a group of ranges.

	308 # If the total number of such ranges is less than minTableSize, we generate

	309 # an inline macro for direct comparisons; if greater, we generate a lookup

	310 # table.

	311 if max(Functs[f][0]) > 0: # only check if at least one entry

	312 rangeTable = makeRange(Functs[f][0])

	313 numRanges = len(rangeTable)

	314 if numRanges >= minTableSize: # table is worthwhile

	315 header.write("XMLPUBVAR const unsigned char %s_tab[256];\n" % f)

	316 header.write("""

	317 /**

	318 * %s_ch:

	319 * @c: char to validate

	320 *

	321 * Automatically generated by genChRanges.py

	322 */

	323 """ % f)

	324 header.write("#define %s_ch(c)\t(%s_tab[(c)])\n" % (f, f))

	325

	326 # write the constant data to the code file

	327 output.write("const unsigned char %s_tab[256] = {\n" % f)

	328 pline = " "

	329 for n in range(255):

	330 pline += " 0x%02x," % Functs[f][0][n]

	331 if len(pline) > 72:

	332 output.write(pline + "\n")

	333 pline = " "

	334 output.write(pline + " 0x%02x };\n\n" % Functs[f][0][255])

	335

	336 else: # inline check is used

	337 # first another little optimisation - if space is present,

	338 # put it at the front of the list so it is checked first

	339 try:

	340 ix = rangeTable.remove((0x20, 0x20))

	341 rangeTable.insert(0, (0x20, 0x20))

	342 except:

	343 pass

	344 firstFlag = 1

	345

	346 header.write("""

	347 /**

	348 * %s_ch:

	349 * @c: char to validate

	350 *

	351 * Automatically generated by genChRanges.py

	352 */

	353 """ % f)

	354 # okay, I'm tired of the messy lineup - let's automate it!

	355 pline = "#define %s_ch(c)" % f

	356 # 'ntab' is number of tabs needed to position to col. 33 from name e nd

	357 ntab = 4 - (len(pline)) / 8

	358 if ntab < 0:

	359 ntab = 0

	360 just = ""

	361 for i in range(ntab):

	362 just += "\t"

	363 pline = pline + just + "("

	364 for rg in rangeTable:

	365 if not firstFlag:

	366 pline += " \|\| \\\n\t\t\t\t "

	367 else:

	368 firstFlag = 0

	369 if rg[0] == rg[1]: # single value - check equal

	370 pline += "((c) == 0x%x)" % rg[0]

	371 else: # value range

	372 # since we are doing char, also change range ending in 0xff

	373 if rg[1] != 0xff:

	374 pline += "((0x%x <= (c)) &&" % rg[0]

	375 pline += " ((c) <= 0x%x))" % rg[1]

	376 else:

	377 pline += " (0x%x <= (c))" % rg[0]

	378 pline += ")\n"

	379 header.write(pline)

	380

	381 header.write("""

	382 /**

	383 * %sQ:

	384 * @c: char to validate

	385 *

	386 * Automatically generated by genChRanges.py

	387 */

	388 """ % f)

	389 pline = "#define %sQ(c)" % f

	390 ntab = 4 - (len(pline)) / 8

	391 if ntab < 0:

	392 ntab = 0

	393 just = ""

	394 for i in range(ntab):

	395 just += "\t"

	396 header.write(pline + just + "(((c) < 0x100) ? \\\n\t\t\t\t ")

	397 if max(Functs[f][0]) > 0:

	398 header.write("%s_ch((c)) :" % f)

	399 else:

	400 header.write("0 :")

	401

	402 # if no ranges defined, value invalid if >= 0x100

	403 numRanges = len(Functs[f][1])

	404 if numRanges == 0:

	405 header.write(" 0)\n\n")

	406 else:

	407 if numRanges >= minTableSize:

	408 header.write(" \\\n\t\t\t\t xmlCharInRange((c), &%sGroup))\n\n" % f )

	409 else: # if < minTableSize, generate inline code

	410 firstFlag = 1

	411 for rg in Functs[f][1]:

	412 if not firstFlag:

	413 pline += " \|\| \\\n\t\t\t\t "

	414 else:

	415 firstFlag = 0

	416 pline = "\\\n\t\t\t\t("

	417 if rg[0] == rg[1]: # single value - check equal

	418 pline += "((c) == 0x%x)" % rg[0]

	419 else: # value range

	420 pline += "((0x%x <= (c)) &&" % rg[0]

	421 pline += " ((c) <= 0x%x))" % rg[1]

	422 pline += "))\n\n"

	423 header.write(pline)

	424

	425

	426 if len(Functs[f][1]) > 0:

	427 header.write("XMLPUBVAR const xmlChRangeGroup %sGroup;\n" % f)

	428

	429

	430 #

	431 # Next we do the unicode ranges

	432 #

	433

	434 for f in fkeys:

	435 if len(Functs[f][1]) > 0: # only generate if unicode ranges present

	436 rangeTable = Functs[f][1]

	437 rangeTable.sort() # ascending tuple sequence

	438 numShort = 0

	439 numLong = 0

	440 for rg in rangeTable:

	441 if rg[1] < 0x10000: # if short value

	442 if numShort == 0: # first occurence

	443 pline = "static const xmlChSRange %s_srng[] = { " % f

	444 else:

	445 pline += ", "

	446 numShort += 1

	447 if len(pline) > 60:

	448 output.write(pline + "\n")

	449 pline = " "

	450 pline += "{0x%x, 0x%x}" % (rg[0], rg[1])

	451 else: # if long value

	452 if numLong == 0: # first occurence

	453 if numShort > 0: # if there were shorts, finish them off

	454 output.write(pline + "};\n")

	455 pline = "static const xmlChLRange %s_lrng[] = { " % f

	456 else:

	457 pline += ", "

	458 numLong += 1

	459 if len(pline) > 60:

	460 output.write(pline + "\n")

	461 pline = " "

	462 pline += "{0x%x, 0x%x}" % (rg[0], rg[1])

	463 output.write(pline + "};\n") # finish off last group

	464

	465 pline = "const xmlChRangeGroup %sGroup =\n\t{%d, %d, " % (f, numShort, n umLong)

	466 if numShort > 0:

	467 pline += "%s_srng" % f

	468 else:

	469 pline += "(xmlChSRangePtr)0"

	470 if numLong > 0:

	471 pline += ", %s_lrng" % f

	472 else:

	473 pline += ", (xmlChLRangePtr)0"

	474

	475 output.write(pline + "};\n\n")

	476

	477 output.write(

	478 """

	479 /**

	480 * xmlCharInRange:

	481 * @val: character to be validated

	482 * @rptr: pointer to range to be used to validate

	483 *

	484 * Does a binary search of the range table to determine if char

	485 * is valid

	486 *

	487 * Returns: true if character valid, false otherwise

	488 */

	489 int

	490 xmlCharInRange (unsigned int val, const xmlChRangeGroup *rptr) {

	491 int low, high, mid;

	492 const xmlChSRange *sptr;

	493 const xmlChLRange *lptr;

	494

	495 if (rptr == NULL) return(0);

	496 if (val < 0x10000) { /* is val in 'short' or 'long' array? */

	497 if (rptr->nbShortRange == 0)

	498 return 0;

	499 low = 0;

	500 high = rptr->nbShortRange - 1;

	501 sptr = rptr->shortRange;

	502 while (low <= high) {

	503 mid = (low + high) / 2;

	504 if ((unsigned short) val < sptr[mid].low) {

	505 high = mid - 1;

	506 } else {

	507 if ((unsigned short) val > sptr[mid].high) {

	508 low = mid + 1;

	509 } else {

	510 return 1;

	511 }

	512 }

	513 }

	514 } else {

	515 if (rptr->nbLongRange == 0) {

	516 return 0;

	517 }

	518 low = 0;

	519 high = rptr->nbLongRange - 1;

	520 lptr = rptr->longRange;

	521 while (low <= high) {

	522 mid = (low + high) / 2;

	523 if (val < lptr[mid].low) {

	524 high = mid - 1;

	525 } else {

	526 if (val > lptr[mid].high) {

	527 low = mid + 1;

	528 } else {

	529 return 1;

	530 }

	531 }

	532 }

	533 }

	534 return 0;

	535 }

	536

	537 """);

	538

	539 #

	540 # finally, generate the ABI compatibility functions

	541 #

	542 for f in fkeys:

	543 output.write("""

	544 /**

	545 * %s:

	546 * @ch: character to validate

	547 *

	548 * This function is DEPRECATED.

	549 """ % f);

	550 if max(Functs[f][0]) > 0:

	551 output.write(" * Use %s_ch or %sQ instead" % (f, f))

	552 else:

	553 output.write(" * Use %sQ instead" % f)

	554 output.write("""

	555 *

	556 * Returns true if argument valid, false otherwise

	557 */

	558 """)

	559 output.write("int\n%s(unsigned int ch) {\n return(%sQ(ch));\n}\n\n" % (f, f))

	560 header.write("XMLPUBFUN int XMLCALL\n\t\t%s(unsigned int ch);\n" % f);

	561 #

	562 # Run complete - write trailers and close the output files

	563 #

	564

	565 header.write("""

	566 #ifdef __cplusplus

	567 }

	568 #endif

	569 #endif /* __XML_CHVALID_H__ */

	570 """)

	571

	572 header.close()

	573

	574 output.write("""#define bottom_chvalid

	575 #include "elfgcchack.h"

	576 """)

	577 output.close()

	578

OLD	NEW

« third_party/libxml/README.chromium ('K') | « third_party/libxml/src/example/gjobs.xml ('k') | third_party/libxml/src/global.data » ('j') | third_party/libxslt/README.chromium » ('J')