third_party/libxml/src/genChRanges.py - Issue 1193533007: Upgrade to libxml 2.9.2 and libxslt 1.1.28

Unified Diff: third_party/libxml/src/genChRanges.py

Issue 1193533007: Upgrade to libxml 2.9.2 and libxslt 1.1.28 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Add expected blink test failures Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: third_party/libxml/src/genChRanges.py

diff --git a/third_party/libxml/src/genChRanges.py b/third_party/libxml/src/genChRanges.py

new file mode 100755

index 0000000000000000000000000000000000000000..c1383193fb6b6aee80d26c63cedb18d040dc58ab

--- /dev/null

+++ b/third_party/libxml/src/genChRanges.py

@@ -0,0 +1,578 @@

+#!/usr/bin/python -u

+# Portions of this script have been (shamelessly) stolen from the

+# prior work of Daniel Veillard (genUnicode.py)

+# I, however, take full credit for any bugs, errors or difficulties :-)

+# William Brack

+# October 2003

+# 18 October 2003

+# Modified to maintain binary compatibility with previous library versions

+# by adding a suffix 'Q' ('quick') to the macro generated for the original,

+# function, and adding generation of a function (with the original name) which

+# instantiates the macro.

+import sys

+import string

+import time

+# A routine to take a list of yes/no (1, 0) values and turn it

+# into a list of ranges. This will later be used to determine whether

+# to generate single-byte lookup tables, or inline comparisons

+def makeRange(lst):

+ ret = []

+ pos = 0

+ while pos < len(lst):

+ try: # index generates exception if not present

+ s = lst[pos:].index(1) # look for start of next range

+ except:

+ break # if no more, finished

+ pos += s # pointer to start of possible range

+ try:

+ e = lst[pos:].index(0) # look for end of range

+ e += pos

+ except: # if no end, set to end of list

+ e = len(lst)

+ ret.append((pos, e-1)) # append range tuple to list

+ pos = e + 1 # ready to check for next range

+ return ret

+sources = "chvalid.def" # input filename

+# minTableSize gives the minimum number of ranges which must be present

+# before a 256-byte lookup table is produced. If there are less than this

+# number, a macro with inline comparisons is generated

+minTableSize = 6

+# dictionary of functions, key=name, element contains char-map and range-list

+Functs = {}

+state = 0

+try:

+ defines = open("chvalid.def", "r")

+except:

+ print "Missing chvalid.def, aborting ..."

+ sys.exit(1)

+# The lines in the .def file have three types:-

+# name: Defines a new function block

+# ur: Defines individual or ranges of unicode values

+# end: Indicates the end of the function block

+# These lines are processed below.

+for line in defines.readlines():

+ # ignore blank lines, or lines beginning with '#'

+ if line[0] == '#':

+ continue

+ line = string.strip(line)

+ if line == '':

+ continue

+ # split line into space-separated fields, then split on type

+ try:

+ fields = string.split(line, ' ')

+ #

+ # name line:

+ # validate any previous function block already ended

+ # validate this function not already defined

+ # initialize an entry in the function dicitonary

+ # including a mask table with no values yet defined

+ #

+ if fields[0] == 'name':

+ name = fields[1]

+ if state != 0:

+ print "'name' %s found before previous name" \

+ "completed" % (fields[1])

+ continue

+ state = 1

+ if Functs.has_key(name):

+ print "name '%s' already present - may give" \

+ " wrong results" % (name)

+ else:

+ # dict entry with two list elements (chdata, rangedata)

+ Functs[name] = [ [], [] ]

+ for v in range(256):

+ Functs[name][0].append(0)

+ #

+ # end line:

+ # validate there was a preceding function name line

+ # set state to show no current function active

+ #

+ elif fields[0] == 'end':

+ if state == 0:

+ print "'end' found outside of function block"

+ continue

+ state = 0

+ #

+ # ur line:

+ # validate function has been defined

+ # process remaining fields on the line, which may be either

+ # individual unicode values or ranges of values

+ #

+ elif fields[0] == 'ur':

+ if state != 1:

+ raise ValidationError, "'ur' found outside of 'name' block"

+ for el in fields[1:]:

+ pos = string.find(el, '..')

+ # pos <=0 means not a range, so must be individual value

+ if pos <= 0:

+ # cheap handling of hex or decimal values

+ if el[0:2] == '0x':

+ value = int(el[2:],16)

+ elif el[0] == "'":

+ value = ord(el[1])

+ else:

+ value = int(el)

+ if ((value < 0) | (value > 0x1fffff)):

+ raise ValidationError, 'Illegal value (%s) in ch for'\

+ ' name %s' % (el,name)

+ # for ur we have only ranges (makes things simpler),

+ # so convert val to range

+ currange = (value, value)

+ # pos > 0 means this is a range, so isolate/validate

+ # the interval

+ else:

+ # split the range into it's first-val, last-val

+ (first, last) = string.split(el, "..")

+ # convert values from text into binary

+ if first[0:2] == '0x':

+ start = int(first[2:],16)

+ elif first[0] == "'":

+ start = ord(first[1])

+ else:

+ start = int(first)

+ if last[0:2] == '0x':

+ end = int(last[2:],16)

+ elif last[0] == "'":

+ end = ord(last[1])

+ else:

+ end = int(last)

+ if (start < 0) | (end > 0x1fffff) | (start > end):

+ raise ValidationError, "Invalid range '%s'" % el

+ currange = (start, end)

+ # common path - 'currange' has the range, now take care of it

+ # We split on single-byte values vs. multibyte

+ if currange[1] < 0x100: # single-byte

+ for ch in range(currange[0],currange[1]+1):

+ # validate that value not previously defined

+ if Functs[name][0][ch]:

+ msg = "Duplicate ch value '%s' for name '%s'" % (el, name)

+ raise ValidationError, msg

+ Functs[name][0][ch] = 1

+ else: # multi-byte

+ if currange in Functs[name][1]:

+ raise ValidationError, "range already defined in" \

+ " function"

+ else:

+ Functs[name][1].append(currange)

+ except:

+ print "Failed to process line: %s" % (line)

+ raise

+# At this point, the entire definition file has been processed. Now we

+# enter the output phase, where we generate the two files chvalid.c and'

+# chvalid.h

+# To do this, we first output the 'static' data (heading, fixed

+# definitions, etc.), then output the 'dynamic' data (the results

+# of the above processing), and finally output closing 'static' data

+# (e.g. the subroutine to process the ranges)

+# Generate the headings:

+try:

+ header = open("include/libxml/chvalid.h", "w")

+except:

+ print "Failed to open include/libxml/chvalid.h"

+ sys.exit(1)

+try:

+ output = open("chvalid.c", "w")

+except:

+ print "Failed to open chvalid.c"

+ sys.exit(1)

+date = time.asctime(time.localtime(time.time()))

+header.write(

+"""/*

+ * Summary: Unicode character range checking

+ * Description: this module exports interfaces for the character

+ * range validation APIs

+ *

+ * This file is automatically generated from the cvs source

+ * definition files using the genChRanges.py Python script

+ *

+ * Generation date: %s

+ * Sources: %s

+ * Author: William Brack <wbrack@mmm.com.hk>

+ */

+#ifndef __XML_CHVALID_H__

+#define __XML_CHVALID_H__

+#include <libxml/xmlversion.h>

+#include <libxml/xmlstring.h>

+#ifdef __cplusplus

+extern "C" {

+#endif

+/*

+ * Define our typedefs and structures

+ *

+ */

+typedef struct _xmlChSRange xmlChSRange;

+typedef xmlChSRange *xmlChSRangePtr;

+struct _xmlChSRange {

+ unsigned short low;

+ unsigned short high;

+};

+typedef struct _xmlChLRange xmlChLRange;

+typedef xmlChLRange *xmlChLRangePtr;

+struct _xmlChLRange {

+ unsigned int low;

+ unsigned int high;

+};

+typedef struct _xmlChRangeGroup xmlChRangeGroup;

+typedef xmlChRangeGroup *xmlChRangeGroupPtr;

+struct _xmlChRangeGroup {

+ int nbShortRange;

+ int nbLongRange;

+ const xmlChSRange *shortRange; /* points to an array of ranges */

+ const xmlChLRange *longRange;

+};

+/**

+ * Range checking routine

+ */

+XMLPUBFUN int XMLCALL

+ xmlCharInRange(unsigned int val, const xmlChRangeGroup *group);

+""" % (date, sources));

+output.write(

+"""/*

+ * chvalid.c: this module implements the character range

+ * validation APIs

+ *

+ * This file is automatically generated from the cvs source

+ * definition files using the genChRanges.py Python script

+ *

+ * Generation date: %s

+ * Sources: %s

+ * William Brack <wbrack@mmm.com.hk>

+ */

+#define IN_LIBXML

+#include "libxml.h"

+#include <libxml/chvalid.h>

+/*

+ * The initial tables ({func_name}_tab) are used to validate whether a

+ * single-byte character is within the specified group. Each table

+ * contains 256 bytes, with each byte representing one of the 256

+ * possible characters. If the table byte is set, the character is

+ * allowed.

+ *

+ */

+""" % (date, sources));

+# Now output the generated data.

+# We try to produce the best execution times. Tests have shown that validation

+# with direct table lookup is, when there are a "small" number of valid items,

+# still not as fast as a sequence of inline compares. So, if the single-byte

+# portion of a range has a "small" number of ranges, we output a macro for inline

+# compares, otherwise we output a 256-byte table and a macro to use it.

+fkeys = Functs.keys() # Dictionary of all defined functions

+fkeys.sort() # Put some order to our output

+for f in fkeys:

+# First we convert the specified single-byte values into a group of ranges.

+# If the total number of such ranges is less than minTableSize, we generate

+# an inline macro for direct comparisons; if greater, we generate a lookup

+# table.

+ if max(Functs[f][0]) > 0: # only check if at least one entry

+ rangeTable = makeRange(Functs[f][0])

+ numRanges = len(rangeTable)

+ if numRanges >= minTableSize: # table is worthwhile

+ header.write("XMLPUBVAR const unsigned char %s_tab[256];\n" % f)

+ header.write("""

+/**

+ * %s_ch:

+ * @c: char to validate

+ *

+ * Automatically generated by genChRanges.py

+ */

+""" % f)

+ header.write("#define %s_ch(c)\t(%s_tab[(c)])\n" % (f, f))

+ # write the constant data to the code file

+ output.write("const unsigned char %s_tab[256] = {\n" % f)

+ pline = " "

+ for n in range(255):

+ pline += " 0x%02x," % Functs[f][0][n]

+ if len(pline) > 72:

+ output.write(pline + "\n")

+ pline = " "

+ output.write(pline + " 0x%02x };\n\n" % Functs[f][0][255])

+ else: # inline check is used

+ # first another little optimisation - if space is present,

+ # put it at the front of the list so it is checked first

+ try:

+ ix = rangeTable.remove((0x20, 0x20))

+ rangeTable.insert(0, (0x20, 0x20))

+ except:

+ pass

+ firstFlag = 1

+ header.write("""

+/**

+ * %s_ch:

+ * @c: char to validate

+ *

+ * Automatically generated by genChRanges.py

+ */

+""" % f)

+ # okay, I'm tired of the messy lineup - let's automate it!

+ pline = "#define %s_ch(c)" % f

+ # 'ntab' is number of tabs needed to position to col. 33 from name end

+ ntab = 4 - (len(pline)) / 8

+ if ntab < 0:

+ ntab = 0

+ just = ""

+ for i in range(ntab):

+ just += "\t"

+ pline = pline + just + "("

+ for rg in rangeTable:

+ if not firstFlag:

+ pline += " || \\\n\t\t\t\t "

+ else:

+ firstFlag = 0

+ if rg[0] == rg[1]: # single value - check equal

+ pline += "((c) == 0x%x)" % rg[0]

+ else: # value range

+ # since we are doing char, also change range ending in 0xff

+ if rg[1] != 0xff:

+ pline += "((0x%x <= (c)) &&" % rg[0]

+ pline += " ((c) <= 0x%x))" % rg[1]

+ else:

+ pline += " (0x%x <= (c))" % rg[0]

+ pline += ")\n"

+ header.write(pline)

+ header.write("""

+/**

+ * %sQ:

+ * @c: char to validate

+ *

+ * Automatically generated by genChRanges.py

+ */

+""" % f)

+ pline = "#define %sQ(c)" % f

+ ntab = 4 - (len(pline)) / 8

+ if ntab < 0:

+ ntab = 0

+ just = ""

+ for i in range(ntab):

+ just += "\t"

+ header.write(pline + just + "(((c) < 0x100) ? \\\n\t\t\t\t ")

+ if max(Functs[f][0]) > 0:

+ header.write("%s_ch((c)) :" % f)

+ else:

+ header.write("0 :")

+ # if no ranges defined, value invalid if >= 0x100

+ numRanges = len(Functs[f][1])

+ if numRanges == 0:

+ header.write(" 0)\n\n")

+ else:

+ if numRanges >= minTableSize:

+ header.write(" \\\n\t\t\t\t xmlCharInRange((c), &%sGroup))\n\n" % f)

+ else: # if < minTableSize, generate inline code

+ firstFlag = 1

+ for rg in Functs[f][1]:

+ if not firstFlag:

+ pline += " || \\\n\t\t\t\t "

+ else:

+ firstFlag = 0

+ pline = "\\\n\t\t\t\t("

+ if rg[0] == rg[1]: # single value - check equal

+ pline += "((c) == 0x%x)" % rg[0]

+ else: # value range

+ pline += "((0x%x <= (c)) &&" % rg[0]

+ pline += " ((c) <= 0x%x))" % rg[1]

+ pline += "))\n\n"

+ header.write(pline)

+ if len(Functs[f][1]) > 0:

+ header.write("XMLPUBVAR const xmlChRangeGroup %sGroup;\n" % f)

+# Next we do the unicode ranges

+for f in fkeys:

+ if len(Functs[f][1]) > 0: # only generate if unicode ranges present

+ rangeTable = Functs[f][1]

+ rangeTable.sort() # ascending tuple sequence

+ numShort = 0

+ numLong = 0

+ for rg in rangeTable:

+ if rg[1] < 0x10000: # if short value

+ if numShort == 0: # first occurence

+ pline = "static const xmlChSRange %s_srng[] = { " % f

+ else:

+ pline += ", "

+ numShort += 1

+ if len(pline) > 60:

+ output.write(pline + "\n")

+ pline = " "

+ pline += "{0x%x, 0x%x}" % (rg[0], rg[1])

+ else: # if long value

+ if numLong == 0: # first occurence

+ if numShort > 0: # if there were shorts, finish them off

+ output.write(pline + "};\n")

+ pline = "static const xmlChLRange %s_lrng[] = { " % f

+ else:

+ pline += ", "

+ numLong += 1

+ if len(pline) > 60:

+ output.write(pline + "\n")

+ pline = " "

+ pline += "{0x%x, 0x%x}" % (rg[0], rg[1])

+ output.write(pline + "};\n") # finish off last group

+ pline = "const xmlChRangeGroup %sGroup =\n\t{%d, %d, " % (f, numShort, numLong)

+ if numShort > 0:

+ pline += "%s_srng" % f

+ else:

+ pline += "(xmlChSRangePtr)0"

+ if numLong > 0:

+ pline += ", %s_lrng" % f

+ else:

+ pline += ", (xmlChLRangePtr)0"

+ output.write(pline + "};\n\n")

+output.write(

+"""

+/**

+ * xmlCharInRange:

+ * @val: character to be validated

+ * @rptr: pointer to range to be used to validate

+ *

+ * Does a binary search of the range table to determine if char

+ * is valid

+ *

+ * Returns: true if character valid, false otherwise

+ */

+int

+xmlCharInRange (unsigned int val, const xmlChRangeGroup *rptr) {

+ int low, high, mid;

+ const xmlChSRange *sptr;

+ const xmlChLRange *lptr;

+ if (rptr == NULL) return(0);

+ if (val < 0x10000) { /* is val in 'short' or 'long' array? */

+ if (rptr->nbShortRange == 0)

+ return 0;

+ low = 0;

+ high = rptr->nbShortRange - 1;

+ sptr = rptr->shortRange;

+ while (low <= high) {

+ mid = (low + high) / 2;

+ if ((unsigned short) val < sptr[mid].low) {

+ high = mid - 1;

+ } else {

+ if ((unsigned short) val > sptr[mid].high) {

+ low = mid + 1;

+ } else {

+ return 1;

+ }

+ } else {

+ if (rptr->nbLongRange == 0) {

+ return 0;

+ }

+ low = 0;

+ high = rptr->nbLongRange - 1;

+ lptr = rptr->longRange;

+ while (low <= high) {

+ mid = (low + high) / 2;

+ if (val < lptr[mid].low) {

+ high = mid - 1;

+ } else {

+ if (val > lptr[mid].high) {

+ low = mid + 1;

+ } else {

+ return 1;

+ }

+ return 0;

+""");

+# finally, generate the ABI compatibility functions

+for f in fkeys:

+ output.write("""

+/**

+ * %s:

+ * @ch: character to validate

+ *

+ * This function is DEPRECATED.

+""" % f);

+ if max(Functs[f][0]) > 0:

+ output.write(" * Use %s_ch or %sQ instead" % (f, f))

+ else:

+ output.write(" * Use %sQ instead" % f)

+ output.write("""

+ *

+ * Returns true if argument valid, false otherwise

+ */

+""")

+ output.write("int\n%s(unsigned int ch) {\n return(%sQ(ch));\n}\n\n" % (f,f))

+ header.write("XMLPUBFUN int XMLCALL\n\t\t%s(unsigned int ch);\n" % f);

+# Run complete - write trailers and close the output files

+header.write("""

+#ifdef __cplusplus

+#endif

+#endif /* __XML_CHVALID_H__ */

+""")

+header.close()

+output.write("""#define bottom_chvalid

+#include "elfgcchack.h"

+""")

+output.close()

« content/test/test_expectations.txt ('K') | « third_party/libxml/src/example/gjobs.xml ('k') | third_party/libxml/src/global.data » ('j') | no next file with comments »