icu46/source/tools/toolutil/xmlparser.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Unified Diff: icu46/source/tools/toolutil/xmlparser.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: icu46/source/tools/toolutil/xmlparser.cpp

===================================================================

--- icu46/source/tools/toolutil/xmlparser.cpp (revision 0)

+++ icu46/source/tools/toolutil/xmlparser.cpp (revision 0)

@@ -0,0 +1,825 @@

+/*

+*******************************************************************************

+* file name: xmlparser.cpp

+* encoding: US-ASCII

+* tab size: 8 (not used)

+* indentation:4

+* created on: 2004jul21

+* created by: Andy Heninger

+*/

+#include <stdio.h>

+#include "unicode/uchar.h"

+#include "unicode/ucnv.h"

+#include "unicode/regex.h"

+#include "filestrm.h"

+#include "xmlparser.h"

+#if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_CONVERSION

+// character constants

+enum {

+ x_QUOT=0x22,

+ x_AMP=0x26,

+ x_APOS=0x27,

+ x_LT=0x3c,

+ x_GT=0x3e,

+ x_l=0x6c

+};

+#define XML_SPACES "[ \\u0009\\u000d\\u000a]"

+// XML #4

+#define XML_NAMESTARTCHAR "[[A-Z]:_[a-z][\\u00c0-\\u00d6][\\u00d8-\\u00f6]" \

+ "[\\u00f8-\\u02ff][\\u0370-\\u037d][\\u037F-\\u1FFF][\\u200C-\\u200D]" \

+ "[\\u2070-\\u218F][\\u2C00-\\u2FEF][\\u3001-\\uD7FF][\\uF900-\\uFDCF]" \

+ "[\\uFDF0-\\uFFFD][\\U00010000-\\U000EFFFF]]"

+// XML #5

+#define XML_NAMECHAR "[" XML_NAMESTARTCHAR "\\-.[0-9]\\u00b7[\\u0300-\\u036f][\\u203f-\\u2040]]"

+// XML #6

+#define XML_NAME XML_NAMESTARTCHAR "(?:" XML_NAMECHAR ")*"

+U_NAMESPACE_BEGIN

+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UXMLParser)

+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UXMLElement)

+//

+// UXMLParser constructor. Mostly just initializes the ICU regexes that are

+// used for parsing.

+//

+UXMLParser::UXMLParser(UErrorCode &status) :

+ // XML Declaration. XML Production #23.

+ // example: "<?xml version=1.0 encoding="utf-16" ?>

+ // This is a sloppy implementation - just look for the leading <?xml and the closing ?>

+ // allow for a possible leading BOM.

+ mXMLDecl(UnicodeString("(?s)\\uFEFF?<\\?xml.+?\\?>", -1, US_INV), 0, status),

+ // XML Comment production #15

+ // example: "

+ // note, does not detect an illegal "--" within comments

+ mXMLComment(UnicodeString("(?s)", -1, US_INV), 0, status),

+ // XML Spaces

+ // production [3]

+ mXMLSP(UnicodeString(XML_SPACES "+", -1, US_INV), 0, status),

+ // XML Doctype decl production #28

+ // example "<!DOCTYPE foo SYSTEM "somewhere" >

+ // or "<!DOCTYPE foo [internal dtd]>

+ // TODO: we don't actually parse the DOCTYPE or internal subsets.

+ // Some internal dtd subsets could confuse this simple-minded

+ // attempt at skipping over them, specifically, occcurences

+ // of closeing square brackets. These could appear in comments,

+ // or in parameter entity declarations, for example.

+ mXMLDoctype(UnicodeString(

+ "(?s)<!DOCTYPE.*?(>|\\[.*?\\].*?>)", -1, US_INV

+ ), 0, status),

+ // XML PI production #16

+ // example "<?target stuff?>

+ mXMLPI(UnicodeString("(?s)<\\?.+?\\?>", -1, US_INV), 0, status),

+ // XML Element Start Productions #40, #41

+ // example <foo att1='abc' att2="d e f" >

+ // capture #1: the tag name

+ //

+ mXMLElemStart (UnicodeString("(?s)<(" XML_NAME ")" // match "<tag_name"

+ "(?:"

+ XML_SPACES "+" XML_NAME XML_SPACES "*=" XML_SPACES "*" // match "ATTR_NAME = "

+ "(?:(?:\\\'[^<\\\']*?\\\')|(?:\\\"[^<\\\"]*?\\\"))" // match '"attribute value"'

+ ")*" // * for zero or more attributes.

+ XML_SPACES "*?>", -1, US_INV), 0, status), // match " >"

+ // XML Element End production #42

+ // example </foo>

+ mXMLElemEnd (UnicodeString("</(" XML_NAME ")" XML_SPACES "*>", -1, US_INV), 0, status),

+ // XML Element Empty production #44

+ // example <foo att1="abc" att2="d e f" />

+ mXMLElemEmpty (UnicodeString("(?s)<(" XML_NAME ")" // match "<tag_name"

+ "(?:"

+ XML_SPACES "+" XML_NAME XML_SPACES "*=" XML_SPACES "*" // match "ATTR_NAME = "

+ "(?:(?:\\\'[^<\\\']*?\\\')|(?:\\\"[^<\\\"]*?\\\"))" // match '"attribute value"'

+ ")*" // * for zero or more attributes.

+ XML_SPACES "*?/>", -1, US_INV), 0, status), // match " />"

+ // XMLCharData. Everything but '<'. Note that & will be dealt with later.

+ mXMLCharData(UnicodeString("(?s)[^<]*", -1, US_INV), 0, status),

+ // Attribute name = "value". XML Productions 10, 40/41

+ // Capture group 1 is name,

+ // 2 is the attribute value, including the quotes.

+ //

+ // Note that attributes are scanned twice. The first time is with

+ // the regex for an entire element start. There, the attributes

+ // are checked syntactically, but not separted out one by one.

+ // Here, we match a single attribute, and make its name and

+ // attribute value available to the parser code.

+ mAttrValue(UnicodeString(XML_SPACES "+(" XML_NAME ")" XML_SPACES "*=" XML_SPACES "*"

+ "((?:\\\'[^<\\\']*?\\\')|(?:\\\"[^<\\\"]*?\\\"))", -1, US_INV), 0, status),

+ mAttrNormalizer(UnicodeString(XML_SPACES, -1, US_INV), 0, status),

+ // Match any of the new-line sequences in content.

+ // All are changed to \u000a.

+ mNewLineNormalizer(UnicodeString("\\u000d\\u000a|\\u000d\\u0085|\\u000a|\\u000d|\\u0085|\\u2028", -1, US_INV), 0, status),

+ // & char references

+ // We will figure out what we've got based on which capture group has content.

+ // The last one is a catchall for unrecognized entity references..

+ // 1 2 3 4 5 6 7 8

+ mAmps(UnicodeString("&(?:(amp;)|(lt;)|(gt;)|(apos;)|(quot;)|#x([0-9A-Fa-f]{1,8});|#([0-9]{1,8});|(.))"),

+ 0, status),

+ fNames(status),

+ fElementStack(status),

+ fOneLF((UChar)0x0a) // Plain new-line string, used in new line normalization.

+ {

+ }

+UXMLParser *

+UXMLParser::createParser(UErrorCode &errorCode) {

+ if (U_FAILURE(errorCode)) {

+ return NULL;

+ } else {

+ return new UXMLParser(errorCode);

+ }

+UXMLParser::~UXMLParser() {}

+UXMLElement *

+UXMLParser::parseFile(const char *filename, UErrorCode &errorCode) {

+ char bytes[4096], charsetBuffer[100];

+ FileStream *f;

+ const char *charset, *pb;

+ UnicodeString src;

+ UConverter *cnv;

+ UChar *buffer, *pu;

+ int32_t fileLength, bytesLength, length, capacity;

+ UBool flush;

+ if(U_FAILURE(errorCode)) {

+ return NULL;

+ }

+ f=T_FileStream_open(filename, "rb");

+ if(f==NULL) {

+ errorCode=U_FILE_ACCESS_ERROR;

+ return NULL;

+ }

+ bytesLength=T_FileStream_read(f, bytes, (int32_t)sizeof(bytes));

+ if(bytesLength<(int32_t)sizeof(bytes)) {

+ // we have already read the entire file

+ fileLength=bytesLength;

+ } else {

+ // get the file length

+ fileLength=T_FileStream_size(f);

+ }

+ /*

+ * get the charset:

+ * 1. Unicode signature

+ * 2. treat as ISO-8859-1 and read XML encoding="charser"

+ * 3. default to UTF-8

+ */

+ charset=ucnv_detectUnicodeSignature(bytes, bytesLength, NULL, &errorCode);

+ if(U_SUCCESS(errorCode) && charset!=NULL) {

+ // open converter according to Unicode signature

+ cnv=ucnv_open(charset, &errorCode);

+ } else {

+ // read as Latin-1 and parse the XML declaration and encoding

+ cnv=ucnv_open("ISO-8859-1", &errorCode);

+ if(U_FAILURE(errorCode)) {

+ // unexpected error opening Latin-1 converter

+ goto exit;

+ }

+ buffer=src.getBuffer(bytesLength);

+ if(buffer==NULL) {

+ // unexpected failure to reserve some string capacity

+ errorCode=U_MEMORY_ALLOCATION_ERROR;

+ goto exit;

+ }

+ pb=bytes;

+ pu=buffer;

+ ucnv_toUnicode(

+ cnv,

+ &pu, buffer+src.getCapacity(),

+ &pb, bytes+bytesLength,

+ NULL, TRUE, &errorCode);

+ src.releaseBuffer(U_SUCCESS(errorCode) ? (int32_t)(pu-buffer) : 0);

+ ucnv_close(cnv);

+ cnv=NULL;

+ if(U_FAILURE(errorCode)) {

+ // unexpected error in conversion from Latin-1

+ src.remove();

+ goto exit;

+ }

+ // parse XML declaration

+ if(mXMLDecl.reset(src).lookingAt(0, errorCode)) {

+ int32_t declEnd=mXMLDecl.end(errorCode);

+ // go beyond <?xml

+ int32_t pos=src.indexOf((UChar)x_l)+1;

+ mAttrValue.reset(src);

+ while(pos<declEnd && mAttrValue.lookingAt(pos, errorCode)) { // loop runs once per attribute on this element.

+ UnicodeString attName = mAttrValue.group(1, errorCode);

+ UnicodeString attValue = mAttrValue.group(2, errorCode);

+ // Trim the quotes from the att value. These are left over from the original regex

+ // that parsed the attribue, which couldn't conveniently strip them.

+ attValue.remove(0,1); // one char from the beginning

+ attValue.truncate(attValue.length()-1); // and one from the end.

+ if(attName==UNICODE_STRING("encoding", 8)) {

+ length=attValue.extract(0, 0x7fffffff, charsetBuffer, (int32_t)sizeof(charsetBuffer));

+ charset=charsetBuffer;

+ break;

+ }

+ pos = mAttrValue.end(2, errorCode);

+ }

+ if(charset==NULL) {

+ // default to UTF-8

+ charset="UTF-8";

+ }

+ cnv=ucnv_open(charset, &errorCode);

+ }

+ if(U_FAILURE(errorCode)) {

+ // unable to open the converter

+ goto exit;

+ }

+ // convert the file contents

+ capacity=fileLength; // estimated capacity

+ src.getBuffer(capacity);

+ src.releaseBuffer(0); // zero length

+ flush=FALSE;

+ for(;;) {

+ // convert contents of bytes[bytesLength]

+ pb=bytes;

+ for(;;) {

+ length=src.length();

+ buffer=src.getBuffer(capacity);

+ if(buffer==NULL) {

+ // unexpected failure to reserve some string capacity

+ errorCode=U_MEMORY_ALLOCATION_ERROR;

+ goto exit;

+ }

+ pu=buffer+length;

+ ucnv_toUnicode(

+ cnv, &pu, buffer+src.getCapacity(),

+ &pb, bytes+bytesLength,

+ NULL, FALSE, &errorCode);

+ src.releaseBuffer(U_SUCCESS(errorCode) ? (int32_t)(pu-buffer) : 0);

+ if(errorCode==U_BUFFER_OVERFLOW_ERROR) {

+ errorCode=U_ZERO_ERROR;

+ capacity=(3*src.getCapacity())/2; // increase capacity by 50%

+ } else {

+ break;

+ }

+ if(U_FAILURE(errorCode)) {

+ break; // conversion error

+ }

+ if(flush) {

+ break; // completely converted the file

+ }

+ // read next block

+ bytesLength=T_FileStream_read(f, bytes, (int32_t)sizeof(bytes));

+ if(bytesLength==0) {

+ // reached end of file, convert once more to flush the converter

+ flush=TRUE;

+ }

+ };

+exit:

+ ucnv_close(cnv);

+ T_FileStream_close(f);

+ if(U_SUCCESS(errorCode)) {

+ return parse(src, errorCode);

+ } else {

+ return NULL;

+ }

+UXMLElement *

+UXMLParser::parse(const UnicodeString &src, UErrorCode &status) {

+ if(U_FAILURE(status)) {

+ return NULL;

+ }

+ UXMLElement *root = NULL;

+ fPos = 0; // TODO use just a local pos variable and pass it into functions

+ // where necessary?

+ // set all matchers to work on the input string

+ mXMLDecl.reset(src);

+ mXMLComment.reset(src);

+ mXMLSP.reset(src);

+ mXMLDoctype.reset(src);

+ mXMLPI.reset(src);

+ mXMLElemStart.reset(src);

+ mXMLElemEnd.reset(src);

+ mXMLElemEmpty.reset(src);

+ mXMLCharData.reset(src);

+ mAttrValue.reset(src);

+ mAttrNormalizer.reset(src);

+ mNewLineNormalizer.reset(src);

+ mAmps.reset(src);

+ // Consume the XML Declaration, if present.

+ if (mXMLDecl.lookingAt(fPos, status)) {

+ fPos = mXMLDecl.end(status);

+ }

+ // Consume "misc" [XML production 27] appearing before DocType

+ parseMisc(status);

+ // Consume a DocType declaration, if present.

+ if (mXMLDoctype.lookingAt(fPos, status)) {

+ fPos = mXMLDoctype.end(status);

+ }

+ // Consume additional "misc" [XML production 27] appearing after the DocType

+ parseMisc(status);

+ // Get the root element

+ if (mXMLElemEmpty.lookingAt(fPos, status)) {

+ // Root is an empty element (no nested elements or content)

+ root = createElement(mXMLElemEmpty, status);

+ fPos = mXMLElemEmpty.end(status);

+ } else {

+ if (mXMLElemStart.lookingAt(fPos, status) == FALSE) {

+ error("Root Element expected", status);

+ goto errorExit;

+ }

+ root = createElement(mXMLElemStart, status);

+ UXMLElement *el = root;

+ //

+ // This is the loop that consumes the root element of the document,

+ // including all nested content. Nested elements are handled by

+ // explicit pushes/pops of the element stack; there is no recursion

+ // in the control flow of this code.

+ // "el" always refers to the current element, the one to which content

+ // is being added. It is above the top of the element stack.

+ for (;;) {

+ // Nested Element Start

+ if (mXMLElemStart.lookingAt(fPos, status)) {

+ UXMLElement *t = createElement(mXMLElemStart, status);

+ el->fChildren.addElement(t, status);

+ t->fParent = el;

+ fElementStack.push(el, status);

+ el = t;

+ continue;

+ }

+ // Text Content. String is concatenated onto the current node's content,

+ // but only if it contains something other than spaces.

+ UnicodeString s = scanContent(status);

+ if (s.length() > 0) {

+ mXMLSP.reset(s);

+ if (mXMLSP.matches(status) == FALSE) {

+ // This chunk of text contains something other than just

+ // white space. Make a child node for it.

+ replaceCharRefs(s, status);

+ el->fChildren.addElement(s.clone(), status);

+ }

+ mXMLSP.reset(src); // The matchers need to stay set to the main input string.

+ continue;

+ }

+ // Comments. Discard.

+ if (mXMLComment.lookingAt(fPos, status)) {

+ fPos = mXMLComment.end(status);

+ continue;

+ }

+ // PIs. Discard.

+ if (mXMLPI.lookingAt(fPos, status)) {

+ fPos = mXMLPI.end(status);

+ continue;

+ }

+ // Element End

+ if (mXMLElemEnd.lookingAt(fPos, status)) {

+ fPos = mXMLElemEnd.end(0, status);

+ const UnicodeString name = mXMLElemEnd.group(1, status);

+ if (name != *el->fName) {

+ error("Element start / end tag mismatch", status);

+ goto errorExit;

+ }

+ if (fElementStack.empty()) {

+ // Close of the root element. We're done with the doc.

+ el = NULL;

+ break;

+ }

+ el = (UXMLElement *)fElementStack.pop();

+ continue;

+ }

+ // Empty Element. Stored as a child of the current element, but not stacked.

+ if (mXMLElemEmpty.lookingAt(fPos, status)) {

+ UXMLElement *t = createElement(mXMLElemEmpty, status);

+ el->fChildren.addElement(t, status);

+ continue;

+ }

+ // Hit something within the document that doesn't match anything.

+ // It's an error.

+ error("Unrecognized markup", status);

+ break;

+ }

+ if (el != NULL || !fElementStack.empty()) {

+ // We bailed out early, for some reason.

+ error("Root element not closed.", status);

+ goto errorExit;

+ }

+ // Root Element parse is complete.

+ // Consume the annoying xml "Misc" that can appear at the end of the doc.

+ parseMisc(status);

+ // We should have reached the end of the input

+ if (fPos != src.length()) {

+ error("Extra content at the end of the document", status);

+ goto errorExit;

+ }

+ // Success!

+ return root;

+errorExit:

+ delete root;

+ return NULL;

+//

+// createElement

+// We've just matched an element start tag. Create and fill in a UXMLElement object

+// for it.

+//

+UXMLElement *

+UXMLParser::createElement(RegexMatcher &mEl, UErrorCode &status) {

+ // First capture group is the element's name.

+ UXMLElement *el = new UXMLElement(this, intern(mEl.group(1, status), status), status);

+ // Scan for attributes.

+ int32_t pos = mEl.end(1, status); // The position after the end of the tag name

+ while (mAttrValue.lookingAt(pos, status)) { // loop runs once per attribute on this element.

+ UnicodeString attName = mAttrValue.group(1, status);

+ UnicodeString attValue = mAttrValue.group(2, status);

+ // Trim the quotes from the att value. These are left over from the original regex

+ // that parsed the attribue, which couldn't conveniently strip them.

+ attValue.remove(0,1); // one char from the beginning

+ attValue.truncate(attValue.length()-1); // and one from the end.

+ // XML Attribue value normalization.

+ // This is one of the really screwy parts of the XML spec.

+ // See http://www.w3.org/TR/2004/REC-xml11-20040204/#AVNormalize

+ // Note that non-validating parsers must treat all entities as type CDATA

+ // which simplifies things some.

+ // Att normalization step 1: normalize any newlines in the attribute value

+ mNewLineNormalizer.reset(attValue);

+ attValue = mNewLineNormalizer.replaceAll(fOneLF, status);

+ // Next change all xml white space chars to plain \u0020 spaces.

+ mAttrNormalizer.reset(attValue);

+ UnicodeString oneSpace((UChar)0x0020);

+ attValue = mAttrNormalizer.replaceAll(oneSpace, status);

+ // Replace character entities.

+ replaceCharRefs(attValue, status);

+ // Save the attribute name and value in our document structure.

+ el->fAttNames.addElement((void *)intern(attName, status), status);

+ el->fAttValues.addElement(attValue.clone(), status);

+ pos = mAttrValue.end(2, status);

+ }

+ fPos = mEl.end(0, status);

+ return el;

+//

+// parseMisc

+// Consume XML "Misc" [production #27]

+// which is any combination of space, PI and comments

+// Need to watch end-of-input because xml MISC stuff is allowed after

+// the document element, so we WILL scan off the end in this function

+//

+void

+UXMLParser::parseMisc(UErrorCode &status) {

+ for (;;) {

+ if (fPos >= mXMLPI.input().length()) {

+ break;

+ }

+ if (mXMLPI.lookingAt(fPos, status)) {

+ fPos = mXMLPI.end(status);

+ continue;

+ }

+ if (mXMLSP.lookingAt(fPos, status)) {

+ fPos = mXMLSP.end(status);

+ continue;

+ }

+ if (mXMLComment.lookingAt(fPos, status)) {

+ fPos = mXMLComment.end(status);

+ continue;

+ }

+ break;

+ }

+//

+// Scan for document content.

+//

+UnicodeString

+UXMLParser::scanContent(UErrorCode &status) {

+ UnicodeString result;

+ if (mXMLCharData.lookingAt(fPos, status)) {

+ result = mXMLCharData.group((int32_t)0, status);

+ // Normalize the new-lines. (Before char ref substitution)

+ mNewLineNormalizer.reset(result);

+ result = mNewLineNormalizer.replaceAll(fOneLF, status);

+ // TODO: handle CDATA

+ fPos = mXMLCharData.end(0, status);

+ }

+ return result;

+//

+// replaceCharRefs

+//

+// replace the char entities < & { ካ etc. in a string

+// with the corresponding actual character.

+//

+void

+UXMLParser::replaceCharRefs(UnicodeString &s, UErrorCode &status) {

+ UnicodeString result;

+ UnicodeString replacement;

+ int i;

+ mAmps.reset(s);

+ // See the initialization for the regex matcher mAmps.

+ // Which entity we've matched is determined by which capture group has content,

+ // which is flaged by start() of that group not being -1.

+ while (mAmps.find()) {

+ if (mAmps.start(1, status) != -1) {

+ replacement.setTo((UChar)x_AMP);

+ } else if (mAmps.start(2, status) != -1) {

+ replacement.setTo((UChar)x_LT);

+ } else if (mAmps.start(3, status) != -1) {

+ replacement.setTo((UChar)x_GT);

+ } else if (mAmps.start(4, status) != -1) {

+ replacement.setTo((UChar)x_APOS);

+ } else if (mAmps.start(5, status) != -1) {

+ replacement.setTo((UChar)x_QUOT);

+ } else if (mAmps.start(6, status) != -1) {

+ UnicodeString hexString = mAmps.group(6, status);

+ UChar32 val = 0;

+ for (i=0; i<hexString.length(); i++) {

+ val = (val << 4) + u_digit(hexString.charAt(i), 16);

+ }

+ // TODO: some verification that the character is valid

+ replacement.setTo(val);

+ } else if (mAmps.start(7, status) != -1) {

+ UnicodeString decimalString = mAmps.group(7, status);

+ UChar32 val = 0;

+ for (i=0; i<decimalString.length(); i++) {

+ val = val*10 + u_digit(decimalString.charAt(i), 10);

+ }

+ // TODO: some verification that the character is valid

+ replacement.setTo(val);

+ } else {

+ // An unrecognized &entity; Leave it alone.

+ // TODO: check that it really looks like an entity, and is not some

+ // random & in the text.

+ replacement = mAmps.group((int32_t)0, status);

+ }

+ mAmps.appendReplacement(result, replacement, status);

+ }

+ mAmps.appendTail(result);

+ s = result;

+void

+UXMLParser::error(const char *message, UErrorCode &status) {

+ // TODO: something better here...

+ const UnicodeString &src=mXMLDecl.input();

+ int line = 0;

+ int ci = 0;

+ while (ci < fPos && ci>=0) {

+ ci = src.indexOf((UChar)0x0a, ci+1);

+ line++;

+ }

+ fprintf(stderr, "Error: %s at line %d\n", message, line);

+ if (U_SUCCESS(status)) {

+ status = U_PARSE_ERROR;

+ }

+// intern strings like in Java

+const UnicodeString *

+UXMLParser::intern(const UnicodeString &s, UErrorCode &errorCode) {

+ const UHashElement *he=fNames.find(s);

+ if(he!=NULL) {

+ // already a known name, return its hashed key pointer

+ return (const UnicodeString *)he->key.pointer;

+ } else {

+ // add this new name and return its hashed key pointer

+ fNames.puti(s, 0, errorCode);

+ he=fNames.find(s);

+ return (const UnicodeString *)he->key.pointer;

+ }

+const UnicodeString *

+UXMLParser::findName(const UnicodeString &s) const {

+ const UHashElement *he=fNames.find(s);

+ if(he!=NULL) {

+ // a known name, return its hashed key pointer

+ return (const UnicodeString *)he->key.pointer;

+ } else {

+ // unknown name

+ return NULL;

+ }

+// UXMLElement ------------------------------------------------------------- ***

+UXMLElement::UXMLElement(const UXMLParser *parser, const UnicodeString *name, UErrorCode &errorCode) :

+ fParser(parser),

+ fName(name),

+ fAttNames(errorCode),

+ fAttValues(errorCode),

+ fChildren(errorCode),

+ fParent(NULL)

+UXMLElement::~UXMLElement() {

+ int i;

+ // attribute names are owned by the UXMLParser, don't delete them here

+ for (i=fAttValues.size()-1; i>=0; i--) {

+ delete (UObject *)fAttValues.elementAt(i);

+ }

+ for (i=fChildren.size()-1; i>=0; i--) {

+ delete (UObject *)fChildren.elementAt(i);

+ }

+const UnicodeString &

+UXMLElement::getTagName() const {

+ return *fName;

+UnicodeString

+UXMLElement::getText(UBool recurse) const {

+ UnicodeString text;

+ appendText(text, recurse);

+ return text;

+void

+UXMLElement::appendText(UnicodeString &text, UBool recurse) const {

+ const UObject *node;

+ int32_t i, count=fChildren.size();

+ for(i=0; i<count; ++i) {

+ node=(const UObject *)fChildren.elementAt(i);

+ const UnicodeString *s=dynamic_cast<const UnicodeString *>(node);

+ if(s!=NULL) {

+ text.append(*s);

+ } else if(recurse) /* must be a UXMLElement */ {

+ ((const UXMLElement *)node)->appendText(text, recurse);

+ }

+int32_t

+UXMLElement::countAttributes() const {

+ return fAttNames.size();

+const UnicodeString *

+UXMLElement::getAttribute(int32_t i, UnicodeString &name, UnicodeString &value) const {

+ if(0<=i && i<fAttNames.size()) {

+ name.setTo(*(const UnicodeString *)fAttNames.elementAt(i));

+ value.setTo(*(const UnicodeString *)fAttValues.elementAt(i));

+ return &value; // or return (UnicodeString *)fAttValues.elementAt(i);

+ } else {

+ return NULL;

+ }

+const UnicodeString *

+UXMLElement::getAttribute(const UnicodeString &name) const {

+ // search for the attribute name by comparing the interned pointer,

+ // not the string contents

+ const UnicodeString *p=fParser->findName(name);

+ if(p==NULL) {

+ return NULL; // no such attribute seen by the parser at all

+ }

+ int32_t i, count=fAttNames.size();

+ for(i=0; i<count; ++i) {

+ if(p==(const UnicodeString *)fAttNames.elementAt(i)) {

+ return (const UnicodeString *)fAttValues.elementAt(i);

+ }

+ return NULL;

+int32_t

+UXMLElement::countChildren() const {

+ return fChildren.size();

+const UObject *

+UXMLElement::getChild(int32_t i, UXMLNodeType &type) const {

+ if(0<=i && i<fChildren.size()) {

+ const UObject *node=(const UObject *)fChildren.elementAt(i);

+ if(dynamic_cast<const UXMLElement *>(node)!=NULL) {

+ type=UXML_NODE_TYPE_ELEMENT;

+ } else {

+ type=UXML_NODE_TYPE_STRING;

+ }

+ return node;

+ } else {

+ return NULL;

+ }

+const UXMLElement *

+UXMLElement::nextChildElement(int32_t &i) const {

+ if(i<0) {

+ return NULL;

+ }

+ const UObject *node;

+ int32_t count=fChildren.size();

+ while(i<count) {

+ node=(const UObject *)fChildren.elementAt(i++);

+ const UXMLElement *elem=dynamic_cast<const UXMLElement *>(node);

+ if(elem!=NULL) {

+ return elem;

+ }

+ return NULL;

+const UXMLElement *

+UXMLElement::getChildElement(const UnicodeString &name) const {

+ // search for the element name by comparing the interned pointer,

+ // not the string contents

+ const UnicodeString *p=fParser->findName(name);

+ if(p==NULL) {

+ return NULL; // no such element seen by the parser at all

+ }

+ const UObject *node;

+ int32_t i, count=fChildren.size();

+ for(i=0; i<count; ++i) {

+ node=(const UObject *)fChildren.elementAt(i);

+ const UXMLElement *elem=dynamic_cast<const UXMLElement *>(node);

+ if(elem!=NULL) {

+ if(p==elem->fName) {

+ return elem;

+ }

+ return NULL;

+U_NAMESPACE_END

+#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */

Property changes on: icu46/source/tools/toolutil/xmlparser.cpp

___________________________________________________________________

Added: svn:eol-style

+ LF

« no previous file with comments | « icu46/source/tools/toolutil/xmlparser.h ('k') | icu46/source/tools/tzcode/Makefile.in » ('j') | no next file with comments »