| Index: icu46/source/tools/toolutil/xmlparser.h
|
| ===================================================================
|
| --- icu46/source/tools/toolutil/xmlparser.h (revision 0)
|
| +++ icu46/source/tools/toolutil/xmlparser.h (revision 0)
|
| @@ -0,0 +1,245 @@
|
| +/*
|
| +*******************************************************************************
|
| +*
|
| +* Copyright (C) 2004-2005, International Business Machines
|
| +* Corporation and others. All Rights Reserved.
|
| +*
|
| +*******************************************************************************
|
| +* file name: xmlparser.h
|
| +* encoding: US-ASCII
|
| +* tab size: 8 (not used)
|
| +* indentation:4
|
| +*
|
| +* created on: 2004jul21
|
| +* created by: Andy Heninger
|
| +*
|
| +* Tiny XML parser using ICU and intended for use in ICU tests and in build tools.
|
| +* Not suitable for production use. Not supported.
|
| +* Not conformant. Not efficient.
|
| +* But very small.
|
| +*/
|
| +
|
| +#ifndef __XMLPARSER_H__
|
| +#define __XMLPARSER_H__
|
| +
|
| +#include "unicode/uobject.h"
|
| +#include "unicode/unistr.h"
|
| +#include "unicode/regex.h"
|
| +#include "uvector.h"
|
| +#include "hash.h"
|
| +
|
| +#if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_CONVERSION
|
| +
|
| +enum UXMLNodeType {
|
| + /** Node type string (text contents), stored as a UnicodeString. */
|
| + UXML_NODE_TYPE_STRING,
|
| + /** Node type element, stored as a UXMLElement. */
|
| + UXML_NODE_TYPE_ELEMENT,
|
| + UXML_NODE_TYPE_COUNT
|
| +};
|
| +
|
| +U_NAMESPACE_BEGIN
|
| +
|
| +class UXMLParser;
|
| +
|
| +/**
|
| + * This class represents an element node in a parsed XML tree.
|
| + */
|
| +class U_TOOLUTIL_API UXMLElement : public UObject {
|
| +public:
|
| + /**
|
| + * Destructor.
|
| + */
|
| + virtual ~UXMLElement();
|
| +
|
| + /**
|
| + * Get the tag name of this element.
|
| + */
|
| + const UnicodeString &getTagName() const;
|
| + /**
|
| + * Get the text contents of the element.
|
| + * Append the contents of all text child nodes.
|
| + * @param recurse If TRUE, also recursively appends the contents of all
|
| + * text child nodes of element children.
|
| + * @return The text contents.
|
| + */
|
| + UnicodeString getText(UBool recurse) const;
|
| + /**
|
| + * Get the number of attributes.
|
| + */
|
| + int32_t countAttributes() const;
|
| + /**
|
| + * Get the i-th attribute.
|
| + * @param i Index of the attribute.
|
| + * @param name Output parameter, receives the attribute name.
|
| + * @param value Output parameter, receives the attribute value.
|
| + * @return A pointer to the attribute value (may be &value or a pointer to an
|
| + * internal string object), or NULL if i is out of bounds.
|
| + */
|
| + const UnicodeString *getAttribute(int32_t i, UnicodeString &name, UnicodeString &value) const;
|
| + /**
|
| + * Get the value of the attribute with the given name.
|
| + * @param name Attribute name to be looked up.
|
| + * @return A pointer to the attribute value, or NULL if this element
|
| + * does not have this attribute.
|
| + */
|
| + const UnicodeString *getAttribute(const UnicodeString &name) const;
|
| + /**
|
| + * Get the number of child nodes.
|
| + */
|
| + int32_t countChildren() const;
|
| + /**
|
| + * Get the i-th child node.
|
| + * @param i Index of the child node.
|
| + * @param type The child node type.
|
| + * @return A pointer to the child node object, or NULL if i is out of bounds.
|
| + */
|
| + const UObject *getChild(int32_t i, UXMLNodeType &type) const;
|
| + /**
|
| + * Get the next child element node, skipping non-element child nodes.
|
| + * @param i Enumeration index; initialize to 0 before getting the first child element.
|
| + * @return A pointer to the next child element, or NULL if there is none.
|
| + */
|
| + const UXMLElement *nextChildElement(int32_t &i) const;
|
| + /**
|
| + * Get the immediate child element with the given name.
|
| + * If there are multiple child elements with this name, then return
|
| + * the first one.
|
| + * @param name Element name to be looked up.
|
| + * @return A pointer to the element node, or NULL if this element
|
| + * does not have this immediate child element.
|
| + */
|
| + const UXMLElement *getChildElement(const UnicodeString &name) const;
|
| +
|
| + /**
|
| + * ICU "poor man's RTTI", returns a UClassID for the actual class.
|
| + */
|
| + virtual UClassID getDynamicClassID() const;
|
| +
|
| + /**
|
| + * ICU "poor man's RTTI", returns a UClassID for this class.
|
| + */
|
| + static UClassID U_EXPORT2 getStaticClassID();
|
| +
|
| +private:
|
| + // prevent default construction etc.
|
| + UXMLElement();
|
| + UXMLElement(const UXMLElement &other);
|
| + UXMLElement &operator=(const UXMLElement &other);
|
| +
|
| + void appendText(UnicodeString &text, UBool recurse) const;
|
| +
|
| + friend class UXMLParser;
|
| +
|
| + UXMLElement(const UXMLParser *parser, const UnicodeString *name, UErrorCode &errorCode);
|
| +
|
| + const UXMLParser *fParser;
|
| + const UnicodeString *fName; // The tag name of this element (owned by the UXMLParser)
|
| + UnicodeString fContent; // The text content of this node. All element content is
|
| + // concatenated even when there are intervening nested elements
|
| + // (which doesn't happen with most xml files we care about)
|
| + // Sections of content containing only white space are dropped,
|
| + // which gets rid the bogus white space content from
|
| + // elements which are primarily containers for nested elements.
|
| + UVector fAttNames; // A vector containing the names of this element's attributes
|
| + // The names are UnicodeString objects, owned by the UXMLParser.
|
| + UVector fAttValues; // A vector containing the attribute values for
|
| + // this element's attributes. The order is the same
|
| + // as that of the attribute name vector.
|
| +
|
| + UVector fChildren; // The child nodes of this element (a Vector)
|
| +
|
| + UXMLElement *fParent; // A pointer to the parent element of this element.
|
| +};
|
| +
|
| +/**
|
| + * A simple XML parser; it is neither efficient nor conformant and only useful for
|
| + * restricted types of XML documents.
|
| + *
|
| + * The parse methods parse whole documents and return the parse trees via their
|
| + * root elements.
|
| + */
|
| +class U_TOOLUTIL_API UXMLParser : public UObject {
|
| +public:
|
| + /**
|
| + * Create an XML parser.
|
| + */
|
| + static UXMLParser *createParser(UErrorCode &errorCode);
|
| + /**
|
| + * Destructor.
|
| + */
|
| + virtual ~UXMLParser();
|
| +
|
| + /**
|
| + * Parse an XML document, create the entire document tree, and
|
| + * return a pointer to the root element of the parsed tree.
|
| + * The caller must delete the element.
|
| + */
|
| + UXMLElement *parse(const UnicodeString &src, UErrorCode &errorCode);
|
| + /**
|
| + * Parse an XML file, create the entire document tree, and
|
| + * return a pointer to the root element of the parsed tree.
|
| + * The caller must delete the element.
|
| + */
|
| + UXMLElement *parseFile(const char *filename, UErrorCode &errorCode);
|
| +
|
| + /**
|
| + * ICU "poor man's RTTI", returns a UClassID for the actual class.
|
| + */
|
| + virtual UClassID getDynamicClassID() const;
|
| +
|
| + /**
|
| + * ICU "poor man's RTTI", returns a UClassID for this class.
|
| + */
|
| + static UClassID U_EXPORT2 getStaticClassID();
|
| +
|
| +private:
|
| + // prevent default construction etc.
|
| + UXMLParser();
|
| + UXMLParser(const UXMLParser &other);
|
| + UXMLParser &operator=(const UXMLParser &other);
|
| +
|
| + // constructor
|
| + UXMLParser(UErrorCode &status);
|
| +
|
| + void parseMisc(UErrorCode &status);
|
| + UXMLElement *createElement(RegexMatcher &mEl, UErrorCode &status);
|
| + void error(const char *message, UErrorCode &status);
|
| + UnicodeString scanContent(UErrorCode &status);
|
| + void replaceCharRefs(UnicodeString &s, UErrorCode &status);
|
| +
|
| + const UnicodeString *intern(const UnicodeString &s, UErrorCode &errorCode);
|
| +public:
|
| + // public for UXMLElement only
|
| + const UnicodeString *findName(const UnicodeString &s) const;
|
| +private:
|
| +
|
| + // There is one ICU regex matcher for each of the major XML syntax items
|
| + // that are recognized.
|
| + RegexMatcher mXMLDecl;
|
| + RegexMatcher mXMLComment;
|
| + RegexMatcher mXMLSP;
|
| + RegexMatcher mXMLDoctype;
|
| + RegexMatcher mXMLPI;
|
| + RegexMatcher mXMLElemStart;
|
| + RegexMatcher mXMLElemEnd;
|
| + RegexMatcher mXMLElemEmpty;
|
| + RegexMatcher mXMLCharData;
|
| + RegexMatcher mAttrValue;
|
| + RegexMatcher mAttrNormalizer;
|
| + RegexMatcher mNewLineNormalizer;
|
| + RegexMatcher mAmps;
|
| +
|
| + Hashtable fNames; // interned element/attribute name strings
|
| + UStack fElementStack; // Stack holds the parent elements when nested
|
| + // elements are being parsed. All items on this
|
| + // stack are of type UXMLElement.
|
| + int32_t fPos; // String index of the current scan position in
|
| + // xml source (in fSrc).
|
| + UnicodeString fOneLF;
|
| +};
|
| +
|
| +U_NAMESPACE_END
|
| +#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
|
| +
|
| +#endif
|
|
|
| Property changes on: icu46/source/tools/toolutil/xmlparser.h
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|