Index: icu46/source/tools/toolutil/xmlparser.h |
=================================================================== |
--- icu46/source/tools/toolutil/xmlparser.h (revision 0) |
+++ icu46/source/tools/toolutil/xmlparser.h (revision 0) |
@@ -0,0 +1,245 @@ |
+/* |
+******************************************************************************* |
+* |
+* Copyright (C) 2004-2005, International Business Machines |
+* Corporation and others. All Rights Reserved. |
+* |
+******************************************************************************* |
+* file name: xmlparser.h |
+* encoding: US-ASCII |
+* tab size: 8 (not used) |
+* indentation:4 |
+* |
+* created on: 2004jul21 |
+* created by: Andy Heninger |
+* |
+* Tiny XML parser using ICU and intended for use in ICU tests and in build tools. |
+* Not suitable for production use. Not supported. |
+* Not conformant. Not efficient. |
+* But very small. |
+*/ |
+ |
+#ifndef __XMLPARSER_H__ |
+#define __XMLPARSER_H__ |
+ |
+#include "unicode/uobject.h" |
+#include "unicode/unistr.h" |
+#include "unicode/regex.h" |
+#include "uvector.h" |
+#include "hash.h" |
+ |
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_CONVERSION |
+ |
+enum UXMLNodeType { |
+ /** Node type string (text contents), stored as a UnicodeString. */ |
+ UXML_NODE_TYPE_STRING, |
+ /** Node type element, stored as a UXMLElement. */ |
+ UXML_NODE_TYPE_ELEMENT, |
+ UXML_NODE_TYPE_COUNT |
+}; |
+ |
+U_NAMESPACE_BEGIN |
+ |
+class UXMLParser; |
+ |
+/** |
+ * This class represents an element node in a parsed XML tree. |
+ */ |
+class U_TOOLUTIL_API UXMLElement : public UObject { |
+public: |
+ /** |
+ * Destructor. |
+ */ |
+ virtual ~UXMLElement(); |
+ |
+ /** |
+ * Get the tag name of this element. |
+ */ |
+ const UnicodeString &getTagName() const; |
+ /** |
+ * Get the text contents of the element. |
+ * Append the contents of all text child nodes. |
+ * @param recurse If TRUE, also recursively appends the contents of all |
+ * text child nodes of element children. |
+ * @return The text contents. |
+ */ |
+ UnicodeString getText(UBool recurse) const; |
+ /** |
+ * Get the number of attributes. |
+ */ |
+ int32_t countAttributes() const; |
+ /** |
+ * Get the i-th attribute. |
+ * @param i Index of the attribute. |
+ * @param name Output parameter, receives the attribute name. |
+ * @param value Output parameter, receives the attribute value. |
+ * @return A pointer to the attribute value (may be &value or a pointer to an |
+ * internal string object), or NULL if i is out of bounds. |
+ */ |
+ const UnicodeString *getAttribute(int32_t i, UnicodeString &name, UnicodeString &value) const; |
+ /** |
+ * Get the value of the attribute with the given name. |
+ * @param name Attribute name to be looked up. |
+ * @return A pointer to the attribute value, or NULL if this element |
+ * does not have this attribute. |
+ */ |
+ const UnicodeString *getAttribute(const UnicodeString &name) const; |
+ /** |
+ * Get the number of child nodes. |
+ */ |
+ int32_t countChildren() const; |
+ /** |
+ * Get the i-th child node. |
+ * @param i Index of the child node. |
+ * @param type The child node type. |
+ * @return A pointer to the child node object, or NULL if i is out of bounds. |
+ */ |
+ const UObject *getChild(int32_t i, UXMLNodeType &type) const; |
+ /** |
+ * Get the next child element node, skipping non-element child nodes. |
+ * @param i Enumeration index; initialize to 0 before getting the first child element. |
+ * @return A pointer to the next child element, or NULL if there is none. |
+ */ |
+ const UXMLElement *nextChildElement(int32_t &i) const; |
+ /** |
+ * Get the immediate child element with the given name. |
+ * If there are multiple child elements with this name, then return |
+ * the first one. |
+ * @param name Element name to be looked up. |
+ * @return A pointer to the element node, or NULL if this element |
+ * does not have this immediate child element. |
+ */ |
+ const UXMLElement *getChildElement(const UnicodeString &name) const; |
+ |
+ /** |
+ * ICU "poor man's RTTI", returns a UClassID for the actual class. |
+ */ |
+ virtual UClassID getDynamicClassID() const; |
+ |
+ /** |
+ * ICU "poor man's RTTI", returns a UClassID for this class. |
+ */ |
+ static UClassID U_EXPORT2 getStaticClassID(); |
+ |
+private: |
+ // prevent default construction etc. |
+ UXMLElement(); |
+ UXMLElement(const UXMLElement &other); |
+ UXMLElement &operator=(const UXMLElement &other); |
+ |
+ void appendText(UnicodeString &text, UBool recurse) const; |
+ |
+ friend class UXMLParser; |
+ |
+ UXMLElement(const UXMLParser *parser, const UnicodeString *name, UErrorCode &errorCode); |
+ |
+ const UXMLParser *fParser; |
+ const UnicodeString *fName; // The tag name of this element (owned by the UXMLParser) |
+ UnicodeString fContent; // The text content of this node. All element content is |
+ // concatenated even when there are intervening nested elements |
+ // (which doesn't happen with most xml files we care about) |
+ // Sections of content containing only white space are dropped, |
+ // which gets rid the bogus white space content from |
+ // elements which are primarily containers for nested elements. |
+ UVector fAttNames; // A vector containing the names of this element's attributes |
+ // The names are UnicodeString objects, owned by the UXMLParser. |
+ UVector fAttValues; // A vector containing the attribute values for |
+ // this element's attributes. The order is the same |
+ // as that of the attribute name vector. |
+ |
+ UVector fChildren; // The child nodes of this element (a Vector) |
+ |
+ UXMLElement *fParent; // A pointer to the parent element of this element. |
+}; |
+ |
+/** |
+ * A simple XML parser; it is neither efficient nor conformant and only useful for |
+ * restricted types of XML documents. |
+ * |
+ * The parse methods parse whole documents and return the parse trees via their |
+ * root elements. |
+ */ |
+class U_TOOLUTIL_API UXMLParser : public UObject { |
+public: |
+ /** |
+ * Create an XML parser. |
+ */ |
+ static UXMLParser *createParser(UErrorCode &errorCode); |
+ /** |
+ * Destructor. |
+ */ |
+ virtual ~UXMLParser(); |
+ |
+ /** |
+ * Parse an XML document, create the entire document tree, and |
+ * return a pointer to the root element of the parsed tree. |
+ * The caller must delete the element. |
+ */ |
+ UXMLElement *parse(const UnicodeString &src, UErrorCode &errorCode); |
+ /** |
+ * Parse an XML file, create the entire document tree, and |
+ * return a pointer to the root element of the parsed tree. |
+ * The caller must delete the element. |
+ */ |
+ UXMLElement *parseFile(const char *filename, UErrorCode &errorCode); |
+ |
+ /** |
+ * ICU "poor man's RTTI", returns a UClassID for the actual class. |
+ */ |
+ virtual UClassID getDynamicClassID() const; |
+ |
+ /** |
+ * ICU "poor man's RTTI", returns a UClassID for this class. |
+ */ |
+ static UClassID U_EXPORT2 getStaticClassID(); |
+ |
+private: |
+ // prevent default construction etc. |
+ UXMLParser(); |
+ UXMLParser(const UXMLParser &other); |
+ UXMLParser &operator=(const UXMLParser &other); |
+ |
+ // constructor |
+ UXMLParser(UErrorCode &status); |
+ |
+ void parseMisc(UErrorCode &status); |
+ UXMLElement *createElement(RegexMatcher &mEl, UErrorCode &status); |
+ void error(const char *message, UErrorCode &status); |
+ UnicodeString scanContent(UErrorCode &status); |
+ void replaceCharRefs(UnicodeString &s, UErrorCode &status); |
+ |
+ const UnicodeString *intern(const UnicodeString &s, UErrorCode &errorCode); |
+public: |
+ // public for UXMLElement only |
+ const UnicodeString *findName(const UnicodeString &s) const; |
+private: |
+ |
+ // There is one ICU regex matcher for each of the major XML syntax items |
+ // that are recognized. |
+ RegexMatcher mXMLDecl; |
+ RegexMatcher mXMLComment; |
+ RegexMatcher mXMLSP; |
+ RegexMatcher mXMLDoctype; |
+ RegexMatcher mXMLPI; |
+ RegexMatcher mXMLElemStart; |
+ RegexMatcher mXMLElemEnd; |
+ RegexMatcher mXMLElemEmpty; |
+ RegexMatcher mXMLCharData; |
+ RegexMatcher mAttrValue; |
+ RegexMatcher mAttrNormalizer; |
+ RegexMatcher mNewLineNormalizer; |
+ RegexMatcher mAmps; |
+ |
+ Hashtable fNames; // interned element/attribute name strings |
+ UStack fElementStack; // Stack holds the parent elements when nested |
+ // elements are being parsed. All items on this |
+ // stack are of type UXMLElement. |
+ int32_t fPos; // String index of the current scan position in |
+ // xml source (in fSrc). |
+ UnicodeString fOneLF; |
+}; |
+ |
+U_NAMESPACE_END |
+#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ |
+ |
+#endif |
Property changes on: icu46/source/tools/toolutil/xmlparser.h |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |