Index: icu46/source/common/unisetspan.h |
=================================================================== |
--- icu46/source/common/unisetspan.h (revision 0) |
+++ icu46/source/common/unisetspan.h (revision 0) |
@@ -0,0 +1,155 @@ |
+/* |
+****************************************************************************** |
+* |
+* Copyright (C) 2007, International Business Machines |
+* Corporation and others. All Rights Reserved. |
+* |
+****************************************************************************** |
+* file name: unisetspan.h |
+* encoding: US-ASCII |
+* tab size: 8 (not used) |
+* indentation:4 |
+* |
+* created on: 2007mar01 |
+* created by: Markus W. Scherer |
+*/ |
+ |
+#ifndef __UNISETSPAN_H__ |
+#define __UNISETSPAN_H__ |
+ |
+#include "unicode/utypes.h" |
+#include "unicode/uniset.h" |
+ |
+U_NAMESPACE_BEGIN |
+ |
+/* |
+ * Implement span() etc. for a set with strings. |
+ * Avoid recursion because of its exponential complexity. |
+ * Instead, try multiple paths at once and track them with an IndexList. |
+ */ |
+class UnicodeSetStringSpan : public UMemory { |
+public: |
+ /* |
+ * Which span() variant will be used? |
+ * The object is either built for one variant and used once, |
+ * or built for all and may be used many times. |
+ */ |
+ enum { |
+ FWD = 0x20, |
+ BACK = 0x10, |
+ UTF16 = 8, |
+ UTF8 = 4, |
+ CONTAINED = 2, |
+ NOT_CONTAINED = 1, |
+ |
+ ALL = 0x3f, |
+ |
+ FWD_UTF16_CONTAINED = FWD | UTF16 | CONTAINED, |
+ FWD_UTF16_NOT_CONTAINED = FWD | UTF16 | NOT_CONTAINED, |
+ FWD_UTF8_CONTAINED = FWD | UTF8 | CONTAINED, |
+ FWD_UTF8_NOT_CONTAINED = FWD | UTF8 | NOT_CONTAINED, |
+ BACK_UTF16_CONTAINED = BACK | UTF16 | CONTAINED, |
+ BACK_UTF16_NOT_CONTAINED= BACK | UTF16 | NOT_CONTAINED, |
+ BACK_UTF8_CONTAINED = BACK | UTF8 | CONTAINED, |
+ BACK_UTF8_NOT_CONTAINED = BACK | UTF8 | NOT_CONTAINED |
+ }; |
+ |
+ UnicodeSetStringSpan(const UnicodeSet &set, const UVector &setStrings, uint32_t which); |
+ |
+ // Copy constructor. Assumes which==ALL for a frozen set. |
+ UnicodeSetStringSpan(const UnicodeSetStringSpan &otherStringSpan, const UVector &newParentSetStrings); |
+ |
+ ~UnicodeSetStringSpan(); |
+ |
+ /* |
+ * Do the strings need to be checked in span() etc.? |
+ * @return TRUE if strings need to be checked (call span() here), |
+ * FALSE if not (use a BMPSet for best performance). |
+ */ |
+ inline UBool needsStringSpanUTF16(); |
+ inline UBool needsStringSpanUTF8(); |
+ |
+ // For fast UnicodeSet::contains(c). |
+ inline UBool contains(UChar32 c) const; |
+ |
+ int32_t span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const; |
+ |
+ int32_t spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const; |
+ |
+ int32_t spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const; |
+ |
+ int32_t spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const; |
+ |
+private: |
+ // Special spanLength byte values. |
+ enum { |
+ // The spanLength is >=0xfe. |
+ LONG_SPAN=0xfe, |
+ // All code points in the string are contained in the parent set. |
+ ALL_CP_CONTAINED=0xff |
+ }; |
+ |
+ // Add a starting or ending string character to the spanNotSet |
+ // so that a character span ends before any string. |
+ void addToSpanNotSet(UChar32 c); |
+ |
+ int32_t spanNot(const UChar *s, int32_t length) const; |
+ int32_t spanNotBack(const UChar *s, int32_t length) const; |
+ int32_t spanNotUTF8(const uint8_t *s, int32_t length) const; |
+ int32_t spanNotBackUTF8(const uint8_t *s, int32_t length) const; |
+ |
+ // Set for span(). Same as parent but without strings. |
+ UnicodeSet spanSet; |
+ |
+ // Set for span(not contained). |
+ // Same as spanSet, plus characters that start or end strings. |
+ UnicodeSet *pSpanNotSet; |
+ |
+ // The strings of the parent set. |
+ const UVector &strings; |
+ |
+ // Pointer to the UTF-8 string lengths. |
+ // Also pointer to further allocated storage for meta data and |
+ // UTF-8 string contents as necessary. |
+ int32_t *utf8Lengths; |
+ |
+ // Pointer to the part of the (utf8Lengths) memory block that stores |
+ // the lengths of span(), spanBack() etc. for each string. |
+ uint8_t *spanLengths; |
+ |
+ // Pointer to the part of the (utf8Lengths) memory block that stores |
+ // the UTF-8 versions of the parent set's strings. |
+ uint8_t *utf8; |
+ |
+ // Number of bytes for all UTF-8 versions of strings together. |
+ int32_t utf8Length; |
+ |
+ // Maximum lengths of relevant strings. |
+ int32_t maxLength16; |
+ int32_t maxLength8; |
+ |
+ // Set up for all variants of span()? |
+ UBool all; |
+ |
+ // Memory for small numbers and lengths of strings. |
+ // For example, for 8 strings: |
+ // 8 UTF-8 lengths, 8*4 bytes span lengths, 8*2 3-byte UTF-8 characters |
+ // = 112 bytes = int32_t[28]. |
+ int32_t staticLengths[32]; |
+}; |
+ |
+UBool UnicodeSetStringSpan::needsStringSpanUTF16() { |
+ return (UBool)(maxLength16!=0); |
+} |
+ |
+UBool UnicodeSetStringSpan::needsStringSpanUTF8() { |
+ return (UBool)(maxLength8!=0); |
+} |
+ |
+UBool UnicodeSetStringSpan::contains(UChar32 c) const { |
+ return spanSet.contains(c); |
+} |
+ |
+U_NAMESPACE_END |
+ |
+#endif |
Property changes on: icu46/source/common/unisetspan.h |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |