| Index: icu46/source/i18n/usrchimp.h
|
| ===================================================================
|
| --- icu46/source/i18n/usrchimp.h (revision 0)
|
| +++ icu46/source/i18n/usrchimp.h (revision 0)
|
| @@ -0,0 +1,140 @@
|
| +/*
|
| +**********************************************************************
|
| +* Copyright (C) 2001-2010 IBM and others. All rights reserved.
|
| +**********************************************************************
|
| +* Date Name Description
|
| +* 08/13/2001 synwee Creation.
|
| +**********************************************************************
|
| +*/
|
| +#ifndef USRCHIMP_H
|
| +#define USRCHIMP_H
|
| +
|
| +#include "unicode/utypes.h"
|
| +
|
| +#if !UCONFIG_NO_COLLATION
|
| +
|
| +#include "unicode/normalizer2.h"
|
| +#include "unicode/ucol.h"
|
| +#include "unicode/ucoleitr.h"
|
| +#include "unicode/ubrk.h"
|
| +
|
| +#define INITIAL_ARRAY_SIZE_ 256
|
| +#define MAX_TABLE_SIZE_ 257
|
| +
|
| +struct USearch {
|
| + // required since collation element iterator does not have a getText API
|
| + const UChar *text;
|
| + int32_t textLength; // exact length
|
| + UBool isOverlap;
|
| + UBool isCanonicalMatch;
|
| + int16_t elementComparisonType;
|
| + UBreakIterator *internalBreakIter; //internal character breakiterator
|
| + UBreakIterator *breakIter;
|
| + // value USEARCH_DONE is the default value
|
| + // if we are not at the start of the text or the end of the text,
|
| + // depending on the iteration direction and matchedIndex is USEARCH_DONE
|
| + // it means that we can't find any more matches in that particular direction
|
| + int32_t matchedIndex;
|
| + int32_t matchedLength;
|
| + UBool isForwardSearching;
|
| + UBool reset;
|
| +};
|
| +
|
| +struct UPattern {
|
| + const UChar *text;
|
| + int32_t textLength; // exact length
|
| + // length required for backwards ce comparison
|
| + int32_t CELength;
|
| + int32_t *CE;
|
| + int32_t CEBuffer[INITIAL_ARRAY_SIZE_];
|
| + int32_t PCELength;
|
| + int64_t *PCE;
|
| + int64_t PCEBuffer[INITIAL_ARRAY_SIZE_];
|
| + UBool hasPrefixAccents;
|
| + UBool hasSuffixAccents;
|
| + int16_t defaultShiftSize;
|
| + int16_t shift[MAX_TABLE_SIZE_];
|
| + int16_t backShift[MAX_TABLE_SIZE_];
|
| +};
|
| +
|
| +struct UStringSearch {
|
| + struct USearch *search;
|
| + struct UPattern pattern;
|
| + const UCollator *collator;
|
| + const U_NAMESPACE_QUALIFIER Normalizer2 *nfd;
|
| + // positions within the collation element iterator is used to determine
|
| + // if we are at the start of the text.
|
| + UCollationElements *textIter;
|
| + // utility collation element, used throughout program for temporary
|
| + // iteration.
|
| + UCollationElements *utilIter;
|
| + UBool ownCollator;
|
| + UCollationStrength strength;
|
| + uint32_t ceMask;
|
| + uint32_t variableTop;
|
| + UBool toShift;
|
| + UChar canonicalPrefixAccents[INITIAL_ARRAY_SIZE_];
|
| + UChar canonicalSuffixAccents[INITIAL_ARRAY_SIZE_];
|
| +};
|
| +
|
| +/**
|
| +* Exact matches without checking for the ends for extra accents.
|
| +* The match after the position within the collation element iterator is to be
|
| +* found.
|
| +* After a match is found the offset in the collation element iterator will be
|
| +* shifted to the start of the match.
|
| +* Implementation note:
|
| +* For tertiary we can't use the collator->tertiaryMask, that is a
|
| +* preprocessed mask that takes into account case options. since we are only
|
| +* concerned with exact matches, we don't need that.
|
| +* Alternate handling - since only the 16 most significant digits is only used,
|
| +* we can safely do a compare without masking if the ce is a variable, we mask
|
| +* and get only the primary values no shifting to quartenary is required since
|
| +* all primary values less than variabletop will need to be masked off anyway.
|
| +* If the end character is composite and the pattern ce does not match the text
|
| +* ce, we skip it until we find a match in the end composite character or when
|
| +* it has passed the character. This is so that we can match pattern "a" with
|
| +* the text "\u00e6"
|
| +* @param strsrch string search data
|
| +* @param status error status if any
|
| +* @return TRUE if an exact match is found, FALSE otherwise
|
| +*/
|
| +U_CFUNC
|
| +UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status);
|
| +
|
| +/**
|
| +* Canonical matches.
|
| +* According to the definition, matches found here will include the whole span
|
| +* of beginning and ending accents if it overlaps that region.
|
| +* @param strsrch string search data
|
| +* @param status error status if any
|
| +* @return TRUE if a canonical match is found, FALSE otherwise
|
| +*/
|
| +U_CFUNC
|
| +UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status);
|
| +
|
| +/**
|
| +* Gets the previous match.
|
| +* Comments follows from handleNextExact
|
| +* @param strsrch string search data
|
| +* @param status error status if any
|
| +* @return True if a exact math is found, FALSE otherwise.
|
| +*/
|
| +U_CFUNC
|
| +UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status);
|
| +
|
| +/**
|
| +* Canonical matches.
|
| +* According to the definition, matches found here will include the whole span
|
| +* of beginning and ending accents if it overlaps that region.
|
| +* @param strsrch string search data
|
| +* @param status error status if any
|
| +* @return TRUE if a canonical match is found, FALSE otherwise
|
| +*/
|
| +U_CFUNC
|
| +UBool usearch_handlePreviousCanonical(UStringSearch *strsrch,
|
| + UErrorCode *status);
|
| +
|
| +#endif /* #if !UCONFIG_NO_COLLATION */
|
| +
|
| +#endif
|
|
|
| Property changes on: icu46/source/i18n/usrchimp.h
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|