| OLD | NEW |
| 1 /* | 1 /* |
| 2 ******************************************************************************* | 2 ******************************************************************************* |
| 3 * Copyright (C) 2013, International Business Machines | 3 * Copyright (C) 2014, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
| 5 ******************************************************************************* | 5 ******************************************************************************* |
| 6 * dictionarydata.h | 6 * dictionarydata.h |
| 7 * | 7 * |
| 8 * created on: 2012may31 | 8 * created on: 2012may31 |
| 9 * created by: Markus W. Scherer & Maxime Serrano | 9 * created by: Markus W. Scherer & Maxime Serrano |
| 10 */ | 10 */ |
| 11 | 11 |
| 12 #ifndef __DICTIONARYDATA_H__ | 12 #ifndef __DICTIONARYDATA_H__ |
| 13 #define __DICTIONARYDATA_H__ | 13 #define __DICTIONARYDATA_H__ |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 59 | 59 |
| 60 /** | 60 /** |
| 61 * Wrapper class around generic dictionaries, implementing matches(). | 61 * Wrapper class around generic dictionaries, implementing matches(). |
| 62 * getType() should return a TRIE_TYPE_??? constant from DictionaryData. | 62 * getType() should return a TRIE_TYPE_??? constant from DictionaryData. |
| 63 * | 63 * |
| 64 * All implementations of this interface must be thread-safe if they are to be u
sed inside of the | 64 * All implementations of this interface must be thread-safe if they are to be u
sed inside of the |
| 65 * dictionary-based break iteration code. | 65 * dictionary-based break iteration code. |
| 66 */ | 66 */ |
| 67 class U_COMMON_API DictionaryMatcher : public UMemory { | 67 class U_COMMON_API DictionaryMatcher : public UMemory { |
| 68 public: | 68 public: |
| 69 DictionaryMatcher() {}; |
| 69 virtual ~DictionaryMatcher(); | 70 virtual ~DictionaryMatcher(); |
| 70 // this should emulate CompactTrieDictionary::matches() | 71 // this should emulate CompactTrieDictionary::matches() |
| 71 virtual int32_t matches(UText *text, int32_t maxLength, int32_t *lengths, in
t32_t &count, | 72 /* @param text The text in which to look for matching words. Matching
begins |
| 72 int32_t limit, int32_t *values = NULL) const = 0; | 73 * at the current position of the UText. |
| 74 * @param maxLength The max length of match to consider. Units are the nati
ve indexing |
| 75 * units of the UText. |
| 76 * @param limit Capacity of output arrays, which is also the maximum nu
mber of |
| 77 * matching words to be found. |
| 78 * @param lengths output array, filled with the lengths of the matches, i
n order, |
| 79 * from shortest to longest. Lengths are in native indexin
g units |
| 80 * of the UText. May be NULL. |
| 81 * @param cpLengths output array, filled with the lengths of the matches, i
n order, |
| 82 * from shortest to longest. Lengths are the number of Uni
code code points. |
| 83 * May be NULL. |
| 84 * @param values Output array, filled with the values associated with th
e words found. |
| 85 * May be NULL. |
| 86 * @param prefix Output parameter, the code point length of the prefix m
atch, even if that |
| 87 * prefix didn't lead to a complete word. Will always be >
= the cpLength |
| 88 * of the longest complete word matched. May be NULL. |
| 89 * @return Number of matching words found. |
| 90 */ |
| 91 virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit, |
| 92 int32_t *lengths, int32_t *cpLengths, int32_t *value
s, |
| 93 int32_t *prefix) const = 0; |
| 94 |
| 73 /** @return DictionaryData::TRIE_TYPE_XYZ */ | 95 /** @return DictionaryData::TRIE_TYPE_XYZ */ |
| 74 virtual int32_t getType() const = 0; | 96 virtual int32_t getType() const = 0; |
| 75 }; | 97 }; |
| 76 | 98 |
| 77 // Implementation of the DictionaryMatcher interface for a UCharsTrie dictionary | 99 // Implementation of the DictionaryMatcher interface for a UCharsTrie dictionary |
| 78 class U_COMMON_API UCharsDictionaryMatcher : public DictionaryMatcher { | 100 class U_COMMON_API UCharsDictionaryMatcher : public DictionaryMatcher { |
| 79 public: | 101 public: |
| 80 // constructs a new UCharsDictionaryMatcher. | 102 // constructs a new UCharsDictionaryMatcher. |
| 81 // The UDataMemory * will be closed on this object's destruction. | 103 // The UDataMemory * will be closed on this object's destruction. |
| 82 UCharsDictionaryMatcher(const UChar *c, UDataMemory *f) : characters(c), fil
e(f) { } | 104 UCharsDictionaryMatcher(const UChar *c, UDataMemory *f) : characters(c), fil
e(f) { } |
| 83 virtual ~UCharsDictionaryMatcher(); | 105 virtual ~UCharsDictionaryMatcher(); |
| 84 virtual int32_t matches(UText *text, int32_t maxLength, int32_t *lengths, in
t32_t &count, | 106 virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit, |
| 85 int32_t limit, int32_t *values = NULL) const; | 107 int32_t *lengths, int32_t *cpLengths, int32_t *value
s, |
| 108 int32_t *prefix) const; |
| 86 virtual int32_t getType() const; | 109 virtual int32_t getType() const; |
| 87 private: | 110 private: |
| 88 const UChar *characters; | 111 const UChar *characters; |
| 89 UDataMemory *file; | 112 UDataMemory *file; |
| 90 }; | 113 }; |
| 91 | 114 |
| 92 // Implementation of the DictionaryMatcher interface for a BytesTrie dictionary | 115 // Implementation of the DictionaryMatcher interface for a BytesTrie dictionary |
| 93 class U_COMMON_API BytesDictionaryMatcher : public DictionaryMatcher { | 116 class U_COMMON_API BytesDictionaryMatcher : public DictionaryMatcher { |
| 94 public: | 117 public: |
| 95 // constructs a new BytesTrieDictionaryMatcher | 118 // constructs a new BytesTrieDictionaryMatcher |
| 96 // the transform constant should be the constant read from the file, not a m
asked version! | 119 // the transform constant should be the constant read from the file, not a m
asked version! |
| 97 // the UDataMemory * fed in here will be closed on this object's destruction | 120 // the UDataMemory * fed in here will be closed on this object's destruction |
| 98 BytesDictionaryMatcher(const char *c, int32_t t, UDataMemory *f) | 121 BytesDictionaryMatcher(const char *c, int32_t t, UDataMemory *f) |
| 99 : characters(c), transformConstant(t), file(f) { } | 122 : characters(c), transformConstant(t), file(f) { } |
| 100 virtual ~BytesDictionaryMatcher(); | 123 virtual ~BytesDictionaryMatcher(); |
| 101 virtual int32_t matches(UText *text, int32_t maxLength, int32_t *lengths, in
t32_t &count, | 124 virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit, |
| 102 int32_t limit, int32_t *values = NULL) const; | 125 int32_t *lengths, int32_t *cpLengths, int32_t *value
s, |
| 126 int32_t *prefix) const; |
| 103 virtual int32_t getType() const; | 127 virtual int32_t getType() const; |
| 104 private: | 128 private: |
| 105 UChar32 transform(UChar32 c) const; | 129 UChar32 transform(UChar32 c) const; |
| 106 | 130 |
| 107 const char *characters; | 131 const char *characters; |
| 108 int32_t transformConstant; | 132 int32_t transformConstant; |
| 109 UDataMemory *file; | 133 UDataMemory *file; |
| 110 }; | 134 }; |
| 111 | 135 |
| 112 U_NAMESPACE_END | 136 U_NAMESPACE_END |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 156 * and do not occur in the dictionary. | 180 * and do not occur in the dictionary. |
| 157 * | 181 * |
| 158 * stringTrie; -- a serialized BytesTrie or UCharsTrie | 182 * stringTrie; -- a serialized BytesTrie or UCharsTrie |
| 159 * | 183 * |
| 160 * The dictionary maps strings to specific values (TRIE_HAS_VALUES bit set
in trieType), | 184 * The dictionary maps strings to specific values (TRIE_HAS_VALUES bit set
in trieType), |
| 161 * or it maps all strings to 0 (TRIE_HAS_VALUES bit not set). | 185 * or it maps all strings to 0 (TRIE_HAS_VALUES bit not set). |
| 162 */ | 186 */ |
| 163 | 187 |
| 164 #endif /* !UCONFIG_NO_BREAK_ITERATION */ | 188 #endif /* !UCONFIG_NO_BREAK_ITERATION */ |
| 165 #endif /* __DICTIONARYDATA_H__ */ | 189 #endif /* __DICTIONARYDATA_H__ */ |
| OLD | NEW |