Index: icu46/source/i18n/brktrans.cpp |
=================================================================== |
--- icu46/source/i18n/brktrans.cpp (revision 0) |
+++ icu46/source/i18n/brktrans.cpp (revision 0) |
@@ -0,0 +1,188 @@ |
+/* |
+********************************************************************** |
+* Copyright (C) 2008-2010, International Business Machines |
+* Corporation and others. All Rights Reserved. |
+********************************************************************** |
+* Date Name Description |
+* 05/11/2008 Andy Heninger Port from Java |
+********************************************************************** |
+*/ |
+ |
+#include "unicode/utypes.h" |
+ |
+#if !UCONFIG_NO_TRANSLITERATION && !UCONFIG_NO_BREAK_ITERATION |
+ |
+#include "unicode/unifilt.h" |
+#include "unicode/uchar.h" |
+#include "unicode/uniset.h" |
+#include "unicode/brkiter.h" |
+#include "brktrans.h" |
+#include "unicode/uchar.h" |
+#include "cmemory.h" |
+#include "uprops.h" |
+#include "uinvchar.h" |
+#include "util.h" |
+#include "uvectr32.h" |
+ |
+U_NAMESPACE_BEGIN |
+ |
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BreakTransliterator) |
+ |
+static const UChar SPACE = 32; // ' ' |
+ |
+ |
+/** |
+ * Constructs a transliterator with the default delimiters '{' and |
+ * '}'. |
+ */ |
+BreakTransliterator::BreakTransliterator(UnicodeFilter* adoptedFilter) : |
+ Transliterator(UNICODE_STRING("Any-BreakInternal", 17), adoptedFilter), |
+ fInsertion(SPACE) { |
+ bi = NULL; |
+ UErrorCode status = U_ZERO_ERROR; |
+ boundaries = new UVector32(status); |
+ } |
+ |
+ |
+/** |
+ * Destructor. |
+ */ |
+BreakTransliterator::~BreakTransliterator() { |
+ delete bi; |
+ bi = NULL; |
+ delete boundaries; |
+ boundaries = NULL; |
+} |
+ |
+/** |
+ * Copy constructor. |
+ */ |
+BreakTransliterator::BreakTransliterator(const BreakTransliterator& o) : |
+ Transliterator(o) { |
+ bi = NULL; |
+ if (o.bi != NULL) { |
+ bi = o.bi->clone(); |
+ } |
+ fInsertion = o.fInsertion; |
+ UErrorCode status = U_ZERO_ERROR; |
+ boundaries = new UVector32(status); |
+ } |
+ |
+ |
+/** |
+ * Transliterator API. |
+ */ |
+Transliterator* BreakTransliterator::clone(void) const { |
+ return new BreakTransliterator(*this); |
+} |
+ |
+/** |
+ * Implements {@link Transliterator#handleTransliterate}. |
+ */ |
+void BreakTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets, |
+ UBool isIncremental ) const { |
+ |
+ UErrorCode status = U_ZERO_ERROR; |
+ boundaries->removeAllElements(); |
+ BreakTransliterator *nonConstThis = (BreakTransliterator *)this; |
+ nonConstThis->getBreakIterator(); // Lazy-create it if necessary |
+ UnicodeString sText = replaceableAsString(text); |
+ bi->setText(sText); |
+ bi->preceding(offsets.start); |
+ |
+ // To make things much easier, we will stack the boundaries, and then insert at the end. |
+ // generally, we won't need too many, since we will be filtered. |
+ |
+ int32_t boundary; |
+ for(boundary = bi->next(); boundary != UBRK_DONE && boundary < offsets.limit; boundary = bi->next()) { |
+ if (boundary == 0) continue; |
+ // HACK: Check to see that preceeding item was a letter |
+ |
+ UChar32 cp = sText.char32At(boundary-1); |
+ int type = u_charType(cp); |
+ //System.out.println(Integer.toString(cp,16) + " (before): " + type); |
+ if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue; |
+ |
+ cp = sText.char32At(boundary); |
+ type = u_charType(cp); |
+ //System.out.println(Integer.toString(cp,16) + " (after): " + type); |
+ if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue; |
+ |
+ boundaries->addElement(boundary, status); |
+ // printf("Boundary at %d\n", boundary); |
+ } |
+ |
+ int delta = 0; |
+ int lastBoundary = 0; |
+ |
+ if (boundaries->size() != 0) { // if we found something, adjust |
+ delta = boundaries->size() * fInsertion.length(); |
+ lastBoundary = boundaries->lastElementi(); |
+ |
+ // we do this from the end backwards, so that we don't have to keep updating. |
+ |
+ while (boundaries->size() > 0) { |
+ boundary = boundaries->popi(); |
+ text.handleReplaceBetween(boundary, boundary, fInsertion); |
+ } |
+ } |
+ |
+ // Now fix up the return values |
+ offsets.contextLimit += delta; |
+ offsets.limit += delta; |
+ offsets.start = isIncremental ? lastBoundary + delta : offsets.limit; |
+ |
+ // TODO: do something with U_FAILURE(status); |
+ // (need to look at transliterators overall, not just here.) |
+} |
+ |
+// |
+// getInsertion() |
+// |
+const UnicodeString &BreakTransliterator::getInsertion() const { |
+ return fInsertion; |
+} |
+ |
+// |
+// setInsertion() |
+// |
+void BreakTransliterator::setInsertion(const UnicodeString &insertion) { |
+ this->fInsertion = insertion; |
+} |
+ |
+// |
+// getBreakIterator Lazily create the break iterator if it does |
+// not already exist. Copied from Java, probably |
+// better to just create it in the constructor. |
+// |
+BreakIterator *BreakTransliterator::getBreakIterator() { |
+ UErrorCode status = U_ZERO_ERROR; |
+ if (bi == NULL) { |
+ // Note: Thai breaking behavior is universal, it is not |
+ // tied to the Thai locale. |
+ bi = BreakIterator::createWordInstance(Locale::getEnglish(), status); |
+ } |
+ return bi; |
+} |
+ |
+// |
+// replaceableAsString Hack to let break iterators work |
+// on the replaceable text from transliterators. |
+// In practice, the only real Replaceable type that we |
+// will be seeing is UnicodeString, so this function |
+// will normally be efficient. |
+// |
+UnicodeString BreakTransliterator::replaceableAsString(Replaceable &r) { |
+ UnicodeString s; |
+ UnicodeString *rs = dynamic_cast<UnicodeString *>(&r); |
+ if (rs != NULL) { |
+ s = *rs; |
+ } else { |
+ r.extractBetween(0, r.length(), s); |
+ } |
+ return s; |
+} |
+ |
+U_NAMESPACE_END |
+ |
+#endif /* #if !UCONFIG_NO_TRANSLITERATION */ |
Property changes on: icu46/source/i18n/brktrans.cpp |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |