| Index: icu46/source/i18n/brktrans.cpp
|
| ===================================================================
|
| --- icu46/source/i18n/brktrans.cpp (revision 0)
|
| +++ icu46/source/i18n/brktrans.cpp (revision 0)
|
| @@ -0,0 +1,188 @@
|
| +/*
|
| +**********************************************************************
|
| +* Copyright (C) 2008-2010, International Business Machines
|
| +* Corporation and others. All Rights Reserved.
|
| +**********************************************************************
|
| +* Date Name Description
|
| +* 05/11/2008 Andy Heninger Port from Java
|
| +**********************************************************************
|
| +*/
|
| +
|
| +#include "unicode/utypes.h"
|
| +
|
| +#if !UCONFIG_NO_TRANSLITERATION && !UCONFIG_NO_BREAK_ITERATION
|
| +
|
| +#include "unicode/unifilt.h"
|
| +#include "unicode/uchar.h"
|
| +#include "unicode/uniset.h"
|
| +#include "unicode/brkiter.h"
|
| +#include "brktrans.h"
|
| +#include "unicode/uchar.h"
|
| +#include "cmemory.h"
|
| +#include "uprops.h"
|
| +#include "uinvchar.h"
|
| +#include "util.h"
|
| +#include "uvectr32.h"
|
| +
|
| +U_NAMESPACE_BEGIN
|
| +
|
| +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BreakTransliterator)
|
| +
|
| +static const UChar SPACE = 32; // ' '
|
| +
|
| +
|
| +/**
|
| + * Constructs a transliterator with the default delimiters '{' and
|
| + * '}'.
|
| + */
|
| +BreakTransliterator::BreakTransliterator(UnicodeFilter* adoptedFilter) :
|
| + Transliterator(UNICODE_STRING("Any-BreakInternal", 17), adoptedFilter),
|
| + fInsertion(SPACE) {
|
| + bi = NULL;
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + boundaries = new UVector32(status);
|
| + }
|
| +
|
| +
|
| +/**
|
| + * Destructor.
|
| + */
|
| +BreakTransliterator::~BreakTransliterator() {
|
| + delete bi;
|
| + bi = NULL;
|
| + delete boundaries;
|
| + boundaries = NULL;
|
| +}
|
| +
|
| +/**
|
| + * Copy constructor.
|
| + */
|
| +BreakTransliterator::BreakTransliterator(const BreakTransliterator& o) :
|
| + Transliterator(o) {
|
| + bi = NULL;
|
| + if (o.bi != NULL) {
|
| + bi = o.bi->clone();
|
| + }
|
| + fInsertion = o.fInsertion;
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + boundaries = new UVector32(status);
|
| + }
|
| +
|
| +
|
| +/**
|
| + * Transliterator API.
|
| + */
|
| +Transliterator* BreakTransliterator::clone(void) const {
|
| + return new BreakTransliterator(*this);
|
| +}
|
| +
|
| +/**
|
| + * Implements {@link Transliterator#handleTransliterate}.
|
| + */
|
| +void BreakTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
|
| + UBool isIncremental ) const {
|
| +
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + boundaries->removeAllElements();
|
| + BreakTransliterator *nonConstThis = (BreakTransliterator *)this;
|
| + nonConstThis->getBreakIterator(); // Lazy-create it if necessary
|
| + UnicodeString sText = replaceableAsString(text);
|
| + bi->setText(sText);
|
| + bi->preceding(offsets.start);
|
| +
|
| + // To make things much easier, we will stack the boundaries, and then insert at the end.
|
| + // generally, we won't need too many, since we will be filtered.
|
| +
|
| + int32_t boundary;
|
| + for(boundary = bi->next(); boundary != UBRK_DONE && boundary < offsets.limit; boundary = bi->next()) {
|
| + if (boundary == 0) continue;
|
| + // HACK: Check to see that preceeding item was a letter
|
| +
|
| + UChar32 cp = sText.char32At(boundary-1);
|
| + int type = u_charType(cp);
|
| + //System.out.println(Integer.toString(cp,16) + " (before): " + type);
|
| + if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue;
|
| +
|
| + cp = sText.char32At(boundary);
|
| + type = u_charType(cp);
|
| + //System.out.println(Integer.toString(cp,16) + " (after): " + type);
|
| + if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue;
|
| +
|
| + boundaries->addElement(boundary, status);
|
| + // printf("Boundary at %d\n", boundary);
|
| + }
|
| +
|
| + int delta = 0;
|
| + int lastBoundary = 0;
|
| +
|
| + if (boundaries->size() != 0) { // if we found something, adjust
|
| + delta = boundaries->size() * fInsertion.length();
|
| + lastBoundary = boundaries->lastElementi();
|
| +
|
| + // we do this from the end backwards, so that we don't have to keep updating.
|
| +
|
| + while (boundaries->size() > 0) {
|
| + boundary = boundaries->popi();
|
| + text.handleReplaceBetween(boundary, boundary, fInsertion);
|
| + }
|
| + }
|
| +
|
| + // Now fix up the return values
|
| + offsets.contextLimit += delta;
|
| + offsets.limit += delta;
|
| + offsets.start = isIncremental ? lastBoundary + delta : offsets.limit;
|
| +
|
| + // TODO: do something with U_FAILURE(status);
|
| + // (need to look at transliterators overall, not just here.)
|
| +}
|
| +
|
| +//
|
| +// getInsertion()
|
| +//
|
| +const UnicodeString &BreakTransliterator::getInsertion() const {
|
| + return fInsertion;
|
| +}
|
| +
|
| +//
|
| +// setInsertion()
|
| +//
|
| +void BreakTransliterator::setInsertion(const UnicodeString &insertion) {
|
| + this->fInsertion = insertion;
|
| +}
|
| +
|
| +//
|
| +// getBreakIterator Lazily create the break iterator if it does
|
| +// not already exist. Copied from Java, probably
|
| +// better to just create it in the constructor.
|
| +//
|
| +BreakIterator *BreakTransliterator::getBreakIterator() {
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + if (bi == NULL) {
|
| + // Note: Thai breaking behavior is universal, it is not
|
| + // tied to the Thai locale.
|
| + bi = BreakIterator::createWordInstance(Locale::getEnglish(), status);
|
| + }
|
| + return bi;
|
| +}
|
| +
|
| +//
|
| +// replaceableAsString Hack to let break iterators work
|
| +// on the replaceable text from transliterators.
|
| +// In practice, the only real Replaceable type that we
|
| +// will be seeing is UnicodeString, so this function
|
| +// will normally be efficient.
|
| +//
|
| +UnicodeString BreakTransliterator::replaceableAsString(Replaceable &r) {
|
| + UnicodeString s;
|
| + UnicodeString *rs = dynamic_cast<UnicodeString *>(&r);
|
| + if (rs != NULL) {
|
| + s = *rs;
|
| + } else {
|
| + r.extractBetween(0, r.length(), s);
|
| + }
|
| + return s;
|
| +}
|
| +
|
| +U_NAMESPACE_END
|
| +
|
| +#endif /* #if !UCONFIG_NO_TRANSLITERATION */
|
|
|
| Property changes on: icu46/source/i18n/brktrans.cpp
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|