icu46/source/i18n/brktrans.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Unified Diff: icu46/source/i18n/brktrans.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: icu46/source/i18n/brktrans.cpp

===================================================================

--- icu46/source/i18n/brktrans.cpp (revision 0)

+++ icu46/source/i18n/brktrans.cpp (revision 0)

@@ -0,0 +1,188 @@

+/*

+**********************************************************************

+* Date Name Description

+* 05/11/2008 Andy Heninger Port from Java

+**********************************************************************

+*/

+#include "unicode/utypes.h"

+#if !UCONFIG_NO_TRANSLITERATION && !UCONFIG_NO_BREAK_ITERATION

+#include "unicode/unifilt.h"

+#include "unicode/uchar.h"

+#include "unicode/uniset.h"

+#include "unicode/brkiter.h"

+#include "brktrans.h"

+#include "unicode/uchar.h"

+#include "cmemory.h"

+#include "uprops.h"

+#include "uinvchar.h"

+#include "util.h"

+#include "uvectr32.h"

+U_NAMESPACE_BEGIN

+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BreakTransliterator)

+static const UChar SPACE = 32; // ' '

+/**

+ * Constructs a transliterator with the default delimiters '{' and

+ * '}'.

+ */

+BreakTransliterator::BreakTransliterator(UnicodeFilter* adoptedFilter) :

+ Transliterator(UNICODE_STRING("Any-BreakInternal", 17), adoptedFilter),

+ fInsertion(SPACE) {

+ bi = NULL;

+ UErrorCode status = U_ZERO_ERROR;

+ boundaries = new UVector32(status);

+ }

+/**

+ * Destructor.

+ */

+BreakTransliterator::~BreakTransliterator() {

+ delete bi;

+ bi = NULL;

+ delete boundaries;

+ boundaries = NULL;

+/**

+ * Copy constructor.

+ */

+BreakTransliterator::BreakTransliterator(const BreakTransliterator& o) :

+ Transliterator(o) {

+ bi = NULL;

+ if (o.bi != NULL) {

+ bi = o.bi->clone();

+ }

+ fInsertion = o.fInsertion;

+ UErrorCode status = U_ZERO_ERROR;

+ boundaries = new UVector32(status);

+ }

+/**

+ * Transliterator API.

+ */

+Transliterator* BreakTransliterator::clone(void) const {

+ return new BreakTransliterator(*this);

+/**

+ * Implements {@link Transliterator#handleTransliterate}.

+ */

+void BreakTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,

+ UBool isIncremental ) const {

+ UErrorCode status = U_ZERO_ERROR;

+ boundaries->removeAllElements();

+ BreakTransliterator *nonConstThis = (BreakTransliterator *)this;

+ nonConstThis->getBreakIterator(); // Lazy-create it if necessary

+ UnicodeString sText = replaceableAsString(text);

+ bi->setText(sText);

+ bi->preceding(offsets.start);

+ // To make things much easier, we will stack the boundaries, and then insert at the end.

+ // generally, we won't need too many, since we will be filtered.

+ int32_t boundary;

+ for(boundary = bi->next(); boundary != UBRK_DONE && boundary < offsets.limit; boundary = bi->next()) {

+ if (boundary == 0) continue;

+ // HACK: Check to see that preceeding item was a letter

+ UChar32 cp = sText.char32At(boundary-1);

+ int type = u_charType(cp);

+ //System.out.println(Integer.toString(cp,16) + " (before): " + type);

+ if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue;

+ cp = sText.char32At(boundary);

+ type = u_charType(cp);

+ //System.out.println(Integer.toString(cp,16) + " (after): " + type);

+ if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue;

+ boundaries->addElement(boundary, status);

+ // printf("Boundary at %d\n", boundary);

+ }

+ int delta = 0;

+ int lastBoundary = 0;

+ if (boundaries->size() != 0) { // if we found something, adjust

+ delta = boundaries->size() * fInsertion.length();

+ lastBoundary = boundaries->lastElementi();

+ // we do this from the end backwards, so that we don't have to keep updating.

+ while (boundaries->size() > 0) {

+ boundary = boundaries->popi();

+ text.handleReplaceBetween(boundary, boundary, fInsertion);

+ }

+ // Now fix up the return values

+ offsets.contextLimit += delta;

+ offsets.limit += delta;

+ offsets.start = isIncremental ? lastBoundary + delta : offsets.limit;

+ // TODO: do something with U_FAILURE(status);

+ // (need to look at transliterators overall, not just here.)

+//

+// getInsertion()

+//

+const UnicodeString &BreakTransliterator::getInsertion() const {

+ return fInsertion;

+//

+// setInsertion()

+//

+void BreakTransliterator::setInsertion(const UnicodeString &insertion) {

+ this->fInsertion = insertion;

+//

+// getBreakIterator Lazily create the break iterator if it does

+// not already exist. Copied from Java, probably

+// better to just create it in the constructor.

+//

+BreakIterator *BreakTransliterator::getBreakIterator() {

+ UErrorCode status = U_ZERO_ERROR;

+ if (bi == NULL) {

+ // Note: Thai breaking behavior is universal, it is not

+ // tied to the Thai locale.

+ bi = BreakIterator::createWordInstance(Locale::getEnglish(), status);

+ }

+ return bi;

+//

+// replaceableAsString Hack to let break iterators work

+// on the replaceable text from transliterators.

+// In practice, the only real Replaceable type that we

+// will be seeing is UnicodeString, so this function

+// will normally be efficient.

+//

+UnicodeString BreakTransliterator::replaceableAsString(Replaceable &r) {

+ UnicodeString s;

+ UnicodeString *rs = dynamic_cast<UnicodeString *>(&r);

+ if (rs != NULL) {

+ s = *rs;

+ } else {

+ r.extractBetween(0, r.length(), s);

+ }

+ return s;

+U_NAMESPACE_END

+#endif /* #if !UCONFIG_NO_TRANSLITERATION */

Property changes on: icu46/source/i18n/brktrans.cpp

___________________________________________________________________

Added: svn:eol-style

+ LF

« no previous file with comments | « icu46/source/i18n/brktrans.h ('k') | icu46/source/i18n/buddhcal.h » ('j') | no next file with comments »