icu46/source/i18n/brktrans.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/i18n/brktrans.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /*

	2 **********************************************************************

	3 * Copyright (C) 2008-2010, International Business Machines

	4 * Corporation and others. All Rights Reserved.

	5 **********************************************************************

	6 * Date Name Description

	7 * 05/11/2008 Andy Heninger Port from Java

	8 **********************************************************************

	9 */

	10

	11 #include "unicode/utypes.h"

	12

	13 #if !UCONFIG_NO_TRANSLITERATION && !UCONFIG_NO_BREAK_ITERATION

	14

	15 #include "unicode/unifilt.h"

	16 #include "unicode/uchar.h"

	17 #include "unicode/uniset.h"

	18 #include "unicode/brkiter.h"

	19 #include "brktrans.h"

	20 #include "unicode/uchar.h"

	21 #include "cmemory.h"

	22 #include "uprops.h"

	23 #include "uinvchar.h"

	24 #include "util.h"

	25 #include "uvectr32.h"

	26

	27 U_NAMESPACE_BEGIN

	28

	29 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BreakTransliterator)

	30

	31 static const UChar SPACE = 32; // ' '

	32

	33

	34 /**

	35 * Constructs a transliterator with the default delimiters '{' and

	36 * '}'.

	37 */

	38 BreakTransliterator::BreakTransliterator(UnicodeFilter* adoptedFilter) :

	39 Transliterator(UNICODE_STRING("Any-BreakInternal", 17), adoptedFilter),

	40 fInsertion(SPACE) {

	41 bi = NULL;

	42 UErrorCode status = U_ZERO_ERROR;

	43 boundaries = new UVector32(status);

	44 }

	45

	46

	47 /**

	48 * Destructor.

	49 */

	50 BreakTransliterator::~BreakTransliterator() {

	51 delete bi;

	52 bi = NULL;

	53 delete boundaries;

	54 boundaries = NULL;

	55 }

	56

	57 /**

	58 * Copy constructor.

	59 */

	60 BreakTransliterator::BreakTransliterator(const BreakTransliterator& o) :

	61 Transliterator(o) {

	62 bi = NULL;

	63 if (o.bi != NULL) {

	64 bi = o.bi->clone();

	65 }

	66 fInsertion = o.fInsertion;

	67 UErrorCode status = U_ZERO_ERROR;

	68 boundaries = new UVector32(status);

	69 }

	70

	71

	72 /**

	73 * Transliterator API.

	74 */

	75 Transliterator* BreakTransliterator::clone(void) const {

	76 return new BreakTransliterator(*this);

	77 }

	78

	79 /**

	80 * Implements {@link Transliterator#handleTransliterate}.

	81 */

	82 void BreakTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,

	83 UBool isIncremental ) const {

	84

	85 UErrorCode status = U_ZERO_ERROR;

	86 boundaries->removeAllElements();

	87 BreakTransliterator nonConstThis = (BreakTransliterator )this;

	88 nonConstThis->getBreakIterator(); // Lazy-create it if necessary

	89 UnicodeString sText = replaceableAsString(text);

	90 bi->setText(sText);

	91 bi->preceding(offsets.start);

	92

	93 // To make things much easier, we will stack the boundaries, and then in sert at the end.

	94 // generally, we won't need too many, since we will be filtered.

	95

	96 int32_t boundary;

	97 for(boundary = bi->next(); boundary != UBRK_DONE && boundary < offsets.l imit; boundary = bi->next()) {

	98 if (boundary == 0) continue;

	99 // HACK: Check to see that preceeding item was a letter

	100

	101 UChar32 cp = sText.char32At(boundary-1);

	102 int type = u_charType(cp);

	103 //System.out.println(Integer.toString(cp,16) + " (before): " + type) ;

	104 if ((U_MASK(type) & (U_GC_L_MASK \| U_GC_M_MASK)) == 0) continue;

	105

	106 cp = sText.char32At(boundary);

	107 type = u_charType(cp);

	108 //System.out.println(Integer.toString(cp,16) + " (after): " + type);

	109 if ((U_MASK(type) & (U_GC_L_MASK \| U_GC_M_MASK)) == 0) continue;

	110

	111 boundaries->addElement(boundary, status);

	112 // printf("Boundary at %d\n", boundary);

	113 }

	114

	115 int delta = 0;

	116 int lastBoundary = 0;

	117

	118 if (boundaries->size() != 0) { // if we found something, adjust

	119 delta = boundaries->size() * fInsertion.length();

	120 lastBoundary = boundaries->lastElementi();

	121

	122 // we do this from the end backwards, so that we don't have to keep updating.

	123

	124 while (boundaries->size() > 0) {

	125 boundary = boundaries->popi();

	126 text.handleReplaceBetween(boundary, boundary, fInsertion);

	127 }

	128 }

	129

	130 // Now fix up the return values

	131 offsets.contextLimit += delta;

	132 offsets.limit += delta;

	133 offsets.start = isIncremental ? lastBoundary + delta : offsets.limit;

	134

	135 // TODO: do something with U_FAILURE(status);

	136 // (need to look at transliterators overall, not just here.)

	137 }

	138

	139 //

	140 // getInsertion()

	141 //

	142 const UnicodeString &BreakTransliterator::getInsertion() const {

	143 return fInsertion;

	144 }

	145

	146 //

	147 // setInsertion()

	148 //

	149 void BreakTransliterator::setInsertion(const UnicodeString &insertion) {

	150 this->fInsertion = insertion;

	151 }

	152

	153 //

	154 // getBreakIterator Lazily create the break iterator if it does

	155 // not already exist. Copied from Java, probably

	156 // better to just create it in the constructor.

	157 //

	158 BreakIterator *BreakTransliterator::getBreakIterator() {

	159 UErrorCode status = U_ZERO_ERROR;

	160 if (bi == NULL) {

	161 // Note: Thai breaking behavior is universal, it is not

	162 // tied to the Thai locale.

	163 bi = BreakIterator::createWordInstance(Locale::getEnglish(), status);

	164 }

	165 return bi;

	166 }

	167

	168 //

	169 // replaceableAsString Hack to let break iterators work

	170 // on the replaceable text from transliterators.

	171 // In practice, the only real Replaceable type that we

	172 // will be seeing is UnicodeString, so this function

	173 // will normally be efficient.

	174 //

	175 UnicodeString BreakTransliterator::replaceableAsString(Replaceable &r) {

	176 UnicodeString s;

	177 UnicodeString rs = dynamic_cast<UnicodeString >(&r);

	178 if (rs != NULL) {

	179 s = *rs;

	180 } else {

	181 r.extractBetween(0, r.length(), s);

	182 }

	183 return s;

	184 }

	185

	186 U_NAMESPACE_END

	187

	188 #endif /* #if !UCONFIG_NO_TRANSLITERATION */

OLD	NEW

« no previous file with comments | « icu46/source/i18n/brktrans.h ('k') | icu46/source/i18n/buddhcal.h » ('j') | no next file with comments »