src/intl.cc - Issue 2859203002: [string] Move String.p.toLowerCase to CSA

Side by Side Diff: src/intl.cc

Issue 2859203002: [string] Move String.p.toLowerCase to CSA (Closed)

Patch Set: Address comments Created 3 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2013 the V8 project authors. All rights reserved.	1 // Copyright 2013 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #ifndef V8_INTL_SUPPORT	5 #ifndef V8_INTL_SUPPORT

6 #error Internationalization is expected to be enabled.	6 #error Internationalization is expected to be enabled.

7 #endif // V8_INTL_SUPPORT	7 #endif // V8_INTL_SUPPORT

8	8

9 #include "src/intl.h"	9 #include "src/intl.h"

10	10

(...skipping 123 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
134 uint16_t ch = s->Get(index);	134 uint16_t ch = s->Get(index);

135 if (V8_UNLIKELY(IsASCIIUpper(ch) \|\| ch & ~0x7F)) {	135 if (V8_UNLIKELY(IsASCIIUpper(ch) \|\| ch & ~0x7F)) {

136 return index;	136 return index;

137 }	137 }

138 }	138 }

139 return length;	139 return length;

140 }	140 }

141	141

142 } // namespace	142 } // namespace

143	143

	144 const uint8_t* ToLatin1LowerTable() { return &kToLower[0]; }

	145

144 const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat,	146 const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat,

145 std::unique_ptr<uc16[]>* dest,	147 std::unique_ptr<uc16[]>* dest,

146 int32_t length) {	148 int32_t length) {

147 DCHECK(flat.IsFlat());	149 DCHECK(flat.IsFlat());

148 if (flat.IsOneByte()) {	150 if (flat.IsOneByte()) {

149 if (!*dest) {	151 if (!*dest) {

150 dest->reset(NewArray<uc16>(length));	152 dest->reset(NewArray<uc16>(length));

151 CopyChars(dest->get(), flat.ToOneByteVector().start(), length);	153 CopyChars(dest->get(), flat.ToOneByteVector().start(), length);

152 }	154 }

153 return reinterpret_cast<const UChar*>(dest->get());	155 return reinterpret_cast<const UChar*>(dest->get());

(...skipping 40 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
194 return *result;	196 return *result;

195 }	197 }

196 if (U_SUCCESS(status)) {	198 if (U_SUCCESS(status)) {

197 DCHECK(dest_length < result->length());	199 DCHECK(dest_length < result->length());

198 return *Handle<SeqTwoByteString>::cast(	200 return *Handle<SeqTwoByteString>::cast(

199 SeqString::Truncate(result, dest_length));	201 SeqString::Truncate(result, dest_length));

200 }	202 }

201 return *s;	203 return *s;

202 }	204 }

203	205

	206 // A stripped-down version of ConvertToLower that can only handle flat one-byte

	207 // strings and does not allocate.

	208 // Called from TF builtins.

	209 MUST_USE_RESULT Object* ConvertOneByteToLower(String* src, String* dst,

	210 Isolate* isolate) {

	211 DCHECK_EQ(src->length(), dst->length());

	212 DCHECK(src->IsOneByteRepresentation());

	213 DCHECK(src->IsFlat());

	214 DCHECK(dst->IsSeqOneByteString());

	215

	216 DisallowHeapAllocation no_gc;

	217

	218 const int length = src->length();

	219

	220 const uint8_t* src_data = src->GetFlatContent().ToOneByteVector().start();

	221 uint8_t* dst_data = SeqOneByteString::cast(dst)->GetChars();

	222

	223 bool has_changed_character = false;

	224 int index_to_first_unprocessed = FastAsciiConvert<true>(

	225 reinterpret_cast<char*>(dst_data),

	226 reinterpret_cast<const char*>(src_data), length, &has_changed_character);

	227

	228 // If not ASCII, we keep the result up to index_to_first_unprocessed and

	229 // process the rest.
	Camillo Bruni 2017/05/05 11:44:26 nit: this comment makes probably more sense before nit: this comment makes probably more sense before the for-loop. jgruber 2017/05/05 15:26:58 Yep, thought so as well. Done. Show quoted text On 2017/05/05 11:44:26, Camillo Bruni wrote: > nit: this comment makes probably more sense before the for-loop. Yep, thought so as well. Done.
	230 if (index_to_first_unprocessed == length) {

	231 return has_changed_character ? dst : src;

	232 }

	233

	234 for (int index = index_to_first_unprocessed; index < length; ++index) {

	235 dst_data[index] = ToLatin1Lower(static_cast<uint16_t>(src_data[index]));

	236 }

	237

	238 return dst;

	239 }

	240

204 MUST_USE_RESULT Object* ConvertToLower(Handle<String> s, Isolate* isolate) {	241 MUST_USE_RESULT Object* ConvertToLower(Handle<String> s, Isolate* isolate) {

205 if (!s->HasOnlyOneByteChars()) {	242 if (!s->HasOnlyOneByteChars()) {

206 // Use a slower implementation for strings with characters beyond U+00FF.	243 // Use a slower implementation for strings with characters beyond U+00FF.

207 return LocaleConvertCase(s, isolate, false, "");	244 return LocaleConvertCase(s, isolate, false, "");

208 }	245 }

209	246

210 int length = s->length();	247 int length = s->length();

211	248

212 // We depend here on the invariant that the length of a Latin1	249 // We depend here on the invariant that the length of a Latin1

213 // string is invariant under ToLowerCase, and the result always	250 // string is invariant under ToLowerCase, and the result always

214 // fits in the Latin1 range in the root locale. It does not hold	251 // fits in the Latin1 range in the root locale. It does not hold

215 // for ToUpperCase even in the root locale.	252 // for ToUpperCase even in the root locale.

216	253

217 // Scan the string for uppercase and non-ASCII characters for strings	254 // Scan the string for uppercase and non-ASCII characters for strings

218 // shorter than a machine-word without any memory allocation overhead.	255 // shorter than a machine-word without any memory allocation overhead.

219 // TODO(jshin): Apply this to a longer input by breaking FastAsciiConvert()	256 // TODO(jshin): Apply this to a longer input by breaking FastAsciiConvert()

220 // to two parts, one for scanning the prefix with no change and the other for	257 // to two parts, one for scanning the prefix with no change and the other for

221 // handling ASCII-only characters.	258 // handling ASCII-only characters.

222 int index_to_first_unprocessed = length;	259 int index_to_first_unprocessed = length;

223 const bool is_short = length < static_cast<int>(sizeof(uintptr_t));	260 const bool is_short = length < static_cast<int>(sizeof(uintptr_t));

224 if (is_short) {	261 if (is_short) {

225 index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length);	262 index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length);

226 // Nothing to do if the string is all ASCII with no uppercase.	263 // Nothing to do if the string is all ASCII with no uppercase.

227 if (index_to_first_unprocessed == length) return *s;	264 if (index_to_first_unprocessed == length) return *s;

228 }	265 }

229	266

230 Handle<SeqOneByteString> result =	267 Handle<SeqOneByteString> result =

231 isolate->factory()->NewRawOneByteString(length).ToHandleChecked();	268 isolate->factory()->NewRawOneByteString(length).ToHandleChecked();

232	269

	270 if (s->IsOneByteRepresentation()) {

	271 return ConvertOneByteToLower(s, result, isolate);

	272 }

	273

233 DisallowHeapAllocation no_gc;	274 DisallowHeapAllocation no_gc;

234 DCHECK(s->IsFlat());	275 DCHECK(s->IsFlat());

	276 DCHECK(s->IsTwoByteRepresentation());

235 String::FlatContent flat = s->GetFlatContent();	277 String::FlatContent flat = s->GetFlatContent();

	278 DCHECK(flat.IsTwoByte());

	279

236 uint8_t* dest = result->GetChars();	280 uint8_t* dest = result->GetChars();

237 if (flat.IsOneByte()) {	281 if (index_to_first_unprocessed == length) {

238 const uint8_t* src = flat.ToOneByteVector().start();	282 DCHECK(!is_short);

239 bool has_changed_character = false;	283 index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length);

240 index_to_first_unprocessed = FastAsciiConvert<true>(	284 }

241 reinterpret_cast<char>(dest), reinterpret_cast<const char>(src),	285 // Nothing to do if the string is all ASCII with no uppercase.

242 length, &has_changed_character);	286 if (index_to_first_unprocessed == length) return *s;

243 // If not ASCII, we keep the result up to index_to_first_unprocessed and	287 const uint16_t* src = flat.ToUC16Vector().start();

244 // process the rest.	288 CopyChars(dest, src, index_to_first_unprocessed);

245 if (index_to_first_unprocessed == length)	289 for (int index = index_to_first_unprocessed; index < length; ++index) {

246 return has_changed_character ? result : s;	290 dest[index] = ToLatin1Lower(static_cast<uint16_t>(src[index]));

247

248 for (int index = index_to_first_unprocessed; index < length; ++index) {

249 dest[index] = ToLatin1Lower(static_cast<uint16_t>(src[index]));

250 }

251 } else {

252 if (index_to_first_unprocessed == length) {

253 DCHECK(!is_short);

254 index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length);

255 }

256 // Nothing to do if the string is all ASCII with no uppercase.

257 if (index_to_first_unprocessed == length) return *s;

258 const uint16_t* src = flat.ToUC16Vector().start();

259 CopyChars(dest, src, index_to_first_unprocessed);

260 for (int index = index_to_first_unprocessed; index < length; ++index) {

261 dest[index] = ToLatin1Lower(static_cast<uint16_t>(src[index]));

262 }

263 }	291 }

264	292

265 return *result;	293 return *result;

266 }	294 }

267	295

268 MUST_USE_RESULT Object* ConvertToUpper(Handle<String> s, Isolate* isolate) {	296 MUST_USE_RESULT Object* ConvertToUpper(Handle<String> s, Isolate* isolate) {

269 int32_t length = s->length();	297 int32_t length = s->length();

270 if (s->HasOnlyOneByteChars() && length > 0) {	298 if (s->HasOnlyOneByteChars() && length > 0) {

271 Handle<SeqOneByteString> result =	299 Handle<SeqOneByteString> result =

272 isolate->factory()->NewRawOneByteString(length).ToHandleChecked();	300 isolate->factory()->NewRawOneByteString(length).ToHandleChecked();

(...skipping 106 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
379	407

380 void ICUTimezoneCache::Clear() {	408 void ICUTimezoneCache::Clear() {

381 delete timezone_;	409 delete timezone_;

382 timezone_ = nullptr;	410 timezone_ = nullptr;

383 timezone_name_[0] = '\0';	411 timezone_name_[0] = '\0';

384 dst_timezone_name_[0] = '\0';	412 dst_timezone_name_[0] = '\0';

385 }	413 }

386	414

387 } // namespace internal	415 } // namespace internal

388 } // namespace v8	416 } // namespace v8

OLD	NEW

« src/builtins/builtins-intl-gen.cc ('K') | « src/intl.h ('k') | src/v8.gyp » ('j') | no next file with comments »