Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(180)

Unified Diff: pkg/utf/lib/utf16.dart

Issue 68563004: Move unicode tests to utf package. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Simplify test. Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: pkg/utf/lib/utf16.dart
diff --git a/pkg/utf/lib/utf16.dart b/pkg/utf/lib/utf16.dart
index 7de9e616581296979d4f7e5f8e094a841012a4bd..438c6781a74fa78ba0944e683e5c7c3fd308c1f8 100644
--- a/pkg/utf/lib/utf16.dart
+++ b/pkg/utf/lib/utf16.dart
@@ -4,6 +4,167 @@
part of utf;
+// TODO(jmesserly): would be nice to have this on String (dartbug.com/6501).
floitsch 2013/11/18 17:08:17 Whole section copied verbatim.
Lasse Reichstein Nielsen 2013/11/19 07:42:38 I don't think I want to add anything new to the ut
floitsch 2013/11/19 10:40:32 I don't agree. The utf-package contains much more
Lasse Reichstein Nielsen 2013/11/19 12:25:43 From the same package - in that case, LGTM.
+/**
+ * Provide a list of Unicode codepoints for a given string.
+ */
+List<int> stringToCodepoints(String str) {
+ // Note: str.codeUnits gives us 16-bit code units on all Dart implementations.
+ // So we need to convert.
+ return _utf16CodeUnitsToCodepoints(str.codeUnits);
Lasse Reichstein Nielsen 2013/11/19 07:42:38 If we keep it (and I don't think we should - if it
floitsch 2013/11/19 10:40:32 This was code that already existed in the package.
+}
+
+/**
+ * Generate a string from the provided Unicode codepoints.
+ *
+ * *Deprecated* Use [String.fromCharCodes] instead.
Lasse Reichstein Nielsen 2013/11/19 07:42:38 Ditto - remove this. Definitely remove the "Deprec
floitsch 2013/11/19 10:40:32 Not in this CL.
+ */
+String codepointsToString(List<int> codepoints) {
+ return new String.fromCharCodes(codepoints);
+}
+
+/**
+ * An Iterator<int> of codepoints built on an Iterator of UTF-16 code units.
+ * The parameters can override the default Unicode replacement character. Set
+ * the replacementCharacter to null to throw an ArgumentError
+ * rather than replace the bad value.
+ */
+class Utf16CodeUnitDecoder implements Iterator<int> {
Lasse Reichstein Nielsen 2013/11/19 07:42:38 Do we have a way to use a Converter to go from inp
floitsch 2013/11/19 10:40:32 Again. this is code that already existed. Not chan
+ final _ListRangeIterator utf16CodeUnitIterator;
+ final int replacementCodepoint;
+ int _current = null;
+
+ Utf16CodeUnitDecoder(List<int> utf16CodeUnits, [int offset = 0, int length,
+ int this.replacementCodepoint =
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :
+ utf16CodeUnitIterator =
+ (new _ListRange(utf16CodeUnits, offset, length)).iterator;
+
+ Utf16CodeUnitDecoder.fromListRangeIterator(
+ _ListRangeIterator this.utf16CodeUnitIterator,
+ int this.replacementCodepoint);
+
+ Iterator<int> get iterator => this;
+
+ int get current => _current;
+
+ bool moveNext() {
+ _current = null;
+ if (!utf16CodeUnitIterator.moveNext()) return false;
+
+ int value = utf16CodeUnitIterator.current;
+ if (value < 0) {
+ if (replacementCodepoint != null) {
+ _current = replacementCodepoint;
+ } else {
+ throw new ArgumentError(
+ "Invalid UTF16 at ${utf16CodeUnitIterator.position}");
+ }
+ } else if (value < UNICODE_UTF16_RESERVED_LO ||
+ (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) {
+ // transfer directly
+ _current = value;
+ } else if (value < UNICODE_UTF16_SURROGATE_UNIT_1_BASE &&
+ utf16CodeUnitIterator.moveNext()) {
+ // merge surrogate pair
+ int nextValue = utf16CodeUnitIterator.current;
+ if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_1_BASE &&
+ nextValue <= UNICODE_UTF16_RESERVED_HI) {
+ value = (value - UNICODE_UTF16_SURROGATE_UNIT_0_BASE) << 10;
+ value += UNICODE_UTF16_OFFSET +
+ (nextValue - UNICODE_UTF16_SURROGATE_UNIT_1_BASE);
+ _current = value;
+ } else {
+ if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_0_BASE &&
+ nextValue < UNICODE_UTF16_SURROGATE_UNIT_1_BASE) {
+ utf16CodeUnitIterator.backup();
+ }
+ if (replacementCodepoint != null) {
+ _current = replacementCodepoint;
+ } else {
+ throw new ArgumentError(
+ "Invalid UTF16 at ${utf16CodeUnitIterator.position}");
+ }
+ }
+ } else if (replacementCodepoint != null) {
+ _current = replacementCodepoint;
+ } else {
+ throw new ArgumentError(
+ "Invalid UTF16 at ${utf16CodeUnitIterator.position}");
+ }
+ return true;
+ }
+}
+
+/**
+ * Encode code points as UTF16 code units.
+ */
+List<int> _codepointsToUtf16CodeUnits(
Lasse Reichstein Nielsen 2013/11/19 07:42:38 If this isn't used, remove it. If it is, consider
floitsch 2013/11/19 10:40:32 ditto.
+ List<int> codepoints,
+ [int offset = 0,
+ int length,
+ int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
+
+ _ListRange listRange = new _ListRange(codepoints, offset, length);
+ int encodedLength = 0;
+ for (int value in listRange) {
+ if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) ||
+ (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) {
+ encodedLength++;
+ } else if (value > UNICODE_PLANE_ONE_MAX &&
+ value <= UNICODE_VALID_RANGE_MAX) {
+ encodedLength += 2;
+ } else {
+ encodedLength++;
+ }
+ }
+
+ List<int> codeUnitsBuffer = new List<int>(encodedLength);
+ int j = 0;
+ for (int value in listRange) {
+ if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) ||
+ (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) {
+ codeUnitsBuffer[j++] = value;
+ } else if (value > UNICODE_PLANE_ONE_MAX &&
+ value <= UNICODE_VALID_RANGE_MAX) {
+ int base = value - UNICODE_UTF16_OFFSET;
+ codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_0_BASE +
+ ((base & UNICODE_UTF16_HI_MASK) >> 10);
+ codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_1_BASE +
+ (base & UNICODE_UTF16_LO_MASK);
+ } else if (replacementCodepoint != null) {
+ codeUnitsBuffer[j++] = replacementCodepoint;
+ } else {
+ throw new ArgumentError("Invalid encoding");
+ }
+ }
+ return codeUnitsBuffer;
+}
+
+/**
+ * Decodes the utf16 codeunits to codepoints.
+ */
+List<int> _utf16CodeUnitsToCodepoints(
Lasse Reichstein Nielsen 2013/11/19 07:42:38 If not used, remove. If used, consider rewriting a
floitsch 2013/11/19 10:40:32 ditto.
+ List<int> utf16CodeUnits, [int offset = 0, int length,
+ int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
+ _ListRangeIterator source =
+ (new _ListRange(utf16CodeUnits, offset, length)).iterator;
+ Utf16CodeUnitDecoder decoder = new Utf16CodeUnitDecoder
+ .fromListRangeIterator(source, replacementCodepoint);
+ List<int> codepoints = new List<int>(source.remaining);
+ int i = 0;
+ while (decoder.moveNext()) {
+ codepoints[i++] = decoder.current;
+ }
+ if (i == codepoints.length) {
+ return codepoints;
+ } else {
+ List<int> codepointTrunc = new List<int>(i);
+ codepointTrunc.setRange(0, i, codepoints);
+ return codepointTrunc;
+ }
+}
+
/**
* Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert
* as much of the input as needed. Determines the byte order from the BOM,
@@ -256,7 +417,12 @@ abstract class Utf16BytesToCodeUnitsDecoder implements _ListRangeIterator {
bool moveNext() {
_current = null;
- if (utf16EncodedBytesIterator.remaining < 2) {
+ int remaining = utf16EncodedBytesIterator.remaining;
+ if (remaining == 0) {
+ _current = null;
+ return false;
+ }
+ if (remaining == 1) {
utf16EncodedBytesIterator.moveNext();
if (replacementCodepoint != null) {
_current = replacementCodepoint;
@@ -265,10 +431,9 @@ abstract class Utf16BytesToCodeUnitsDecoder implements _ListRangeIterator {
throw new ArgumentError(
"Invalid UTF16 at ${utf16EncodedBytesIterator.position}");
}
- } else {
- _current = decode();
- return true;
}
+ _current = decode();
+ return true;
}
int get position => utf16EncodedBytesIterator.position ~/ 2;
« no previous file with comments | « pkg/utf/lib/utf.dart ('k') | pkg/utf/lib/utf32.dart » ('j') | pkg/utf/test/utf16_test.dart » ('J')

Powered by Google App Engine
This is Rietveld 408576698