Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1880)

Unified Diff: pkg/utf/lib/utf.dart

Issue 68563004: Move unicode tests to utf package. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Simplify test. Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: pkg/utf/lib/utf.dart
diff --git a/pkg/utf/lib/utf.dart b/pkg/utf/lib/utf.dart
index 002287a8ca3498e9571d5781f063101da373759e..102899065f787b42baf69bf499e6b5be36d54b8a 100644
--- a/pkg/utf/lib/utf.dart
+++ b/pkg/utf/lib/utf.dart
@@ -11,261 +11,9 @@ library utf;
import "dart:async";
import "dart:collection";
+part "constants.dart";
+part "list_range.dart";
part "utf_stream.dart";
part "utf8.dart";
part "utf16.dart";
part "utf32.dart";
-
-// TODO(jmesserly): would be nice to have this on String (dartbug.com/6501).
-/**
- * Provide a list of Unicode codepoints for a given string.
- */
-List<int> stringToCodepoints(String str) {
- // Note: str.codeUnits gives us 16-bit code units on all Dart implementations.
- // So we need to convert.
- return _utf16CodeUnitsToCodepoints(str.codeUnits);
-}
-
-/**
- * Generate a string from the provided Unicode codepoints.
- *
- * *Deprecated* Use [String.fromCharCodes] instead.
- */
-String codepointsToString(List<int> codepoints) {
- return new String.fromCharCodes(codepoints);
-}
-
-/**
- * Invalid codepoints or encodings may be substituted with the value U+fffd.
- */
-const int UNICODE_REPLACEMENT_CHARACTER_CODEPOINT = 0xfffd;
-const int UNICODE_BOM = 0xfeff;
-const int UNICODE_UTF_BOM_LO = 0xff;
-const int UNICODE_UTF_BOM_HI = 0xfe;
-
-const int UNICODE_BYTE_ZERO_MASK = 0xff;
-const int UNICODE_BYTE_ONE_MASK = 0xff00;
-const int UNICODE_VALID_RANGE_MAX = 0x10ffff;
-const int UNICODE_PLANE_ONE_MAX = 0xffff;
-const int UNICODE_UTF16_RESERVED_LO = 0xd800;
-const int UNICODE_UTF16_RESERVED_HI = 0xdfff;
-const int UNICODE_UTF16_OFFSET = 0x10000;
-const int UNICODE_UTF16_SURROGATE_UNIT_0_BASE = 0xd800;
-const int UNICODE_UTF16_SURROGATE_UNIT_1_BASE = 0xdc00;
-const int UNICODE_UTF16_HI_MASK = 0xffc00;
-const int UNICODE_UTF16_LO_MASK = 0x3ff;
-
-/**
- * Encode code points as UTF16 code units.
- */
-List<int> _codepointsToUtf16CodeUnits(
- List<int> codepoints,
- [int offset = 0,
- int length,
- int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
-
- _ListRange listRange = new _ListRange(codepoints, offset, length);
- int encodedLength = 0;
- for (int value in listRange) {
- if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) ||
- (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) {
- encodedLength++;
- } else if (value > UNICODE_PLANE_ONE_MAX &&
- value <= UNICODE_VALID_RANGE_MAX) {
- encodedLength += 2;
- } else {
- encodedLength++;
- }
- }
-
- List<int> codeUnitsBuffer = new List<int>(encodedLength);
- int j = 0;
- for (int value in listRange) {
- if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) ||
- (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) {
- codeUnitsBuffer[j++] = value;
- } else if (value > UNICODE_PLANE_ONE_MAX &&
- value <= UNICODE_VALID_RANGE_MAX) {
- int base = value - UNICODE_UTF16_OFFSET;
- codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_0_BASE +
- ((base & UNICODE_UTF16_HI_MASK) >> 10);
- codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_1_BASE +
- (base & UNICODE_UTF16_LO_MASK);
- } else if (replacementCodepoint != null) {
- codeUnitsBuffer[j++] = replacementCodepoint;
- } else {
- throw new ArgumentError("Invalid encoding");
- }
- }
- return codeUnitsBuffer;
-}
-
-/**
- * Decodes the utf16 codeunits to codepoints.
- */
-List<int> _utf16CodeUnitsToCodepoints(
- List<int> utf16CodeUnits, [int offset = 0, int length,
- int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
- _ListRangeIterator source =
- (new _ListRange(utf16CodeUnits, offset, length)).iterator;
- Utf16CodeUnitDecoder decoder = new Utf16CodeUnitDecoder
- .fromListRangeIterator(source, replacementCodepoint);
- List<int> codepoints = new List<int>(source.remaining);
- int i = 0;
- while (decoder.moveNext()) {
- codepoints[i++] = decoder.current;
- }
- if (i == codepoints.length) {
- return codepoints;
- } else {
- List<int> codepointTrunc = new List<int>(i);
- codepointTrunc.setRange(0, i, codepoints);
- return codepointTrunc;
- }
-}
-
-/**
- * An Iterator<int> of codepoints built on an Iterator of UTF-16 code units.
- * The parameters can override the default Unicode replacement character. Set
- * the replacementCharacter to null to throw an ArgumentError
- * rather than replace the bad value.
- */
-class Utf16CodeUnitDecoder implements Iterator<int> {
- final _ListRangeIterator utf16CodeUnitIterator;
- final int replacementCodepoint;
- int _current = null;
-
- Utf16CodeUnitDecoder(List<int> utf16CodeUnits, [int offset = 0, int length,
- int this.replacementCodepoint =
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :
- utf16CodeUnitIterator =
- (new _ListRange(utf16CodeUnits, offset, length)).iterator;
-
- Utf16CodeUnitDecoder.fromListRangeIterator(
- _ListRangeIterator this.utf16CodeUnitIterator,
- int this.replacementCodepoint);
-
- Iterator<int> get iterator => this;
-
- int get current => _current;
-
- bool moveNext() {
- _current = null;
- if (!utf16CodeUnitIterator.moveNext()) return false;
-
- int value = utf16CodeUnitIterator.current;
- if (value < 0) {
- if (replacementCodepoint != null) {
- _current = replacementCodepoint;
- } else {
- throw new ArgumentError(
- "Invalid UTF16 at ${utf16CodeUnitIterator.position}");
- }
- } else if (value < UNICODE_UTF16_RESERVED_LO ||
- (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) {
- // transfer directly
- _current = value;
- } else if (value < UNICODE_UTF16_SURROGATE_UNIT_1_BASE &&
- utf16CodeUnitIterator.moveNext()) {
- // merge surrogate pair
- int nextValue = utf16CodeUnitIterator.current;
- if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_1_BASE &&
- nextValue <= UNICODE_UTF16_RESERVED_HI) {
- value = (value - UNICODE_UTF16_SURROGATE_UNIT_0_BASE) << 10;
- value += UNICODE_UTF16_OFFSET +
- (nextValue - UNICODE_UTF16_SURROGATE_UNIT_1_BASE);
- _current = value;
- } else {
- if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_0_BASE &&
- nextValue < UNICODE_UTF16_SURROGATE_UNIT_1_BASE) {
- utf16CodeUnitIterator.backup();
- }
- if (replacementCodepoint != null) {
- _current = replacementCodepoint;
- } else {
- throw new ArgumentError(
- "Invalid UTF16 at ${utf16CodeUnitIterator.position}");
- }
- }
- } else if (replacementCodepoint != null) {
- _current = replacementCodepoint;
- } else {
- throw new ArgumentError(
- "Invalid UTF16 at ${utf16CodeUnitIterator.position}");
- }
- return true;
- }
-}
-
-/**
- * _ListRange in an internal type used to create a lightweight Interable on a
- * range within a source list. DO NOT MODIFY the underlying list while
- * iterating over it. The results of doing so are undefined.
- */
-// TODO(floitsch): Consider removing the extend and switch to implements since
-// that's cheaper to allocate.
-class _ListRange extends IterableBase {
- final List _source;
- final int _offset;
- final int _length;
-
- _ListRange(source, [offset = 0, length]) :
- this._source = source,
- this._offset = offset,
- this._length = (length == null ? source.length - offset : length) {
- if (_offset < 0 || _offset > _source.length) {
- throw new RangeError.value(_offset);
- }
- if (_length != null && (_length < 0)) {
- throw new RangeError.value(_length);
- }
- if (_length + _offset > _source.length) {
- throw new RangeError.value(_length + _offset);
- }
- }
-
- _ListRangeIterator get iterator =>
- new _ListRangeIteratorImpl(_source, _offset, _offset + _length);
-
- int get length => _length;
-}
-
-/**
- * The _ListRangeIterator provides more capabilities than a standard iterator,
- * including the ability to get the current position, count remaining items,
- * and move forward/backward within the iterator.
- */
-abstract class _ListRangeIterator implements Iterator<int> {
- bool moveNext();
- int get current;
- int get position;
- void backup([by]);
- int get remaining;
- void skip([count]);
-}
-
-class _ListRangeIteratorImpl implements _ListRangeIterator {
- final List<int> _source;
- int _offset;
- final int _end;
-
- _ListRangeIteratorImpl(this._source, int offset, this._end)
- : _offset = offset - 1;
-
- int get current => _source[_offset];
-
- bool moveNext() => ++_offset < _end;
-
- int get position => _offset;
-
- void backup([int by = 1]) {
- _offset -= by;
- }
-
- int get remaining => _end - _offset - 1;
-
- void skip([int count = 1]) {
- _offset += count;
- }
-}
-
« no previous file with comments | « pkg/utf/lib/list_range.dart ('k') | pkg/utf/lib/utf16.dart » ('j') | pkg/utf/lib/utf16.dart » ('J')

Powered by Google App Engine
This is Rietveld 408576698