Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(73)

Side by Side Diff: runtime/lib/string_patch.dart

Issue 213293002: Optimize one-byte string's toUpperCase. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 patch class String { 5 patch class String {
6 /* patch */ factory String.fromCharCodes(Iterable<int> charCodes) { 6 /* patch */ factory String.fromCharCodes(Iterable<int> charCodes) {
7 return _StringBase.createFromCharCodes(charCodes); 7 return _StringBase.createFromCharCodes(charCodes);
8 } 8 }
9 9
10 /* patch */ factory String.fromCharCode(int charCode) { 10 /* patch */ factory String.fromCharCode(int charCode) {
(...skipping 817 matching lines...) Expand 10 before | Expand all | Expand 10 after
828 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 828 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
829 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 829 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
830 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xd7, 830 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xd7,
831 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xdf, 831 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xdf,
832 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 832 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
833 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 833 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
834 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 834 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
835 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 835 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
836 ]; 836 ];
837 837
838 // Upper-case conversion table for Latin-1.
839 // lower-case ranges: 0x61-0x7a ('a' - 'z'), 0xe0-0xff.
840 // The characters 0xb5 (µ) and 0xff (ÿ) have uper case variants
841 // that are not Latin-1. These are marked as -1 in the table.
842 // The german scharfes s (\xdf) is converted into two character (SS),
sra1 2014/03/26 21:29:17 The VM has a bug then, since it translates \xdf to
843 // and is also marked with -1.
844 // Conversion to lower case performed by subtracting 0x20.
srdjan 2014/03/26 21:18:14 What is the performance impact of changing the tab
Lasse Reichstein Nielsen 2014/03/27 10:20:15 For toLowerCase, it's a roughly 13% reduction in p
Anders Johnsen 2014/03/27 10:38:10 I like the String idea. It has the potential of be
srdjan 2014/03/27 16:42:20 I am surprised that the performance goes down, may
Lasse Reichstein Nielsen 2014/03/27 18:09:13 I assumed that would require the same space for th
845 static const _UC_TABLE = const [
846 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
847 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
848 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
849 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
850 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
851 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
852 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
853 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
854 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
855 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
856 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
857 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
858 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
859 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
860 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
861 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
862 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
863 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
864 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
865 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
866 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
867 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
868 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, -1, 0xb6, 0xb7,
869 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
870 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
871 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
872 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
873 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, -1,
874 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
875 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
876 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xf7,
877 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, -1,
878 ];
879
838 String toLowerCase() { 880 String toLowerCase() {
839 for (int i = 0; i < this.length; i++) { 881 for (int i = 0; i < this.length; i++) {
840 final c = this.codeUnitAt(i); 882 final c = this.codeUnitAt(i);
841 if (c == _LC_TABLE[c]) continue; 883 if (c == _LC_TABLE[c]) continue;
842 // Upper-case character found. 884 // Upper-case character found.
843 final result = _allocate(this.length); 885 final result = _allocate(this.length);
844 for (int j = 0; j < i; j++) { 886 for (int j = 0; j < i; j++) {
845 result._setAt(j, this.codeUnitAt(j)); 887 result._setAt(j, this.codeUnitAt(j));
846 } 888 }
847 for (int j = i; j < this.length; j++) { 889 for (int j = i; j < this.length; j++) {
848 result._setAt(j, _LC_TABLE[this.codeUnitAt(j)]); 890 result._setAt(j, _LC_TABLE[this.codeUnitAt(j)]);
849 } 891 }
850 return result; 892 return result;
851 } 893 }
852 return this; 894 return this;
853 } 895 }
854 896
897 String toUpperCase() {
898 for (int i = 0; i < this.length; i++) {
899 final c = this.codeUnitAt(i);
900 if (c == _UC_TABLE[c]) continue;
901 // Check rest of string for characters that require complex handling.
902 for (int j = i; j < this.length; j++) {
903 if (_UC_TABLE[this.codeUnitAt(i)] < 0) {
904 return super.toUpperCase();
905 }
906 }
907 // Lower-case characters found, but no problematic ones.
908 final result = _allocate(this.length);
909 for (int j = 0; j < i; j++) {
910 result._setAt(j, this.codeUnitAt(j));
911 }
912 for (int j = i; j < this.length; j++) {
913 result._setAt(j, _UC_TABLE[this.codeUnitAt(j)]);
914 }
915 return result;
916 }
917 return this;
918 }
919
855 // Allocates a string of given length, expecting its content to be 920 // Allocates a string of given length, expecting its content to be
856 // set using _setAt. 921 // set using _setAt.
857 static _OneByteString _allocate(int length) native "OneByteString_allocate"; 922 static _OneByteString _allocate(int length) native "OneByteString_allocate";
858 923
859 924
860 static _OneByteString _allocateFromOneByteList(List<int> list) 925 static _OneByteString _allocateFromOneByteList(List<int> list)
861 native "OneByteString_allocateFromOneByteList"; 926 native "OneByteString_allocateFromOneByteList";
862 927
863 // This is internal helper method. Code point value must be a valid 928 // This is internal helper method. Code point value must be a valid
864 // Latin1 value (0..0xFF), index must be valid. 929 // Latin1 value (0..0xFF), index must be valid.
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
955 class _CodeUnits extends Object with ListMixin<int>, 1020 class _CodeUnits extends Object with ListMixin<int>,
956 UnmodifiableListMixin<int> { 1021 UnmodifiableListMixin<int> {
957 /** The string that this is the code units of. */ 1022 /** The string that this is the code units of. */
958 String _string; 1023 String _string;
959 1024
960 _CodeUnits(this._string); 1025 _CodeUnits(this._string);
961 1026
962 int get length => _string.length; 1027 int get length => _string.length;
963 int operator[](int i) => _string.codeUnitAt(i); 1028 int operator[](int i) => _string.codeUnitAt(i);
964 } 1029 }
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698