Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(103)

Side by Side Diff: tools/servicec/lib/src/resources/cc/unicode.h

Issue 2035023003: Remove service-compiler related code. (Closed) Base URL: git@github.com:dartino/sdk.git@master
Patch Set: Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 #ifndef UNICODE_H_
6 #define UNICODE_H_
7
8 #include "struct.h"
9
10 class Utf {
11 public:
12 static const int32_t kMaxCodePoint = 0x10FFFF;
13
14 static bool IsLatin1(int32_t code_point) {
15 return (code_point >= 0) && (code_point <= 0xFF);
16 }
17
18 static bool IsBmp(int32_t code_point) {
19 return (code_point >= 0) && (code_point <= 0xFFFF);
20 }
21
22 static bool IsSupplementary(int32_t code_point) {
23 return (code_point > 0xFFFF) && (code_point <= kMaxCodePoint);
24 }
25
26 // Returns true if the code point value is above Plane 17.
27 static bool IsOutOfRange(intptr_t code_point) {
28 return (code_point < 0) || (code_point > kMaxCodePoint);
29 }
30 };
31
32 class Utf8 {
33 public:
34 enum Type {
35 kLatin1 = 0, // Latin-1 code point [U+0000, U+00FF].
36 kBMP, // Basic Multilingual Plane code point [U+0000, U+FFFF].
37 kSupplementary, // Supplementary code point [U+010000, U+10FFFF].
38 };
39
40 // Returns the most restricted coding form in which the sequence of utf8
41 // characters in 'utf8_array' can be represented in, and the number of
42 // code units needed in that form.
43 static intptr_t CodeUnitCount(const char* utf8_array,
44 intptr_t array_len,
45 Type* type);
46
47 static intptr_t Length(int32_t ch);
48 static intptr_t Length(List<uint16_t> str);
49
50 static intptr_t Encode(int32_t ch, char* dst);
51 static intptr_t Encode(List<uint16_t> str, char* dst, intptr_t len);
52
53 static intptr_t Decode(const char* utf8_array,
54 intptr_t array_len,
55 int32_t* ch);
56 static bool DecodeToUTF16(const char* utf8_array,
57 intptr_t array_len,
58 uint16_t* dst,
59 intptr_t len);
60
61 static const int32_t kMaxOneByteChar = 0x7F;
62 static const int32_t kMaxTwoByteChar = 0x7FF;
63 static const int32_t kMaxThreeByteChar = 0xFFFF;
64 static const int32_t kMaxFourByteChar = Utf::kMaxCodePoint;
65
66 private:
67 static bool IsTrailByte(uint8_t code_unit) {
68 return (code_unit & 0xC0) == 0x80;
69 }
70
71 static bool IsNonShortestForm(uint32_t code_point, size_t num_code_units) {
72 return code_point < kOverlongMinimum[num_code_units];
73 }
74
75 static bool IsLatin1SequenceStart(uint8_t code_unit) {
76 // Check if utf8 sequence is the start of a codepoint <= U+00FF
77 return (code_unit <= 0xC3);
78 }
79
80 static bool IsSupplementarySequenceStart(uint8_t code_unit) {
81 // Check if utf8 sequence is the start of a codepoint >= U+10000.
82 return (code_unit >= 0xF0);
83 }
84
85 static const int8_t kTrailBytes[];
86 static const uint32_t kMagicBits[];
87 static const uint32_t kOverlongMinimum[];
88 };
89
90 class Utf16 {
91 public:
92 // Returns the length of the code point in UTF-16 code units.
93 static intptr_t Length(int32_t ch) {
94 return (ch <= Utf16::kMaxCodeUnit) ? 1 : 2;
95 }
96
97 // Returns true if ch is a lead or trail surrogate.
98 static bool IsSurrogate(int32_t ch) {
99 return (ch & 0xFFFFF800) == 0xD800;
100 }
101
102 // Returns true if ch is a lead surrogate.
103 static bool IsLeadSurrogate(int32_t ch) {
104 return (ch & 0xFFFFFC00) == 0xD800;
105 }
106
107 // Returns true if ch is a low surrogate.
108 static bool IsTrailSurrogate(int32_t ch) {
109 return (ch & 0xFFFFFC00) == 0xDC00;
110 }
111
112 // Returns the character at i and advances i to the next character
113 // boundary.
114 static int32_t Next(const uint16_t* characters, intptr_t* i, intptr_t len) {
115 int32_t ch = characters[*i];
116 if (Utf16::IsLeadSurrogate(ch) && (*i < (len - 1))) {
117 int32_t ch2 = characters[*i + 1];
118 if (Utf16::IsTrailSurrogate(ch2)) {
119 ch = Utf16::Decode(ch, ch2);
120 *i += 1;
121 }
122 }
123 *i += 1;
124 return ch;
125 }
126
127 // Decodes a surrogate pair into a supplementary code point.
128 static int32_t Decode(int32_t lead, int32_t trail) {
129 return 0x10000 + ((lead & 0x3FF) << 10) + (trail & 0x3FF);
130 }
131
132 // Encodes a single code point.
133 static void Encode(int32_t codepoint, uint16_t* dst);
134
135 static const int32_t kMaxCodeUnit = 0xFFFF;
136
137 private:
138 static const int32_t kLeadSurrogateOffset = (0xD800 - (0x10000 >> 10));
139
140 static const int32_t kSurrogateOffset = (0x10000 - (0xD800 << 10) - 0xDC00);
141 };
142
143 #endif // UNICODE_H_
OLDNEW
« no previous file with comments | « tools/servicec/lib/src/resources/cc/struct.cc ('k') | tools/servicec/lib/src/resources/cc/unicode.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698