tools/servicec/lib/src/resources/cc/unicode.cc - Issue 2035023003: Remove service-compiler related code.

Side by Side Diff: tools/servicec/lib/src/resources/cc/unicode.cc

Issue 2035023003: Remove service-compiler related code. (Closed) Base URL: git@github.com:dartino/sdk.git@master

Patch Set: Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.

4

5 #include "unicode.h"

6

7 const int8_t Utf8::kTrailBytes[256] = {

8 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

9 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

10 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

11 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

12 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

13 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

14 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

15 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

16 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

17 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

18 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

19 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

20 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

21 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

22 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,

23 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0

24 };

25

26

27 const uint32_t Utf8::kMagicBits[7] = {

28 0, // Padding.

29 0x00000000,

30 0x00003080,

31 0x000E2080,

32 0x03C82080,

33 0xFA082080,

34 0x82082080

35 };

36

37

38 // Minimum values of code points used to check shortest form.

39 const uint32_t Utf8::kOverlongMinimum[7] = {

40 0, // Padding.

41 0x0,

42 0x80,

43 0x800,

44 0x10000,

45 0xFFFFFFFF,

46 0xFFFFFFFF

47 };

48

49 class CodePointIterator {

50 public:

51 explicit CodePointIterator(List<uint16_t> str)

52 : str_(str),

53 ch_(0),

54 index_(-1),

55 end_(str.length()) {

56 }

57

58 int32_t Current() const {

59 return ch_;

60 }

61

62 bool Next() {

63 intptr_t length = Utf16::Length(ch_);

64 if (index_ < (end_ - length)) {

65 index_ += length;

66 ch_ = str_[index_];

67 if (Utf16::IsLeadSurrogate(ch_) && (index_ < (end_ - 1))) {

68 int32_t ch2 = str_[index_ + 1];

69 if (Utf16::IsTrailSurrogate(ch2)) {

70 ch_ = Utf16::Decode(ch_, ch2);

71 }

72 }

73 return true;

74 }

75 index_ = end_;

76 return false;

77 }

78

79 private:

80 List<uint16_t> str_;

81 int32_t ch_;

82 intptr_t index_;

83 intptr_t end_;

84 };

85

86 // Returns the most restricted coding form in which the sequence of utf8

87 // characters in 'utf8_array' can be represented in, and the number of

88 // code units needed in that form.

89 intptr_t Utf8::CodeUnitCount(const char* utf8_array,

90 intptr_t array_len,

91 Type* type) {

92 intptr_t len = 0;

93 Type char_type = kLatin1;

94 for (intptr_t i = 0; i < array_len; i++) {

95 uint8_t code_unit = utf8_array[i];

96 if (!IsTrailByte(code_unit)) {

97 ++len;

98 if (!IsLatin1SequenceStart(code_unit)) { // > U+00FF

99 if (IsSupplementarySequenceStart(code_unit)) { // >= U+10000

100 char_type = kSupplementary;

101 ++len;

102 } else if (char_type == kLatin1) {

103 char_type = kBMP;

104 }

105 }

106 }

107 }

108 *type = char_type;

109 return len;

110 }

111

112 intptr_t Utf8::Length(int32_t ch) {

113 if (ch <= kMaxOneByteChar) {

114 return 1;

115 } else if (ch <= kMaxTwoByteChar) {

116 return 2;

117 } else if (ch <= kMaxThreeByteChar) {

118 return 3;

119 }

120 return 4;

121 }

122

123 intptr_t Utf8::Length(List<uint16_t> str) {

124 intptr_t length = 0;

125 CodePointIterator it(str);

126 while (it.Next()) {

127 int32_t ch = it.Current();

128 length += Utf8::Length(ch);

129 }

130 return length;

131 }

132

133 intptr_t Utf8::Encode(int32_t ch, char* dst) {

134 static const int kMask = ~(1 << 6);

135 if (ch <= kMaxOneByteChar) {

136 dst[0] = ch;

137 return 1;

138 }

139 if (ch <= kMaxTwoByteChar) {

140 dst[0] = 0xC0 \| (ch >> 6);

141 dst[1] = 0x80 \| (ch & kMask);

142 return 2;

143 }

144 if (ch <= kMaxThreeByteChar) {

145 dst[0] = 0xE0 \| (ch >> 12);

146 dst[1] = 0x80 \| ((ch >> 6) & kMask);

147 dst[2] = 0x80 \| (ch & kMask);

148 return 3;

149 }

150 dst[0] = 0xF0 \| (ch >> 18);

151 dst[1] = 0x80 \| ((ch >> 12) & kMask);

152 dst[2] = 0x80 \| ((ch >> 6) & kMask);

153 dst[3] = 0x80 \| (ch & kMask);

154 return 4;

155 }

156

157 intptr_t Utf8::Encode(List<uint16_t> src, char* dst, intptr_t len) {

158 intptr_t pos = 0;

159 CodePointIterator it(src);

160 while (it.Next()) {

161 int32_t ch = it.Current();

162 intptr_t num_bytes = Utf8::Length(ch);

163 if (pos + num_bytes > len) {

164 break;

165 }

166 Utf8::Encode(ch, &dst[pos]);

167 pos += num_bytes;

168 }

169 return pos;

170 }

171

172 intptr_t Utf8::Decode(const char* utf8_array,

173 intptr_t array_len,

174 int32_t* dst) {

175 uint32_t ch = utf8_array[0] & 0xFF;

176 intptr_t i = 1;

177 if (ch >= 0x80) {

178 intptr_t num_trail_bytes = kTrailBytes[ch];

179 bool is_malformed = false;

180 for (; i < num_trail_bytes; ++i) {

181 if (i < array_len) {

182 uint8_t code_unit = utf8_array[i];

183 is_malformed \|= !IsTrailByte(code_unit);

184 ch = (ch << 6) + code_unit;

185 } else {

186 *dst = -1;

187 return 0;

188 }

189 }

190 ch -= kMagicBits[num_trail_bytes];

191 if (!((is_malformed == false) &&

192 (i == num_trail_bytes) &&

193 !Utf::IsOutOfRange(ch) &&

194 !IsNonShortestForm(ch, i) &&

195 !Utf16::IsSurrogate(ch))) {

196 *dst = -1;

197 return 0;

198 }

199 }

200 *dst = ch;

201 return i;

202 }

203

204 bool Utf8::DecodeToUTF16(const char* utf8_array,

205 intptr_t array_len,

206 uint16_t* dst,

207 intptr_t len) {

208 intptr_t i = 0;

209 intptr_t j = 0;

210 intptr_t num_bytes;

211 for (; (i < array_len) && (j < len); i += num_bytes, ++j) {

212 int32_t ch;

213 bool is_supplementary = IsSupplementarySequenceStart(utf8_array[i]);

214 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);

215 if (ch == -1) {

216 return false; // Invalid input.

217 }

218 if (is_supplementary) {

219 Utf16::Encode(ch, &dst[j]);

220 j = j + 1;

221 } else {

222 dst[j] = ch;

223 }

224 }

225 if ((i < array_len) && (j == len)) {

226 return false; // Output overflow.

227 }

228 return true; // Success.

229 }

230

231 void Utf16::Encode(int32_t codepoint, uint16_t* dst) {

232 dst[0] = (Utf16::kLeadSurrogateOffset + (codepoint >> 10));

233 dst[1] = (0xDC00 + (codepoint & 0x3FF));

234 }

OLD	NEW

« no previous file with comments | « tools/servicec/lib/src/resources/cc/unicode.h ('k') | tools/servicec/lib/src/resources/dart/struct.dart » ('j') | no next file with comments »