tools/servicec/lib/src/resources/cc/unicode.h - Issue 2035023003: Remove service-compiler related code.

Side by Side Diff: tools/servicec/lib/src/resources/cc/unicode.h

Issue 2035023003: Remove service-compiler related code. (Closed) Base URL: git@github.com:dartino/sdk.git@master

Patch Set: Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.

4

5 #ifndef UNICODE_H_

6 #define UNICODE_H_

7

8 #include "struct.h"

9

10 class Utf {

11 public:

12 static const int32_t kMaxCodePoint = 0x10FFFF;

13

14 static bool IsLatin1(int32_t code_point) {

15 return (code_point >= 0) && (code_point <= 0xFF);

16 }

17

18 static bool IsBmp(int32_t code_point) {

19 return (code_point >= 0) && (code_point <= 0xFFFF);

20 }

21

22 static bool IsSupplementary(int32_t code_point) {

23 return (code_point > 0xFFFF) && (code_point <= kMaxCodePoint);

24 }

25

26 // Returns true if the code point value is above Plane 17.

27 static bool IsOutOfRange(intptr_t code_point) {

28 return (code_point < 0) \|\| (code_point > kMaxCodePoint);

29 }

30 };

31

32 class Utf8 {

33 public:

34 enum Type {

35 kLatin1 = 0, // Latin-1 code point [U+0000, U+00FF].

36 kBMP, // Basic Multilingual Plane code point [U+0000, U+FFFF].

37 kSupplementary, // Supplementary code point [U+010000, U+10FFFF].

38 };

39

40 // Returns the most restricted coding form in which the sequence of utf8

41 // characters in 'utf8_array' can be represented in, and the number of

42 // code units needed in that form.

43 static intptr_t CodeUnitCount(const char* utf8_array,

44 intptr_t array_len,

45 Type* type);

46

47 static intptr_t Length(int32_t ch);

48 static intptr_t Length(List<uint16_t> str);

49

50 static intptr_t Encode(int32_t ch, char* dst);

51 static intptr_t Encode(List<uint16_t> str, char* dst, intptr_t len);

52

53 static intptr_t Decode(const char* utf8_array,

54 intptr_t array_len,

55 int32_t* ch);

56 static bool DecodeToUTF16(const char* utf8_array,

57 intptr_t array_len,

58 uint16_t* dst,

59 intptr_t len);

60

61 static const int32_t kMaxOneByteChar = 0x7F;

62 static const int32_t kMaxTwoByteChar = 0x7FF;

63 static const int32_t kMaxThreeByteChar = 0xFFFF;

64 static const int32_t kMaxFourByteChar = Utf::kMaxCodePoint;

65

66 private:

67 static bool IsTrailByte(uint8_t code_unit) {

68 return (code_unit & 0xC0) == 0x80;

69 }

70

71 static bool IsNonShortestForm(uint32_t code_point, size_t num_code_units) {

72 return code_point < kOverlongMinimum[num_code_units];

73 }

74

75 static bool IsLatin1SequenceStart(uint8_t code_unit) {

76 // Check if utf8 sequence is the start of a codepoint <= U+00FF

77 return (code_unit <= 0xC3);

78 }

79

80 static bool IsSupplementarySequenceStart(uint8_t code_unit) {

81 // Check if utf8 sequence is the start of a codepoint >= U+10000.

82 return (code_unit >= 0xF0);

83 }

84

85 static const int8_t kTrailBytes[];

86 static const uint32_t kMagicBits[];

87 static const uint32_t kOverlongMinimum[];

88 };

89

90 class Utf16 {

91 public:

92 // Returns the length of the code point in UTF-16 code units.

93 static intptr_t Length(int32_t ch) {

94 return (ch <= Utf16::kMaxCodeUnit) ? 1 : 2;

95 }

96

97 // Returns true if ch is a lead or trail surrogate.

98 static bool IsSurrogate(int32_t ch) {

99 return (ch & 0xFFFFF800) == 0xD800;

100 }

101

102 // Returns true if ch is a lead surrogate.

103 static bool IsLeadSurrogate(int32_t ch) {

104 return (ch & 0xFFFFFC00) == 0xD800;

105 }

106

107 // Returns true if ch is a low surrogate.

108 static bool IsTrailSurrogate(int32_t ch) {

109 return (ch & 0xFFFFFC00) == 0xDC00;

110 }

111

112 // Returns the character at i and advances i to the next character

113 // boundary.

114 static int32_t Next(const uint16_t* characters, intptr_t* i, intptr_t len) {

115 int32_t ch = characters[*i];

116 if (Utf16::IsLeadSurrogate(ch) && (*i < (len - 1))) {

117 int32_t ch2 = characters[*i + 1];

118 if (Utf16::IsTrailSurrogate(ch2)) {

119 ch = Utf16::Decode(ch, ch2);

120 *i += 1;

121 }

122 }

123 *i += 1;

124 return ch;

125 }

126

127 // Decodes a surrogate pair into a supplementary code point.

128 static int32_t Decode(int32_t lead, int32_t trail) {

129 return 0x10000 + ((lead & 0x3FF) << 10) + (trail & 0x3FF);

130 }

131

132 // Encodes a single code point.

133 static void Encode(int32_t codepoint, uint16_t* dst);

134

135 static const int32_t kMaxCodeUnit = 0xFFFF;

136

137 private:

138 static const int32_t kLeadSurrogateOffset = (0xD800 - (0x10000 >> 10));

139

140 static const int32_t kSurrogateOffset = (0x10000 - (0xD800 << 10) - 0xDC00);

141 };

142

143 #endif // UNICODE_H_

OLD	NEW

« no previous file with comments | « tools/servicec/lib/src/resources/cc/struct.cc ('k') | tools/servicec/lib/src/resources/cc/unicode.cc » ('j') | no next file with comments »