third_party/libphonenumber/cpp/src/utf/unilib.h - Issue 6920006: Revert 84000 - Autofill phone number enhancements and integration of Phone Number Util Library: p...

Side by Side Diff: third_party/libphonenumber/cpp/src/utf/unilib.h

Issue 6920006: Revert 84000 - Autofill phone number enhancements and integration of Phone Number Util Library: p... (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: Created 9 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 /**

2 * Copyright 2010 Google Inc.

3 *

4 * Licensed under the Apache License, Version 2.0 (the "License");

5 * you may not use this file except in compliance with the License.

6 * You may obtain a copy of the License at

7 *

8 * http://www.apache.org/licenses/LICENSE-2.0

9 *

10 * Unless required by applicable law or agreed to in writing, software

11 * distributed under the License is distributed on an "AS IS" BASIS,

12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

13 * See the License for the specific language governing permissions and

14 * limitations under the License.

15 */

16

17 // Routines to do manipulation of Unicode characters or text

18 //

19 // The StructurallyValid routines accept buffers of arbitrary bytes.

20 // For CoerceToStructurallyValid(), the input buffer and output buffers may

21 // point to exactly the same memory.

22 //

23 // In all other cases, the UTF-8 string must be structurally valid and

24 // have all codepoints in the range U+0000 to U+D7FF or U+E000 to U+10FFFF.

25 // Debug builds take a fatal error for invalid UTF-8 input.

26 // The input and output buffers may not overlap at all.

27 //

28 // The char32 routines are here only for convenience; they convert to UTF-8

29 // internally and use the UTF-8 routines.

30

31 #ifndef UTIL_UTF8_UNILIB_H__

32 #define UTIL_UTF8_UNILIB_H__

33

34 #include <string>

35 #include "base/basictypes.h"

36

37 namespace UniLib {

38

39 // Returns true unless a surrogate code point

40 inline bool IsValidCodepoint(char32 c) {

41 // In the range [0, 0xD800) or [0xE000, 0x10FFFF]

42 return (static_cast<uint32>(c) < 0xD800)

43 \|\| (c >= 0xE000 && c <= 0x10FFFF);

44 }

45

46 // Table of UTF-8 character lengths, based on first byte

47 static const unsigned char kUTF8LenTbl[256] = {

48 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,

49 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,

50 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,

51 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,

52

53 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,

54 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,

55 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,

56 3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4

57 };

58

59 // Return length of a single UTF-8 source character

60 inline int OneCharLen(const char* src) {

61 return kUTF8LenTbl[reinterpret_cast<const uint8>(src)];

62 }

63

64 // Return length of a single UTF-8 source character

65 inline int OneCharLen(const uint8* src) {

66 return kUTF8LenTbl[*src];

67 }

68

69 // Return true if this byte is a trailing UTF-8 byte (10xx xxxx)

70 inline bool IsTrailByte(char x) {

71 // return (x & 0xC0) == 0x80;

72 // Since trail bytes are always in [0x80, 0xBF], we can optimize:

73 return static_cast<signed char>(x) < -0x40;

74 }

75

76 // Returns the length in bytes of the prefix of src that is all

77 // interchange valid UTF-8

78 int SpanInterchangeValid(const char* src, int byte_length);

79 inline int SpanInterchangeValid(const std::string& src) {

80 return SpanInterchangeValid(src.data(), src.size());

81 }

82

83 // Returns true if the source is all interchange valid UTF-8

84 // "Interchange valid" is a stronger than structurally valid --

85 // no C0 or C1 control codes (other than CR LF HT FF) and no non-characters.

86 inline bool IsInterchangeValid(const char* src, int byte_length) {

87 return (byte_length == SpanInterchangeValid(src, byte_length));

88 }

89 inline bool IsInterchangeValid(const std::string& src) {

90 return IsInterchangeValid(src.data(), src.size());

91 }

92

93 } // namespace UniLib

94

95 #endif // UTIL_UTF8_PUBLIC_UNILIB_H_

OLD	NEW

« no previous file with comments | « third_party/libphonenumber/cpp/src/utf/unicodetext.cc ('k') | third_party/libphonenumber/cpp/src/utf/unilib.cc » ('j') | no next file with comments »