Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(266)

Side by Side Diff: third_party/libphonenumber/cpp/src/utf/unilib.h

Issue 6930013: Re-committing http://codereview.chromium.org/6803005/ after fixing multi-dll build: (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: Created 9 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
Property Changes:
Added: svn:eol-style
+ LF
OLDNEW
(Empty)
1 /**
2 * Copyright 2010 Google Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 // Routines to do manipulation of Unicode characters or text
18 //
19 // The StructurallyValid routines accept buffers of arbitrary bytes.
20 // For CoerceToStructurallyValid(), the input buffer and output buffers may
21 // point to exactly the same memory.
22 //
23 // In all other cases, the UTF-8 string must be structurally valid and
24 // have all codepoints in the range U+0000 to U+D7FF or U+E000 to U+10FFFF.
25 // Debug builds take a fatal error for invalid UTF-8 input.
26 // The input and output buffers may not overlap at all.
27 //
28 // The char32 routines are here only for convenience; they convert to UTF-8
29 // internally and use the UTF-8 routines.
30
31 #ifndef UTIL_UTF8_UNILIB_H__
32 #define UTIL_UTF8_UNILIB_H__
33
34 #include <string>
35 #include "base/basictypes.h"
36
37 namespace UniLib {
38
39 // Returns true unless a surrogate code point
40 inline bool IsValidCodepoint(char32 c) {
41 // In the range [0, 0xD800) or [0xE000, 0x10FFFF]
42 return (static_cast<uint32>(c) < 0xD800)
43 || (c >= 0xE000 && c <= 0x10FFFF);
44 }
45
46 // Table of UTF-8 character lengths, based on first byte
47 static const unsigned char kUTF8LenTbl[256] = {
48 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
49 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
50 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
51 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
52
53 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
54 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
55 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,
56 3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4
57 };
58
59 // Return length of a single UTF-8 source character
60 inline int OneCharLen(const char* src) {
61 return kUTF8LenTbl[*reinterpret_cast<const uint8*>(src)];
62 }
63
64 // Return length of a single UTF-8 source character
65 inline int OneCharLen(const uint8* src) {
66 return kUTF8LenTbl[*src];
67 }
68
69 // Return true if this byte is a trailing UTF-8 byte (10xx xxxx)
70 inline bool IsTrailByte(char x) {
71 // return (x & 0xC0) == 0x80;
72 // Since trail bytes are always in [0x80, 0xBF], we can optimize:
73 return static_cast<signed char>(x) < -0x40;
74 }
75
76 // Returns the length in bytes of the prefix of src that is all
77 // interchange valid UTF-8
78 int SpanInterchangeValid(const char* src, int byte_length);
79 inline int SpanInterchangeValid(const std::string& src) {
80 return SpanInterchangeValid(src.data(), src.size());
81 }
82
83 // Returns true if the source is all interchange valid UTF-8
84 // "Interchange valid" is a stronger than structurally valid --
85 // no C0 or C1 control codes (other than CR LF HT FF) and no non-characters.
86 inline bool IsInterchangeValid(const char* src, int byte_length) {
87 return (byte_length == SpanInterchangeValid(src, byte_length));
88 }
89 inline bool IsInterchangeValid(const std::string& src) {
90 return IsInterchangeValid(src.data(), src.size());
91 }
92
93 } // namespace UniLib
94
95 #endif // UTIL_UTF8_PUBLIC_UNILIB_H_
OLDNEW
« no previous file with comments | « third_party/libphonenumber/cpp/src/utf/unicodetext.cc ('k') | third_party/libphonenumber/cpp/src/utf/unilib.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698