| Index: third_party/libphonenumber/cpp/src/utf/unilib.h
 | 
| ===================================================================
 | 
| --- third_party/libphonenumber/cpp/src/utf/unilib.h	(revision 0)
 | 
| +++ third_party/libphonenumber/cpp/src/utf/unilib.h	(revision 0)
 | 
| @@ -0,0 +1,95 @@
 | 
| +/**
 | 
| + * Copyright 2010 Google Inc.
 | 
| + *
 | 
| + * Licensed under the Apache License, Version 2.0 (the "License");
 | 
| + * you may not use this file except in compliance with the License.
 | 
| + * You may obtain a copy of the License at
 | 
| + *
 | 
| + *      http://www.apache.org/licenses/LICENSE-2.0
 | 
| + *
 | 
| + * Unless required by applicable law or agreed to in writing, software
 | 
| + * distributed under the License is distributed on an "AS IS" BASIS,
 | 
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
| + * See the License for the specific language governing permissions and
 | 
| + * limitations under the License.
 | 
| + */
 | 
| +
 | 
| +// Routines to do manipulation of Unicode characters or text
 | 
| +//
 | 
| +// The StructurallyValid routines accept buffers of arbitrary bytes.
 | 
| +// For CoerceToStructurallyValid(), the input buffer and output buffers may
 | 
| +// point to exactly the same memory.
 | 
| +//
 | 
| +// In all other cases, the UTF-8 string must be structurally valid and
 | 
| +// have all codepoints in the range  U+0000 to U+D7FF or U+E000 to U+10FFFF.
 | 
| +// Debug builds take a fatal error for invalid UTF-8 input.
 | 
| +// The input and output buffers may not overlap at all.
 | 
| +//
 | 
| +// The char32 routines are here only for convenience; they convert to UTF-8
 | 
| +// internally and use the UTF-8 routines.
 | 
| +
 | 
| +#ifndef UTIL_UTF8_UNILIB_H__
 | 
| +#define UTIL_UTF8_UNILIB_H__
 | 
| +
 | 
| +#include <string>
 | 
| +#include "base/basictypes.h"
 | 
| +
 | 
| +namespace UniLib {
 | 
| +
 | 
| +// Returns true unless a surrogate code point
 | 
| +inline bool IsValidCodepoint(char32 c) {
 | 
| +  // In the range [0, 0xD800) or [0xE000, 0x10FFFF]
 | 
| +  return (static_cast<uint32>(c) < 0xD800)
 | 
| +    || (c >= 0xE000 && c <= 0x10FFFF);
 | 
| +}
 | 
| +
 | 
| +// Table of UTF-8 character lengths, based on first byte
 | 
| +static const unsigned char kUTF8LenTbl[256] = {
 | 
| +  1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
 | 
| +  1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
 | 
| +  1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
 | 
| +  1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
 | 
| +
 | 
| +  1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
 | 
| +  1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
 | 
| +  2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,
 | 
| +  3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4
 | 
| +};
 | 
| +
 | 
| +// Return length of a single UTF-8 source character
 | 
| +inline int OneCharLen(const char* src) {
 | 
| +  return kUTF8LenTbl[*reinterpret_cast<const uint8*>(src)];
 | 
| +}
 | 
| +
 | 
| +// Return length of a single UTF-8 source character
 | 
| +inline int OneCharLen(const uint8* src) {
 | 
| +  return kUTF8LenTbl[*src];
 | 
| +}
 | 
| +
 | 
| +// Return true if this byte is a trailing UTF-8 byte (10xx xxxx)
 | 
| +inline bool IsTrailByte(char x) {
 | 
| +  // return (x & 0xC0) == 0x80;
 | 
| +  // Since trail bytes are always in [0x80, 0xBF], we can optimize:
 | 
| +  return static_cast<signed char>(x) < -0x40;
 | 
| +}
 | 
| +
 | 
| +// Returns the length in bytes of the prefix of src that is all
 | 
| +//  interchange valid UTF-8
 | 
| +int SpanInterchangeValid(const char* src, int byte_length);
 | 
| +inline int SpanInterchangeValid(const std::string& src) {
 | 
| +  return SpanInterchangeValid(src.data(), src.size());
 | 
| +}
 | 
| +
 | 
| +// Returns true if the source is all interchange valid UTF-8
 | 
| +// "Interchange valid" is a stronger than structurally valid --
 | 
| +// no C0 or C1 control codes (other than CR LF HT FF) and no non-characters.
 | 
| +inline bool IsInterchangeValid(const char* src, int byte_length) {
 | 
| +  return (byte_length == SpanInterchangeValid(src, byte_length));
 | 
| +}
 | 
| +inline bool IsInterchangeValid(const std::string& src) {
 | 
| +  return IsInterchangeValid(src.data(), src.size());
 | 
| +}
 | 
| +
 | 
| +}  // namespace UniLib
 | 
| +
 | 
| +#endif  // UTIL_UTF8_PUBLIC_UNILIB_H_
 | 
| 
 | 
| Property changes on: third_party\libphonenumber\cpp\src\utf\unilib.h
 | 
| ___________________________________________________________________
 | 
| Added: svn:eol-style
 | 
|    + LF
 | 
| 
 | 
| 
 |