| Index: third_party/cld/bar/toolbar/cld/i18n/encodings/public/encodings.h
 | 
| ===================================================================
 | 
| --- third_party/cld/bar/toolbar/cld/i18n/encodings/public/encodings.h	(revision 0)
 | 
| +++ third_party/cld/bar/toolbar/cld/i18n/encodings/public/encodings.h	(revision 0)
 | 
| @@ -0,0 +1,304 @@
 | 
| +// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
 | 
| +// Use of this source code is governed by a BSD-style license that can be
 | 
| +// found in the LICENSE file.
 | 
| +
 | 
| +#ifndef I18N_ENCODINGS_PUBLIC_ENCODINGS_H_
 | 
| +#define I18N_ENCODINGS_PUBLIC_ENCODINGS_H_
 | 
| +
 | 
| +// This interface defines the Encoding enum and various functions that
 | 
| +// depend only on Encoding values.
 | 
| +
 | 
| +// A hash-function for Encoding, hash<Encoding>, is defined in
 | 
| +// i18n/encodings/public/encodings-hash.h
 | 
| +
 | 
| +// On some Windows projects, UNICODE may be defined, which would prevent the
 | 
| +// Encoding enum below from compiling. Note that this is a quick fix that does
 | 
| +// not break any existing projects. The UNICODE enum may someday be changed
 | 
| +// to something more specific and non-colliding, but this involves careful
 | 
| +// testing of changes in many other projects.
 | 
| +#undef UNICODE
 | 
| +
 | 
| +// NOTE: The Encoding enum must always start at 0. This assumption has
 | 
| +// been made and used.
 | 
| +
 | 
| +#ifndef SWIG
 | 
| +
 | 
| +#include "third_party/cld/bar/toolbar/cld/i18n/encodings/proto/encodings.pb.h"
 | 
| +
 | 
| +// We must have this for compatibility.
 | 
| +// COMMENTED OUT TO REDUCE DEPENDENCIES ON GOOGLE3 CODE
 | 
| +//using namespace i18n::encodings;
 | 
| +
 | 
| +#else
 | 
| +
 | 
| +// Special proto SWIG workaround header file.
 | 
| +#include "i18n/encodings/internal/encodings_proto_wrapper.h"
 | 
| +
 | 
| +#endif
 | 
| +
 | 
| +const int kNumEncodings = NUM_ENCODINGS;
 | 
| +
 | 
| +// some of the popular encoding aliases
 | 
| +// TODO(jrm) Make these static const Encoding values instead of macros.
 | 
| +#define LATIN1           ISO_8859_1
 | 
| +#define LATIN2           ISO_8859_2
 | 
| +#define LATIN3           ISO_8859_3
 | 
| +#define LATIN4           ISO_8859_4
 | 
| +#define CYRILLIC         ISO_8859_5
 | 
| +#define ARABIC_ENCODING  ISO_8859_6     // avoiding the same name as language
 | 
| +#define GREEK_ENCODING   ISO_8859_7     // avoiding the same name as language
 | 
| +#define HEBREW_ENCODING  ISO_8859_8     // avoiding the same name as language
 | 
| +#define LATIN5           ISO_8859_9
 | 
| +#define LATIN6           ISO_8859_10
 | 
| +#define KOREAN_HANGUL    KOREAN_EUC_KR
 | 
| +
 | 
| +// The default Encoding (LATIN1).
 | 
| +Encoding default_encoding();
 | 
| +
 | 
| +
 | 
| +
 | 
| +// *************************************************************
 | 
| +// Encoding predicates
 | 
| +//   IsValidEncoding()
 | 
| +//   IsEncEncCompatible
 | 
| +//   IsSupersetOfAscii7Bit
 | 
| +//   Is8BitEncoding
 | 
| +//   IsCJKEncoding
 | 
| +//   IsHebrewEncoding
 | 
| +//   IsRightToLeftEncoding
 | 
| +//   IsLogicalRightToLeftEncoding
 | 
| +//   IsVisualRightToLeftEncoding
 | 
| +//   IsIso2022Encoding
 | 
| +//   IsIso2022JpOrVariant
 | 
| +//   IsShiftJisOrVariant
 | 
| +//   IsJapaneseCellPhoneCarrierSpecificEncoding
 | 
| +// *************************************************************
 | 
| +
 | 
| +// IsValidEncoding
 | 
| +// ===================================
 | 
| +//
 | 
| +// Function to check if the input language enum is within range.
 | 
| +//
 | 
| +
 | 
| +bool IsValidEncoding(Encoding enc);
 | 
| +
 | 
| +//
 | 
| +// IsEncEncCompatible
 | 
| +// ------------------
 | 
| +//
 | 
| +// This function is to determine whether or not converting from the
 | 
| +// first encoding to the second requires any changes to the underlying
 | 
| +// text (e.g.  ASCII_7BIT is a subset of UTF8).
 | 
| +//
 | 
| +// TODO(someone more familiar with i18n): the current implementation
 | 
| +// is likely incomplete.  It would be good to consider the full matrix
 | 
| +// of all pairs of encodings and to fish out all compatible pairs.
 | 
| +//
 | 
| +bool IsEncEncCompatible(const Encoding from, const Encoding to);
 | 
| +
 | 
| +// To be a superset of 7-bit Ascii means that bytes 0...127 in the given
 | 
| +// encoding represent the same characters as they do in ISO_8859_1.
 | 
| +
 | 
| +// WARNING: This function does not currently return true for all encodings that
 | 
| +// are supersets of Ascii 7-bit.
 | 
| +bool IsSupersetOfAscii7Bit(Encoding e);
 | 
| +
 | 
| +// To be an 8-bit encoding means that there are fewer than 256 symbols.
 | 
| +// Each byte determines a new character; there are no multi-byte sequences.
 | 
| +
 | 
| +// WARNING: This function does not currently return true for all encodings that
 | 
| +// are 8-bit encodings.
 | 
| +bool Is8BitEncoding(Encoding e);
 | 
| +
 | 
| +// IsCJKEncoding
 | 
| +// -------------
 | 
| +//
 | 
| +// This function returns true if the encoding is either Chinese
 | 
| +// (simplified or traditional), Japanese, or Korean. Note: UTF8 is not
 | 
| +// considered a CJK encoding.
 | 
| +bool IsCJKEncoding(Encoding e);
 | 
| +
 | 
| +// IsHebrewEncoding
 | 
| +// -------------
 | 
| +//
 | 
| +// This function returns true if the encoding is a Hebrew specific
 | 
| +// encoding (not UTF8, etc).
 | 
| +bool IsHebrewEncoding(Encoding e);
 | 
| +
 | 
| +// IsRightToLeftEncoding
 | 
| +// ---------------------
 | 
| +//
 | 
| +// Returns true if the encoding is a right-to-left encoding.
 | 
| +// See http://wiki/Main/RtlLanguages.
 | 
| +//
 | 
| +// Note that the name of this function is somewhat misleading. There is nothing
 | 
| +// "right to left" about these encodings. They merely contain code points for
 | 
| +// characters in RTL languages such as Hebrew and Arabic. But this is also
 | 
| +// true for UTF-8.
 | 
| +//
 | 
| +// TODO(benjy): Get rid of this function. The only special-case we
 | 
| +// should need to worry about are visual encodings. Anything we
 | 
| +// need to do for all 'RTL' encodings we need to do for UTF-8 as well.
 | 
| +bool IsRightToLeftEncoding(Encoding enc);
 | 
| +
 | 
| +// IsLogicalRightToLeftEncoding
 | 
| +// ----------------------------
 | 
| +//
 | 
| +// Returns true if the encoding is a logical right-to-left encoding.
 | 
| +// Logical right-to-left encodings are those that the browser renders
 | 
| +// right-to-left and applies the BiDi algorithm to. Therefore the characters
 | 
| +// appear in reading order in the file, and indexing, snippet generation etc.
 | 
| +// should all just work with no special processing.
 | 
| +// See http://wiki/Main/RtlLanguages.
 | 
| +//
 | 
| +// TODO(benjy): Get rid of this function. The only special-case we
 | 
| +// should need to worry about are visual encodings.
 | 
| +bool IsLogicalRightToLeftEncoding(Encoding enc);
 | 
| +
 | 
| +// IsVisualRightToLeftEncoding
 | 
| +// ---------------------------
 | 
| +//
 | 
| +// Returns true if the encoding is a visual right-to-left encoding.
 | 
| +// Visual right-to-left encodings are those that the browser renders
 | 
| +// left-to-right and does not apply the BiDi algorithm to. Therefore each
 | 
| +// line appears in reverse order in the file, lines are manually wrapped
 | 
| +// by abusing <br> or <p> tags, etc. Visual RTL encoding is a relic of
 | 
| +// the prehistoric days when browsers couldn't render right-to-left, but
 | 
| +// unfortunately some visual pages persist to this day. These documents require
 | 
| +// special processing so that we don't index or snippet them with each line
 | 
| +// reversed.
 | 
| +// See http://wiki/Main/RtlLanguages.
 | 
| +bool IsVisualRightToLeftEncoding(Encoding enc);
 | 
| +
 | 
| +// IsIso2022Encoding
 | 
| +// -----------------
 | 
| +//
 | 
| +// Returns true if the encoding is a kind of ISO 2022 such as
 | 
| +// ISO-2022-JP.
 | 
| +bool IsIso2022Encoding(Encoding enc);
 | 
| +
 | 
| +// IsIso2022JpOrVariant
 | 
| +// --------------------
 | 
| +//
 | 
| +// Returns true if the encoding is ISO-2022-JP or a variant such as
 | 
| +// KDDI's ISO-2022-JP.
 | 
| +bool IsIso2022JpOrVariant(Encoding enc);
 | 
| +
 | 
| +// IsShiftJisOrVariant
 | 
| +// --------------------
 | 
| +//
 | 
| +// Returns true if the encoding is Shift_JIS or a variant such as
 | 
| +// KDDI's Shift_JIS.
 | 
| +bool IsShiftJisOrVariant(Encoding enc);
 | 
| +
 | 
| +// IsJapanesCellPhoneCarrierSpecificEncoding
 | 
| +// -----------------------------------------
 | 
| +//
 | 
| +// Returns true if it's Japanese cell phone carrier specific encoding
 | 
| +// such as KDDI_SHIFT_JIS.
 | 
| +bool IsJapaneseCellPhoneCarrierSpecificEncoding(Encoding enc);
 | 
| +
 | 
| +
 | 
| +
 | 
| +// *************************************************************
 | 
| +// ENCODING NAMES
 | 
| +//
 | 
| +// This interface defines a standard name for each valid encoding, and
 | 
| +// a standard name for invalid encodings. (Some names use all upper
 | 
| +// case, but others use mixed case.)
 | 
| +//
 | 
| +//   EncodingName() [Encoding to name]
 | 
| +//   MimeEncodingName() [Encoding to name]
 | 
| +//   EncodingFromName() [name to Encoding]
 | 
| +//   EncodingNameAliasToEncoding() [name to Encoding]
 | 
| +//   default_encoding_name()
 | 
| +//   invalid_encoding_name()
 | 
| +// *************************************************************
 | 
| +
 | 
| +// EncodingName
 | 
| +// ------------
 | 
| +//
 | 
| +// Given the encoding, returns its standard name.
 | 
| +// Return invalid_encoding_name() if the encoding is invalid.
 | 
| +//
 | 
| +const char* EncodingName(Encoding enc);
 | 
| +
 | 
| +//
 | 
| +// MimeEncodingName
 | 
| +// ----------------
 | 
| +//
 | 
| +// Return the "preferred MIME name" of an encoding.
 | 
| +//
 | 
| +// This name is suitable for using in HTTP headers, HTML tags,
 | 
| +// and as the "charset" parameter of a MIME Content-Type.
 | 
| +const char* MimeEncodingName(Encoding enc);
 | 
| +
 | 
| +
 | 
| +// The maximum length of an encoding name
 | 
| +const int kMaxEncodingNameSize = 50;
 | 
| +
 | 
| +// The standard name of the default encoding.
 | 
| +const char* default_encoding_name();
 | 
| +
 | 
| +// The name used for an invalid encoding.
 | 
| +const char* invalid_encoding_name();
 | 
| +
 | 
| +// EncodingFromName
 | 
| +// ----------------
 | 
| +//
 | 
| +// If enc_name matches the standard name of an Encoding, using a
 | 
| +// case-insensitive comparison, set *encoding to that Encoding and
 | 
| +// return true.  Otherwise set *encoding to UNKNOWN_ENCODING and
 | 
| +// return false.
 | 
| +//
 | 
| +// REQUIRES: encoding must not be NULL.
 | 
| +//
 | 
| +bool EncodingFromName(const char* enc_name, Encoding *encoding);
 | 
| +
 | 
| +//
 | 
| +// EncodingNameAliasToEncoding
 | 
| +// ---------------------------
 | 
| +//
 | 
| +// If enc_name matches the standard name or an alias of an Encoding,
 | 
| +// using a case-insensitive comparison, return that
 | 
| +// Encoding. Otherwise, return UNKNOWN_ENCODING.
 | 
| +//
 | 
| +// Aliases include most mime-encoding names (e.g., "ISO-8859-7" for
 | 
| +// GREEK), alternate names (e.g., "cyrillic" for ISO_8859_5) and
 | 
| +// common variations with hyphens and underscores (e.g., "koi8-u" and
 | 
| +// "koi8u" for RUSSIAN_KOI8_R).
 | 
| +
 | 
| +Encoding EncodingNameAliasToEncoding(const char *enc_name);
 | 
| +
 | 
| +
 | 
| +// *************************************************************
 | 
| +// Miscellany
 | 
| +// *************************************************************
 | 
| +
 | 
| +// PreferredWebOutputEncoding
 | 
| +// --------------------------
 | 
| +//
 | 
| +// Some multi-byte encodings use byte values that coincide with the
 | 
| +// ASCII codes for HTML syntax characters <>"&' and browsers like MSIE
 | 
| +// can misinterpret these, as indicated in an external XSS report from
 | 
| +// 2007-02-15. Here, we map these dangerous encodings to safer ones. We
 | 
| +// also use UTF8 instead of encodings that we don't support in our
 | 
| +// output, and we generally try to be conservative in what we send out.
 | 
| +// Where the client asks for single- or double-byte encodings that are
 | 
| +// not as common, we substitute a more common single- or double-byte
 | 
| +// encoding, if there is one, thereby preserving the client's intent
 | 
| +// to use less space than UTF-8. This also means that characters
 | 
| +// outside the destination set will be converted to HTML NCRs (&#NNN;)
 | 
| +// if requested.
 | 
| +Encoding PreferredWebOutputEncoding(Encoding enc);
 | 
| +
 | 
| +
 | 
| +// InitEncodings
 | 
| +// -------------
 | 
| +//
 | 
| +// Ensures the encodings module has been initialized.  Normally this happens
 | 
| +// during InitGoogle, but this allows access for scripts that don't
 | 
| +// support InitGoogle.
 | 
| +void InitEncodings();
 | 
| +
 | 
| +#endif  // I18N_ENCODINGS_PUBLIC_ENCODINGS_H_
 | 
| 
 | 
| Property changes on: third_party\cld\bar\toolbar\cld\i18n\encodings\public\encodings.h
 | 
| ___________________________________________________________________
 | 
| Added: svn:eol-style
 | 
|    + LF
 | 
| 
 | 
| 
 |