| Index: third_party/cld/bar/toolbar/cld/i18n/languages/public/languages.h
 | 
| ===================================================================
 | 
| --- third_party/cld/bar/toolbar/cld/i18n/languages/public/languages.h	(revision 0)
 | 
| +++ third_party/cld/bar/toolbar/cld/i18n/languages/public/languages.h	(revision 0)
 | 
| @@ -0,0 +1,373 @@
 | 
| +// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
 | 
| +// Use of this source code is governed by a BSD-style license that can be
 | 
| +// found in the LICENSE file.
 | 
| +
 | 
| +#ifndef I18N_LANGUAGES_PUBLIC_LANGUAGES_H_
 | 
| +#define I18N_LANGUAGES_PUBLIC_LANGUAGES_H_
 | 
| +
 | 
| +// This interface defines the Language enum and functions that depend
 | 
| +// only on Language values.
 | 
| +
 | 
| +#ifndef SWIG
 | 
| +// Language enum defined in languages.proto
 | 
| +// Also description on how to add languages.
 | 
| +#include "third_party/cld/bar/toolbar/cld/i18n/languages/proto/languages.pb.h"
 | 
| +
 | 
| +// We need this for compatibility:
 | 
| +// - The Language enum in the default namespace.
 | 
| +
 | 
| +#else
 | 
| +// And we must have a swig-compatible enum.
 | 
| +// This one is a simple cleaned up version of language.proto, making the enum
 | 
| +// compatible with C++.
 | 
| +#include "i18n/languages/internal/languages_proto_wrapper.h"
 | 
| +
 | 
| +#endif
 | 
| +
 | 
| +const int kNumLanguages = NUM_LANGUAGES;
 | 
| +
 | 
| +// Return the default language (ENGLISH).
 | 
| +Language default_language();
 | 
| +
 | 
| +
 | 
| +// *******************************************
 | 
| +// Language predicates
 | 
| +//   IsValidLanguage()
 | 
| +//   IS_LANGUAGE_UNKNOWN()
 | 
| +//   IsCJKLanguage()
 | 
| +//   IsChineseLanguage()
 | 
| +//   IsNorwegianLanguage()
 | 
| +//   IsPortugueseLanguage()
 | 
| +//   IsRightToLeftLanguage()
 | 
| +//   IsMaybeRightToLeftLanguage()
 | 
| +//   IsSameLanguage()
 | 
| +//   IsScriptRequiringLongerSnippets()
 | 
| +// *******************************************
 | 
| +
 | 
| +// IsValidLanguage
 | 
| +// ===============
 | 
| +//
 | 
| +// Function to check if the input is within range of the Language enum. If
 | 
| +// IsValidLanguage(lang) returns true, it is safe to call
 | 
| +// static_cast<Language>(lang).
 | 
| +//
 | 
| +inline bool IsValidLanguage(int lang) {
 | 
| +  return ((lang >= 0) && (lang < kNumLanguages));
 | 
| +}
 | 
| +
 | 
| +// Return true if the language is "unknown". (This function was
 | 
| +// previously a macro, hence the spelling in all caps.)
 | 
| +//
 | 
| +inline bool IS_LANGUAGE_UNKNOWN(Language lang) {
 | 
| +  return lang == TG_UNKNOWN_LANGUAGE || lang == UNKNOWN_LANGUAGE;
 | 
| +}
 | 
| +
 | 
| +// IsCJKLanguage
 | 
| +// -------------
 | 
| +//
 | 
| +// This function returns true if the language is either Chinese
 | 
| +// (simplified or traditional), Japanese, or Korean.
 | 
| +bool IsCJKLanguage(Language lang);
 | 
| +
 | 
| +// IsChineseLanguage
 | 
| +// -----------------
 | 
| +//
 | 
| +// This function returns true if the language is either Chinese
 | 
| +// (simplified or traditional)
 | 
| +bool IsChineseLanguage(Language lang);
 | 
| +
 | 
| +// IsNorwegianLanguage
 | 
| +// --------------------
 | 
| +//
 | 
| +// This function returns true if the language is any of the Norwegian
 | 
| +// (regular or Nynorsk).
 | 
| +bool IsNorwegianLanguage(Language lang);
 | 
| +
 | 
| +// IsPortugueseLanguage
 | 
| +// --------------------
 | 
| +//
 | 
| +// This function returns true if the language is any of the Portuguese
 | 
| +// languages (regular, Portugal or Brazil)
 | 
| +bool IsPortugueseLanguage(Language lang);
 | 
| +
 | 
| +// IsSameLanguage
 | 
| +// --------------
 | 
| +//
 | 
| +// WARNING: This function provides only a simple test on the values of
 | 
| +// the two Language arguments. It returns false if either language is
 | 
| +// invalid. It returns true if the language arguments are equal, or
 | 
| +// if they are both Chinese languages, both Norwegian languages, or
 | 
| +// both Portuguese languages, as defined by IsChineseLanguage,
 | 
| +// IsNorwegianLanguage, and IsPortugueseLanguage. Otherwise it returns
 | 
| +// false.
 | 
| +bool IsSameLanguage(Language lang1, Language lang2);
 | 
| +
 | 
| +
 | 
| +// IsRightToLeftLanguage
 | 
| +// ---------------------
 | 
| +//
 | 
| +// This function returns true if the language is only written right-to-left
 | 
| +// (E.g., Hebrew, Arabic, Persian etc.)
 | 
| +//
 | 
| +// IMPORTANT NOTE: Technically we're talking about scripts, not languages.
 | 
| +// There are languages that can be written in more than one script.
 | 
| +// Examples:
 | 
| +//   - Kurdish and Azeri ('AZERBAIJANI') can be written left-to-right in
 | 
| +//     Latin or Cyrillic script, and right-to-left in Arabic script.
 | 
| +//   - Sindhi and Punjabi are written in different scripts, depending on
 | 
| +//     region and dialect.
 | 
| +//   - Turkmen used an Arabic script historically, but not any more.
 | 
| +//   - Pashto and Uyghur can use Arabic script, but use a Roman script
 | 
| +//     on the Internet.
 | 
| +//   - Kashmiri and Urdu are written either with Arabic or Devanagari script.
 | 
| +//
 | 
| +// This function only returns true for languages that are always, unequivocally
 | 
| +// written in right-to-left script.
 | 
| +//
 | 
| +// TODO : If we want to do anything special with multi-script languages
 | 
| +// we should create new 'languages' for each language+script, as we do for
 | 
| +// traditional vs. simplified Chinese. However most such languages are rare in
 | 
| +// use and even rarer on the web, so this is unlikely to be something we'll
 | 
| +// be concerned with for a while.
 | 
| +bool IsRightToLeftLanguage(Language lang);
 | 
| +
 | 
| +// IsMaybeRightToLeftLanguage
 | 
| +// --------------------------
 | 
| +//
 | 
| +// This function returns true if the language may appear on the web in a
 | 
| +// right-to-left script (E.g., Hebrew, Arabic, Persian, Urdu, Kurdish, etc.)
 | 
| +//
 | 
| +// NOTE: See important notes under IsRightToLeftLanguage(...).
 | 
| +//
 | 
| +// This function returns true for languages that *may* appear on the web in a
 | 
| +// right-to-left script, even if they may also appear in a left-to-right
 | 
| +// script.
 | 
| +//
 | 
| +// This function should typically be used in cases where doing some work on
 | 
| +// left-to-right text would be OK (usually a no-op), and this function is used
 | 
| +// just to cut down on unnecessary work on regular, LTR text.
 | 
| +bool IsMaybeRightToLeftLanguage(Language lang);
 | 
| +
 | 
| +// IsScriptRequiringLongerSnippets
 | 
| +// --------------------
 | 
| +//
 | 
| +// This function returns true if the script chracteristics require longer
 | 
| +// snippet length (Devanagari, Bengali, Gurmukhi,
 | 
| +// Gujarati, Oriya, Tamil, Telugu, Kannada, Malayalam).
 | 
| +// COMMENTED OUT TO REDUCE DEPENDENCIES ON GOOGLE3 CODE
 | 
| +// bool IsScriptRequiringLongerSnippets(UnicodeScript script);
 | 
| +
 | 
| +
 | 
| +// *******************************************
 | 
| +// LANGUAGE NAMES
 | 
| +//
 | 
| +// This interface defines a standard name for each valid Language,
 | 
| +// and a standard name for invalid languages. Some language names use all
 | 
| +// uppercase letters, but others use mixed case.
 | 
| +//   LanguageName() [Language to name]
 | 
| +//   LanguageEnumName() [language to enum name]
 | 
| +//   LanguageFromName() [name to Language]
 | 
| +//   default_language_name()
 | 
| +//   invalid_language_name()
 | 
| +// *******************************************
 | 
| +
 | 
| +// Given a Language, returns its standard name.
 | 
| +// Return invalid_language_name() if the language is invalid.
 | 
| +const char* LanguageName(Language lang);
 | 
| +
 | 
| +// Given a Language, return the name of the enum constant for that
 | 
| +// language. In all but a few cases, this is the same as its standard
 | 
| +// name. For example, LanguageName(CHINESE) returns "Chinese", but
 | 
| +// LanguageEnumName(CHINESE) returns "CHINESE". This is intended for
 | 
| +// code that is generating C++ code, where the enum constant is more
 | 
| +// useful than its integer value.  Return "NUM_LANGUAGES" if
 | 
| +// the language is invalid.
 | 
| +const char* LanguageEnumName(Language lang);
 | 
| +
 | 
| +// The maximum length of a standard language name.
 | 
| +const int kMaxLanguageNameSize = 50;
 | 
| +
 | 
| +// The standard name for the default language.
 | 
| +const char* default_language_name();
 | 
| +
 | 
| +// The standard name for all invalid languages.
 | 
| +const char* invalid_language_name();
 | 
| +
 | 
| +// If lang_name matches the standard name of a Language, using a
 | 
| +// case-insensitive comparison, set *language to that Language and
 | 
| +// return true.
 | 
| +// Otherwise, set *language to UNKNOWN_LANGUAGE and return false.
 | 
| +//
 | 
| +// For backwards compatibility, "HATIAN_CREOLE" is allowed as a name
 | 
| +// for HAITIAN_CREOLE, and "QUECHAU" is allowed as a name for QUECHUA.
 | 
| +// For compatibility with LanguageEnumName, "UNKNOWN_LANGUAGE" is allowed
 | 
| +// as a name for UNKNOWN_LANGUAGE (the return value is true in this case,
 | 
| +// as it is for "Unknown"), and "CHINESE_T" is allowed as a name for
 | 
| +// CHINESE_T (i.e., a synonym for "ChineseT").
 | 
| +//
 | 
| +// REQUIRES: language must not be NULL.
 | 
| +//
 | 
| +bool LanguageFromName(const char* lang_name, Language *language);
 | 
| +
 | 
| +
 | 
| +
 | 
| +// *******************************************
 | 
| +// LANGUAGE CODES
 | 
| +//
 | 
| +// This interface defines a standard code for each valid language, and
 | 
| +// a standard code for invalid languages. These are derived from ISO codes,
 | 
| +// with some Google additions.
 | 
| +//   LanguageCode()
 | 
| +//   default_language_code()
 | 
| +//   invalid_language_code()
 | 
| +//   LanguageCodeWithDialects()
 | 
| +//   LanguageCodeISO639_1()
 | 
| +//   LanguageCodeISO639_2()
 | 
| +// *******************************************
 | 
| +
 | 
| +// Given a Language, return its standard code. There are Google-specific codes:
 | 
| +//     For CHINESE_T, return "zh-TW".
 | 
| +//     For TG_UNKNOWN_LANGUAGE, return "ut".
 | 
| +//     For UNKNOWN_LANGUAGE, return "un".
 | 
| +//     For PORTUGUESE_P, return "pt-PT".
 | 
| +//     For PORTUGUESE_B, return "pt-BR".
 | 
| +//     For LIMBU, return "sit-NP".
 | 
| +//     For CHEROKEE, return "chr".
 | 
| +//     For SYRIAC, return "syr".
 | 
| +// Otherwise return the ISO 639-1 two-letter language code for lang.
 | 
| +// If lang is invalid, return invalid_language_code().
 | 
| +//
 | 
| +// NOTE: See the note below about the codes for Chinese languages.
 | 
| +//
 | 
| +const char* LanguageCode(Language lang);
 | 
| +
 | 
| +// The maximum length of a language code.
 | 
| +const int kMaxLanguageCodeSize = 50;
 | 
| +
 | 
| +// The standard code for the default language.
 | 
| +const char* default_language_code();
 | 
| +
 | 
| +// The standard code for all invalid languages.
 | 
| +const char* invalid_language_code();
 | 
| +
 | 
| +
 | 
| +// --------------------------------------------
 | 
| +// NOTE: CHINESE LANGUAGE CODES
 | 
| +//
 | 
| +// There are three functions that return codes for Chinese languages.
 | 
| +// LanguageCode(lang) and LanguageCodeWithDialects(lang) are defined here.
 | 
| +// LanguageCode(lang, encoding) is defined in i18n/encodings.lang_enc.h.
 | 
| +// The following list shows the different results.
 | 
| +//
 | 
| +// LanguageCode(CHINESE) returns "zh"
 | 
| +// LanguageCode(CHINESE_T) returns "zh-TW".
 | 
| +//
 | 
| +// LanguageCodeWithDialects(CHINESE) returns "zh-CN".
 | 
| +// LanguageCodeWithDialects(CHINESE_T) returns "zh-TW".
 | 
| +//
 | 
| +// LanguageCode(CHINESE_T, <any encoding>) returns "zh-TW".
 | 
| +// LanguageCode(CHINESE, CHINESE_BIG5) returns "zh-TW".
 | 
| +// LanguageCode(CHINESE, <any other encoding>) returns "zh-CN".
 | 
| +//
 | 
| +// --------------------------------------------
 | 
| +
 | 
| +// LanguageCodeWithDialects
 | 
| +// ------------------------
 | 
| +//
 | 
| +// If lang is CHINESE, return "zh-CN". Otherwise return LanguageCode(lang).
 | 
| +const char* LanguageCodeWithDialects(Language lang);
 | 
| +
 | 
| +// LanguageCodeISO639_1
 | 
| +// --------------------
 | 
| +//
 | 
| +// Return the ISO 639-1 two-letter language code for lang.
 | 
| +// Return invalid_language_code() if lang is invalid or does not have
 | 
| +// an ISO 639-1 two-letter language code.
 | 
| +const char* LanguageCodeISO639_1(Language lang);
 | 
| +
 | 
| +// LanguageCodeISO639_2
 | 
| +// --------------------
 | 
| +//
 | 
| +// Return the ISO 639-2 three-letter language for lang.
 | 
| +// Return invalid_language_code() if lang is invalid or does not have
 | 
| +// an ISO 639-2 three-letter language code.
 | 
| +const char* LanguageCodeISO639_2(Language lang);
 | 
| +
 | 
| +// LanguageFromCode
 | 
| +// ----------------
 | 
| +//
 | 
| +// If lang_code matches the code for a Language, using a case-insensitive
 | 
| +// comparison, set *lang to that Language and return true.
 | 
| +// Otherwise, set *lang to UNKNOWN_LANGUAGE and return false.
 | 
| +//
 | 
| +// lang_code can be an ISO 639-1 (two-letter) code, an ISO 639-2
 | 
| +// (three-letter) code, or a Google-specific code (see LanguageCode).
 | 
| +//
 | 
| +// Certain language-code aliases are also allowed:
 | 
| +//   For "zh-cn" and "zh_cn", set *lang to CHINESE.
 | 
| +//   For "zh-tw" and "zh_tw", set *lang to CHINESE_T.
 | 
| +//   For "he", set *lang to HEBREW.
 | 
| +//   For "in", set *lang to INDONESIAN.
 | 
| +//   For "ji", set *lang to YIDDISH.
 | 
| +//   For "fil", set *lang to TAGALOG.
 | 
| +//
 | 
| +// REQUIRES: 'lang' must not be NULL.
 | 
| +bool LanguageFromCode(const char* lang_code, Language *language);
 | 
| +
 | 
| +
 | 
| +// LanguageFromCodeOrName
 | 
| +// ----------------------
 | 
| +//
 | 
| +// If lang_code_or_name is a language code or a language name.
 | 
| +// set *language to the corresponding Language and return true.
 | 
| +// Otherwise set *language to UNKNOWN_LANGUAGE and return false.
 | 
| +//
 | 
| +bool LanguageFromCodeOrName(const char* lang_code_or_name,
 | 
| +                            Language* language);
 | 
| +
 | 
| +// LanguageNameFromCode
 | 
| +// --------------------
 | 
| +//
 | 
| +// If language_code is the code for a Language (see LanguageFromCode),
 | 
| +// return the standard name of that language (see LanguageName).
 | 
| +// Otherwise return invalid_language_name().
 | 
| +//
 | 
| +const char* LanguageNameFromCode(const char* language_code);
 | 
| +
 | 
| +
 | 
| +// Miscellany
 | 
| +
 | 
| +// LanguageCodeToUnderscoreForm
 | 
| +// ----------------------------
 | 
| +//
 | 
| +// Given a language code, convert the dash "-" to underscore "_".
 | 
| +// This is because some module of google3 use the underscore form
 | 
| +// ISO 639 specification.
 | 
| +//
 | 
| +// Specifically, if result_length <= strlen(lang_code), set result[0]
 | 
| +// to '\0' and return false. Otherwise, copy lang_code to result,
 | 
| +// converting every dash to an underscore, converting every character
 | 
| +// before the first dash or underscore to lower case, and converting
 | 
| +// every character after the first dash or underscore to upper
 | 
| +// case. If there is no dash or underscore, convert the entire string
 | 
| +// to lower case.
 | 
| +//
 | 
| +// REQUIRES: 'lang_code' must not be NULL. 'result' must not be NULL.
 | 
| +
 | 
| +bool LanguageCodeToUnderscoreForm(const char* lang_code,
 | 
| +                                  char* result,
 | 
| +                                  int result_length);
 | 
| +
 | 
| +//
 | 
| +// AlwaysPutInExpectedRestrict
 | 
| +// ---------------------------
 | 
| +//
 | 
| +// For Web pages in certain top-level domains, Web Search always
 | 
| +// applies a "country restrict". If 'tld' matches one of those, using
 | 
| +// a case-SENSITIVE comparison, set *expected_language to the Language
 | 
| +// most commonly found in that top-level domain and return true.
 | 
| +// Otherwise, set *expected_language to UNKNOWN_LANGUAGE and return false.
 | 
| +bool AlwaysPutInExpectedRestrict(const char *tld, Language *expected_language);
 | 
| +
 | 
| +
 | 
| +#endif  // I18N_LANGUAGES_PUBLIC_LANGUAGES_H_
 | 
| 
 | 
| Property changes on: third_party\cld\bar\toolbar\cld\i18n\languages\public\languages.h
 | 
| ___________________________________________________________________
 | 
| Added: svn:eol-style
 | 
|    + LF
 | 
| 
 | 
| 
 |