| Index: third_party/cld/bar/toolbar/cld/i18n/languages/internal/languages.cc
 | 
| ===================================================================
 | 
| --- third_party/cld/bar/toolbar/cld/i18n/languages/internal/languages.cc	(revision 0)
 | 
| +++ third_party/cld/bar/toolbar/cld/i18n/languages/internal/languages.cc	(revision 0)
 | 
| @@ -0,0 +1,331 @@
 | 
| +// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
 | 
| +// Use of this source code is governed by a BSD-style license that can be
 | 
| +// found in the LICENSE file.
 | 
| +
 | 
| +#include "base/string_util.h"
 | 
| +#include "third_party/cld/bar/toolbar/cld/i18n/languages/public/languages.h"
 | 
| +#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_basictypes.h"
 | 
| +
 | 
| +
 | 
| +Language default_language() {return ENGLISH;}
 | 
| +
 | 
| +
 | 
| +// Language names and codes
 | 
| +
 | 
| +struct LanguageInfo {
 | 
| +  const char * language_name_;
 | 
| +  const char * language_code_639_1_;   // the ISO-639-1 code for the language
 | 
| +  const char * language_code_639_2_;   // the ISO-639-2 code for the language
 | 
| +  const char * language_code_other_;   // some nonstandard code for the language
 | 
| +};
 | 
| +
 | 
| +static const LanguageInfo kLanguageInfoTable[] = {
 | 
| +  { "ENGLISH",             "en", "eng", NULL},
 | 
| +  { "DANISH",              "da", "dan", NULL},
 | 
| +  { "DUTCH",               "nl", "dut", NULL},
 | 
| +  { "FINNISH",             "fi", "fin", NULL},
 | 
| +  { "FRENCH",              "fr", "fre", NULL},
 | 
| +  { "GERMAN",              "de", "ger", NULL},
 | 
| +  // NB: we use "iw" for Hebrew instead of the more recent "he"
 | 
| +  { "HEBREW",              "iw", "heb", NULL},
 | 
| +  { "ITALIAN",             "it", "ita", NULL},
 | 
| +  { "Japanese",            "ja", "jpn", NULL},
 | 
| +  { "Korean",              "ko", "kor", NULL},
 | 
| +  { "NORWEGIAN",           "no", "nor", NULL},
 | 
| +  { "POLISH",              "pl", "pol", NULL},
 | 
| +  { "PORTUGUESE",          "pt", "por", NULL},
 | 
| +  { "RUSSIAN",             "ru", "rus", NULL},
 | 
| +  { "SPANISH",             "es", "spa", NULL},
 | 
| +  { "SWEDISH",             "sv", "swe", NULL},
 | 
| +  { "Chinese",             "zh", "chi", "zh-CN"},
 | 
| +  { "CZECH",               "cs", "cze", NULL},
 | 
| +  { "GREEK",               "el", "gre", NULL},
 | 
| +  { "ICELANDIC",           "is", "ice", NULL},
 | 
| +  { "LATVIAN",             "lv", "lav", NULL},
 | 
| +  { "LITHUANIAN",          "lt", "lit", NULL},
 | 
| +  { "ROMANIAN",            "ro", "rum", NULL},
 | 
| +  { "HUNGARIAN",           "hu", "hun", NULL},
 | 
| +  { "ESTONIAN",            "et", "est", NULL},
 | 
| +  // TODO: Although Teragram has two output names "TG_UNKNOWN_LANGUAGE"
 | 
| +  // and "Unknown", they are essentially the same. Need to unify them.
 | 
| +  // "un" and "ut" are invented by us, not from ISO-639.
 | 
| +  //
 | 
| +  { "TG_UNKNOWN_LANGUAGE", NULL, NULL, "ut"},
 | 
| +  { "Unknown",             NULL, NULL, "un"},
 | 
| +  { "BULGARIAN",           "bg", "bul", NULL},
 | 
| +  { "CROATIAN",            "hr", "scr", NULL},
 | 
| +  { "SERBIAN",             "sr", "scc", NULL},
 | 
| +  { "IRISH",               "ga", "gle", NULL},
 | 
| +  { "GALICIAN",            "gl", "glg", NULL},
 | 
| +  { "TAGALOG",             "tl", "tgl", NULL},
 | 
| +  { "TURKISH",             "tr", "tur", NULL},
 | 
| +  { "UKRAINIAN",           "uk", "ukr", NULL},
 | 
| +  { "HINDI",               "hi", "hin", NULL},
 | 
| +  { "MACEDONIAN",          "mk", "mac", NULL},
 | 
| +  { "BENGALI",             "bn", "ben", NULL},
 | 
| +  { "INDONESIAN",          "id", "ind", NULL},
 | 
| +  { "LATIN",               "la", "lat", NULL},
 | 
| +  { "MALAY",               "ms", "may", NULL},
 | 
| +  { "MALAYALAM",           "ml", "mal", NULL},
 | 
| +  { "WELSH",               "cy", "wel", NULL},
 | 
| +  { "NEPALI",              "ne", "nep", NULL},
 | 
| +  { "TELUGU",              "te", "tel", NULL},
 | 
| +  { "ALBANIAN",            "sq", "alb", NULL},
 | 
| +  { "TAMIL",               "ta", "tam", NULL},
 | 
| +  { "BELARUSIAN",          "be", "bel", NULL},
 | 
| +  { "JAVANESE",            "jw", "jav", NULL},
 | 
| +  { "OCCITAN",             "oc", "oci", NULL},
 | 
| +  { "URDU",                "ur", "urd", NULL},
 | 
| +  { "BIHARI",              "bh", "bih", NULL},
 | 
| +  { "GUJARATI",            "gu", "guj", NULL},
 | 
| +  { "THAI",                "th", "tha", NULL},
 | 
| +  { "ARABIC",              "ar", "ara", NULL},
 | 
| +  { "CATALAN",             "ca", "cat", NULL},
 | 
| +  { "ESPERANTO",           "eo", "epo", NULL},
 | 
| +  { "BASQUE",              "eu", "baq", NULL},
 | 
| +  { "INTERLINGUA",         "ia", "ina", NULL},
 | 
| +  { "KANNADA",             "kn", "kan", NULL},
 | 
| +  { "PUNJABI",             "pa", "pan", NULL},
 | 
| +  { "SCOTS_GAELIC",        "gd", "gla", NULL},
 | 
| +  { "SWAHILI",             "sw", "swa", NULL},
 | 
| +  { "SLOVENIAN",           "sl", "slv", NULL},
 | 
| +  { "MARATHI",             "mr", "mar", NULL},
 | 
| +  { "MALTESE",             "mt", "mlt", NULL},
 | 
| +  { "VIETNAMESE",          "vi", "vie", NULL},
 | 
| +  { "FRISIAN",             "fy", "fry", NULL},
 | 
| +  { "SLOVAK",              "sk", "slo", NULL},
 | 
| +  { "ChineseT",
 | 
| +    NULL,  NULL,  // We intentionally set these 2 fields to NULL to avoid
 | 
| +                  // confusion between CHINESE_T and CHINESE.
 | 
| +    "zh-TW"},
 | 
| +  { "FAROESE",             "fo", "fao", NULL},
 | 
| +  { "SUNDANESE",           "su", "sun", NULL},
 | 
| +  { "UZBEK",               "uz", "uzb", NULL},
 | 
| +  { "AMHARIC",             "am", "amh", NULL},
 | 
| +  { "AZERBAIJANI",         "az", "aze", NULL},
 | 
| +  { "GEORGIAN",            "ka", "geo", NULL},
 | 
| +  { "TIGRINYA",            "ti", "tir", NULL},
 | 
| +  { "PERSIAN",             "fa", "per", NULL},
 | 
| +  { "BOSNIAN",             "bs", "bos", NULL},
 | 
| +  { "SINHALESE",           "si", "sin", NULL},
 | 
| +  { "NORWEGIAN_N",         "nn", "nno", NULL},
 | 
| +  { "PORTUGUESE_P",        NULL, NULL, "pt-PT"},
 | 
| +  { "PORTUGUESE_B",        NULL, NULL, "pt-BR"},
 | 
| +  { "XHOSA",               "xh", "xho", NULL},
 | 
| +  { "ZULU",                "zu", "zul", NULL},
 | 
| +  { "GUARANI",             "gn", "grn", NULL},
 | 
| +  { "SESOTHO",             "st", "sot", NULL},
 | 
| +  { "TURKMEN",             "tk", "tuk", NULL},
 | 
| +  { "KYRGYZ",              "ky", "kir", NULL},
 | 
| +  { "BRETON",              "br", "bre", NULL},
 | 
| +  { "TWI",                 "tw", "twi", NULL},
 | 
| +  { "YIDDISH",             "yi", "yid", NULL},
 | 
| +  { "SERBO_CROATIAN",      "sh", NULL, NULL},
 | 
| +  { "SOMALI",              "so", "som", NULL},
 | 
| +  { "UIGHUR",              "ug", "uig", NULL},
 | 
| +  { "KURDISH",             "ku", "kur", NULL},
 | 
| +  { "MONGOLIAN",           "mn", "mon", NULL},
 | 
| +  { "ARMENIAN",            "hy", "arm", NULL},
 | 
| +  { "LAOTHIAN",            "lo", "lao", NULL},
 | 
| +  { "SINDHI",              "sd", "snd", NULL},
 | 
| +  { "RHAETO_ROMANCE",      "rm", "roh", NULL},
 | 
| +  { "AFRIKAANS",           "af", "afr", NULL},
 | 
| +  { "LUXEMBOURGISH",       "lb", "ltz", NULL},
 | 
| +  { "BURMESE",             "my", "bur", NULL},
 | 
| +  // KHMER is known as Cambodian for Google user interfaces.
 | 
| +  { "KHMER",               "km", "khm", NULL},
 | 
| +  { "TIBETAN",             "bo", "tib", NULL},
 | 
| +  { "DHIVEHI",             "dv", "div", NULL},
 | 
| +  { "CHEROKEE",            NULL, "chr", NULL},
 | 
| +  { "SYRIAC",              NULL, "syr", NULL},
 | 
| +  { "LIMBU",               NULL, NULL, "sit-NP"},
 | 
| +  { "ORIYA",               "or", "ori", NULL},
 | 
| +  { "ASSAMESE",            "as", "asm", NULL},
 | 
| +  { "CORSICAN",            "co", "cos", NULL},
 | 
| +  { "INTERLINGUE",         "ie", "ine", NULL},
 | 
| +  { "KAZAKH",              "kk", "kaz", NULL},
 | 
| +  { "LINGALA",             "ln", "lin", NULL},
 | 
| +  { "MOLDAVIAN",           "mo", "mol", NULL},
 | 
| +  { "PASHTO",              "ps", "pus", NULL},
 | 
| +  { "QUECHUA",             "qu", "que", NULL},
 | 
| +  { "SHONA",               "sn", "sna", NULL},
 | 
| +  { "TAJIK",               "tg", "tgk", NULL},
 | 
| +  { "TATAR",               "tt", "tat", NULL},
 | 
| +  { "TONGA",               "to", "tog", NULL},
 | 
| +  { "YORUBA",              "yo", "yor", NULL},
 | 
| +  { "CREOLES_AND_PIDGINS_ENGLISH_BASED", NULL, "cpe", NULL},
 | 
| +  { "CREOLES_AND_PIDGINS_FRENCH_BASED",  NULL, "cpf", NULL},
 | 
| +  { "CREOLES_AND_PIDGINS_PORTUGUESE_BASED", NULL, "cpp", NULL},
 | 
| +  { "CREOLES_AND_PIDGINS_OTHER", NULL, "crp", NULL},
 | 
| +  { "MAORI",               "mi", "mao", NULL},
 | 
| +  { "WOLOF",               "wo", "wol", NULL},
 | 
| +  { "ABKHAZIAN",           "ab", "abk", NULL},
 | 
| +  { "AFAR",                "aa", "aar", NULL},
 | 
| +  { "AYMARA",              "ay", "aym", NULL},
 | 
| +  { "BASHKIR",             "ba", "bak", NULL},
 | 
| +  { "BISLAMA",             "bi", "bis", NULL},
 | 
| +  { "DZONGKHA",            "dz", "dzo", NULL},
 | 
| +  { "FIJIAN",              "fj", "fij", NULL},
 | 
| +  { "GREENLANDIC",         "kl", "kal", NULL},
 | 
| +  { "HAUSA",               "ha", "hau", NULL},
 | 
| +  { "HAITIAN_CREOLE",       "ht", NULL, NULL},
 | 
| +  { "INUPIAK",             "ik", "ipk", NULL},
 | 
| +  { "INUKTITUT",           "iu", "iku", NULL},
 | 
| +  { "KASHMIRI",            "ks", "kas", NULL},
 | 
| +  { "KINYARWANDA",         "rw", "kin", NULL},
 | 
| +  { "MALAGASY",            "mg", "mlg", NULL},
 | 
| +  { "NAURU",               "na", "nau", NULL},
 | 
| +  { "OROMO",               "om", "orm", NULL},
 | 
| +  { "RUNDI",               "rn", "run", NULL},
 | 
| +  { "SAMOAN",              "sm", "smo", NULL},
 | 
| +  { "SANGO",               "sg", "sag", NULL},
 | 
| +  { "SANSKRIT",            "sa", "san", NULL},
 | 
| +  { "SISWANT",             "ss", "ssw", NULL},
 | 
| +  { "TSONGA",              "ts", "tso", NULL},
 | 
| +  { "TSWANA",              "tn", "tsn", NULL},
 | 
| +  { "VOLAPUK",             "vo", "vol", NULL},
 | 
| +  { "ZHUANG",              "za", "zha", NULL},
 | 
| +  { "KHASI",               NULL, "kha", NULL},
 | 
| +  { "SCOTS",               NULL, "sco", NULL},
 | 
| +  { "GANDA",               "lg", "lug", NULL},
 | 
| +  { "MANX",                "gv", "glv", NULL},
 | 
| +  { "MONTENEGRIN",         NULL, NULL, "sr-ME"},
 | 
| +  { "XX",                  NULL, NULL, "XX"},
 | 
| +};
 | 
| +
 | 
| +COMPILE_ASSERT(arraysize(kLanguageInfoTable) == NUM_LANGUAGES + 1,
 | 
| +               kLanguageInfoTable_has_incorrect_length);
 | 
| +
 | 
| +
 | 
| +// LANGUAGE NAMES
 | 
| +
 | 
| +const char* default_language_name() {
 | 
| +  return kLanguageInfoTable[ENGLISH].language_name_;
 | 
| +}
 | 
| +
 | 
| +static const char* const kInvalidLanguageName = "invalid_language";
 | 
| +
 | 
| +const char *invalid_language_name() {
 | 
| +  return kInvalidLanguageName;
 | 
| +}
 | 
| +
 | 
| +const char* LanguageName(Language lang) {
 | 
| +  return IsValidLanguage(lang)
 | 
| +      ? kLanguageInfoTable[lang].language_name_
 | 
| +      : kInvalidLanguageName;
 | 
| +}
 | 
| +
 | 
| +
 | 
| +
 | 
| +// LANGUAGE CODES
 | 
| +
 | 
| +
 | 
| +// The space before invalid_language_code is intentional. It is used
 | 
| +// to prevent it matching any two letter language code.
 | 
| +//
 | 
| +static const char* const kInvalidLanguageCode = " invalid_language_code";
 | 
| +
 | 
| +const char *invalid_language_code() {
 | 
| +  return kInvalidLanguageCode;
 | 
| +}
 | 
| +
 | 
| +const char * LanguageCode(Language lang) {
 | 
| +  if (! IsValidLanguage(lang))
 | 
| +    return kInvalidLanguageCode;
 | 
| +  const LanguageInfo& info = kLanguageInfoTable[lang];
 | 
| +  if (info.language_code_639_1_) {
 | 
| +    return info.language_code_639_1_;
 | 
| +  } else if (info.language_code_639_2_) {
 | 
| +    return info.language_code_639_2_;
 | 
| +  } else if (info.language_code_other_) {
 | 
| +    return info.language_code_other_;
 | 
| +  } else {
 | 
| +    return kInvalidLanguageCode;
 | 
| +  }
 | 
| +}
 | 
| +
 | 
| +const char* default_language_code() {
 | 
| +  return kLanguageInfoTable[ENGLISH].language_code_639_1_;
 | 
| +}
 | 
| +
 | 
| +const char* LanguageCodeISO639_1(Language lang) {
 | 
| +  if (! IsValidLanguage(lang))
 | 
| +    return kInvalidLanguageCode;
 | 
| +  if (const char* code = kLanguageInfoTable[lang].language_code_639_1_)
 | 
| +    return code;
 | 
| +  return kInvalidLanguageCode;
 | 
| +}
 | 
| +
 | 
| +const char* LanguageCodeISO639_2(Language lang) {
 | 
| +  if (! IsValidLanguage(lang))
 | 
| +    return kInvalidLanguageCode;
 | 
| +  if (const char* code = kLanguageInfoTable[lang].language_code_639_2_)
 | 
| +    return code;
 | 
| +  return kInvalidLanguageCode;
 | 
| +}
 | 
| +
 | 
| +const char* LanguageCodeWithDialects(Language lang) {
 | 
| +  if (lang == CHINESE)
 | 
| +    return "zh-CN";
 | 
| +  return LanguageCode(lang);
 | 
| +}
 | 
| +
 | 
| +
 | 
| +
 | 
| +bool LanguageFromCode(const char* lang_code, Language *language) {
 | 
| +  *language = UNKNOWN_LANGUAGE;
 | 
| +  if ( lang_code == NULL ) return false;
 | 
| +
 | 
| +  for ( int i = 0 ; i < kNumLanguages ; i++ ) {
 | 
| +    const LanguageInfo& info = kLanguageInfoTable[i];
 | 
| +    if ((info.language_code_639_1_ &&
 | 
| +         !base::strcasecmp(lang_code, info.language_code_639_1_)) ||
 | 
| +        (info.language_code_639_2_ &&
 | 
| +         !base::strcasecmp(lang_code, info.language_code_639_2_)) ||
 | 
| +        (info.language_code_other_ &&
 | 
| +         !base::strcasecmp(lang_code, info.language_code_other_))) {
 | 
| +      *language = static_cast<Language>(i);
 | 
| +      return true;
 | 
| +    }
 | 
| +  }
 | 
| +
 | 
| +  // For convenience, this function can also parse the non-standard
 | 
| +  // five-letter language codes "zh-cn" and "zh-tw" which are used by
 | 
| +  // front-ends such as GWS to distinguish Simplified from Traditional
 | 
| +  // Chinese.
 | 
| +  if (!base::strcasecmp(lang_code, "zh-cn") || !base::strcasecmp(lang_code, "zh_cn")) {
 | 
| +    *language = CHINESE;
 | 
| +    return true;
 | 
| +  }
 | 
| +  if (!base::strcasecmp(lang_code, "zh-tw") || !base::strcasecmp(lang_code, "zh_tw")) {
 | 
| +    *language = CHINESE_T;
 | 
| +    return true;
 | 
| +  }
 | 
| +  if (!base::strcasecmp(lang_code, "sr-me") || !base::strcasecmp(lang_code, "sr_me")) {
 | 
| +    *language = MONTENEGRIN;
 | 
| +    return true;
 | 
| +  }
 | 
| +
 | 
| +  // Process language-code synonyms per http://wiki/Main/IIISynonyms.
 | 
| +  if (!base::strcasecmp(lang_code, "he")) {
 | 
| +    *language = HEBREW;  // Use "iw".
 | 
| +    return true;
 | 
| +  }
 | 
| +  if (!base::strcasecmp(lang_code, "in")) {
 | 
| +    *language = INDONESIAN;  // Use "id".
 | 
| +    return true;
 | 
| +  }
 | 
| +  if (!base::strcasecmp(lang_code, "ji")) {
 | 
| +    *language = YIDDISH;  // Use "yi".
 | 
| +    return true;
 | 
| +  }
 | 
| +
 | 
| +  // These distinct languages cannot be differentiated by our current
 | 
| +  // language-detection algorithms.
 | 
| +  if (!base::strcasecmp(lang_code, "fil")) {
 | 
| +    *language = TAGALOG;
 | 
| +    return true;
 | 
| +  }
 | 
| +
 | 
| +  return false;
 | 
| +}
 | 
| 
 | 
| Property changes on: third_party\cld\bar\toolbar\cld\i18n\languages\internal\languages.cc
 | 
| ___________________________________________________________________
 | 
| Added: svn:eol-style
 | 
|    + LF
 | 
| 
 | 
| 
 |