Index: third_party/cld/bar/toolbar/cld/i18n/encodings/proto/encodings.pb.h |
=================================================================== |
--- third_party/cld/bar/toolbar/cld/i18n/encodings/proto/encodings.pb.h (revision 0) |
+++ third_party/cld/bar/toolbar/cld/i18n/encodings/proto/encodings.pb.h (revision 0) |
@@ -0,0 +1,169 @@ |
+// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#ifndef BAR_TOOLBAR_CLD_I18N_ENCODINGS_PROTO_ENCODINGS_PB_H_ |
+#define BAR_TOOLBAR_CLD_I18N_ENCODINGS_PROTO_ENCODINGS_PB_H_ |
+ |
+enum Encoding { |
+ ISO_8859_1 = 0, // Teragram ASCII |
+ ISO_8859_2 = 1, // Teragram Latin2 |
+ ISO_8859_3 = 2, // in BasisTech but not in Teragram |
+ ISO_8859_4 = 3, // Teragram Latin4 |
+ ISO_8859_5 = 4, // Teragram ISO-8859-5 |
+ ISO_8859_6 = 5, // Teragram Arabic |
+ ISO_8859_7 = 6, // Teragram Greek |
+ ISO_8859_8 = 7, // Teragram Hebrew |
+ ISO_8859_9 = 8, // in BasisTech but not in Teragram |
+ ISO_8859_10 = 9, // in BasisTech but not in Teragram |
+ JAPANESE_EUC_JP = 10, // Teragram EUC_JP |
+ JAPANESE_SHIFT_JIS = 11, // Teragram SJS |
+ JAPANESE_JIS = 12, // Teragram JIS |
+ CHINESE_BIG5 = 13, // Teragram BIG5 |
+ CHINESE_GB = 14, // Teragram GB |
+ CHINESE_EUC_CN = 15, // Misnamed. Should be EUC_TW. Was Basis Tech |
+ // CNS11643EUC, before that Teragram EUC-CN(!) |
+ // See //i18n/basistech/basistech_encodings.h |
+ KOREAN_EUC_KR = 16, // Teragram KSC |
+ UNICODE = 17, // Teragram Unicode |
+ CHINESE_EUC_DEC = 18, // Misnamed. Should be EUC_TW. Was Basis Tech |
+ // CNS11643EUC, before that Teragram EUC. |
+ CHINESE_CNS = 19, // Misnamed. Should be EUC_TW. Was Basis Tech |
+ // CNS11643EUC, before that Teragram CNS. |
+ CHINESE_BIG5_CP950 = 20, // Teragram BIG5_CP950 |
+ JAPANESE_CP932 = 21, // Teragram CP932 |
+ UTF8 = 22, |
+ UNKNOWN_ENCODING = 23, |
+ ASCII_7BIT = 24, // ISO_8859_1 with all characters <= 127. |
+ // Should be present only in the crawler |
+ // and in the repository, |
+ // *never* as a result of Document::encoding(). |
+ RUSSIAN_KOI8_R = 25, // Teragram KOI8R |
+ RUSSIAN_CP1251 = 26, // Teragram CP1251 |
+ |
+ //---------------------------------------------------------- |
+ // These are _not_ output from teragram. Instead, they are as |
+ // detected in the headers of usenet articles. |
+ MSFT_CP1252 = 27, // 27: CP1252 aka MSFT euro ascii |
+ RUSSIAN_KOI8_RU = 28, // CP21866 aka KOI8-U, used for Ukrainian. |
+ // Misnamed, this is _not_ KOI8-RU but KOI8-U. |
+ // KOI8-U is used much more often than KOI8-RU. |
+ MSFT_CP1250 = 29, // CP1250 aka MSFT eastern european |
+ ISO_8859_15 = 30, // aka ISO_8859_0 aka ISO_8859_1 euroized |
+ //---------------------------------------------------------- |
+ |
+ //---------------------------------------------------------- |
+ // These are in BasisTech but not in Teragram. They are |
+ // needed for new interface languages. Now detected by |
+ // research langid |
+ MSFT_CP1254 = 31, // used for Turkish |
+ MSFT_CP1257 = 32, // used in Baltic countries |
+ //---------------------------------------------------------- |
+ |
+ //---------------------------------------------------------- |
+ //---------------------------------------------------------- |
+ // New encodings detected by Teragram |
+ ISO_8859_11 = 33, // aka TIS-620, used for Thai |
+ MSFT_CP874 = 34, // used for Thai |
+ MSFT_CP1256 = 35, // used for Arabic |
+ |
+ //---------------------------------------------------------- |
+ // Detected as ISO_8859_8 by Teragram, but can be found in META tags |
+ MSFT_CP1255 = 36, // Logical Hebrew Microsoft |
+ ISO_8859_8_I = 37, // Iso Hebrew Logical |
+ HEBREW_VISUAL = 38, // Iso Hebrew Visual |
+ //---------------------------------------------------------- |
+ |
+ //---------------------------------------------------------- |
+ // Detected by research langid |
+ CZECH_CP852 = 39, |
+ CZECH_CSN_369103 = 40, // aka ISO_IR_139 aka KOI8_CS |
+ MSFT_CP1253 = 41, // used for Greek |
+ RUSSIAN_CP866 = 42, |
+ //---------------------------------------------------------- |
+ |
+ //---------------------------------------------------------- |
+ // Handled by iconv in glibc |
+ ISO_8859_13 = 43, |
+ ISO_2022_KR = 44, |
+ GBK = 45, |
+ GB18030 = 46, |
+ BIG5_HKSCS = 47, |
+ ISO_2022_CN = 48, |
+ |
+ //----------------------------------------------------------- |
+ // Detected by xin liu's detector |
+ // Handled by transcoder |
+ // (Indic encodings) |
+ |
+ TSCII = 49, |
+ TAMIL_MONO = 50, |
+ TAMIL_BI = 51, |
+ JAGRAN = 52, |
+ |
+ |
+ MACINTOSH_ROMAN = 53, |
+ UTF7 = 54, |
+ BHASKAR = 55, // Indic encoding - Devanagari |
+ HTCHANAKYA = 56, // 56 Indic encoding - Devanagari |
+ |
+ //----------------------------------------------------------- |
+ // These allow a single place (inputconverter and outputconverter) |
+ // to do UTF-16 <==> UTF-8 bulk conversions and UTF-32 <==> UTF-8 |
+ // bulk conversions, with interchange-valid checking on input and |
+ // fallback if needed on ouput. |
+ UTF16BE = 57, // big-endian UTF-16 |
+ UTF16LE = 58, // little-endian UTF-16 |
+ UTF32BE = 59, // big-endian UTF-32 |
+ UTF32LE = 60, // little-endian UTF-32 |
+ //----------------------------------------------------------- |
+ |
+ //----------------------------------------------------------- |
+ // An encoding that means "This is not text, but it may have some |
+ // simple ASCII text embedded". Intended input conversion (not yet |
+ // implemented) is to keep strings of >=4 seven-bit ASCII characters |
+ // (follow each kept string with an ASCII space), delete the rest of |
+ // the bytes. This will pick up and allow indexing of e.g. captions |
+ // in JPEGs. No output conversion needed. |
+ BINARYENC = 61, |
+ //----------------------------------------------------------- |
+ |
+ //----------------------------------------------------------- |
+ // Some Web pages allow a mixture of HZ-GB and GB-2312 by using |
+ // ~{ ... ~} for 2-byte pairs, and the browsers support this. |
+ HZ_GB_2312 = 62, |
+ //----------------------------------------------------------- |
+ |
+ //----------------------------------------------------------- |
+ // Some external vendors make the common input error of |
+ // converting MSFT_CP1252 to UTF8 *twice*. No output conversion needed. |
+ UTF8UTF8 = 63, |
+ //----------------------------------------------------------- |
+ |
+ //----------------------------------------------------------- |
+ // Handled by transcoder for tamil language specific font |
+ // encodings without the support for detection at present. |
+ TAM_ELANGO = 64, // Elango - Tamil |
+ TAM_LTTMBARANI = 65, // Barani - Tamil |
+ TAM_SHREE = 66, // Shree - Tamil |
+ TAM_TBOOMIS = 67, // TBoomis - Tamil |
+ TAM_TMNEWS = 68, // TMNews - Tamil |
+ TAM_WEBTAMIL = 69, // Webtamil - Tamil |
+ //----------------------------------------------------------- |
+ |
+ //----------------------------------------------------------- |
+ // Shift_JIS variants used by Japanese cell phone carriers. |
+ KDDI_SHIFT_JIS = 70, |
+ DOCOMO_SHIFT_JIS = 71, |
+ SOFTBANK_SHIFT_JIS = 72, |
+ // ISO-2022-JP variants used by KDDI and SoftBank. |
+ KDDI_ISO_2022_JP = 73, |
+ SOFTBANK_ISO_2022_JP = 74, |
+ //----------------------------------------------------------- |
+ |
+ NUM_ENCODINGS = 75, // Always keep this at the end. It is not a |
+ // valid Encoding enum, it is only used to |
+ // indicate the total number of Encodings. |
+}; |
+ |
+#endif // BAR_TOOLBAR_CLD_I18N_ENCODINGS_PROTO_ENCODINGS_PB_H_ |
Property changes on: third_party\cld\bar\toolbar\cld\i18n\encodings\proto\encodings.pb.h |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |