Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(8)

Unified Diff: third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det.cc

Issue 122007: [chromium-reviews] Add Compact Language Detection (CLD) library to Chrome. This works in Windows... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 11 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det.cc
===================================================================
--- third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det.cc (revision 0)
+++ third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det.cc (revision 0)
@@ -0,0 +1,258 @@
+// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det.h"
+#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det_impl.h"
+#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_basictypes.h"
+
+// String is "code_version - data_scrape_date"
+static const char* kDetectLanguageVersion = "V1.6 - 20081121";
+
+// Large-table version for all ~160 languages (all Tiers)
+
+// Scan interchange-valid UTF-8 bytes and detect most likely language
+Language CompactLangDet::DetectLanguage(
+ const char* buffer,
+ int buffer_length,
+ bool is_plain_text,
+ bool* is_reliable) {
+ bool allow_extended_lang = false;
+ Language language3[3];
+ int percent3[3];
+ double normalized_score3[3];
+ int text_bytes;
+ int flags = 0;
+ Language plus_one = UNKNOWN_LANGUAGE;
+ const char* tld_hint = "";
+ int encoding_hint = UNKNOWN_ENCODING;
+ Language language_hint = UNKNOWN_LANGUAGE;
+
+ Language lang = CompactLangDetImpl::DetectLanguageSummaryV25(
+ buffer,
+ buffer_length,
+ is_plain_text,
+ tld_hint, // "id" boosts Indonesian
+ encoding_hint, // SJS boosts Japanese
+ language_hint, // ITALIAN boosts it
+ allow_extended_lang,
+ flags,
+ plus_one,
+ language3,
+ percent3,
+ normalized_score3,
+ &text_bytes,
+ is_reliable);
+ // Default to English.
+ if (lang == UNKNOWN_LANGUAGE) {
+ lang = ENGLISH;
+ }
+ return lang;
+}
+
+// Scan interchange-valid UTF-8 bytes and detect list of top 3 languages.
+Language CompactLangDet::DetectLanguageSummary(
+ const char* buffer,
+ int buffer_length,
+ bool is_plain_text,
+ Language* language3,
+ int* percent3,
+ int* text_bytes,
+ bool* is_reliable) {
+ double normalized_score3[3];
+ bool allow_extended_lang = false;
+ int flags = 0;
+ Language plus_one = UNKNOWN_LANGUAGE;
+ const char* tld_hint = "";
+ int encoding_hint = UNKNOWN_ENCODING;
+ Language language_hint = UNKNOWN_LANGUAGE;
+
+ Language lang = CompactLangDetImpl::DetectLanguageSummaryV25(
+ buffer,
+ buffer_length,
+ is_plain_text,
+ tld_hint, // "id" boosts Indonesian
+ encoding_hint, // SJS boosts Japanese
+ language_hint, // ITALIAN boosts it
+ allow_extended_lang,
+ flags,
+ plus_one,
+ language3,
+ percent3,
+ normalized_score3,
+ text_bytes,
+ is_reliable);
+ // Default to English
+ if (lang == UNKNOWN_LANGUAGE) {
+ lang = ENGLISH;
+ }
+ return lang;
+}
+
+// Same as above, with hints supplied
+// Scan interchange-valid UTF-8 bytes and detect list of top 3 languages.
+Language CompactLangDet::DetectLanguageSummary(
+ const char* buffer,
+ int buffer_length,
+ bool is_plain_text,
+ const char* tld_hint, // "id" boosts Indonesian
+ int encoding_hint, // SJS boosts Japanese
+ Language language_hint, // ITALIAN boosts it
+ Language* language3,
+ int* percent3,
+ int* text_bytes,
+ bool* is_reliable) {
+ double normalized_score3[3];
+ bool allow_extended_lang = false;
+ int flags = 0;
+ Language plus_one = UNKNOWN_LANGUAGE;
+
+ Language lang = CompactLangDetImpl::DetectLanguageSummaryV25(
+ buffer,
+ buffer_length,
+ is_plain_text,
+ tld_hint, // "id" boosts Indonesian
+ encoding_hint, // SJS boosts Japanese
+ language_hint, // ITALIAN boosts it
+ allow_extended_lang,
+ flags,
+ plus_one,
+ language3,
+ percent3,
+ normalized_score3,
+ text_bytes,
+ is_reliable);
+ // Default to English
+ if (lang == UNKNOWN_LANGUAGE) {
+ lang = ENGLISH;
+ }
+ return lang;
+}
+
+
+// Scan interchange-valid UTF-8 bytes and detect list of top 3 extended
+// languages.
+// Extended languages are additional Google interface languages and Unicode
+// single-language scripts, from ext_lang_enc.h
+Language CompactLangDet::ExtDetectLanguageSummary(
+ const char* buffer,
+ int buffer_length,
+ bool is_plain_text,
+ Language* language3,
+ int* percent3,
+ int* text_bytes,
+ bool* is_reliable) {
+ double normalized_score3[3];
+ bool allow_extended_lang = true;
+ int flags = 0;
+ Language plus_one = UNKNOWN_LANGUAGE;
+ const char* tld_hint = "";
+ int encoding_hint = UNKNOWN_ENCODING;
+ Language language_hint = UNKNOWN_LANGUAGE;
+
+ Language lang = CompactLangDetImpl::DetectLanguageSummaryV25(
+ buffer,
+ buffer_length,
+ is_plain_text,
+ tld_hint, // "id" boosts Indonesian
+ encoding_hint, // SJS boosts Japanese
+ language_hint, // ITALIAN boosts it
+ allow_extended_lang,
+ flags,
+ plus_one,
+ language3,
+ percent3,
+ normalized_score3,
+ text_bytes,
+ is_reliable);
+ // Do not default to English
+ return lang;
+}
+
+// Same as above, with hints supplied
+// Scan interchange-valid UTF-8 bytes and detect list of top 3 extended
+// languages.
+// Extended languages are additional Google interface languages and Unicode
+// single-language scripts, from ext_lang_enc.h
+Language CompactLangDet::ExtDetectLanguageSummary(
+ const char* buffer,
+ int buffer_length,
+ bool is_plain_text,
+ const char* tld_hint, // "id" boosts Indonesian
+ int encoding_hint, // SJS boosts Japanese
+ Language language_hint, // ITALIAN boosts it
+ Language* language3,
+ int* percent3,
+ int* text_bytes,
+ bool* is_reliable) {
+ double normalized_score3[3];
+ bool allow_extended_lang = true;
+ int flags = 0;
+ Language plus_one = UNKNOWN_LANGUAGE;
+
+ Language lang = CompactLangDetImpl::DetectLanguageSummaryV25(
+ buffer,
+ buffer_length,
+ is_plain_text,
+ tld_hint, // "id" boosts Indonesian
+ encoding_hint, // SJS boosts Japanese
+ language_hint, // ITALIAN boosts it
+ allow_extended_lang,
+ flags,
+ plus_one,
+ language3,
+ percent3,
+ normalized_score3,
+ text_bytes,
+ is_reliable);
+ // Do not default to English
+ return lang;
+}
+
+// Same as above, and also returns internal language scores as a ratio to
+// normal score for real text in that language. Scores close to 1.0 indicate
+// normal text, while scores far away from 1.0 indicate badly-skewed text or
+// gibberish
+//
+Language CompactLangDet::ExtDetectLanguageSummary(
+ const char* buffer,
+ int buffer_length,
+ bool is_plain_text,
+ const char* tld_hint, // "id" boosts Indonesian
+ int encoding_hint, // SJS boosts Japanese
+ Language language_hint, // ITALIAN boosts it
+ Language* language3,
+ int* percent3,
+ double* normalized_score3,
+ int* text_bytes,
+ bool* is_reliable) {
+ bool allow_extended_lang = true;
+ int flags = 0;
+ Language plus_one = UNKNOWN_LANGUAGE;
+
+ Language lang = CompactLangDetImpl::DetectLanguageSummaryV25(
+ buffer,
+ buffer_length,
+ is_plain_text,
+ tld_hint, // "id" boosts Indonesian
+ encoding_hint, // SJS boosts Japanese
+ language_hint, // ITALIAN boosts it
+ allow_extended_lang,
+ flags,
+ plus_one,
+ language3,
+ percent3,
+ normalized_score3,
+ text_bytes,
+ is_reliable);
+ // Do not default to English
+ return lang;
+ }
+
+
+
+// Return version text string
+// String is "code_version - data_scrape_date"
+const char* CompactLangDet::DetectLanguageVersion() {
+ return kDetectLanguageVersion;
+}
Property changes on: third_party\cld\bar\toolbar\cld\i18n\encodings\compact_lang_det\compact_lang_det.cc
___________________________________________________________________
Added: svn:eol-style
+ LF

Powered by Google App Engine
This is Rietveld 408576698