Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1093)

Unified Diff: third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/getonescriptspan.h

Issue 122007: [chromium-reviews] Add Compact Language Detection (CLD) library to Chrome. This works in Windows... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 11 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/getonescriptspan.h
===================================================================
--- third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/getonescriptspan.h (revision 0)
+++ third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/getonescriptspan.h (revision 0)
@@ -0,0 +1,131 @@
+// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef I18N_ENCODINGS_COMPACT_LANG_DET_GETONESCRIPTSPAN_H_
+#define I18N_ENCODINGS_COMPACT_LANG_DET_GETONESCRIPTSPAN_H_
+
+#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/letterscript_enum.h"
+#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det_impl.h"
+
+namespace getone {
+ static const int kMaxScriptBuffer = 4096;
+ static const int kMaxScriptLowerBuffer = (kMaxScriptBuffer * 3) / 2;
+ static const int kMaxScriptBytes = kMaxScriptBuffer- 8; // Leave some room
+ static const int kMaxAnswerBuffer = 256;
+
+ typedef enum UnicodeLScript ULScript;
+
+ typedef struct {
+ char* text; // Pointer to the span, somewhere
+ int text_bytes; // Number of bytes of text in the span
+ int offset; // Offset of start of span in original input buffer
+ ULScript script; // Script of all the letters in this span
+ Language lang; // Language identified for this span
+ bool truncated; // true if buffer filled up before a
+ // different script or EOF was found
+ } LangSpan;
+
+
+ static inline bool IsContinuationByte(char c) {
+ return static_cast<signed char>(c) < -64;
+ }
+
+ // Gets lscript number for letters; always returns
+ // 0 (common script) for non-letters
+ int GetUTF8LetterScriptNum(const char* src);
+
+
+ // Update src pointer to point to next quadgram, +2..+5
+ // Looks at src[0..4]
+ const char* AdvanceQuad(const char* src);
+} // end namespace getone
+
+
+
+
+
+
+class ScriptScanner {
+ public:
+ ScriptScanner(const char* buffer, int buffer_length, bool is_plain_text);
+ ~ScriptScanner();
+
+ // Copy next run of same-script non-tag letters to buffer [NUL terminated]
+ bool GetOneScriptSpan(getone::LangSpan* span);
+
+ // Force Latin and Cyrillic scripts to be lowercase
+ void LowerScriptSpan(getone::LangSpan* span);
+
+ // Copy next run of same-script non-tag letters to buffer [NUL terminated]
+ // Force Latin and Cyrillic scripts to be lowercase
+ bool GetOneScriptSpanLower(getone::LangSpan* span);
+
+ private:
+ int SkipToFrontOfSpan(const char* src, int len, int* script);
+
+ const char* start_byte_;
+ const char* next_byte_;
+ const char* next_byte_limit_;
+ int byte_length_;
+ bool is_plain_text_;
+ char* script_buffer_; // Holds text with expanded entities
+ char* script_buffer_lower_; // Holds lowercased text
+};
+
+
+class LangScanner {
+ public:
+ LangScanner(const CompactLangDetImpl::LangDetObj* langdetobj,
+ getone::LangSpan* spn, int smoothwidth, int smoothcandidates,
+ int maxlangs, int minlangspan);
+ ~LangScanner();
+
+
+ int script() {return script_;}
+
+ // Use new text
+ // Keep smoothing state if same script, otherwise reinit smoothing
+ void NewText(getone::LangSpan* spn);
+
+ bool GetOneShortLangSpanBoot(getone::LangSpan* span); // Just for bootstrapping
+ bool GetOneLangSpanBoot(getone::LangSpan* span); // Just for bootstrapping
+
+ // The real ones
+ bool GetOneShortLangSpan(const CompactLangDetImpl::LangDetObj* langdetobj,
+ getone::LangSpan* span);
+ bool GetOneLangSpan(const CompactLangDetImpl::LangDetObj* langdetobj,
+ getone::LangSpan* span);
+
+ // Increases language bias by delta
+ void SetLanguageBias(const CompactLangDetImpl::LangDetObj* langdetobj,
+ Language key, int delta);
+
+ // For debugging output
+ int next_answer_;
+ char answer_buffer_[getone::kMaxAnswerBuffer];
+ char answer_buffer2_[getone::kMaxAnswerBuffer];
+ char answer_buffer3_[getone::kMaxAnswerBuffer];
+ char answer_buffer4_[getone::kMaxAnswerBuffer];
+
+ private:
+ const char* start_byte_;
+ const char* next_byte_limit_;
+ const char* next_byte_;
+ const char* onelangspan_begin_;
+ int byte_length_;
+ int script_;
+ Language spanlang_;
+ int smoothwidth_;
+ int smoothwidth_2_;
+ int smoothcandidates_;
+ int maxlangs_;
+ int minlangspan_;
+ int rb_size_;
+ int next_rb_;
+ int rb_mask_;
+ uint32* rb_;
+ int* offset_rb_;
+};
+
+#endif // I18N_ENCODINGS_COMPACT_LANG_DET_GETONESCRIPTSPAN_H_
Property changes on: third_party\cld\bar\toolbar\cld\i18n\encodings\compact_lang_det\getonescriptspan.h
___________________________________________________________________
Added: svn:eol-style
+ LF

Powered by Google App Engine
This is Rietveld 408576698