Index: third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/getonescriptspan.h |
=================================================================== |
--- third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/getonescriptspan.h (revision 0) |
+++ third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/getonescriptspan.h (revision 0) |
@@ -0,0 +1,131 @@ |
+// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#ifndef I18N_ENCODINGS_COMPACT_LANG_DET_GETONESCRIPTSPAN_H_ |
+#define I18N_ENCODINGS_COMPACT_LANG_DET_GETONESCRIPTSPAN_H_ |
+ |
+#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/letterscript_enum.h" |
+#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det_impl.h" |
+ |
+namespace getone { |
+ static const int kMaxScriptBuffer = 4096; |
+ static const int kMaxScriptLowerBuffer = (kMaxScriptBuffer * 3) / 2; |
+ static const int kMaxScriptBytes = kMaxScriptBuffer- 8; // Leave some room |
+ static const int kMaxAnswerBuffer = 256; |
+ |
+ typedef enum UnicodeLScript ULScript; |
+ |
+ typedef struct { |
+ char* text; // Pointer to the span, somewhere |
+ int text_bytes; // Number of bytes of text in the span |
+ int offset; // Offset of start of span in original input buffer |
+ ULScript script; // Script of all the letters in this span |
+ Language lang; // Language identified for this span |
+ bool truncated; // true if buffer filled up before a |
+ // different script or EOF was found |
+ } LangSpan; |
+ |
+ |
+ static inline bool IsContinuationByte(char c) { |
+ return static_cast<signed char>(c) < -64; |
+ } |
+ |
+ // Gets lscript number for letters; always returns |
+ // 0 (common script) for non-letters |
+ int GetUTF8LetterScriptNum(const char* src); |
+ |
+ |
+ // Update src pointer to point to next quadgram, +2..+5 |
+ // Looks at src[0..4] |
+ const char* AdvanceQuad(const char* src); |
+} // end namespace getone |
+ |
+ |
+ |
+ |
+ |
+ |
+class ScriptScanner { |
+ public: |
+ ScriptScanner(const char* buffer, int buffer_length, bool is_plain_text); |
+ ~ScriptScanner(); |
+ |
+ // Copy next run of same-script non-tag letters to buffer [NUL terminated] |
+ bool GetOneScriptSpan(getone::LangSpan* span); |
+ |
+ // Force Latin and Cyrillic scripts to be lowercase |
+ void LowerScriptSpan(getone::LangSpan* span); |
+ |
+ // Copy next run of same-script non-tag letters to buffer [NUL terminated] |
+ // Force Latin and Cyrillic scripts to be lowercase |
+ bool GetOneScriptSpanLower(getone::LangSpan* span); |
+ |
+ private: |
+ int SkipToFrontOfSpan(const char* src, int len, int* script); |
+ |
+ const char* start_byte_; |
+ const char* next_byte_; |
+ const char* next_byte_limit_; |
+ int byte_length_; |
+ bool is_plain_text_; |
+ char* script_buffer_; // Holds text with expanded entities |
+ char* script_buffer_lower_; // Holds lowercased text |
+}; |
+ |
+ |
+class LangScanner { |
+ public: |
+ LangScanner(const CompactLangDetImpl::LangDetObj* langdetobj, |
+ getone::LangSpan* spn, int smoothwidth, int smoothcandidates, |
+ int maxlangs, int minlangspan); |
+ ~LangScanner(); |
+ |
+ |
+ int script() {return script_;} |
+ |
+ // Use new text |
+ // Keep smoothing state if same script, otherwise reinit smoothing |
+ void NewText(getone::LangSpan* spn); |
+ |
+ bool GetOneShortLangSpanBoot(getone::LangSpan* span); // Just for bootstrapping |
+ bool GetOneLangSpanBoot(getone::LangSpan* span); // Just for bootstrapping |
+ |
+ // The real ones |
+ bool GetOneShortLangSpan(const CompactLangDetImpl::LangDetObj* langdetobj, |
+ getone::LangSpan* span); |
+ bool GetOneLangSpan(const CompactLangDetImpl::LangDetObj* langdetobj, |
+ getone::LangSpan* span); |
+ |
+ // Increases language bias by delta |
+ void SetLanguageBias(const CompactLangDetImpl::LangDetObj* langdetobj, |
+ Language key, int delta); |
+ |
+ // For debugging output |
+ int next_answer_; |
+ char answer_buffer_[getone::kMaxAnswerBuffer]; |
+ char answer_buffer2_[getone::kMaxAnswerBuffer]; |
+ char answer_buffer3_[getone::kMaxAnswerBuffer]; |
+ char answer_buffer4_[getone::kMaxAnswerBuffer]; |
+ |
+ private: |
+ const char* start_byte_; |
+ const char* next_byte_limit_; |
+ const char* next_byte_; |
+ const char* onelangspan_begin_; |
+ int byte_length_; |
+ int script_; |
+ Language spanlang_; |
+ int smoothwidth_; |
+ int smoothwidth_2_; |
+ int smoothcandidates_; |
+ int maxlangs_; |
+ int minlangspan_; |
+ int rb_size_; |
+ int next_rb_; |
+ int rb_mask_; |
+ uint32* rb_; |
+ int* offset_rb_; |
+}; |
+ |
+#endif // I18N_ENCODINGS_COMPACT_LANG_DET_GETONESCRIPTSPAN_H_ |
Property changes on: third_party\cld\bar\toolbar\cld\i18n\encodings\compact_lang_det\getonescriptspan.h |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |