| Index: third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/getonescriptspan.h
 | 
| ===================================================================
 | 
| --- third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/getonescriptspan.h	(revision 0)
 | 
| +++ third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/getonescriptspan.h	(revision 0)
 | 
| @@ -0,0 +1,131 @@
 | 
| +// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
 | 
| +// Use of this source code is governed by a BSD-style license that can be
 | 
| +// found in the LICENSE file.
 | 
| +
 | 
| +#ifndef I18N_ENCODINGS_COMPACT_LANG_DET_GETONESCRIPTSPAN_H_
 | 
| +#define I18N_ENCODINGS_COMPACT_LANG_DET_GETONESCRIPTSPAN_H_
 | 
| +
 | 
| +#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/letterscript_enum.h"
 | 
| +#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det_impl.h"
 | 
| +
 | 
| +namespace getone {
 | 
| +  static const int kMaxScriptBuffer = 4096;
 | 
| +  static const int kMaxScriptLowerBuffer = (kMaxScriptBuffer * 3) / 2;
 | 
| +  static const int kMaxScriptBytes = kMaxScriptBuffer- 8;   // Leave some room
 | 
| +  static const int kMaxAnswerBuffer = 256;
 | 
| +
 | 
| +  typedef enum UnicodeLScript ULScript;
 | 
| +
 | 
| +  typedef struct {
 | 
| +    char* text;             // Pointer to the span, somewhere
 | 
| +    int text_bytes;         // Number of bytes of text in the span
 | 
| +    int offset;             // Offset of start of span in original input buffer
 | 
| +    ULScript script;        // Script of all the letters in this span
 | 
| +    Language lang;          // Language identified for this span
 | 
| +    bool truncated;         // true if buffer filled up before a
 | 
| +                            // different script or EOF was found
 | 
| +  } LangSpan;
 | 
| +
 | 
| +
 | 
| +  static inline bool IsContinuationByte(char c) {
 | 
| +    return static_cast<signed char>(c) < -64;
 | 
| +  }
 | 
| +
 | 
| +  // Gets lscript number for letters; always returns
 | 
| +  //   0 (common script) for non-letters
 | 
| +  int GetUTF8LetterScriptNum(const char* src);
 | 
| +
 | 
| +
 | 
| +  // Update src pointer to point to next quadgram, +2..+5
 | 
| +  // Looks at src[0..4]
 | 
| +  const char* AdvanceQuad(const char* src);
 | 
| +}     // end namespace getone
 | 
| +
 | 
| +
 | 
| +
 | 
| +
 | 
| +
 | 
| +
 | 
| +class ScriptScanner {
 | 
| + public:
 | 
| +  ScriptScanner(const char* buffer, int buffer_length, bool is_plain_text);
 | 
| +  ~ScriptScanner();
 | 
| +
 | 
| +  // Copy next run of same-script non-tag letters to buffer [NUL terminated]
 | 
| +  bool GetOneScriptSpan(getone::LangSpan* span);
 | 
| +
 | 
| +  // Force Latin and Cyrillic scripts to be lowercase
 | 
| +  void LowerScriptSpan(getone::LangSpan* span);
 | 
| +
 | 
| +  // Copy next run of same-script non-tag letters to buffer [NUL terminated]
 | 
| +  // Force Latin and Cyrillic scripts to be lowercase
 | 
| +  bool GetOneScriptSpanLower(getone::LangSpan* span);
 | 
| +
 | 
| + private:
 | 
| +  int SkipToFrontOfSpan(const char* src, int len, int* script);
 | 
| +
 | 
| +  const char* start_byte_;
 | 
| +  const char* next_byte_;
 | 
| +  const char* next_byte_limit_;
 | 
| +  int byte_length_;
 | 
| +  bool is_plain_text_;
 | 
| +  char* script_buffer_;           // Holds text with expanded entities
 | 
| +  char* script_buffer_lower_;     // Holds lowercased text
 | 
| +};
 | 
| +
 | 
| +
 | 
| +class LangScanner {
 | 
| + public:
 | 
| +  LangScanner(const CompactLangDetImpl::LangDetObj* langdetobj,
 | 
| +              getone::LangSpan* spn, int smoothwidth, int smoothcandidates,
 | 
| +              int maxlangs, int minlangspan);
 | 
| +  ~LangScanner();
 | 
| +
 | 
| +
 | 
| +  int script() {return script_;}
 | 
| +
 | 
| +  // Use new text
 | 
| +  // Keep smoothing state if same script, otherwise reinit smoothing
 | 
| +  void NewText(getone::LangSpan* spn);
 | 
| +
 | 
| +  bool GetOneShortLangSpanBoot(getone::LangSpan* span);  // Just for bootstrapping
 | 
| +  bool GetOneLangSpanBoot(getone::LangSpan* span);       // Just for bootstrapping
 | 
| +
 | 
| +  // The real ones
 | 
| +  bool GetOneShortLangSpan(const CompactLangDetImpl::LangDetObj* langdetobj,
 | 
| +                           getone::LangSpan* span);
 | 
| +  bool GetOneLangSpan(const CompactLangDetImpl::LangDetObj* langdetobj,
 | 
| +                      getone::LangSpan* span);
 | 
| +
 | 
| +  // Increases language bias by delta
 | 
| +  void SetLanguageBias(const CompactLangDetImpl::LangDetObj* langdetobj,
 | 
| +                       Language key, int delta);
 | 
| +
 | 
| +  // For debugging output
 | 
| +  int next_answer_;
 | 
| +  char answer_buffer_[getone::kMaxAnswerBuffer];
 | 
| +  char answer_buffer2_[getone::kMaxAnswerBuffer];
 | 
| +  char answer_buffer3_[getone::kMaxAnswerBuffer];
 | 
| +  char answer_buffer4_[getone::kMaxAnswerBuffer];
 | 
| +
 | 
| + private:
 | 
| +  const char* start_byte_;
 | 
| +  const char* next_byte_limit_;
 | 
| +  const char* next_byte_;
 | 
| +  const char* onelangspan_begin_;
 | 
| +  int byte_length_;
 | 
| +  int script_;
 | 
| +  Language spanlang_;
 | 
| +  int smoothwidth_;
 | 
| +  int smoothwidth_2_;
 | 
| +  int smoothcandidates_;
 | 
| +  int maxlangs_;
 | 
| +  int minlangspan_;
 | 
| +  int rb_size_;
 | 
| +  int next_rb_;
 | 
| +  int rb_mask_;
 | 
| +  uint32* rb_;
 | 
| +  int* offset_rb_;
 | 
| +};
 | 
| +
 | 
| +#endif  // I18N_ENCODINGS_COMPACT_LANG_DET_GETONESCRIPTSPAN_H_
 | 
| 
 | 
| Property changes on: third_party\cld\bar\toolbar\cld\i18n\encodings\compact_lang_det\getonescriptspan.h
 | 
| ___________________________________________________________________
 | 
| Added: svn:eol-style
 | 
|    + LF
 | 
| 
 | 
| 
 |