third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_utf8statetable.h - Issue 122007: [chromium-reviews] Add Compact Language Detection (CLD) library to Chrome. This works in Windows...

Unified Diff: third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_utf8statetable.h

Issue 122007: [chromium-reviews] Add Compact Language Detection (CLD) library to Chrome. This works in Windows... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 11 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_utf.h ('k') | third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_utf8statetable.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_utf8statetable.h

===================================================================

--- third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_utf8statetable.h (revision 0)

+++ third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_utf8statetable.h (revision 0)

@@ -0,0 +1,145 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#ifndef BAR_TOOLBAR_CLD_I18N_ENCODINGS_COMPACT_LANG_DET_WIN_CLD_UTF8STATETABLE_H_

+#define BAR_TOOLBAR_CLD_I18N_ENCODINGS_COMPACT_LANG_DET_WIN_CLD_UTF8STATETABLE_H_

+#if !defined(CLD_WINDOWS)

+#include "third_party/cld/util/utf8/utf8statetable.h"

+#else

+// This code is copied from google3/util/utf8/internal/utf8statetable.cc and was

+// not modified (it generates a lot of lint warnings, but I decided not to fix

+// them to simplify its maintenance).

+#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_basictypes.h"

+// These four-byte entries compactly encode how many bytes 0..255 to delete

+// in making a string replacement, how many bytes to add 0..255, and the offset

+// 0..64k-1 of the replacement string in remap_string.

+struct RemapEntry {

+ uint8 delete_bytes;

+ uint8 add_bytes;

+ uint16 bytes_offset;

+};

+// Exit type codes for state tables. All but the first get stuffed into

+// signed one-byte entries. The first is only generated by executable code.

+// To distinguish from next-state entries, these must be contiguous and

+// all <= kExitNone

+typedef enum {

+ kExitDstSpaceFull = 239,

+ kExitIllegalStructure, // 240

+ kExitOK, // 241

+ kExitReject, // ...

+ kExitReplace1,

+ kExitReplace2,

+ kExitReplace3,

+ kExitReplace21,

+ kExitReplace31,

+ kExitReplace32,

+ kExitReplaceOffset1,

+ kExitReplaceOffset2,

+ kExitReplace1S0,

+ kExitSpecial,

+ kExitDoAgain,

+ kExitRejectAlt,

+ kExitNone // 255

+} ExitReason;

+typedef enum {

+ kExitDstSpaceFull_2 = -32769,

+ kExitIllegalStructure_2, // -32768

+ kExitOK_2, // -32767

+ kExitReject_2, // ...

+ kExitReplace1_2,

+ kExitReplace2_2,

+ kExitReplace3_2,

+ kExitReplace21_2,

+ kExitReplace31_2,

+ kExitReplace32_2,

+ kExitReplaceOffset1_2,

+ kExitReplaceOffset2_2,

+ kExitReplace1S0_2,

+ kExitSpecial_2,

+ kExitDoAgain_2,

+ kExitRejectAlt_2,

+ kExitNone_2 // -32753

+} ExitReason_2;

+// This struct represents one entire state table. The three initialized byte

+// areas are state_table, remap_base, and remap_string. state0 and state0_size

+// give the byte offset and length within state_table of the initial state --

+// table lookups are expected to start and end in this state, but for

+// truncated UTF-8 strings, may end in a different state. These allow a quick

+// test for that condition. entry_shift is 8 for tables subscripted by a full

+// byte value and 6 for space-optimized tables subscripted by only six

+// significant bits in UTF-8 continuation bytes.

+typedef struct {

+ const uint32 state0;

+ const uint32 state0_size;

+ const uint32 total_size;

+ const int max_expand;

+ const int entry_shift;

+ const int bytes_per_entry;

+ const uint32 losub;

+ const uint32 hiadd;

+ const uint8* state_table;

+ const RemapEntry* remap_base;

+ const uint8* remap_string;

+ const uint8* fast_state;

+} UTF8StateMachineObj;

+// Near-duplicate declaration for tables with two-byte entries

+typedef struct {

+ const uint32 state0;

+ const uint32 state0_size;

+ const uint32 total_size;

+ const int max_expand;

+ const int entry_shift;

+ const int bytes_per_entry;

+ const uint32 losub;

+ const uint32 hiadd;

+ const signed short* state_table;

+ const RemapEntry* remap_base;

+ const uint8* remap_string;

+ const uint8* fast_state;

+} UTF8StateMachineObj_2;

+typedef UTF8StateMachineObj UTF8PropObj;

+typedef UTF8StateMachineObj UTF8ScanObj;

+typedef UTF8StateMachineObj_2 UTF8PropObj_2;

+// Look up property of one UTF-8 character and advance over it

+// Return 0 if input length is zero

+// Return 0 and advance one byte if input is ill-formed

+uint8 UTF8GenericProperty(const UTF8PropObj* st,

+ const uint8** src,

+ int* srclen);

+// BigOneByte versions are needed for tables > 240 states, but most

+// won't need the TwoByte versions.

+// Look up property of one UTF-8 character and advance over it

+// Return 0 if input length is zero

+// Return 0 and advance one byte if input is ill-formed

+uint8 UTF8GenericPropertyBigOneByte(const UTF8PropObj* st,

+ const uint8** src,

+ int* srclen);

+// Scan a UTF-8 stringpiece based on a state table.

+// Always scan complete UTF-8 characters

+// Set number of bytes scanned. Return reason for exiting

+int UTF8GenericScan(const UTF8ScanObj* st,

+ const uint8* str,

+ const int len,

+ int* bytes_consumed);

+#endif

+#endif // BAR_TOOLBAR_CLD_I18N_ENCODINGS_COMPACT_LANG_DET_WIN_CLD_UTF8STATETABLE_H_

Property changes on: third_party\cld\bar\toolbar\cld\i18n\encodings\compact_lang_det\win\cld_utf8statetable.h

___________________________________________________________________

Added: svn:eol-style

+ LF