Index: third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_utf8statetable.h |
=================================================================== |
--- third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_utf8statetable.h (revision 0) |
+++ third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_utf8statetable.h (revision 0) |
@@ -0,0 +1,145 @@ |
+// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#ifndef BAR_TOOLBAR_CLD_I18N_ENCODINGS_COMPACT_LANG_DET_WIN_CLD_UTF8STATETABLE_H_ |
+#define BAR_TOOLBAR_CLD_I18N_ENCODINGS_COMPACT_LANG_DET_WIN_CLD_UTF8STATETABLE_H_ |
+ |
+#if !defined(CLD_WINDOWS) |
+ |
+#include "third_party/cld/util/utf8/utf8statetable.h" |
+ |
+#else |
+ |
+// This code is copied from google3/util/utf8/internal/utf8statetable.cc and was |
+// not modified (it generates a lot of lint warnings, but I decided not to fix |
+// them to simplify its maintenance). |
+ |
+#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_basictypes.h" |
+ |
+// These four-byte entries compactly encode how many bytes 0..255 to delete |
+// in making a string replacement, how many bytes to add 0..255, and the offset |
+// 0..64k-1 of the replacement string in remap_string. |
+struct RemapEntry { |
+ uint8 delete_bytes; |
+ uint8 add_bytes; |
+ uint16 bytes_offset; |
+}; |
+ |
+// Exit type codes for state tables. All but the first get stuffed into |
+// signed one-byte entries. The first is only generated by executable code. |
+// To distinguish from next-state entries, these must be contiguous and |
+// all <= kExitNone |
+typedef enum { |
+ kExitDstSpaceFull = 239, |
+ kExitIllegalStructure, // 240 |
+ kExitOK, // 241 |
+ kExitReject, // ... |
+ kExitReplace1, |
+ kExitReplace2, |
+ kExitReplace3, |
+ kExitReplace21, |
+ kExitReplace31, |
+ kExitReplace32, |
+ kExitReplaceOffset1, |
+ kExitReplaceOffset2, |
+ kExitReplace1S0, |
+ kExitSpecial, |
+ kExitDoAgain, |
+ kExitRejectAlt, |
+ kExitNone // 255 |
+} ExitReason; |
+ |
+typedef enum { |
+ kExitDstSpaceFull_2 = -32769, |
+ kExitIllegalStructure_2, // -32768 |
+ kExitOK_2, // -32767 |
+ kExitReject_2, // ... |
+ kExitReplace1_2, |
+ kExitReplace2_2, |
+ kExitReplace3_2, |
+ kExitReplace21_2, |
+ kExitReplace31_2, |
+ kExitReplace32_2, |
+ kExitReplaceOffset1_2, |
+ kExitReplaceOffset2_2, |
+ kExitReplace1S0_2, |
+ kExitSpecial_2, |
+ kExitDoAgain_2, |
+ kExitRejectAlt_2, |
+ kExitNone_2 // -32753 |
+} ExitReason_2; |
+ |
+// This struct represents one entire state table. The three initialized byte |
+// areas are state_table, remap_base, and remap_string. state0 and state0_size |
+// give the byte offset and length within state_table of the initial state -- |
+// table lookups are expected to start and end in this state, but for |
+// truncated UTF-8 strings, may end in a different state. These allow a quick |
+// test for that condition. entry_shift is 8 for tables subscripted by a full |
+// byte value and 6 for space-optimized tables subscripted by only six |
+// significant bits in UTF-8 continuation bytes. |
+typedef struct { |
+ const uint32 state0; |
+ const uint32 state0_size; |
+ const uint32 total_size; |
+ const int max_expand; |
+ const int entry_shift; |
+ const int bytes_per_entry; |
+ const uint32 losub; |
+ const uint32 hiadd; |
+ const uint8* state_table; |
+ const RemapEntry* remap_base; |
+ const uint8* remap_string; |
+ const uint8* fast_state; |
+} UTF8StateMachineObj; |
+ |
+// Near-duplicate declaration for tables with two-byte entries |
+typedef struct { |
+ const uint32 state0; |
+ const uint32 state0_size; |
+ const uint32 total_size; |
+ const int max_expand; |
+ const int entry_shift; |
+ const int bytes_per_entry; |
+ const uint32 losub; |
+ const uint32 hiadd; |
+ const signed short* state_table; |
+ const RemapEntry* remap_base; |
+ const uint8* remap_string; |
+ const uint8* fast_state; |
+} UTF8StateMachineObj_2; |
+ |
+ |
+typedef UTF8StateMachineObj UTF8PropObj; |
+typedef UTF8StateMachineObj UTF8ScanObj; |
+typedef UTF8StateMachineObj_2 UTF8PropObj_2; |
+ |
+ |
+// Look up property of one UTF-8 character and advance over it |
+// Return 0 if input length is zero |
+// Return 0 and advance one byte if input is ill-formed |
+uint8 UTF8GenericProperty(const UTF8PropObj* st, |
+ const uint8** src, |
+ int* srclen); |
+ |
+// BigOneByte versions are needed for tables > 240 states, but most |
+// won't need the TwoByte versions. |
+ |
+// Look up property of one UTF-8 character and advance over it |
+// Return 0 if input length is zero |
+// Return 0 and advance one byte if input is ill-formed |
+uint8 UTF8GenericPropertyBigOneByte(const UTF8PropObj* st, |
+ const uint8** src, |
+ int* srclen); |
+ |
+// Scan a UTF-8 stringpiece based on a state table. |
+// Always scan complete UTF-8 characters |
+// Set number of bytes scanned. Return reason for exiting |
+int UTF8GenericScan(const UTF8ScanObj* st, |
+ const uint8* str, |
+ const int len, |
+ int* bytes_consumed); |
+ |
+#endif |
+ |
+#endif // BAR_TOOLBAR_CLD_I18N_ENCODINGS_COMPACT_LANG_DET_WIN_CLD_UTF8STATETABLE_H_ |
Property changes on: third_party\cld\bar\toolbar\cld\i18n\encodings\compact_lang_det\win\cld_utf8statetable.h |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |