Index: icu46/source/i18n/csrsbcs.h |
=================================================================== |
--- icu46/source/i18n/csrsbcs.h (revision 0) |
+++ icu46/source/i18n/csrsbcs.h (revision 0) |
@@ -0,0 +1,427 @@ |
+/* |
+ ********************************************************************** |
+ * Copyright (C) 2005-2009, International Business Machines |
+ * Corporation and others. All Rights Reserved. |
+ ********************************************************************** |
+ */ |
+ |
+#ifndef __CSRSBCS_H |
+#define __CSRSBCS_H |
+ |
+#include "unicode/uobject.h" |
+ |
+#if !UCONFIG_NO_CONVERSION |
+ |
+#include "csrecog.h" |
+ |
+U_NAMESPACE_BEGIN |
+ |
+class NGramParser : public UMemory |
+{ |
+private: |
+ int32_t byteIndex; |
+ int32_t ngram; |
+ |
+ const int32_t *ngramList; |
+ const uint8_t *charMap; |
+ |
+ int32_t ngramCount; |
+ int32_t hitCount; |
+ |
+public: |
+ NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap); |
+ |
+private: |
+ /* |
+ * Binary search for value in table, which must have exactly 64 entries. |
+ */ |
+ int32_t search(const int32_t *table, int32_t value); |
+ |
+ void lookup(int32_t thisNgram); |
+ void addByte(int32_t b); |
+ int32_t nextByte(InputText *det); |
+ |
+public: |
+ int32_t parse(InputText *det); |
+ |
+}; |
+ |
+class CharsetRecog_sbcs : public CharsetRecognizer |
+{ |
+protected: |
+ UBool haveC1Bytes; |
+ |
+public: |
+ CharsetRecog_sbcs(); |
+ |
+ virtual ~CharsetRecog_sbcs(); |
+ |
+ virtual const char *getName() const = 0; |
+ |
+ virtual int32_t match(InputText *det) = 0; |
+ |
+ int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]); |
+}; |
+ |
+class CharsetRecog_8859_1 : public CharsetRecog_sbcs |
+{ |
+public: |
+ virtual ~CharsetRecog_8859_1(); |
+ |
+ const char *getName() const; |
+}; |
+ |
+class CharsetRecog_8859_2 : public CharsetRecog_sbcs |
+{ |
+public: |
+ virtual ~CharsetRecog_8859_2(); |
+ |
+ const char *getName() const; |
+}; |
+ |
+class CharsetRecog_8859_5 : public CharsetRecog_sbcs |
+{ |
+public: |
+ virtual ~CharsetRecog_8859_5(); |
+ |
+ const char *getName() const; |
+}; |
+ |
+class CharsetRecog_8859_6 : public CharsetRecog_sbcs |
+{ |
+public: |
+ virtual ~CharsetRecog_8859_6(); |
+ |
+ const char *getName() const; |
+}; |
+ |
+class CharsetRecog_8859_7 : public CharsetRecog_sbcs |
+{ |
+public: |
+ virtual ~CharsetRecog_8859_7(); |
+ |
+ const char *getName() const; |
+}; |
+ |
+class CharsetRecog_8859_8 : public CharsetRecog_sbcs |
+{ |
+public: |
+ virtual ~CharsetRecog_8859_8(); |
+ |
+ virtual const char *getName() const; |
+}; |
+ |
+class CharsetRecog_8859_9 : public CharsetRecog_sbcs |
+{ |
+public: |
+ virtual ~CharsetRecog_8859_9(); |
+ |
+ const char *getName() const; |
+}; |
+ |
+class CharsetRecog_8859_1_en : public CharsetRecog_8859_1 |
+{ |
+public: |
+ virtual ~CharsetRecog_8859_1_en(); |
+ |
+ const char *getLanguage() const; |
+ |
+ int32_t match(InputText *textIn); |
+}; |
+ |
+class CharsetRecog_8859_1_da : public CharsetRecog_8859_1 |
+{ |
+public: |
+ virtual ~CharsetRecog_8859_1_da(); |
+ |
+ const char *getLanguage() const; |
+ |
+ int32_t match(InputText *textIn); |
+}; |
+ |
+class CharsetRecog_8859_1_de : public CharsetRecog_8859_1 |
+{ |
+public: |
+ virtual ~CharsetRecog_8859_1_de(); |
+ |
+ const char *getLanguage() const; |
+ |
+ int32_t match(InputText *textIn); |
+}; |
+ |
+class CharsetRecog_8859_1_es : public CharsetRecog_8859_1 |
+{ |
+public: |
+ virtual ~CharsetRecog_8859_1_es(); |
+ |
+ const char *getLanguage() const; |
+ |
+ int32_t match(InputText *textIn); |
+}; |
+ |
+class CharsetRecog_8859_1_fr : public CharsetRecog_8859_1 |
+{ |
+public: |
+ virtual ~CharsetRecog_8859_1_fr(); |
+ |
+ const char *getLanguage() const; |
+ |
+ int32_t match(InputText *textIn); |
+}; |
+ |
+class CharsetRecog_8859_1_it : public CharsetRecog_8859_1 |
+{ |
+public: |
+ virtual ~CharsetRecog_8859_1_it(); |
+ |
+ const char *getLanguage() const; |
+ |
+ int32_t match(InputText *textIn); |
+}; |
+ |
+class CharsetRecog_8859_1_nl : public CharsetRecog_8859_1 |
+{ |
+public: |
+ virtual ~CharsetRecog_8859_1_nl(); |
+ |
+ const char *getLanguage() const; |
+ |
+ int32_t match(InputText *textIn); |
+}; |
+ |
+class CharsetRecog_8859_1_no : public CharsetRecog_8859_1 |
+{ |
+public: |
+ virtual ~CharsetRecog_8859_1_no(); |
+ |
+ const char *getLanguage() const; |
+ |
+ int32_t match(InputText *textIn); |
+}; |
+ |
+class CharsetRecog_8859_1_pt : public CharsetRecog_8859_1 |
+{ |
+public: |
+ virtual ~CharsetRecog_8859_1_pt(); |
+ |
+ const char *getLanguage() const; |
+ |
+ int32_t match(InputText *textIn); |
+}; |
+ |
+class CharsetRecog_8859_1_sv : public CharsetRecog_8859_1 |
+{ |
+public: |
+ virtual ~CharsetRecog_8859_1_sv(); |
+ |
+ const char *getLanguage() const; |
+ |
+ int32_t match(InputText *textIn); |
+}; |
+ |
+class CharsetRecog_8859_2_cs : public CharsetRecog_8859_2 |
+{ |
+public: |
+ virtual ~CharsetRecog_8859_2_cs(); |
+ |
+ const char *getLanguage() const; |
+ |
+ int32_t match(InputText *textIn); |
+}; |
+ |
+class CharsetRecog_8859_2_hu : public CharsetRecog_8859_2 |
+{ |
+public: |
+ virtual ~CharsetRecog_8859_2_hu(); |
+ |
+ const char *getLanguage() const; |
+ |
+ int32_t match(InputText *textIn); |
+}; |
+ |
+class CharsetRecog_8859_2_pl : public CharsetRecog_8859_2 |
+{ |
+public: |
+ virtual ~CharsetRecog_8859_2_pl(); |
+ |
+ const char *getLanguage() const; |
+ |
+ int32_t match(InputText *textIn); |
+}; |
+ |
+class CharsetRecog_8859_2_ro : public CharsetRecog_8859_2 |
+{ |
+public: |
+ virtual ~CharsetRecog_8859_2_ro(); |
+ |
+ const char *getLanguage() const; |
+ |
+ int32_t match(InputText *textIn); |
+}; |
+ |
+class CharsetRecog_8859_5_ru : public CharsetRecog_8859_5 |
+{ |
+public: |
+ virtual ~CharsetRecog_8859_5_ru(); |
+ |
+ const char *getLanguage() const; |
+ |
+ int32_t match(InputText *textIn); |
+}; |
+ |
+class CharsetRecog_8859_6_ar : public CharsetRecog_8859_6 |
+{ |
+public: |
+ virtual ~CharsetRecog_8859_6_ar(); |
+ |
+ const char *getLanguage() const; |
+ |
+ int32_t match(InputText *textIn); |
+}; |
+ |
+class CharsetRecog_8859_7_el : public CharsetRecog_8859_7 |
+{ |
+public: |
+ virtual ~CharsetRecog_8859_7_el(); |
+ |
+ const char *getLanguage() const; |
+ |
+ int32_t match(InputText *textIn); |
+}; |
+ |
+class CharsetRecog_8859_8_I_he : public CharsetRecog_8859_8 |
+{ |
+public: |
+ virtual ~CharsetRecog_8859_8_I_he(); |
+ |
+ const char *getName() const; |
+ |
+ const char *getLanguage() const; |
+ |
+ int32_t match(InputText *textIn); |
+}; |
+ |
+class CharsetRecog_8859_8_he : public CharsetRecog_8859_8 |
+{ |
+public: |
+ virtual ~CharsetRecog_8859_8_he (); |
+ |
+ const char *getLanguage() const; |
+ |
+ int32_t match(InputText *textIn); |
+}; |
+ |
+class CharsetRecog_8859_9_tr : public CharsetRecog_8859_9 |
+{ |
+public: |
+ virtual ~CharsetRecog_8859_9_tr (); |
+ |
+ const char *getLanguage() const; |
+ |
+ int32_t match(InputText *textIn); |
+}; |
+ |
+class CharsetRecog_windows_1256 : public CharsetRecog_sbcs |
+{ |
+public: |
+ virtual ~CharsetRecog_windows_1256(); |
+ |
+ const char *getName() const; |
+ |
+ const char *getLanguage() const; |
+ |
+ int32_t match(InputText *textIn); |
+}; |
+ |
+class CharsetRecog_windows_1251 : public CharsetRecog_sbcs |
+{ |
+public: |
+ virtual ~CharsetRecog_windows_1251(); |
+ |
+ const char *getName() const; |
+ |
+ const char *getLanguage() const; |
+ |
+ int32_t match(InputText *textIn); |
+}; |
+ |
+ |
+class CharsetRecog_KOI8_R : public CharsetRecog_sbcs |
+{ |
+public: |
+ virtual ~CharsetRecog_KOI8_R(); |
+ |
+ const char *getName() const; |
+ |
+ const char *getLanguage() const; |
+ |
+ int32_t match(InputText *textIn); |
+}; |
+ |
+class CharsetRecog_IBM424_he : public CharsetRecog_sbcs |
+{ |
+public: |
+ virtual ~CharsetRecog_IBM424_he(); |
+ |
+ const char *getLanguage() const; |
+}; |
+ |
+class CharsetRecog_IBM424_he_rtl : public CharsetRecog_IBM424_he { |
+public: |
+ virtual ~CharsetRecog_IBM424_he_rtl(); |
+ |
+ const char *getName() const; |
+ |
+ int32_t match(InputText *textIn); |
+}; |
+ |
+class CharsetRecog_IBM424_he_ltr : public CharsetRecog_IBM424_he { |
+ virtual ~CharsetRecog_IBM424_he_ltr(); |
+ |
+ const char *getName() const; |
+ |
+ int32_t match(InputText *textIn); |
+}; |
+ |
+class CharsetRecog_IBM420_ar : public CharsetRecog_sbcs |
+{ |
+public: |
+ virtual ~CharsetRecog_IBM420_ar(); |
+ |
+ const char *getLanguage() const; |
+ |
+protected: |
+ void matchInit(InputText *textIn); |
+ void matchFinish(InputText *textIn); |
+ |
+private: |
+ uint8_t *prev_fInputBytes; |
+ int32_t prev_fInputBytesLength; |
+ UBool deleteBuffer; |
+ |
+ UBool isLamAlef(uint8_t b); |
+ uint8_t *unshapeLamAlef(const uint8_t *inputBytes, int32_t inputBytesLength, int32_t &length); |
+ uint8_t *unshape(const uint8_t *inputBytes, int32_t inputBytesLength, int32_t &length); |
+}; |
+ |
+class CharsetRecog_IBM420_ar_rtl : public CharsetRecog_IBM420_ar { |
+public: |
+ virtual ~CharsetRecog_IBM420_ar_rtl(); |
+ |
+ const char *getName() const; |
+ |
+ int32_t match(InputText *textIn); |
+}; |
+ |
+class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar { |
+ virtual ~CharsetRecog_IBM420_ar_ltr(); |
+ |
+ const char *getName() const; |
+ |
+ int32_t match(InputText *textIn); |
+}; |
+ |
+U_NAMESPACE_END |
+ |
+#endif |
+#endif /* __CSRSBCS_H */ |
Property changes on: icu46/source/i18n/csrsbcs.h |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |