OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ********************************************************************** |
| 3 * Copyright (C) 2005-2006, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. |
| 5 ********************************************************************** |
| 6 */ |
| 7 |
| 8 #ifndef __CSR2022_H |
| 9 #define __CSR2022_H |
| 10 |
| 11 #include "unicode/utypes.h" |
| 12 |
| 13 #if !UCONFIG_NO_CONVERSION |
| 14 |
| 15 #include "csrecog.h" |
| 16 |
| 17 U_NAMESPACE_BEGIN |
| 18 |
| 19 /** |
| 20 * class CharsetRecog_2022 part of the ICU charset detection imlementation. |
| 21 * This is a superclass for the individual detectors f
or |
| 22 * each of the detectable members of the ISO 2022 fami
ly |
| 23 * of encodings. |
| 24 * |
| 25 * The separate classes are nested within this class. |
| 26 * |
| 27 * @internal |
| 28 */ |
| 29 class CharsetRecog_2022 : public CharsetRecognizer |
| 30 { |
| 31 |
| 32 public: |
| 33 virtual ~CharsetRecog_2022() = 0; |
| 34 |
| 35 protected: |
| 36 |
| 37 /** |
| 38 * Matching function shared among the 2022 detectors JP, CN and KR |
| 39 * Counts up the number of legal an unrecognized escape sequences in |
| 40 * the sample of text, and computes a score based on the total number & |
| 41 * the proportion that fit the encoding. |
| 42 * |
| 43 * |
| 44 * @param text the byte buffer containing text to analyse |
| 45 * @param textLen the size of the text in the byte. |
| 46 * @param escapeSequences the byte escape sequences to test for. |
| 47 * @return match quality, in the range of 0-100. |
| 48 */ |
| 49 int32_t match_2022(const uint8_t *text, int32_t textLen, const uint8_t escap
eSequences[][5], int32_t escapeSequences_length); |
| 50 |
| 51 }; |
| 52 |
| 53 class CharsetRecog_2022JP :public CharsetRecog_2022 |
| 54 { |
| 55 public: |
| 56 virtual ~CharsetRecog_2022JP() {} |
| 57 |
| 58 const char *getName() const; |
| 59 |
| 60 int32_t match(InputText *textIn); |
| 61 }; |
| 62 |
| 63 class CharsetRecog_2022KR :public CharsetRecog_2022 { |
| 64 public: |
| 65 virtual ~CharsetRecog_2022KR() {} |
| 66 |
| 67 const char *getName() const; |
| 68 |
| 69 int32_t match(InputText *textIn); |
| 70 |
| 71 }; |
| 72 |
| 73 class CharsetRecog_2022CN :public CharsetRecog_2022 |
| 74 { |
| 75 public: |
| 76 virtual ~CharsetRecog_2022CN() {} |
| 77 |
| 78 const char* getName() const; |
| 79 |
| 80 int32_t match(InputText *textIn); |
| 81 }; |
| 82 |
| 83 U_NAMESPACE_END |
| 84 |
| 85 #endif |
| 86 #endif /* __CSR2022_H */ |
OLD | NEW |