OLD | NEW |
(Empty) | |
| 1 // |
| 2 // Copyright 2006 Google Inc. All Rights Reserved. |
| 3 // Author: dsites@google.com (Dick Sites) |
| 4 // |
| 5 |
| 6 |
| 7 #ifndef ENCODINGS_COMPACT_ENC_DET_COMPACT_ENC_DET_HINT_CODE_H__ |
| 8 #define ENCODINGS_COMPACT_ENC_DET_COMPACT_ENC_DET_HINT_CODE_H__ |
| 9 |
| 10 #include <string> // for string |
| 11 |
| 12 #include "base/basictypes.h" // for uint32 |
| 13 #include "encodings/public/encodings.h" // for Encoding |
| 14 |
| 15 using std::string; |
| 16 |
| 17 // Return name for extended encoding |
| 18 const char* MyEncodingName(Encoding enc); |
| 19 |
| 20 // Normalize ASCII string to first 4 alphabetic chars and last 4 digit chars |
| 21 // Letters are forced to lowercase ASCII |
| 22 // Used to normalize charset= values |
| 23 string MakeChar44(const string& str); |
| 24 |
| 25 // Normalize ASCII string to first 4 alphabetic/digit chars |
| 26 // Letters are forced to lowercase ASCII |
| 27 // Used to normalize TLD values |
| 28 string MakeChar4(const string& str); |
| 29 |
| 30 // Normalize ASCII string to first 8 alphabetic/digit chars |
| 31 // Letters are forced to lowercase ASCII |
| 32 // Used to normalize other values |
| 33 string MakeChar8(const string& str); |
| 34 |
| 35 // A-Z to a-z and all non-digits-letters to minus '-' |
| 36 void StringToLowercase(string* str); |
| 37 |
| 38 bool AllDigits(const string& str, int wordstart_offset, int len); |
| 39 |
| 40 // Get charset value from string |
| 41 // Normalize: truncate to 16 chars and make lowercase |
| 42 string GetCharset(const string& str); |
| 43 |
| 44 int GetHttpHeaderLength(const char* document_text, uint32 document_length); |
| 45 |
| 46 // Get top level domain from URL |
| 47 // Normalize: truncate to 16 chars and make lowercase |
| 48 string GetTLD(const char* url_str); |
| 49 |
| 50 // Get charset from HTTP headers |
| 51 // Normalize: truncate to 16 chars and make lowercase |
| 52 string GetCharsetFromHttp(const char* http, int http_len); |
| 53 |
| 54 // Get charset from <meta> tag |
| 55 // Normalize: truncate to 16 chars and make lowercase |
| 56 string GetCharsetFromMeta(const char* body, int body_len); |
| 57 |
| 58 #endif // ENCODINGS_COMPACT_ENC_DET_COMPACT_ENC_DET_HINT_CODE_H__ |
OLD | NEW |