Index: third_party/cld/encodings/compact_enc_det/compact_enc_det_hint_code.h |
diff --git a/third_party/cld/encodings/compact_enc_det/compact_enc_det_hint_code.h b/third_party/cld/encodings/compact_enc_det/compact_enc_det_hint_code.h |
new file mode 100644 |
index 0000000000000000000000000000000000000000..4bd429168e25631573c08bde31ff776f34f38116 |
--- /dev/null |
+++ b/third_party/cld/encodings/compact_enc_det/compact_enc_det_hint_code.h |
@@ -0,0 +1,58 @@ |
+// |
+// Copyright 2006 Google Inc. All Rights Reserved. |
+// Author: dsites@google.com (Dick Sites) |
+// |
+ |
+ |
+#ifndef ENCODINGS_COMPACT_ENC_DET_COMPACT_ENC_DET_HINT_CODE_H__ |
+#define ENCODINGS_COMPACT_ENC_DET_COMPACT_ENC_DET_HINT_CODE_H__ |
+ |
+#include <string> // for string |
+ |
+#include "base/basictypes.h" // for uint32 |
+#include "encodings/public/encodings.h" // for Encoding |
+ |
+using std::string; |
+ |
+// Return name for extended encoding |
+const char* MyEncodingName(Encoding enc); |
+ |
+// Normalize ASCII string to first 4 alphabetic chars and last 4 digit chars |
+// Letters are forced to lowercase ASCII |
+// Used to normalize charset= values |
+string MakeChar44(const string& str); |
+ |
+// Normalize ASCII string to first 4 alphabetic/digit chars |
+// Letters are forced to lowercase ASCII |
+// Used to normalize TLD values |
+string MakeChar4(const string& str); |
+ |
+// Normalize ASCII string to first 8 alphabetic/digit chars |
+// Letters are forced to lowercase ASCII |
+// Used to normalize other values |
+string MakeChar8(const string& str); |
+ |
+// A-Z to a-z and all non-digits-letters to minus '-' |
+void StringToLowercase(string* str); |
+ |
+bool AllDigits(const string& str, int wordstart_offset, int len); |
+ |
+// Get charset value from string |
+// Normalize: truncate to 16 chars and make lowercase |
+string GetCharset(const string& str); |
+ |
+int GetHttpHeaderLength(const char* document_text, uint32 document_length); |
+ |
+// Get top level domain from URL |
+// Normalize: truncate to 16 chars and make lowercase |
+string GetTLD(const char* url_str); |
+ |
+// Get charset from HTTP headers |
+// Normalize: truncate to 16 chars and make lowercase |
+string GetCharsetFromHttp(const char* http, int http_len); |
+ |
+// Get charset from <meta> tag |
+// Normalize: truncate to 16 chars and make lowercase |
+string GetCharsetFromMeta(const char* body, int body_len); |
+ |
+#endif // ENCODINGS_COMPACT_ENC_DET_COMPACT_ENC_DET_HINT_CODE_H__ |