| Index: third_party/cld/encodings/compact_enc_det/compact_enc_det_hint_code.h
|
| diff --git a/third_party/cld/encodings/compact_enc_det/compact_enc_det_hint_code.h b/third_party/cld/encodings/compact_enc_det/compact_enc_det_hint_code.h
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..4bd429168e25631573c08bde31ff776f34f38116
|
| --- /dev/null
|
| +++ b/third_party/cld/encodings/compact_enc_det/compact_enc_det_hint_code.h
|
| @@ -0,0 +1,58 @@
|
| +//
|
| +// Copyright 2006 Google Inc. All Rights Reserved.
|
| +// Author: dsites@google.com (Dick Sites)
|
| +//
|
| +
|
| +
|
| +#ifndef ENCODINGS_COMPACT_ENC_DET_COMPACT_ENC_DET_HINT_CODE_H__
|
| +#define ENCODINGS_COMPACT_ENC_DET_COMPACT_ENC_DET_HINT_CODE_H__
|
| +
|
| +#include <string> // for string
|
| +
|
| +#include "base/basictypes.h" // for uint32
|
| +#include "encodings/public/encodings.h" // for Encoding
|
| +
|
| +using std::string;
|
| +
|
| +// Return name for extended encoding
|
| +const char* MyEncodingName(Encoding enc);
|
| +
|
| +// Normalize ASCII string to first 4 alphabetic chars and last 4 digit chars
|
| +// Letters are forced to lowercase ASCII
|
| +// Used to normalize charset= values
|
| +string MakeChar44(const string& str);
|
| +
|
| +// Normalize ASCII string to first 4 alphabetic/digit chars
|
| +// Letters are forced to lowercase ASCII
|
| +// Used to normalize TLD values
|
| +string MakeChar4(const string& str);
|
| +
|
| +// Normalize ASCII string to first 8 alphabetic/digit chars
|
| +// Letters are forced to lowercase ASCII
|
| +// Used to normalize other values
|
| +string MakeChar8(const string& str);
|
| +
|
| +// A-Z to a-z and all non-digits-letters to minus '-'
|
| +void StringToLowercase(string* str);
|
| +
|
| +bool AllDigits(const string& str, int wordstart_offset, int len);
|
| +
|
| +// Get charset value from string
|
| +// Normalize: truncate to 16 chars and make lowercase
|
| +string GetCharset(const string& str);
|
| +
|
| +int GetHttpHeaderLength(const char* document_text, uint32 document_length);
|
| +
|
| +// Get top level domain from URL
|
| +// Normalize: truncate to 16 chars and make lowercase
|
| +string GetTLD(const char* url_str);
|
| +
|
| +// Get charset from HTTP headers
|
| +// Normalize: truncate to 16 chars and make lowercase
|
| +string GetCharsetFromHttp(const char* http, int http_len);
|
| +
|
| +// Get charset from <meta> tag
|
| +// Normalize: truncate to 16 chars and make lowercase
|
| +string GetCharsetFromMeta(const char* body, int body_len);
|
| +
|
| +#endif // ENCODINGS_COMPACT_ENC_DET_COMPACT_ENC_DET_HINT_CODE_H__
|
|
|