base/string.h - Issue 624713003: Keep only base/extractor.[cc|h].

Unified Diff: base/string.h

Issue 624713003: Keep only base/extractor.[cc|h]. (Closed) Base URL: https://chromium.googlesource.com/external/omaha.git@master

Patch Set: Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: base/string.h

diff --git a/base/string.h b/base/string.h

deleted file mode 100644

index 77063416c856511031c6aa56f4df8c4cc3d42d96..0000000000000000000000000000000000000000

--- a/base/string.h

+++ /dev/null

@@ -1,542 +0,0 @@

-//

-// Licensed under the Apache License, Version 2.0 (the "License");

-// you may not use this file except in compliance with the License.

-// You may obtain a copy of the License at

-//

-// http://www.apache.org/licenses/LICENSE-2.0

-//

-// Unless required by applicable law or agreed to in writing, software

-// distributed under the License is distributed on an "AS IS" BASIS,

-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

-// See the License for the specific language governing permissions and

-// limitations under the License.

-// ========================================================================

-#ifndef OMAHA_BASE_STRING_H_

-#define OMAHA_BASE_STRING_H_

-#include <windows.h>

-#include <vector>

-#include "base/basictypes.h"

-#include "omaha/base/constants.h"

-#include "omaha/base/debug.h"

-namespace omaha {

-#define STR_SIZE(str) (arraysize(str)-1) // number of characters in char array (only for single-byte string literals!!!)

-#define TSTR_SIZE(tstr) (arraysize(tstr)-1) // like STR_SIZE but works on _T("string literal") ONLY!!!

-#define kEllipsis L".."

-// The number of replacements matches we expect, before we start allocating extra memory

-// to process it. This is an optimizing constant

-#define kExpectedMaxReplaceMatches 100

-// TODO(omaha): above each of these function names, we should

-// define what we expect the implementation to do. that way,

-// implementers will know what is desired. an example would probably

-// make things easiest.

-CString AbbreviateString (const CString & title, int32 max_len);

-CString AbbreviateUri (const CString & uri, int32 max_len);

-CString NormalizeUri (const CString & uri);

-// removes "http://", "ftp://", "mailto:" or "file://" (note that the "file" protocol is

-// like: "file:///~/calendar", this method removes only the first two slashes

-CString RemoveInternetProtocolHeader (const CString& url);

-// Converts a file:// URI to a valid Windows path.

-HRESULT ConvertFileUriToLocalPath(const CString& uri, CString* path_out);

-void RemoveFromStart (CString & s, const TCHAR* remove, bool ignore_case);

-void RemoveFromEnd (CString & s, const TCHAR* remove);

-// Limit string to max length, truncating and adding ellipsis if needed

-// Attempts to not leave a partial word at the end, unless min_len is reached

-CString ElideIfNeeded (const CString & input_string, int max_len, int min_len);

-// The ability to clean up a string for relevant target audiences. Add flags accordingly

-// Sanitizes for insertion in an HTML document, uses the basic literals [<>&]

-#define kSanHtml 0x1

-// XML is the HTML replacements, and a few more

-#define kSanXml (kSanHtml | 0x2)

-// Javascript has a seperate set of encodings [which is a superset of HTML replacements]

-#define kSanJs (kSanHtml | 0x4)

-// For input fields on HTML documents

-#define kSanHtmlInput 0x8

-// TODO(omaha): be consistent on use of int/uint32/int32 for lengths

-// The input length of the string does not include the null terminator.

-// Caller deletes the returned buffer.

-WCHAR *ToWide (const char *s, int len);

-// returns pointer to data if found otherwise NULL

-const byte *BufferContains (const byte *buf, uint32 buf_len, const byte *data, uint32 data_len);

-// Given a string, 'protect' the characters that are invalid for a given mode

-// For instance, kSanHtml will replace < with the HTML literal equivalent

-// If kSanHtml is used, and bold_periods is true, then periods used for url abbreviation are bolded.

-// NOTE: If you call AbbreviateLinkForDisplay before this function, then there might be periods

-// used for abbreviation. BoldAbbreviationPeriods should be called after HighlightTerms.

-CString SanitizeString(const CString & in, DWORD mode);

-// Bolds the periods used for abbreviation. Call this after HighlightTerms.

-CString BoldAbbreviationPeriods(const CString & in);

-// Unencode a URL encoded string

-CString Unencode(const CString & input);

-CString GetTextInbetween(const CString &input, const CString &start, const CString &end);

-// Given a ? seperated string, extract a particular segment, and URL-Unencode it

-CString GetParam(const CString & input, const CString & key);

-// Given an XML style string, extract the contents of a <INPUT>...</INPUT> pair

-CString GetField (const CString & input, const CString & field);

-// Finds a whole word match in the query, followed by a ":".

-// If not found, return -1.

-//

-// Note: this is case sensitive.

-int FindWholeWordMatch (const CString &query,

- const CString &word_to_match,

- const bool end_with_colon,

- const int index_begin);

-// Do whole-word replacement in "str".

-// This does not do partial matches (unlike CString::Replace),

-// e.g. CString::Replace will replace "ie" within "pie" and

-// this function will not.

-//

-// Note: this is case sensitive.

-void ReplaceWholeWord (const CString &string_to_replace,

- const CString &replacement,

- const bool trim_whitespace,

- CString *str);

-// Convert Wide to ANSI directly. Use only when it is all ANSI

-CStringA WideToAnsiDirect(const CString & in);

-// Transform a unicode string into UTF8, used primarily by the webserver

-CStringA WideToUtf8(const CString& w);

-// Converts the UTF-8 encoded buffer to an in-memory Unicode (wide character)

-// string.

-// @param utf8 A non-NULL pointer to a UTF-8 encoded buffer that has at

-// least num_bytes valid characters.

-// @param num_bytes Number of bytes to process from utf8.

-// @return The Unicode string represented by utf8 (or that part of it

-// specified by num_bytes). If the UTF-8 representation of the string started

-// with a byte-order marker (BOM), it will be ignored and not included in the

-// returned string. On failure, the function returns the empty string.

-CString Utf8ToWideChar(const char* utf8, uint32 num_bytes);

-CString Utf8BufferToWideChar(const std::vector<uint8>& buffer);

-// Dealing with Unicode BOM

-bool StartsWithBOM(const TCHAR* string);

-const TCHAR* StringAfterBOM(const TCHAR* string);

-// Convert an ANSI string into Widechar string, according to the specified

-// codepage. The input length can be -1, if the string is null terminated, and

-// the actual length will be used internally.

-BOOL AnsiToWideString(const char *from, int length, UINT codepage, CString *to);

-// Convert char to Wchar directly

-CString AnsiToWideString(const char *from, int length);

-// these functions untested

-// they should not be used unless tested

-// HRESULT AnsiToUTF8 (char * src, int src_len, char * dest, int *dest_len);

-// HRESULT UTF8ToAnsi (char * src, int src_len, char * dest, int *dest_len);

-// HRESULT UCS2ToUTF8 (LPCWSTR src, int src_len, char * dest, int *dest_len);

-// HRESULT UTF8ToUCS2 (char * src, int src_len, LPWSTR dest, int *dest_len);

-// "Absolute" is perhaps not the right term, this normalizes the Uri

-// given http://www.google.com changes to correct http://www.google.com/

-// given http://www.google.com// changes to correct http://www.google.com/

-// given http://www.google.com/home.html returns the same

-CString GetAbsoluteUri(const CString& uri);

-// Reverse (big-endian<->little-endian) the shorts that make up

-// Unicode characters in a byte array of Unicode chars

-HRESULT ReverseUnicodeByteOrder(byte* unicode_string, int size_in_bytes);

-// given http://google.com/bobby this returns http://google.com/

-// If strip_leading is specified, it will turn

-// http://www.google.com into http://google.com

-#define kStrLeadingWww _T("www.")

-// TODO(omaha): no default parameters

-CString GetUriHostName(const CString& uri, bool strip_leading = false);

-CString GetUriHostNameHostOnly(const CString& uri, bool strip_leading_www);

-const char *stristr(const char *string, const char *pattern);

-const WCHAR *stristrW(const WCHAR *string, const WCHAR *pattern);

-const WCHAR *strstrW(const WCHAR *string, const WCHAR *pattern);

-// Add len_to_add to len_so_far, assuming that if it exceeds the

-// length of the line, it will word wrap onto the next line. Returns

-// the total length of all the lines summed together.

-float GetLenWithWordWrap (const float len_so_far,

- const float len_to_add,

- const uint32 len_line);

-// ----------------------------------------------------------------------

-// QuotedPrintableUnescape()

-// Copies "src" to "dest", rewriting quoted printable escape sequences

-// =XX to their ASCII equivalents. src is not null terminated, instead

-// specify len. I recommend that slen<len_dest, but we honour len_dest

-// anyway.

-// RETURNS the length of dest.

-// ----------------------------------------------------------------------

-int QuotedPrintableUnescape(const WCHAR *src, int slen, WCHAR *dest, int len_dest);

-// Return the length to use for the output buffer given to the base64 escape

-// routines. Make sure to use the same value for do_padding in both.

-// This function may return incorrect results if given input_len values that

-// are extremely high, which should happen rarely.

-int CalculateBase64EscapedLen(int input_len, bool do_padding);

-// Use this version when calling Base64Escape without a do_padding arg.

-int CalculateBase64EscapedLen(int input_len);

-// ----------------------------------------------------------------------

-// Base64Escape()

-// WebSafeBase64Escape()

-// Encode "src" to "dest" using base64 encoding.

-// src is not null terminated, instead specify len.

-// 'dest' should have at least CalculateBase64EscapedLen() length.

-// RETURNS the length of dest.

-// The WebSafe variation use '-' instead of '+' and '_' instead of '/'

-// so that we can place the out in the URL or cookies without having

-// to escape them. It also has an extra parameter "do_padding",

-// which when set to false will prevent padding with "=".

-// ----------------------------------------------------------------------

-int Base64Escape(const char *src, int slen, char *dest, int szdest);

-int WebSafeBase64Escape(const char *src, int slen, char *dest,

- int szdest, bool do_padding);

-void WebSafeBase64Escape(const CStringA& src, CStringA* dest);

-void Base64Escape(const char *src, int szsrc,

- CStringA* dest, bool do_padding);

-void WebSafeBase64Escape(const char *src, int szsrc,

- CStringA* dest, bool do_padding);

-// ----------------------------------------------------------------------

-// Base64Unescape()

-// Copies "src" to "dest", where src is in base64 and is written to its

-// ASCII equivalents. src is not null terminated, instead specify len.

-// I recommend that slen<len_dest, but we honour len_dest anyway.

-// RETURNS the length of dest.

-// The WebSafe variation use '-' instead of '+' and '_' instead of '/'.

-// ----------------------------------------------------------------------

-int Base64Unescape(const char *src, int slen, char *dest, int len_dest);

-int WebSafeBase64Unescape(const char *src, int slen, char *dest, int szdest);

-#ifdef UNICODE

-#define IsSpace IsSpaceW

-#else

-#define IsSpace IsSpaceA

-#endif

-bool IsSpaceW(WCHAR c);

-bool IsSpaceA(char c);

-// Remove all leading and trailing whitespace from s.

-// Returns the new length of the string (not including 0-terminator)

-int TrimCString(CString &s);

-int Trim(TCHAR *s);

-// Trims all characters in the delimiter string from both ends of the

-// string s

-void TrimString(CString& s, const TCHAR* delimiters);

-// Strip the first token from the front of argument s. A token is a

-// series of consecutive non-blank characters - unless the first

-// character is a double-quote ("), in that case the token is the full

-// quoted string

-CString StripFirstQuotedToken(const CString& s);

-// A block of text to separate lines, and back

-void TextToLines(const CString& text, const TCHAR* delimiter, std::vector<CString>* lines);

-// (LinesToText puts a delimiter at the end of the last line too)

-void LinesToText(const std::vector<CString>& lines, const TCHAR* delimiter, CString* text);

-// Make a CString lower case

-void MakeLowerCString(CString & s);

-// Clean up the string: replace all whitespace with spaces, and

-// replace consecutive spaces with one.

-// Returns the new length of the string (not including 0-terminator)

-int CleanupWhitespaceCString(CString &s);

-int CleanupWhitespace(TCHAR *s);

-int HexDigitToInt (WCHAR c);

-bool IsHexDigit (WCHAR c);

-// Converts to lower, but does so much faster if the string is ANSI

-TCHAR * String_FastToLower(TCHAR * str);

-// Replacement for the CRT toupper(c)

-int String_ToUpper(int c);

-// Replacement for the CRT toupper(c)

-char String_ToUpperA(char c);

-// Converts str to lowercase in place.

-void String_ToLower(TCHAR* str);

-// Converts str to uppercase in place.

-void String_ToUpper(TCHAR* str);

-bool String_IsUpper(TCHAR c);

-// String comparison based on length

-// Replacement for the CRT strncmp(i)

-int String_StrNCmp(const TCHAR * str1, const TCHAR * str2, uint32 len, bool ignore_case);

-// Replacement for strncpy() - except ALWAYS ends string with null

-TCHAR* String_StrNCpy(TCHAR* destination, const TCHAR* source, uint32 len);

-// check if str starts with start_str

-bool String_StartsWith(const TCHAR *str, const TCHAR *start_str, bool ignore_case);

-// check if str starts with start_str, for char *

-bool String_StartsWithA(const char *str, const char *start_str, bool ignore_case);

-// check if str ends with end_str

-bool String_EndsWith(const TCHAR *str, const TCHAR *end_str, bool ignore_case);

-// If the input string str doesn't already end with the string end_str,

-// make it end with the string end_str.

-CString String_MakeEndWith(const TCHAR *str, const TCHAR* end_str, bool ignore_case);

-// converts an int to a string

-CString String_Int64ToString(int64 value, int radix);

-// converts an uint64 to a string

-CString String_Uint64ToString(uint64 value, int radix);

-// Convert numeric types to CString

-CString sizet_to_str(const size_t & i);

-CString itostr(const int i);

-CString itostr(const uint32 i);

-// converts a large number to an approximate value, like "1.2G" or "900M"

-// base_ten = true if based on powers of 10 (like disk space) otherwise based

-// on powers of two. power = 0 for *10^0, 1 for *10^3 or 2^10, 2 for *10^6

-// or 2^20, and 3 for *10^9 or 2^30, in other words: no units, K, M, or G.

-CString String_LargeIntToApproximateString(uint64 value, bool base_ten, int* power);

-// converts a string to an int

-// Does not check for overflow

-int32 String_StringToInt(const TCHAR * str);

-int64 String_StringToInt64(const TCHAR * str);

-// converts an double to a string

-// specifies the number of digits after the decimal point

-// TODO(omaha): Make this work for negative values

-CString String_DoubleToString(double value, int point_digits);

-// convert string to double

-double String_StringToDouble (const TCHAR *s);

-// Converts a character to a digit

-// if the character is not a digit return -1

-int32 String_CharToDigit(const TCHAR c);

-// returns true if ASCII digit

-bool String_IsDigit(const TCHAR c);

-// Converts the digit to a character.

-TCHAR String_DigitToChar(unsigned int n);

-// Returns true if an identifier character: letter, digit, or "_"

-bool String_IsIdentifierChar(const TCHAR c);

-// Returns true if the string has letters in it.

-// This is used by the keyword extractor to downweight numbers,

-// IDs (sequences of numbers like social security numbers), etc.

-bool String_HasAlphabetLetters (const TCHAR *str);

-// Return the index of the first occurrence of s2 in s1, or -1 if none.

-int String_FindString(const TCHAR *s1, const TCHAR *s2);

-int String_FindString(const TCHAR *s1, const TCHAR *s2, int start_pos);

-// Return the index of the first occurrence of c in s1, or -1 if none.

-int String_FindChar(const TCHAR *str, const TCHAR c);

-// start from index start_pos

-int String_FindChar(const TCHAR *str, const TCHAR c, int start_pos);

-// Return the index of the first occurrence of c in string, or -1 if none.

-int String_ReverseFindChar(const TCHAR * str, TCHAR c);

-bool String_Contains(const TCHAR *s1, const TCHAR *s2);

-// Replace old_char with new_char in str.

-void String_ReplaceChar(TCHAR *str, TCHAR old_char, TCHAR new_char);

-void String_ReplaceChar(CString & str, TCHAR old_char, TCHAR new_char);

-// Append the given character to the string if it doesn't already end with it.

-// There must be room in the string to append the character if necessary.

-void String_EndWithChar(TCHAR *str, TCHAR c);

-// A special version of the replace function which takes advantage of CString properties

-// to make it much faster when the string grows

-// NOTE: it CANNOT match more than kMaxReplaceMatches instances within the string

-// do not use this function if that is a possibility

-// The maximum number of replacements to perform. Essentially infinite

-const unsigned int kRepMax = kuint32max;

-int ReplaceCString (CString & src, const TCHAR *from, unsigned int from_len,

- const TCHAR *to, unsigned int to_len,

- unsigned int max_matches);

-// replace from with to in src

-// on memory allocation error, returns the original string

-int ReplaceString (TCHAR *src, const TCHAR *from, const TCHAR *to, TCHAR **out, int *out_len);

-// replace from with to in src

-// will replace in place if length(to) <= length(from) and return *out == src

-// WILL CREATE NEW OUTPUT BUFFER OTHERWISE and set created_new_string to true

-// on memory allocation error, returns the original string

-int ReplaceStringMaybeInPlace (TCHAR *src, const TCHAR *from, const TCHAR *to, TCHAR **out, int *out_len, bool *created_new_string);

-// you really want to use the straight TCHAR version above. you know it

-// on memory allocation error, returns the original string

-int ReplaceCString (CString & src, const TCHAR *from, const TCHAR *to);

-long __cdecl Wcstol (const wchar_t *nptr, wchar_t **endptr, int ibase);

-unsigned long __cdecl Wcstoul (const wchar_t *nptr, wchar_t **endptr, int ibase);

-// Functions on arrays of strings

-// Returns true iff s is in the array strings (case-insensitive compare)

-bool String_MemberOf(const TCHAR* const* strings, const TCHAR* s);

-// Returns index of s in the array of strings (or -1 for missing) (case-insensitive compare)

-int String_IndexOf(const TCHAR* const* strings, const TCHAR* s);

-// Serializes a time64 to a string, and then loads it out again, this string it not for human consumption

-time64 StringToTime(const CString & time);

-CString TimeToString(const time64 & time);

-// looks for string A followed by any number of spaces/tabs followed by string b

-// returns starting position of a if found, NULL if not

-// case insensitive

-const TCHAR *FindStringASpaceStringB (const TCHAR *s, const TCHAR *a, const TCHAR *b);

-bool IsAlphaA (const char c);

-bool IsDigitA (const char c);

-// TODO(omaha): deprecate since we have secure CRT now.

-// dest_buffer_len includes the NULL

-// always NULL terminates

-// dest must be a valid string with length < dest_buffer_len

-void SafeStrCat (TCHAR *dest, const TCHAR *src, int dest_buffer_len);

-const TCHAR *ExtractNextDouble (const TCHAR *s, double *f);

-TCHAR *String_PathFindExtension(const TCHAR *path);

-inline TCHAR Char_ToLower(TCHAR c) {

-// C4302: truncation from 'type 1' to 'type 2'

-#pragma warning(disable : 4302)

- return reinterpret_cast<TCHAR>(::CharLower(reinterpret_cast<TCHAR*>(c)));

-#pragma warning(default : 4302)

-// @returns the lowercase character (type is int to be consistent with the CRT)

-int String_ToLowerChar(int c);

-// Replacement for the CRT tolower(c)

-char String_ToLowerCharAnsi(char c);

-bool String_PathRemoveFileSpec(TCHAR *path);

-// Escapes and unescapes strings (shlwapi-based implementation).

-// The indended usage for these APIs is escaping strings to make up

-// URLs, for example building query strings.

-//

-// Pass false to the flag segment_only to escape the url. This will not

-// cause the conversion of the # (%23), ? (%3F), and / (%2F) characters.

-HRESULT StringEscape(const CString& str_in,

- bool segment_only,

- CString* str_out);

-HRESULT StringUnescape(const CString& str_in, CString* str_out);

-// Converts a string to an int, performs all the necessary

-// checks to ensure that the string is correct.

-// Tests for overflow and non-int strings.

-bool String_StringToDecimalIntChecked(const TCHAR* str, int* value);

-// Converts CLSID to a string.

-bool CLSIDToCString(const GUID& guid, CString* str);

-// Converts a string to a bool.

-HRESULT String_StringToBool(const TCHAR* str, bool* value);

-// Convert boolean to its string representation.

-HRESULT String_BoolToString(bool value, CString* string);

-// Similar to ATL::CStringT::Replace() except that it ignores case.

-CString String_ReplaceIgnoreCase(const CString& string,

- const CString& token,

- const CString& replacement);

-// Converts a string to a Tristate enum.

-bool String_StringToTristate(const TCHAR* str, Tristate* value);

-// Extracts the name and value from a string that contains a name/value pair.

-bool ParseNameValuePair(const CString& token, TCHAR separator,

- CString* name, CString* value);

-// Splits a command line buffer into two parts in place:

-// first argument (which could be path to executable) and remaining arguments.

-// Note that the same pointer can be used for both command_line and

-// either of the remaining parameters.

-bool SplitCommandLineInPlace(TCHAR *command_line,

- TCHAR **first_argument,

- TCHAR **remaining_arguments);

-// Returns true if the unicode string only contains ascii values.

-bool ContainsOnlyAsciiChars(const CString& str);

-// Converts a buffer of bytes to a hex string.

-CString BytesToHex(const uint8* bytes, size_t num_bytes);

-// Converts a vector of bytes to a hex string.

-CString BytesToHex(const std::vector<uint8>& bytes);

-void JoinStrings(const std::vector<CString>& components,

- const TCHAR* delim,

- CString* result);

-void JoinStringsInArray(const TCHAR* components[],

- int num_components,

- const TCHAR* delim,

- CString* result);

-// Formats the specified message ID.

-// It is similar to CStringT::FormatMessage() but it returns an empty string

-// instead of throwing when the message ID cannot be loaded.

-CString FormatResourceMessage(uint32 resource_id, ...);

-// Formats an error code as an 8-digit HRESULT-style hex number or an unsigned

-// integer depending on whether it matches the HRESULT failure format.

-CString FormatErrorCode(DWORD error_code);

-// Converts the unicode string into a utf8 encoded, urlencoded string.

-// The resulting ascii string is returned in a wide CString.

-HRESULT WideStringToUtf8UrlEncodedString(const CString& str, CString* out);

-// Converts a string that is in the utf8 representation and is urlencoded

-// into a unicode string.

-HRESULT Utf8UrlEncodedStringToWideString(const CString& str, CString* out);

-} // namespace omaha

-#endif // OMAHA_BASE_STRING_H_

« no previous file with comments | « base/store_watcher.h ('k') | base/string.cc » ('j') | no next file with comments »