| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 // This file defines utility functions for working with html. | |
| 6 | |
| 7 #ifndef CHROME_FRAME_HTML_UTILS_H_ | |
| 8 #define CHROME_FRAME_HTML_UTILS_H_ | |
| 9 | |
| 10 #include <string> | |
| 11 #include <vector> | |
| 12 | |
| 13 #include "base/basictypes.h" | |
| 14 #include "base/gtest_prod_util.h" | |
| 15 #include "net/http/http_util.h" | |
| 16 | |
| 17 // Forward declarations | |
| 18 class HtmlUtilUnittest; | |
| 19 | |
| 20 // | |
| 21 // Class designed to take a string of HTML and extract from it named | |
| 22 // attribute values from named tags. | |
| 23 // | |
| 24 // Caveat: this class currently doesn't handle multi-word UTF-16 encoded | |
| 25 // characters. Doesn't handle implies that any data following such a | |
| 26 // character could possibly be misinterpreted. | |
| 27 // | |
| 28 class HTMLScanner { | |
| 29 public: | |
| 30 typedef std::wstring::const_iterator StrPos; | |
| 31 | |
| 32 // Structure maintaining const_iterators into html_string_. | |
| 33 class StringRange { | |
| 34 friend class HTMLScanner; | |
| 35 public: | |
| 36 StringRange(); | |
| 37 StringRange(StrPos start, StrPos end); | |
| 38 | |
| 39 bool LowerCaseEqualsASCII(const char* other) const; | |
| 40 bool Equals(const wchar_t* other) const; | |
| 41 | |
| 42 // Copies the data described by StringRange into destination. | |
| 43 std::wstring Copy() const; | |
| 44 | |
| 45 // If this StringRange represents a tag, this method extracts the name of | |
| 46 // the tag and sticks it in tag_name. | |
| 47 // Returns true if the tag name was successfully extracted. | |
| 48 // Returns false if this string doesn't look like a valid tag. | |
| 49 bool GetTagName(std::wstring* tag_name) const; | |
| 50 | |
| 51 // From a given string range, uses a string tokenizer to extract the value | |
| 52 // of the named attribute if a simple scan finds that the attribute name is | |
| 53 // present. | |
| 54 // | |
| 55 // Returns true if the named attribute can be located and it has a value | |
| 56 // which has been placed in attribute_value. | |
| 57 // | |
| 58 // Note that the attribute value is unquoted here as well, so that | |
| 59 // GetTagAttribute(*<foo bar="baz">*, L"bar", *out_value*) will stick | |
| 60 // 'bar' in out_value and not '"bar"'. | |
| 61 // | |
| 62 // Returns false if the named attribute is not present in the tag or if it | |
| 63 // did not have a value. | |
| 64 // | |
| 65 bool GetTagAttribute(const wchar_t* attribute_name, | |
| 66 StringRange* attribute_value) const; | |
| 67 | |
| 68 // Unquotes a StringRange by removing a matching pair of either ' or " | |
| 69 // characters from the beginning and end of the string if present. | |
| 70 // Returns true if string was modified, false otherwise. | |
| 71 bool UnQuote(); | |
| 72 private: | |
| 73 StrPos start_; | |
| 74 StrPos end_; | |
| 75 }; | |
| 76 | |
| 77 typedef std::vector<StringRange> StringRangeList; | |
| 78 | |
| 79 // html_string must be a null-terminated string containing the HTML | |
| 80 // to be scanned. | |
| 81 explicit HTMLScanner(const wchar_t* html_string); | |
| 82 | |
| 83 // Returns the set of ranges denoting HTML tags that match the given name. | |
| 84 // If stop_tag_name is given, then as soon as a tag with this name is | |
| 85 // encountered this method will return. | |
| 86 void GetTagsByName(const wchar_t* name, StringRangeList* tag_list, | |
| 87 const wchar_t* stop_tag_name); | |
| 88 | |
| 89 private: | |
| 90 friend class HtmlUtilUnittest; | |
| 91 FRIEND_TEST_ALL_PREFIXES(HtmlUtilUnittest, BasicTest); | |
| 92 | |
| 93 // Given html_string which represents the remaining html range, this method | |
| 94 // returns the next tag in tag and advances html_string to one character after | |
| 95 // the end of tag. This method is intended to be called repeatedly to extract | |
| 96 // all of the tags in sequence. | |
| 97 // | |
| 98 // Returns true if another tag was found and 'tag' was populated with a valid | |
| 99 // range. | |
| 100 // Returns false if we have reached the end of the html data. | |
| 101 bool NextTag(StringRange* html_string, StringRange* tag); | |
| 102 | |
| 103 // Returns true if c can be found in quotes_, false otherwise | |
| 104 bool IsQuote(wchar_t c); | |
| 105 | |
| 106 // Returns true if pos refers to the last character in an HTML comment in a | |
| 107 // string described by html_string, false otherwise. | |
| 108 // For example with html_string describing <!-- foo> -->, pos must refer to | |
| 109 // the last > for this method to return true. | |
| 110 bool IsHTMLCommentClose(const StringRange* html_string, StrPos pos); | |
| 111 | |
| 112 // Returns true if pos refers to the last character in the terminator of the | |
| 113 // opening tag of a downlevel-hidden conditional comment in IE as per | |
| 114 // http://msdn.microsoft.com/en-us/library/ms537512(VS.85).aspx#syntax | |
| 115 // For example with html_string describing <![if booga >wooga]>, pos must | |
| 116 // refer to the last > for this method to return true. | |
| 117 bool IsIEConditionalCommentClose(const StringRange* html_string, StrPos pos); | |
| 118 | |
| 119 // We store a (CollapsedWhitespace'd) copy of the html data. | |
| 120 const std::wstring html_string_; | |
| 121 | |
| 122 // Store the string of quote characters to avoid repeated construction. | |
| 123 const std::wstring quotes_; | |
| 124 | |
| 125 DISALLOW_COPY_AND_ASSIGN(HTMLScanner); | |
| 126 }; | |
| 127 | |
| 128 namespace http_utils { | |
| 129 | |
| 130 // Adds "chromeframe/a.b.c.d" to the User-Agent string (a.b.c.d is the version). | |
| 131 // If the cf tag has already been added to the string, the original string is | |
| 132 // returned. | |
| 133 std::string AddChromeFrameToUserAgentValue(const std::string& value); | |
| 134 | |
| 135 // Removes "chromeframe/a.b.c.d" from the User-Agent string (a.b.c.d is the | |
| 136 // version). If the cf tag is not present in the string, the original string is | |
| 137 // returned. | |
| 138 std::string RemoveChromeFrameFromUserAgentValue(const std::string& value); | |
| 139 | |
| 140 // Fetches the user agent from urlmon and adds chrome frame to the | |
| 141 // comment section. | |
| 142 // NOTE: The returned string includes the "User-Agent: " header name. | |
| 143 std::string GetDefaultUserAgentHeaderWithCFTag(); | |
| 144 | |
| 145 // Returns the User-Agent header as would be used by Chrome itself. | |
| 146 const char* GetChromeUserAgent(); | |
| 147 | |
| 148 // Fetches the default user agent string from urlmon. | |
| 149 // This value does not include the "User-Agent:" header name. | |
| 150 std::string GetDefaultUserAgent(); | |
| 151 | |
| 152 // Returns the Chrome Frame user agent. E.g. "chromeframe/1.0". | |
| 153 // Note that in unit tests this will be "chromeframe/0.0" due to the version | |
| 154 // table not being present in the unit test executable. | |
| 155 const char* GetChromeFrameUserAgent(); | |
| 156 | |
| 157 // Returns true if there is a frame busting header (other than the do-nothing | |
| 158 // "X-Frame-Options: ALLOWALL") in the provided header block. Note that there | |
| 159 // may be multiple X-Frame-Options values specified; if there is one anywhere in | |
| 160 // the list with a value other than ALLOWALL, this returns true. | |
| 161 bool HasFrameBustingHeader(const std::string& http_headers); | |
| 162 | |
| 163 // Returns the header passed in from the headers list. | |
| 164 std::string GetHttpHeaderFromHeaderList(const std::string& header_name, | |
| 165 const std::string& headers); | |
| 166 } // namespace http_utils | |
| 167 | |
| 168 #endif // CHROME_FRAME_HTML_UTILS_H_ | |
| OLD | NEW |