OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 // This file defines utility functions for working with html. | |
6 | |
7 #ifndef CHROME_FRAME_HTML_UTILS_H_ | |
8 #define CHROME_FRAME_HTML_UTILS_H_ | |
9 | |
10 #include <string> | |
11 #include <vector> | |
12 | |
13 #include "base/basictypes.h" | |
14 #include "base/gtest_prod_util.h" | |
15 #include "net/http/http_util.h" | |
16 | |
17 // Forward declarations | |
18 class HtmlUtilUnittest; | |
19 | |
20 // | |
21 // Class designed to take a string of HTML and extract from it named | |
22 // attribute values from named tags. | |
23 // | |
24 // Caveat: this class currently doesn't handle multi-word UTF-16 encoded | |
25 // characters. Doesn't handle implies that any data following such a | |
26 // character could possibly be misinterpreted. | |
27 // | |
28 class HTMLScanner { | |
29 public: | |
30 typedef std::wstring::const_iterator StrPos; | |
31 | |
32 // Structure maintaining const_iterators into html_string_. | |
33 class StringRange { | |
34 friend class HTMLScanner; | |
35 public: | |
36 StringRange(); | |
37 StringRange(StrPos start, StrPos end); | |
38 | |
39 bool LowerCaseEqualsASCII(const char* other) const; | |
40 bool Equals(const wchar_t* other) const; | |
41 | |
42 // Copies the data described by StringRange into destination. | |
43 std::wstring Copy() const; | |
44 | |
45 // If this StringRange represents a tag, this method extracts the name of | |
46 // the tag and sticks it in tag_name. | |
47 // Returns true if the tag name was successfully extracted. | |
48 // Returns false if this string doesn't look like a valid tag. | |
49 bool GetTagName(std::wstring* tag_name) const; | |
50 | |
51 // From a given string range, uses a string tokenizer to extract the value | |
52 // of the named attribute if a simple scan finds that the attribute name is | |
53 // present. | |
54 // | |
55 // Returns true if the named attribute can be located and it has a value | |
56 // which has been placed in attribute_value. | |
57 // | |
58 // Note that the attribute value is unquoted here as well, so that | |
59 // GetTagAttribute(*<foo bar="baz">*, L"bar", *out_value*) will stick | |
60 // 'bar' in out_value and not '"bar"'. | |
61 // | |
62 // Returns false if the named attribute is not present in the tag or if it | |
63 // did not have a value. | |
64 // | |
65 bool GetTagAttribute(const wchar_t* attribute_name, | |
66 StringRange* attribute_value) const; | |
67 | |
68 // Unquotes a StringRange by removing a matching pair of either ' or " | |
69 // characters from the beginning and end of the string if present. | |
70 // Returns true if string was modified, false otherwise. | |
71 bool UnQuote(); | |
72 private: | |
73 StrPos start_; | |
74 StrPos end_; | |
75 }; | |
76 | |
77 typedef std::vector<StringRange> StringRangeList; | |
78 | |
79 // html_string must be a null-terminated string containing the HTML | |
80 // to be scanned. | |
81 explicit HTMLScanner(const wchar_t* html_string); | |
82 | |
83 // Returns the set of ranges denoting HTML tags that match the given name. | |
84 // If stop_tag_name is given, then as soon as a tag with this name is | |
85 // encountered this method will return. | |
86 void GetTagsByName(const wchar_t* name, StringRangeList* tag_list, | |
87 const wchar_t* stop_tag_name); | |
88 | |
89 private: | |
90 friend class HtmlUtilUnittest; | |
91 FRIEND_TEST_ALL_PREFIXES(HtmlUtilUnittest, BasicTest); | |
92 | |
93 // Given html_string which represents the remaining html range, this method | |
94 // returns the next tag in tag and advances html_string to one character after | |
95 // the end of tag. This method is intended to be called repeatedly to extract | |
96 // all of the tags in sequence. | |
97 // | |
98 // Returns true if another tag was found and 'tag' was populated with a valid | |
99 // range. | |
100 // Returns false if we have reached the end of the html data. | |
101 bool NextTag(StringRange* html_string, StringRange* tag); | |
102 | |
103 // Returns true if c can be found in quotes_, false otherwise | |
104 bool IsQuote(wchar_t c); | |
105 | |
106 // Returns true if pos refers to the last character in an HTML comment in a | |
107 // string described by html_string, false otherwise. | |
108 // For example with html_string describing <!-- foo> -->, pos must refer to | |
109 // the last > for this method to return true. | |
110 bool IsHTMLCommentClose(const StringRange* html_string, StrPos pos); | |
111 | |
112 // Returns true if pos refers to the last character in the terminator of the | |
113 // opening tag of a downlevel-hidden conditional comment in IE as per | |
114 // http://msdn.microsoft.com/en-us/library/ms537512(VS.85).aspx#syntax | |
115 // For example with html_string describing <![if booga >wooga]>, pos must | |
116 // refer to the last > for this method to return true. | |
117 bool IsIEConditionalCommentClose(const StringRange* html_string, StrPos pos); | |
118 | |
119 // We store a (CollapsedWhitespace'd) copy of the html data. | |
120 const std::wstring html_string_; | |
121 | |
122 // Store the string of quote characters to avoid repeated construction. | |
123 const std::wstring quotes_; | |
124 | |
125 DISALLOW_COPY_AND_ASSIGN(HTMLScanner); | |
126 }; | |
127 | |
128 namespace http_utils { | |
129 | |
130 // Adds "chromeframe/a.b.c.d" to the User-Agent string (a.b.c.d is the version). | |
131 // If the cf tag has already been added to the string, the original string is | |
132 // returned. | |
133 std::string AddChromeFrameToUserAgentValue(const std::string& value); | |
134 | |
135 // Removes "chromeframe/a.b.c.d" from the User-Agent string (a.b.c.d is the | |
136 // version). If the cf tag is not present in the string, the original string is | |
137 // returned. | |
138 std::string RemoveChromeFrameFromUserAgentValue(const std::string& value); | |
139 | |
140 // Fetches the user agent from urlmon and adds chrome frame to the | |
141 // comment section. | |
142 // NOTE: The returned string includes the "User-Agent: " header name. | |
143 std::string GetDefaultUserAgentHeaderWithCFTag(); | |
144 | |
145 // Returns the User-Agent header as would be used by Chrome itself. | |
146 const char* GetChromeUserAgent(); | |
147 | |
148 // Fetches the default user agent string from urlmon. | |
149 // This value does not include the "User-Agent:" header name. | |
150 std::string GetDefaultUserAgent(); | |
151 | |
152 // Returns the Chrome Frame user agent. E.g. "chromeframe/1.0". | |
153 // Note that in unit tests this will be "chromeframe/0.0" due to the version | |
154 // table not being present in the unit test executable. | |
155 const char* GetChromeFrameUserAgent(); | |
156 | |
157 // Returns true if there is a frame busting header (other than the do-nothing | |
158 // "X-Frame-Options: ALLOWALL") in the provided header block. Note that there | |
159 // may be multiple X-Frame-Options values specified; if there is one anywhere in | |
160 // the list with a value other than ALLOWALL, this returns true. | |
161 bool HasFrameBustingHeader(const std::string& http_headers); | |
162 | |
163 // Returns the header passed in from the headers list. | |
164 std::string GetHttpHeaderFromHeaderList(const std::string& header_name, | |
165 const std::string& headers); | |
166 } // namespace http_utils | |
167 | |
168 #endif // CHROME_FRAME_HTML_UTILS_H_ | |
OLD | NEW |