Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(6)

Side by Side Diff: chrome_frame/html_utils.h

Issue 218019: Initial import of the Chrome Frame codebase. Integration in chrome.gyp coming... (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Created 11 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « chrome_frame/host_w_controls.html ('k') | chrome_frame/html_utils.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // This file defines utility functions for working with html.
6
7 #ifndef CHROME_FRAME_HTML_UTILS_H_
8 #define CHROME_FRAME_HTML_UTILS_H_
9
10 #include <string>
11 #include <vector>
12
13 #include "base/basictypes.h"
14 #include "testing/gtest/include/gtest/gtest_prod.h"
15
16 // Forward declarations
17 class HtmlUtilUnittest;
18
19 //
20 // Class designed to take a string of HTML and extract from it named
21 // attribute values from named tags.
22 //
23 // Caveat: this class currently doesn't handle multi-word UTF-16 encoded
24 // characters. Doesn't handle implies that any data following such a
25 // character could possibly be misinterpreted.
26 //
27 class HTMLScanner {
28 public:
29 typedef std::wstring::const_iterator StrPos;
30
31 // Structure maintaining const_iterators into html_string_.
32 class StringRange {
33 friend class HTMLScanner;
34 public:
35 StringRange();
36 StringRange(StrPos start, StrPos end);
37
38 bool LowerCaseEqualsASCII(const char* other) const;
39 bool Equals(const wchar_t* other) const;
40
41 // Copies the data described by StringRange into destination.
42 std::wstring Copy() const;
43
44 // If this StringRange represents a tag, this method extracts the name of
45 // the tag and sticks it in tag_name.
46 // Returns true if the tag name was successfully extracted.
47 // Returns false if this string doesn't look like a valid tag.
48 bool GetTagName(std::wstring* tag_name) const;
49
50 // From a given string range, uses a string tokenizer to extract the value
51 // of the named attribute if a simple scan finds that the attribute name is
52 // present.
53 //
54 // Returns true if the named attribute can be located and it has a value
55 // which has been placed in attribute_value.
56 //
57 // Note that the attribute value is unquoted here as well, so that
58 // GetTagAttribute(*<foo bar="baz">*, L"bar", *out_value*) will stick
59 // 'bar' in out_value and not '"bar"'.
60 //
61 // Returns false if the named attribute is not present in the tag or if it
62 // did not have a value.
63 //
64 bool GetTagAttribute(const wchar_t* attribute_name,
65 StringRange* attribute_value) const;
66
67 // Unquotes a StringRange by removing a matching pair of either ' or "
68 // characters from the beginning and end of the string if present.
69 // Returns true if string was modified, false otherwise.
70 bool UnQuote();
71 private:
72 StrPos start_;
73 StrPos end_;
74 };
75
76 typedef std::vector<StringRange> StringRangeList;
77
78 // html_string must be a null-terminated string containing the HTML
79 // to be scanned.
80 explicit HTMLScanner(const wchar_t* html_string);
81
82 // Returns the set of ranges denoting HTML tags that match the given name.
83 // If stop_tag_name is given, then as soon as a tag with this name is
84 // encountered this method will return.
85 void GetTagsByName(const wchar_t* name, StringRangeList* tag_list,
86 const wchar_t* stop_tag_name);
87
88 private:
89 friend class HtmlUtilUnittest;
90 FRIEND_TEST(HtmlUtilUnittest, BasicTest);
91
92 // Given html_string which represents the remaining html range, this method
93 // returns the next tag in tag and advances html_string to one character after
94 // the end of tag. This method is intended to be called repeatedly to extract
95 // all of the tags in sequence.
96 //
97 // Returns true if another tag was found and 'tag' was populated with a valid
98 // range.
99 // Returns false if we have reached the end of the html data.
100 bool NextTag(StringRange* html_string, StringRange* tag);
101
102 // Returns true if c can be found in quotes_, false otherwise
103 bool IsQuote(wchar_t c);
104
105 // Returns true if pos refers to the last character in an HTML comment in a
106 // string described by html_string, false otherwise.
107 // For example with html_string describing <!-- foo> -->, pos must refer to
108 // the last > for this method to return true.
109 bool IsHTMLCommentClose(StringRange* html_string, StrPos pos);
110
111 // We store a (CollapsedWhitespace'd) copy of the html data.
112 const std::wstring html_string_;
113
114 // Store the string of quote characters to avoid repeated construction.
115 const std::wstring quotes_;
116
117 DISALLOW_COPY_AND_ASSIGN(HTMLScanner);
118 };
119
120 #endif // CHROME_FRAME_HTML_UTILS_H_
OLDNEW
« no previous file with comments | « chrome_frame/host_w_controls.html ('k') | chrome_frame/html_utils.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698