chrome_frame/html_utils.h - Issue 218019: Initial import of the Chrome Frame codebase. Integration in chrome.gyp coming...

Unified Diff: chrome_frame/html_utils.h

Issue 218019: Initial import of the Chrome Frame codebase. Integration in chrome.gyp coming... (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: Created 11 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: chrome_frame/html_utils.h

===================================================================

--- chrome_frame/html_utils.h (revision 0)

+++ chrome_frame/html_utils.h (revision 0)

@@ -0,0 +1,120 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+// This file defines utility functions for working with html.

+#ifndef CHROME_FRAME_HTML_UTILS_H_

+#define CHROME_FRAME_HTML_UTILS_H_

+#include <string>

+#include <vector>

+#include "base/basictypes.h"

+#include "testing/gtest/include/gtest/gtest_prod.h"

+// Forward declarations

+class HtmlUtilUnittest;

+//

+// Class designed to take a string of HTML and extract from it named

+// attribute values from named tags.

+//

+// Caveat: this class currently doesn't handle multi-word UTF-16 encoded

+// characters. Doesn't handle implies that any data following such a

+// character could possibly be misinterpreted.

+//

+class HTMLScanner {

+ public:

+ typedef std::wstring::const_iterator StrPos;

+ // Structure maintaining const_iterators into html_string_.

+ class StringRange {

+ friend class HTMLScanner;

+ public:

+ StringRange();

+ StringRange(StrPos start, StrPos end);

+ bool LowerCaseEqualsASCII(const char* other) const;

+ bool Equals(const wchar_t* other) const;

+ // Copies the data described by StringRange into destination.

+ std::wstring Copy() const;

+ // If this StringRange represents a tag, this method extracts the name of

+ // the tag and sticks it in tag_name.

+ // Returns true if the tag name was successfully extracted.

+ // Returns false if this string doesn't look like a valid tag.

+ bool GetTagName(std::wstring* tag_name) const;

+ // From a given string range, uses a string tokenizer to extract the value

+ // of the named attribute if a simple scan finds that the attribute name is

+ // present.

+ //

+ // Returns true if the named attribute can be located and it has a value

+ // which has been placed in attribute_value.

+ //

+ // Note that the attribute value is unquoted here as well, so that

+ // GetTagAttribute(*<foo bar="baz">*, L"bar", *out_value*) will stick

+ // 'bar' in out_value and not '"bar"'.

+ //

+ // Returns false if the named attribute is not present in the tag or if it

+ // did not have a value.

+ //

+ bool GetTagAttribute(const wchar_t* attribute_name,

+ StringRange* attribute_value) const;

+ // Unquotes a StringRange by removing a matching pair of either ' or "

+ // characters from the beginning and end of the string if present.

+ // Returns true if string was modified, false otherwise.

+ bool UnQuote();

+ private:

+ StrPos start_;

+ StrPos end_;

+ };

+ typedef std::vector<StringRange> StringRangeList;

+ // html_string must be a null-terminated string containing the HTML

+ // to be scanned.

+ explicit HTMLScanner(const wchar_t* html_string);

+ // Returns the set of ranges denoting HTML tags that match the given name.

+ // If stop_tag_name is given, then as soon as a tag with this name is

+ // encountered this method will return.

+ void GetTagsByName(const wchar_t* name, StringRangeList* tag_list,

+ const wchar_t* stop_tag_name);

+ private:

+ friend class HtmlUtilUnittest;

+ FRIEND_TEST(HtmlUtilUnittest, BasicTest);

+ // Given html_string which represents the remaining html range, this method

+ // returns the next tag in tag and advances html_string to one character after

+ // the end of tag. This method is intended to be called repeatedly to extract

+ // all of the tags in sequence.

+ //

+ // Returns true if another tag was found and 'tag' was populated with a valid

+ // range.

+ // Returns false if we have reached the end of the html data.

+ bool NextTag(StringRange* html_string, StringRange* tag);

+ // Returns true if c can be found in quotes_, false otherwise

+ bool IsQuote(wchar_t c);

+ // Returns true if pos refers to the last character in an HTML comment in a

+ // string described by html_string, false otherwise.

+ // For example with html_string describing , pos must refer to

+ // the last > for this method to return true.

+ bool IsHTMLCommentClose(StringRange* html_string, StrPos pos);

+ // We store a (CollapsedWhitespace'd) copy of the html data.

+ const std::wstring html_string_;

+ // Store the string of quote characters to avoid repeated construction.

+ const std::wstring quotes_;

+ DISALLOW_COPY_AND_ASSIGN(HTMLScanner);

+};

+#endif // CHROME_FRAME_HTML_UTILS_H_

« no previous file with comments | « chrome_frame/host_w_controls.html ('k') | chrome_frame/html_utils.cc » ('j') | no next file with comments »