Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4401)

Unified Diff: chrome/renderer/safe_browsing/phishing_term_feature_extractor.cc

Issue 266883010: Refactor code to avoid direct dependency upon ICU: phishing_term_feature_extractor (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « base/i18n/break_iterator.h ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: chrome/renderer/safe_browsing/phishing_term_feature_extractor.cc
diff --git a/chrome/renderer/safe_browsing/phishing_term_feature_extractor.cc b/chrome/renderer/safe_browsing/phishing_term_feature_extractor.cc
index 89994dfd04cf4488d4f4a87689cff92bc8760bb8..ba297f66f1c43d4a8b7c2ca2261a023e3f1cf995 100644
--- a/chrome/renderer/safe_browsing/phishing_term_feature_extractor.cc
+++ b/chrome/renderer/safe_browsing/phishing_term_feature_extractor.cc
@@ -9,6 +9,7 @@
#include "base/bind.h"
#include "base/compiler_specific.h"
+#include "base/i18n/break_iterator.h"
#include "base/i18n/case_conversion.h"
#include "base/logging.h"
#include "base/message_loop/message_loop.h"
@@ -19,7 +20,6 @@
#include "chrome/renderer/safe_browsing/features.h"
#include "chrome/renderer/safe_browsing/murmurhash3_util.h"
#include "crypto/sha2.h"
-#include "third_party/icu/source/common/unicode/ubrk.h"
#include "ui/base/l10n/l10n_util.h"
namespace safe_browsing {
@@ -51,15 +51,10 @@ struct PhishingTermFeatureExtractor::ExtractionState {
std::list<size_t> previous_word_sizes;
// An iterator for word breaking.
- UBreakIterator* iterator;
+ base::i18n::BreakIterator* iterator;
mattm 2014/05/07 00:37:03 scoped_ptr
Andrew Hayden (chromium.org) 2014/05/07 15:31:58 Done.
- // Our current position in the text that was passed to the ExtractionState
- // constructor, speciailly, the most recent break position returned by our
- // iterator.
- int position;
-
- // True if position has been initialized.
- bool position_initialized;
+ // True until we have advanced once.
+ bool initial;
// The time at which we started feature extraction for the current page.
base::TimeTicks start_time;
@@ -67,26 +62,27 @@ struct PhishingTermFeatureExtractor::ExtractionState {
// The number of iterations we've done for the current extraction.
int num_iterations;
+ // Stored position from the last time we advanced the iterator.
+ size_t last_position;
+
ExtractionState(const base::string16& text, base::TimeTicks start_time_ticks)
- : position(-1),
- position_initialized(false),
+ : initial(true),
start_time(start_time_ticks),
num_iterations(0) {
- UErrorCode status = U_ZERO_ERROR;
- // TODO(bryner): We should pass in the language for the document.
- iterator = ubrk_open(UBRK_WORD, NULL,
- text.data(), text.size(),
- &status);
- if (U_FAILURE(status)) {
- DLOG(ERROR) << "ubrk_open failed: " << status;
+ iterator = new base::i18n::BreakIterator(
+ text, base::i18n::BreakIterator::BREAK_WORD);
+
+ if (!iterator->Init()) {
+ DLOG(ERROR) << "failed to open iterator";
+ delete iterator;
iterator = NULL;
}
+ last_position = iterator->pos();
}
~ExtractionState() {
- if (iterator) {
- ubrk_close(iterator);
- }
+ delete iterator;
+ iterator = NULL;
}
};
@@ -152,26 +148,25 @@ void PhishingTermFeatureExtractor::ExtractFeaturesWithTimeout() {
return;
}
- if (!state_->position_initialized) {
- state_->position = ubrk_first(state_->iterator);
- if (state_->position == UBRK_DONE) {
+ if (state_->initial) {
+ if (!state_->iterator->IsValid()) {
mattm 2014/05/07 00:37:03 Looks like this doesn't work, since BreakIterator
Andrew Hayden (chromium.org) 2014/05/07 15:31:58 I had the same thought and went off to investigate
// No words present, so we're done.
RunCallback(true);
return;
}
- state_->position_initialized = true;
+ state_->initial = false;
}
int num_words = 0;
- for (int next = ubrk_next(state_->iterator);
- next != UBRK_DONE; next = ubrk_next(state_->iterator)) {
- if (ubrk_getRuleStatus(state_->iterator) != UBRK_WORD_NONE) {
+ while (state_->iterator->Advance()) {
+ int next = state_->iterator->pos();
+ if (state_->iterator->IsEndOfWord(next)) {
// next is now positioned at the end of a word.
- HandleWord(base::StringPiece16(page_text_->data() + state_->position,
- next - state_->position));
+ HandleWord(base::StringPiece16(page_text_->data() + state_->last_position,
+ next - state_->last_position));
++num_words;
}
- state_->position = next;
+ state_->last_position = next;
mattm 2014/05/07 00:37:03 I think all of this could be simplified by using t
Andrew Hayden (chromium.org) 2014/05/07 15:31:58 Seems reasonable, done.
if (num_words >= kClockCheckGranularity) {
num_words = 0;
« no previous file with comments | « base/i18n/break_iterator.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698