Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(104)

Side by Side Diff: chrome/browser/speech/tts_controller_impl.cc

Issue 2364753002: Improve TTS GetMatchingVoices algorithm (Closed)
Patch Set: Address feedback Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/speech/tts_controller_impl.h" 5 #include "chrome/browser/speech/tts_controller_impl.h"
6 6
7 #include <stddef.h> 7 #include <stddef.h>
8 8
9 #include <string> 9 #include <string>
10 #include <vector> 10 #include <vector>
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
74 id_(next_utterance_id_++), 74 id_(next_utterance_id_++),
75 src_id_(-1), 75 src_id_(-1),
76 gender_(TTS_GENDER_NONE), 76 gender_(TTS_GENDER_NONE),
77 can_enqueue_(false), 77 can_enqueue_(false),
78 char_index_(0), 78 char_index_(0),
79 finished_(false) { 79 finished_(false) {
80 options_.reset(new base::DictionaryValue()); 80 options_.reset(new base::DictionaryValue());
81 } 81 }
82 82
83 Utterance::~Utterance() { 83 Utterance::~Utterance() {
84 DCHECK(finished_); 84 // It's an error if an Utterance is destructed without being finished,
85 // unless |browser_context_| is nullptr because it's a unit test.
86 DCHECK(finished_ || !browser_context_);
85 } 87 }
86 88
87 void Utterance::OnTtsEvent(TtsEventType event_type, 89 void Utterance::OnTtsEvent(TtsEventType event_type,
88 int char_index, 90 int char_index,
89 const std::string& error_message) { 91 const std::string& error_message) {
90 if (char_index >= 0) 92 if (char_index >= 0)
91 char_index_ = char_index; 93 char_index_ = char_index;
92 if (IsFinalTtsEventType(event_type)) 94 if (IsFinalTtsEventType(event_type))
93 finished_ = true; 95 finished_ = true;
94 96
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after
156 158
157 void TtsControllerImpl::SpeakNow(Utterance* utterance) { 159 void TtsControllerImpl::SpeakNow(Utterance* utterance) {
158 // Ensure we have all built-in voices loaded. This is a no-op if already 160 // Ensure we have all built-in voices loaded. This is a no-op if already
159 // loaded. 161 // loaded.
160 bool loaded_built_in = 162 bool loaded_built_in =
161 GetPlatformImpl()->LoadBuiltInTtsExtension(utterance->browser_context()); 163 GetPlatformImpl()->LoadBuiltInTtsExtension(utterance->browser_context());
162 164
163 // Get all available voices and try to find a matching voice. 165 // Get all available voices and try to find a matching voice.
164 std::vector<VoiceData> voices; 166 std::vector<VoiceData> voices;
165 GetVoices(utterance->browser_context(), &voices); 167 GetVoices(utterance->browser_context(), &voices);
168
169 // Get the best matching voice. If nothing matches, just set "native"
170 // to true because that might trigger deferred loading of native voices.
166 int index = GetMatchingVoice(utterance, voices); 171 int index = GetMatchingVoice(utterance, voices);
167
168 VoiceData voice; 172 VoiceData voice;
169 if (index != -1) { 173 if (index >= 0)
170 // Select the matching voice.
171 voice = voices[index]; 174 voice = voices[index];
172 } else { 175 else
173 // However, if no match was found on a platform without native tts voices, 176 voice.native = true; // Try to let
174 // attempt to get a voice based only on the current locale without respect
175 // to any supplied voice names.
176 std::vector<VoiceData> native_voices;
177
178 if (GetPlatformImpl()->PlatformImplAvailable())
179 GetPlatformImpl()->GetVoices(&native_voices);
180
181 if (native_voices.empty() && !voices.empty()) {
182 // TODO(dtseng): Notify extension caller of an error.
183 utterance->set_voice_name("");
184 // TODO(gaochun): Replace the global variable g_browser_process with
185 // GetContentClient()->browser() to eliminate the dependency of browser
186 // once TTS implementation was moved to content.
187 utterance->set_lang(g_browser_process->GetApplicationLocale());
188 index = GetMatchingVoice(utterance, voices);
189
190 // If even that fails, just take the first available voice.
191 if (index == -1)
192 index = 0;
193 voice = voices[index];
194 } else {
195 // Otherwise, simply give native voices a chance to handle this utterance.
196 voice.native = true;
197 }
198 }
199 177
200 GetPlatformImpl()->WillSpeakUtteranceWithVoice(utterance, voice); 178 GetPlatformImpl()->WillSpeakUtteranceWithVoice(utterance, voice);
201 179
202 if (!voice.native) { 180 if (!voice.native) {
203 #if !defined(OS_ANDROID) 181 #if !defined(OS_ANDROID)
204 DCHECK(!voice.extension_id.empty()); 182 DCHECK(!voice.extension_id.empty());
205 current_utterance_ = utterance; 183 current_utterance_ = utterance;
206 utterance->set_extension_id(voice.extension_id); 184 utterance->set_extension_id(voice.extension_id);
207 if (tts_engine_delegate_) 185 if (tts_engine_delegate_)
208 tts_engine_delegate_->Speak(utterance, voice); 186 tts_engine_delegate_->Speak(utterance, voice);
(...skipping 160 matching lines...) Expand 10 before | Expand all | Expand 10 after
369 } 347 }
370 348
371 TtsPlatformImpl* TtsControllerImpl::GetPlatformImpl() { 349 TtsPlatformImpl* TtsControllerImpl::GetPlatformImpl() {
372 if (!platform_impl_) 350 if (!platform_impl_)
373 platform_impl_ = TtsPlatformImpl::GetInstance(); 351 platform_impl_ = TtsPlatformImpl::GetInstance();
374 return platform_impl_; 352 return platform_impl_;
375 } 353 }
376 354
377 int TtsControllerImpl::GetMatchingVoice( 355 int TtsControllerImpl::GetMatchingVoice(
378 const Utterance* utterance, std::vector<VoiceData>& voices) { 356 const Utterance* utterance, std::vector<VoiceData>& voices) {
379 // Make two passes: the first time, do strict language matching 357 // Return the index of the voice that best match the utterance parameters.
380 // ('fr-FR' does not match 'fr-CA'). The second time, do prefix 358 //
381 // language matching ('fr-FR' matches 'fr' and 'fr-CA') 359 // These criteria are considered mandatory - if they're specified, any voice
382 for (int pass = 0; pass < 2; ++pass) { 360 // that doesn't match is rejected.
383 for (size_t i = 0; i < voices.size(); ++i) { 361 //
384 const VoiceData& voice = voices[i]; 362 // Extension ID
363 // Voice name
364 //
365 // The other criteria are scored based on how well they match, in
366 // this order of precedence:
367 //
368 // Utterange language (exact region preferred, then general language)
369 // App/system language (exact region preferred, then general language)
370 // Required event types
371 // Gender
385 372
386 if (!utterance->extension_id().empty() && 373 // TODO(gaochun): Replace the global variable g_browser_process with
387 utterance->extension_id() != voice.extension_id) { 374 // GetContentClient()->browser() to eliminate the dependency of browser
388 continue; 375 // once TTS implementation was moved to content.
376 std::string app_lang = g_browser_process->GetApplicationLocale();
377
378 // Start with a best score of -1, that way even if none of the criteria
379 // match, something will be returned if there are any voices.
380 int best_score = -1;
381 int best_score_index = -1;
382 for (size_t i = 0; i < voices.size(); ++i) {
383 const VoiceData& voice = voices[i];
384 int score = 0;
385
386 // If the extension ID is specified, check for an exact match.
387 if (!utterance->extension_id().empty() &&
388 utterance->extension_id() != voice.extension_id)
389 continue;
390
391 // If the voice name is specified, check for an exact match.
392 if (!utterance->voice_name().empty() &&
393 voice.name != utterance->voice_name())
394 continue;
395
396 // Prefer the utterance language.
397 if (!voice.lang.empty() && !utterance->lang().empty()) {
398 // An exact language match is worth more than a partial match.
399 if (voice.lang == utterance->lang()) {
400 score += 32;
401 } else if (TrimLanguageCode(voice.lang) ==
402 TrimLanguageCode(utterance->lang())) {
403 score += 16;
389 } 404 }
405 }
390 406
391 if (!voice.name.empty() && 407 // Prefer the system language after that.
392 !utterance->voice_name().empty() && 408 if (!voice.lang.empty()) {
393 voice.name != utterance->voice_name()) { 409 if (voice.lang == app_lang)
394 continue; 410 score += 8;
395 } 411 else if (TrimLanguageCode(voice.lang) == TrimLanguageCode(app_lang))
396 if (!voice.lang.empty() && !utterance->lang().empty()) { 412 score += 4;
397 std::string voice_lang = voice.lang; 413 }
398 std::string utterance_lang = utterance->lang(); 414
399 if (pass == 1) { 415 // Next, prefer required event types.
400 voice_lang = TrimLanguageCode(voice_lang); 416 if (utterance->required_event_types().size() > 0) {
401 utterance_lang = TrimLanguageCode(utterance_lang); 417 bool has_all_required_event_types = true;
402 } 418 for (std::set<TtsEventType>::const_iterator iter =
403 if (voice_lang != utterance_lang) { 419 utterance->required_event_types().begin();
404 continue; 420 iter != utterance->required_event_types().end();
421 ++iter) {
422 if (voice.events.find(*iter) == voice.events.end()) {
423 has_all_required_event_types = false;
424 break;
405 } 425 }
406 } 426 }
407 if (voice.gender != TTS_GENDER_NONE && 427 if (has_all_required_event_types)
408 utterance->gender() != TTS_GENDER_NONE && 428 score += 2;
409 voice.gender != utterance->gender()) { 429 }
410 continue;
411 }
412 430
413 if (utterance->required_event_types().size() > 0) { 431 // Finally prefer the requested gender last.
414 bool has_all_required_event_types = true; 432 if (voice.gender != TTS_GENDER_NONE &&
415 for (std::set<TtsEventType>::const_iterator iter = 433 utterance->gender() != TTS_GENDER_NONE &&
416 utterance->required_event_types().begin(); 434 voice.gender == utterance->gender()) {
417 iter != utterance->required_event_types().end(); 435 score += 1;
418 ++iter) { 436 }
419 if (voice.events.find(*iter) == voice.events.end()) {
420 has_all_required_event_types = false;
421 break;
422 }
423 }
424 if (!has_all_required_event_types)
425 continue;
426 }
427 437
428 return static_cast<int>(i); 438 if (score > best_score) {
439 best_score = score;
440 best_score_index = i;
429 } 441 }
430 } 442 }
431 443
432 return -1; 444 return best_score_index;
433 } 445 }
434 446
435 void TtsControllerImpl::VoicesChanged() { 447 void TtsControllerImpl::VoicesChanged() {
436 // Existence of platform tts indicates explicit requests to tts. Since 448 // Existence of platform tts indicates explicit requests to tts. Since
437 // |VoicesChanged| can occur implicitly, only send if needed. 449 // |VoicesChanged| can occur implicitly, only send if needed.
438 if (!platform_impl_) 450 if (!platform_impl_)
439 return; 451 return;
440 452
441 for (std::set<VoicesChangedDelegate*>::iterator iter = 453 for (std::set<VoicesChangedDelegate*>::iterator iter =
442 voices_changed_delegates_.begin(); 454 voices_changed_delegates_.begin();
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
486 } 498 }
487 499
488 void TtsControllerImpl::SetTtsEngineDelegate( 500 void TtsControllerImpl::SetTtsEngineDelegate(
489 TtsEngineDelegate* delegate) { 501 TtsEngineDelegate* delegate) {
490 tts_engine_delegate_ = delegate; 502 tts_engine_delegate_ = delegate;
491 } 503 }
492 504
493 TtsEngineDelegate* TtsControllerImpl::GetTtsEngineDelegate() { 505 TtsEngineDelegate* TtsControllerImpl::GetTtsEngineDelegate() {
494 return tts_engine_delegate_; 506 return tts_engine_delegate_;
495 } 507 }
OLDNEW
« no previous file with comments | « chrome/browser/speech/tts_controller_impl.h ('k') | chrome/browser/speech/tts_controller_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698