chrome/browser/speech/tts_controller_impl.cc - Issue 2364753002: Improve TTS GetMatchingVoices algorithm

Side by Side Diff: chrome/browser/speech/tts_controller_impl.cc

Issue 2364753002: Improve TTS GetMatchingVoices algorithm (Closed)

Patch Set: Address feedback Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2014 The Chromium Authors. All rights reserved.	1 // Copyright 2014 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/browser/speech/tts_controller_impl.h"	5 #include "chrome/browser/speech/tts_controller_impl.h"

6	6

7 #include <stddef.h>	7 #include <stddef.h>

8	8

9 #include <string>	9 #include <string>

10 #include <vector>	10 #include <vector>

(...skipping 63 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
74 id_(next_utterance_id_++),	74 id_(next_utterance_id_++),

75 src_id_(-1),	75 src_id_(-1),

76 gender_(TTS_GENDER_NONE),	76 gender_(TTS_GENDER_NONE),

77 can_enqueue_(false),	77 can_enqueue_(false),

78 char_index_(0),	78 char_index_(0),

79 finished_(false) {	79 finished_(false) {

80 options_.reset(new base::DictionaryValue());	80 options_.reset(new base::DictionaryValue());

81 }	81 }

82	82

83 Utterance::~Utterance() {	83 Utterance::~Utterance() {

84 DCHECK(finished_);	84 // It's an error if an Utterance is destructed without being finished,

	85 // unless \|browser_context_\| is nullptr because it's a unit test.

	86 DCHECK(finished_ \|\| !browser_context_);

85 }	87 }

86	88

87 void Utterance::OnTtsEvent(TtsEventType event_type,	89 void Utterance::OnTtsEvent(TtsEventType event_type,

88 int char_index,	90 int char_index,

89 const std::string& error_message) {	91 const std::string& error_message) {

90 if (char_index >= 0)	92 if (char_index >= 0)

91 char_index_ = char_index;	93 char_index_ = char_index;

92 if (IsFinalTtsEventType(event_type))	94 if (IsFinalTtsEventType(event_type))

93 finished_ = true;	95 finished_ = true;

94	96

(...skipping 61 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
156	158

157 void TtsControllerImpl::SpeakNow(Utterance* utterance) {	159 void TtsControllerImpl::SpeakNow(Utterance* utterance) {

158 // Ensure we have all built-in voices loaded. This is a no-op if already	160 // Ensure we have all built-in voices loaded. This is a no-op if already

159 // loaded.	161 // loaded.

160 bool loaded_built_in =	162 bool loaded_built_in =

161 GetPlatformImpl()->LoadBuiltInTtsExtension(utterance->browser_context());	163 GetPlatformImpl()->LoadBuiltInTtsExtension(utterance->browser_context());

162	164

163 // Get all available voices and try to find a matching voice.	165 // Get all available voices and try to find a matching voice.

164 std::vector<VoiceData> voices;	166 std::vector<VoiceData> voices;

165 GetVoices(utterance->browser_context(), &voices);	167 GetVoices(utterance->browser_context(), &voices);

	168

	169 // Get the best matching voice. If nothing matches, just set "native"

	170 // to true because that might trigger deferred loading of native voices.

166 int index = GetMatchingVoice(utterance, voices);	171 int index = GetMatchingVoice(utterance, voices);

167

168 VoiceData voice;	172 VoiceData voice;

169 if (index != -1) {	173 if (index >= 0)

170 // Select the matching voice.

171 voice = voices[index];	174 voice = voices[index];

172 } else {	175 else

173 // However, if no match was found on a platform without native tts voices,	176 voice.native = true; // Try to let

174 // attempt to get a voice based only on the current locale without respect

175 // to any supplied voice names.

176 std::vector<VoiceData> native_voices;

177

178 if (GetPlatformImpl()->PlatformImplAvailable())

179 GetPlatformImpl()->GetVoices(&native_voices);

180

181 if (native_voices.empty() && !voices.empty()) {

182 // TODO(dtseng): Notify extension caller of an error.

183 utterance->set_voice_name("");

184 // TODO(gaochun): Replace the global variable g_browser_process with

185 // GetContentClient()->browser() to eliminate the dependency of browser

186 // once TTS implementation was moved to content.

187 utterance->set_lang(g_browser_process->GetApplicationLocale());

188 index = GetMatchingVoice(utterance, voices);

189

190 // If even that fails, just take the first available voice.

191 if (index == -1)

192 index = 0;

193 voice = voices[index];

194 } else {

195 // Otherwise, simply give native voices a chance to handle this utterance.

196 voice.native = true;

197 }

198 }

199	177

200 GetPlatformImpl()->WillSpeakUtteranceWithVoice(utterance, voice);	178 GetPlatformImpl()->WillSpeakUtteranceWithVoice(utterance, voice);

201	179

202 if (!voice.native) {	180 if (!voice.native) {

203 #if !defined(OS_ANDROID)	181 #if !defined(OS_ANDROID)

204 DCHECK(!voice.extension_id.empty());	182 DCHECK(!voice.extension_id.empty());

205 current_utterance_ = utterance;	183 current_utterance_ = utterance;

206 utterance->set_extension_id(voice.extension_id);	184 utterance->set_extension_id(voice.extension_id);

207 if (tts_engine_delegate_)	185 if (tts_engine_delegate_)

208 tts_engine_delegate_->Speak(utterance, voice);	186 tts_engine_delegate_->Speak(utterance, voice);

(...skipping 160 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
369 }	347 }

370	348

371 TtsPlatformImpl* TtsControllerImpl::GetPlatformImpl() {	349 TtsPlatformImpl* TtsControllerImpl::GetPlatformImpl() {

372 if (!platform_impl_)	350 if (!platform_impl_)

373 platform_impl_ = TtsPlatformImpl::GetInstance();	351 platform_impl_ = TtsPlatformImpl::GetInstance();

374 return platform_impl_;	352 return platform_impl_;

375 }	353 }

376	354

377 int TtsControllerImpl::GetMatchingVoice(	355 int TtsControllerImpl::GetMatchingVoice(

378 const Utterance* utterance, std::vector<VoiceData>& voices) {	356 const Utterance* utterance, std::vector<VoiceData>& voices) {

379 // Make two passes: the first time, do strict language matching	357 // Return the index of the voice that best match the utterance parameters.

380 // ('fr-FR' does not match 'fr-CA'). The second time, do prefix	358 //

381 // language matching ('fr-FR' matches 'fr' and 'fr-CA')	359 // These criteria are considered mandatory - if they're specified, any voice

382 for (int pass = 0; pass < 2; ++pass) {	360 // that doesn't match is rejected.

383 for (size_t i = 0; i < voices.size(); ++i) {	361 //

384 const VoiceData& voice = voices[i];	362 // Extension ID

	363 // Voice name

	364 //

	365 // The other criteria are scored based on how well they match, in

	366 // this order of precedence:

	367 //

	368 // Utterange language (exact region preferred, then general language)

	369 // App/system language (exact region preferred, then general language)

	370 // Required event types

	371 // Gender

385	372

386 if (!utterance->extension_id().empty() &&	373 // TODO(gaochun): Replace the global variable g_browser_process with

387 utterance->extension_id() != voice.extension_id) {	374 // GetContentClient()->browser() to eliminate the dependency of browser

388 continue;	375 // once TTS implementation was moved to content.

	376 std::string app_lang = g_browser_process->GetApplicationLocale();

	377

	378 // Start with a best score of -1, that way even if none of the criteria

	379 // match, something will be returned if there are any voices.

	380 int best_score = -1;

	381 int best_score_index = -1;

	382 for (size_t i = 0; i < voices.size(); ++i) {

	383 const VoiceData& voice = voices[i];

	384 int score = 0;

	385

	386 // If the extension ID is specified, check for an exact match.

	387 if (!utterance->extension_id().empty() &&

	388 utterance->extension_id() != voice.extension_id)

	389 continue;

	390

	391 // If the voice name is specified, check for an exact match.

	392 if (!utterance->voice_name().empty() &&

	393 voice.name != utterance->voice_name())

	394 continue;

	395

	396 // Prefer the utterance language.

	397 if (!voice.lang.empty() && !utterance->lang().empty()) {

	398 // An exact language match is worth more than a partial match.

	399 if (voice.lang == utterance->lang()) {

	400 score += 32;

	401 } else if (TrimLanguageCode(voice.lang) ==

	402 TrimLanguageCode(utterance->lang())) {

	403 score += 16;

389 }	404 }

	405 }

390	406

391 if (!voice.name.empty() &&	407 // Prefer the system language after that.

392 !utterance->voice_name().empty() &&	408 if (!voice.lang.empty()) {

393 voice.name != utterance->voice_name()) {	409 if (voice.lang == app_lang)

394 continue;	410 score += 8;

395 }	411 else if (TrimLanguageCode(voice.lang) == TrimLanguageCode(app_lang))

396 if (!voice.lang.empty() && !utterance->lang().empty()) {	412 score += 4;

397 std::string voice_lang = voice.lang;	413 }

398 std::string utterance_lang = utterance->lang();	414

399 if (pass == 1) {	415 // Next, prefer required event types.

400 voice_lang = TrimLanguageCode(voice_lang);	416 if (utterance->required_event_types().size() > 0) {

401 utterance_lang = TrimLanguageCode(utterance_lang);	417 bool has_all_required_event_types = true;

402 }	418 for (std::set<TtsEventType>::const_iterator iter =

403 if (voice_lang != utterance_lang) {	419 utterance->required_event_types().begin();

404 continue;	420 iter != utterance->required_event_types().end();

	421 ++iter) {

	422 if (voice.events.find(*iter) == voice.events.end()) {

	423 has_all_required_event_types = false;

	424 break;

405 }	425 }

406 }	426 }

407 if (voice.gender != TTS_GENDER_NONE &&	427 if (has_all_required_event_types)

408 utterance->gender() != TTS_GENDER_NONE &&	428 score += 2;

409 voice.gender != utterance->gender()) {	429 }

410 continue;

411 }

412	430

413 if (utterance->required_event_types().size() > 0) {	431 // Finally prefer the requested gender last.

414 bool has_all_required_event_types = true;	432 if (voice.gender != TTS_GENDER_NONE &&

415 for (std::set<TtsEventType>::const_iterator iter =	433 utterance->gender() != TTS_GENDER_NONE &&

416 utterance->required_event_types().begin();	434 voice.gender == utterance->gender()) {

417 iter != utterance->required_event_types().end();	435 score += 1;

418 ++iter) {	436 }

419 if (voice.events.find(*iter) == voice.events.end()) {

420 has_all_required_event_types = false;

421 break;

422 }

423 }

424 if (!has_all_required_event_types)

425 continue;

426 }

427	437

428 return static_cast<int>(i);	438 if (score > best_score) {

	439 best_score = score;

	440 best_score_index = i;

429 }	441 }

430 }	442 }

431	443

432 return -1;	444 return best_score_index;

433 }	445 }

434	446

435 void TtsControllerImpl::VoicesChanged() {	447 void TtsControllerImpl::VoicesChanged() {

436 // Existence of platform tts indicates explicit requests to tts. Since	448 // Existence of platform tts indicates explicit requests to tts. Since

437 // \|VoicesChanged\| can occur implicitly, only send if needed.	449 // \|VoicesChanged\| can occur implicitly, only send if needed.

438 if (!platform_impl_)	450 if (!platform_impl_)

439 return;	451 return;

440	452

441 for (std::set<VoicesChangedDelegate*>::iterator iter =	453 for (std::set<VoicesChangedDelegate*>::iterator iter =

442 voices_changed_delegates_.begin();	454 voices_changed_delegates_.begin();

(...skipping 43 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
486 }	498 }

487	499

488 void TtsControllerImpl::SetTtsEngineDelegate(	500 void TtsControllerImpl::SetTtsEngineDelegate(

489 TtsEngineDelegate* delegate) {	501 TtsEngineDelegate* delegate) {

490 tts_engine_delegate_ = delegate;	502 tts_engine_delegate_ = delegate;

491 }	503 }

492	504

493 TtsEngineDelegate* TtsControllerImpl::GetTtsEngineDelegate() {	505 TtsEngineDelegate* TtsControllerImpl::GetTtsEngineDelegate() {

494 return tts_engine_delegate_;	506 return tts_engine_delegate_;

495 }	507 }

OLD	NEW

« no previous file with comments | « chrome/browser/speech/tts_controller_impl.h ('k') | chrome/browser/speech/tts_controller_unittest.cc » ('j') | no next file with comments »