OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "chrome/browser/speech/tts_controller.h" | |
6 | |
7 #include <string> | |
8 #include <vector> | |
9 | |
10 #include "base/float_util.h" | |
11 #include "base/values.h" | |
12 #include "chrome/browser/browser_process.h" | |
13 #include "chrome/browser/speech/tts_platform.h" | |
14 | |
15 namespace { | |
16 // A value to be used to indicate that there is no char index available. | |
17 const int kInvalidCharIndex = -1; | |
18 | |
19 // Given a language/region code of the form 'fr-FR', returns just the basic | |
20 // language portion, e.g. 'fr'. | |
21 std::string TrimLanguageCode(std::string lang) { | |
22 if (lang.size() >= 5 && lang[2] == '-') | |
23 return lang.substr(0, 2); | |
24 else | |
25 return lang; | |
26 } | |
27 | |
28 } // namespace | |
29 | |
30 bool IsFinalTtsEventType(TtsEventType event_type) { | |
31 return (event_type == TTS_EVENT_END || | |
32 event_type == TTS_EVENT_INTERRUPTED || | |
33 event_type == TTS_EVENT_CANCELLED || | |
34 event_type == TTS_EVENT_ERROR); | |
35 } | |
36 | |
37 // | |
38 // UtteranceContinuousParameters | |
39 // | |
40 | |
41 | |
42 UtteranceContinuousParameters::UtteranceContinuousParameters() | |
43 : rate(-1), | |
44 pitch(-1), | |
45 volume(-1) {} | |
46 | |
47 | |
48 // | |
49 // VoiceData | |
50 // | |
51 | |
52 | |
53 VoiceData::VoiceData() | |
54 : gender(TTS_GENDER_NONE), | |
55 remote(false), | |
56 native(false) {} | |
57 | |
58 VoiceData::~VoiceData() {} | |
59 | |
60 | |
61 // | |
62 // Utterance | |
63 // | |
64 | |
65 // static | |
66 int Utterance::next_utterance_id_ = 0; | |
67 | |
68 Utterance::Utterance(Profile* profile) | |
69 : profile_(profile), | |
70 id_(next_utterance_id_++), | |
71 src_id_(-1), | |
72 gender_(TTS_GENDER_NONE), | |
73 can_enqueue_(false), | |
74 char_index_(0), | |
75 finished_(false) { | |
76 options_.reset(new base::DictionaryValue()); | |
77 } | |
78 | |
79 Utterance::~Utterance() { | |
80 DCHECK(finished_); | |
81 } | |
82 | |
83 void Utterance::OnTtsEvent(TtsEventType event_type, | |
84 int char_index, | |
85 const std::string& error_message) { | |
86 if (char_index >= 0) | |
87 char_index_ = char_index; | |
88 if (IsFinalTtsEventType(event_type)) | |
89 finished_ = true; | |
90 | |
91 if (event_delegate_) | |
92 event_delegate_->OnTtsEvent(this, event_type, char_index, error_message); | |
93 if (finished_) | |
94 event_delegate_.reset(); | |
95 } | |
96 | |
97 void Utterance::Finish() { | |
98 finished_ = true; | |
99 } | |
100 | |
101 void Utterance::set_options(const base::Value* options) { | |
102 options_.reset(options->DeepCopy()); | |
103 } | |
104 | |
105 // | |
106 // TtsController | |
107 // | |
108 | |
109 // static | |
110 TtsController* TtsController::GetInstance() { | |
111 return Singleton<TtsController>::get(); | |
112 } | |
113 | |
114 TtsController::TtsController() | |
115 : current_utterance_(NULL), | |
116 paused_(false), | |
117 platform_impl_(NULL), | |
118 tts_engine_delegate_(NULL) { | |
119 } | |
120 | |
121 TtsController::~TtsController() { | |
122 if (current_utterance_) { | |
123 current_utterance_->Finish(); | |
124 delete current_utterance_; | |
125 } | |
126 | |
127 // Clear any queued utterances too. | |
128 ClearUtteranceQueue(false); // Don't sent events. | |
129 } | |
130 | |
131 void TtsController::SpeakOrEnqueue(Utterance* utterance) { | |
132 // If we're paused and we get an utterance that can't be queued, | |
133 // flush the queue but stay in the paused state. | |
134 if (paused_ && !utterance->can_enqueue()) { | |
135 Stop(); | |
136 paused_ = true; | |
137 return; | |
138 } | |
139 | |
140 if (paused_ || (IsSpeaking() && utterance->can_enqueue())) { | |
141 utterance_queue_.push(utterance); | |
142 } else { | |
143 Stop(); | |
144 SpeakNow(utterance); | |
145 } | |
146 } | |
147 | |
148 void TtsController::SpeakNow(Utterance* utterance) { | |
149 // Ensure we have all built-in voices loaded. This is a no-op if already | |
150 // loaded. | |
151 bool loaded_built_in = | |
152 GetPlatformImpl()->LoadBuiltInTtsExtension(utterance->profile()); | |
153 | |
154 // Get all available voices and try to find a matching voice. | |
155 std::vector<VoiceData> voices; | |
156 GetVoices(utterance->profile(), &voices); | |
157 int index = GetMatchingVoice(utterance, voices); | |
158 | |
159 VoiceData voice; | |
160 if (index != -1) { | |
161 // Select the matching voice. | |
162 voice = voices[index]; | |
163 } else { | |
164 // However, if no match was found on a platform without native tts voices, | |
165 // attempt to get a voice based only on the current locale without respect | |
166 // to any supplied voice names. | |
167 std::vector<VoiceData> native_voices; | |
168 | |
169 if (GetPlatformImpl()->PlatformImplAvailable()) | |
170 GetPlatformImpl()->GetVoices(&native_voices); | |
171 | |
172 if (native_voices.empty() && !voices.empty()) { | |
173 // TODO(dtseng): Notify extension caller of an error. | |
174 utterance->set_voice_name(""); | |
175 // TODO(gaochun): Replace the global variable g_browser_process with | |
176 // GetContentClient()->browser() to eliminate the dependency of browser | |
177 // once TTS implementation was moved to content. | |
178 utterance->set_lang(g_browser_process->GetApplicationLocale()); | |
179 index = GetMatchingVoice(utterance, voices); | |
180 | |
181 // If even that fails, just take the first available voice. | |
182 if (index == -1) | |
183 index = 0; | |
184 voice = voices[index]; | |
185 } else { | |
186 // Otherwise, simply give native voices a chance to handle this utterance. | |
187 voice.native = true; | |
188 } | |
189 } | |
190 | |
191 GetPlatformImpl()->WillSpeakUtteranceWithVoice(utterance, voice); | |
192 | |
193 if (!voice.native) { | |
194 #if !defined(OS_ANDROID) | |
195 DCHECK(!voice.extension_id.empty()); | |
196 current_utterance_ = utterance; | |
197 utterance->set_extension_id(voice.extension_id); | |
198 if (tts_engine_delegate_) | |
199 tts_engine_delegate_->Speak(utterance, voice); | |
200 bool sends_end_event = | |
201 voice.events.find(TTS_EVENT_END) != voice.events.end(); | |
202 if (!sends_end_event) { | |
203 utterance->Finish(); | |
204 delete utterance; | |
205 current_utterance_ = NULL; | |
206 SpeakNextUtterance(); | |
207 } | |
208 #endif | |
209 } else { | |
210 // It's possible for certain platforms to send start events immediately | |
211 // during |speak|. | |
212 current_utterance_ = utterance; | |
213 GetPlatformImpl()->clear_error(); | |
214 bool success = GetPlatformImpl()->Speak( | |
215 utterance->id(), | |
216 utterance->text(), | |
217 utterance->lang(), | |
218 voice, | |
219 utterance->continuous_parameters()); | |
220 if (!success) | |
221 current_utterance_ = NULL; | |
222 | |
223 // If the native voice wasn't able to process this speech, see if | |
224 // the browser has built-in TTS that isn't loaded yet. | |
225 if (!success && loaded_built_in) { | |
226 utterance_queue_.push(utterance); | |
227 return; | |
228 } | |
229 | |
230 if (!success) { | |
231 utterance->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex, | |
232 GetPlatformImpl()->error()); | |
233 delete utterance; | |
234 return; | |
235 } | |
236 } | |
237 } | |
238 | |
239 void TtsController::Stop() { | |
240 paused_ = false; | |
241 if (current_utterance_ && !current_utterance_->extension_id().empty()) { | |
242 #if !defined(OS_ANDROID) | |
243 if (tts_engine_delegate_) | |
244 tts_engine_delegate_->Stop(current_utterance_); | |
245 #endif | |
246 } else { | |
247 GetPlatformImpl()->clear_error(); | |
248 GetPlatformImpl()->StopSpeaking(); | |
249 } | |
250 | |
251 if (current_utterance_) | |
252 current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex, | |
253 std::string()); | |
254 FinishCurrentUtterance(); | |
255 ClearUtteranceQueue(true); // Send events. | |
256 } | |
257 | |
258 void TtsController::Pause() { | |
259 paused_ = true; | |
260 if (current_utterance_ && !current_utterance_->extension_id().empty()) { | |
261 #if !defined(OS_ANDROID) | |
262 if (tts_engine_delegate_) | |
263 tts_engine_delegate_->Pause(current_utterance_); | |
264 #endif | |
265 } else if (current_utterance_) { | |
266 GetPlatformImpl()->clear_error(); | |
267 GetPlatformImpl()->Pause(); | |
268 } | |
269 } | |
270 | |
271 void TtsController::Resume() { | |
272 paused_ = false; | |
273 if (current_utterance_ && !current_utterance_->extension_id().empty()) { | |
274 #if !defined(OS_ANDROID) | |
275 if (tts_engine_delegate_) | |
276 tts_engine_delegate_->Resume(current_utterance_); | |
277 #endif | |
278 } else if (current_utterance_) { | |
279 GetPlatformImpl()->clear_error(); | |
280 GetPlatformImpl()->Resume(); | |
281 } else { | |
282 SpeakNextUtterance(); | |
283 } | |
284 } | |
285 | |
286 void TtsController::OnTtsEvent(int utterance_id, | |
287 TtsEventType event_type, | |
288 int char_index, | |
289 const std::string& error_message) { | |
290 // We may sometimes receive completion callbacks "late", after we've | |
291 // already finished the utterance (for example because another utterance | |
292 // interrupted or we got a call to Stop). This is normal and we can | |
293 // safely just ignore these events. | |
294 if (!current_utterance_ || utterance_id != current_utterance_->id()) { | |
295 return; | |
296 } | |
297 current_utterance_->OnTtsEvent(event_type, char_index, error_message); | |
298 if (current_utterance_->finished()) { | |
299 FinishCurrentUtterance(); | |
300 SpeakNextUtterance(); | |
301 } | |
302 } | |
303 | |
304 void TtsController::GetVoices(Profile* profile, | |
305 std::vector<VoiceData>* out_voices) { | |
306 #if !defined(OS_ANDROID) | |
307 if (profile && tts_engine_delegate_) | |
308 tts_engine_delegate_->GetVoices(profile, out_voices); | |
309 #endif | |
310 | |
311 TtsPlatformImpl* platform_impl = GetPlatformImpl(); | |
312 if (platform_impl) { | |
313 // Ensure we have all built-in voices loaded. This is a no-op if already | |
314 // loaded. | |
315 platform_impl->LoadBuiltInTtsExtension(profile); | |
316 if (platform_impl->PlatformImplAvailable()) | |
317 platform_impl->GetVoices(out_voices); | |
318 } | |
319 } | |
320 | |
321 bool TtsController::IsSpeaking() { | |
322 return current_utterance_ != NULL || GetPlatformImpl()->IsSpeaking(); | |
323 } | |
324 | |
325 void TtsController::FinishCurrentUtterance() { | |
326 if (current_utterance_) { | |
327 if (!current_utterance_->finished()) | |
328 current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex, | |
329 std::string()); | |
330 delete current_utterance_; | |
331 current_utterance_ = NULL; | |
332 } | |
333 } | |
334 | |
335 void TtsController::SpeakNextUtterance() { | |
336 if (paused_) | |
337 return; | |
338 | |
339 // Start speaking the next utterance in the queue. Keep trying in case | |
340 // one fails but there are still more in the queue to try. | |
341 while (!utterance_queue_.empty() && !current_utterance_) { | |
342 Utterance* utterance = utterance_queue_.front(); | |
343 utterance_queue_.pop(); | |
344 SpeakNow(utterance); | |
345 } | |
346 } | |
347 | |
348 void TtsController::RetrySpeakingQueuedUtterances() { | |
349 if (current_utterance_ == NULL && !utterance_queue_.empty()) | |
350 SpeakNextUtterance(); | |
351 } | |
352 | |
353 void TtsController::ClearUtteranceQueue(bool send_events) { | |
354 while (!utterance_queue_.empty()) { | |
355 Utterance* utterance = utterance_queue_.front(); | |
356 utterance_queue_.pop(); | |
357 if (send_events) | |
358 utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex, | |
359 std::string()); | |
360 else | |
361 utterance->Finish(); | |
362 delete utterance; | |
363 } | |
364 } | |
365 | |
366 void TtsController::SetPlatformImpl( | |
367 TtsPlatformImpl* platform_impl) { | |
368 platform_impl_ = platform_impl; | |
369 } | |
370 | |
371 int TtsController::QueueSize() { | |
372 return static_cast<int>(utterance_queue_.size()); | |
373 } | |
374 | |
375 TtsPlatformImpl* TtsController::GetPlatformImpl() { | |
376 if (!platform_impl_) | |
377 platform_impl_ = TtsPlatformImpl::GetInstance(); | |
378 return platform_impl_; | |
379 } | |
380 | |
381 int TtsController::GetMatchingVoice( | |
382 const Utterance* utterance, std::vector<VoiceData>& voices) { | |
383 // Make two passes: the first time, do strict language matching | |
384 // ('fr-FR' does not match 'fr-CA'). The second time, do prefix | |
385 // language matching ('fr-FR' matches 'fr' and 'fr-CA') | |
386 for (int pass = 0; pass < 2; ++pass) { | |
387 for (size_t i = 0; i < voices.size(); ++i) { | |
388 const VoiceData& voice = voices[i]; | |
389 | |
390 if (!utterance->extension_id().empty() && | |
391 utterance->extension_id() != voice.extension_id) { | |
392 continue; | |
393 } | |
394 | |
395 if (!voice.name.empty() && | |
396 !utterance->voice_name().empty() && | |
397 voice.name != utterance->voice_name()) { | |
398 continue; | |
399 } | |
400 if (!voice.lang.empty() && !utterance->lang().empty()) { | |
401 std::string voice_lang = voice.lang; | |
402 std::string utterance_lang = utterance->lang(); | |
403 if (pass == 1) { | |
404 voice_lang = TrimLanguageCode(voice_lang); | |
405 utterance_lang = TrimLanguageCode(utterance_lang); | |
406 } | |
407 if (voice_lang != utterance_lang) { | |
408 continue; | |
409 } | |
410 } | |
411 if (voice.gender != TTS_GENDER_NONE && | |
412 utterance->gender() != TTS_GENDER_NONE && | |
413 voice.gender != utterance->gender()) { | |
414 continue; | |
415 } | |
416 | |
417 if (utterance->required_event_types().size() > 0) { | |
418 bool has_all_required_event_types = true; | |
419 for (std::set<TtsEventType>::const_iterator iter = | |
420 utterance->required_event_types().begin(); | |
421 iter != utterance->required_event_types().end(); | |
422 ++iter) { | |
423 if (voice.events.find(*iter) == voice.events.end()) { | |
424 has_all_required_event_types = false; | |
425 break; | |
426 } | |
427 } | |
428 if (!has_all_required_event_types) | |
429 continue; | |
430 } | |
431 | |
432 return static_cast<int>(i); | |
433 } | |
434 } | |
435 | |
436 return -1; | |
437 } | |
438 | |
439 void TtsController::VoicesChanged() { | |
440 for (std::set<VoicesChangedDelegate*>::iterator iter = | |
441 voices_changed_delegates_.begin(); | |
442 iter != voices_changed_delegates_.end(); ++iter) { | |
443 (*iter)->OnVoicesChanged(); | |
444 } | |
445 } | |
446 | |
447 void TtsController::AddVoicesChangedDelegate(VoicesChangedDelegate* delegate) { | |
448 voices_changed_delegates_.insert(delegate); | |
449 } | |
450 | |
451 void TtsController::RemoveVoicesChangedDelegate( | |
452 VoicesChangedDelegate* delegate) { | |
453 voices_changed_delegates_.erase(delegate); | |
454 } | |
455 | |
456 void TtsController::SetTtsEngineDelegate( | |
457 TtsEngineDelegate* delegate) { | |
458 tts_engine_delegate_ = delegate; | |
459 } | |
OLD | NEW |