OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <math.h> | 5 #include <math.h> |
6 #include <sapi.h> | 6 #include <sapi.h> |
7 #include <sphelper.h> | |
7 | 8 |
8 #include "base/memory/singleton.h" | 9 #include "base/memory/singleton.h" |
9 #include "base/strings/string_number_conversions.h" | 10 #include "base/strings/string_number_conversions.h" |
11 #include "base/strings/string_piece.h" | |
10 #include "base/strings/utf_string_conversions.h" | 12 #include "base/strings/utf_string_conversions.h" |
11 #include "base/values.h" | 13 #include "base/values.h" |
14 #include "base/win/scoped_co_mem.h" | |
12 #include "base/win/scoped_comptr.h" | 15 #include "base/win/scoped_comptr.h" |
13 #include "chrome/browser/speech/tts_controller.h" | 16 #include "chrome/browser/speech/tts_controller.h" |
14 #include "chrome/browser/speech/tts_platform.h" | 17 #include "chrome/browser/speech/tts_platform.h" |
15 | 18 |
19 namespace { | |
20 | |
21 // ISpObjectToken key and value names. | |
22 const wchar_t kAttributesKey[] = L"Attributes"; | |
23 const wchar_t kGenderValue[] = L"Gender"; | |
24 const wchar_t kLanguageValue[] = L"Language"; | |
25 | |
26 } // anonymous namespace. | |
27 | |
16 class TtsPlatformImplWin : public TtsPlatformImpl { | 28 class TtsPlatformImplWin : public TtsPlatformImpl { |
17 public: | 29 public: |
18 virtual bool PlatformImplAvailable() { | 30 virtual bool PlatformImplAvailable() { |
19 return true; | 31 return true; |
20 } | 32 } |
21 | 33 |
22 virtual bool Speak( | 34 virtual bool Speak( |
23 int utterance_id, | 35 int utterance_id, |
24 const std::string& utterance, | 36 const std::string& utterance, |
25 const std::string& lang, | 37 const std::string& lang, |
(...skipping 14 matching lines...) Expand all Loading... | |
40 static TtsPlatformImplWin* GetInstance(); | 52 static TtsPlatformImplWin* GetInstance(); |
41 | 53 |
42 static void __stdcall SpeechEventCallback(WPARAM w_param, LPARAM l_param); | 54 static void __stdcall SpeechEventCallback(WPARAM w_param, LPARAM l_param); |
43 | 55 |
44 private: | 56 private: |
45 TtsPlatformImplWin(); | 57 TtsPlatformImplWin(); |
46 virtual ~TtsPlatformImplWin() {} | 58 virtual ~TtsPlatformImplWin() {} |
47 | 59 |
48 void OnSpeechEvent(); | 60 void OnSpeechEvent(); |
49 | 61 |
62 void SetVoiceFromName(const std::string& name); | |
63 | |
50 base::win::ScopedComPtr<ISpVoice> speech_synthesizer_; | 64 base::win::ScopedComPtr<ISpVoice> speech_synthesizer_; |
51 | 65 |
52 // These apply to the current utterance only. | 66 // These apply to the current utterance only. |
53 std::wstring utterance_; | 67 std::wstring utterance_; |
54 int utterance_id_; | 68 int utterance_id_; |
55 int prefix_len_; | 69 int prefix_len_; |
56 ULONG stream_number_; | 70 ULONG stream_number_; |
57 int char_position_; | 71 int char_position_; |
58 bool paused_; | 72 bool paused_; |
59 | 73 |
(...skipping 12 matching lines...) Expand all Loading... | |
72 const std::string& src_utterance, | 86 const std::string& src_utterance, |
73 const std::string& lang, | 87 const std::string& lang, |
74 const VoiceData& voice, | 88 const VoiceData& voice, |
75 const UtteranceContinuousParameters& params) { | 89 const UtteranceContinuousParameters& params) { |
76 std::wstring prefix; | 90 std::wstring prefix; |
77 std::wstring suffix; | 91 std::wstring suffix; |
78 | 92 |
79 if (!speech_synthesizer_.get()) | 93 if (!speech_synthesizer_.get()) |
80 return false; | 94 return false; |
81 | 95 |
82 // TODO(dmazzoni): support languages other than the default: crbug.com/88059 | 96 SetVoiceFromName(voice.name); |
83 | 97 |
84 if (params.rate >= 0.0) { | 98 if (params.rate >= 0.0) { |
85 // Map our multiplicative range of 0.1x to 10.0x onto Microsoft's | 99 // Map our multiplicative range of 0.1x to 10.0x onto Microsoft's |
86 // linear range of -10 to 10: | 100 // linear range of -10 to 10: |
87 // 0.1 -> -10 | 101 // 0.1 -> -10 |
88 // 1.0 -> 0 | 102 // 1.0 -> 0 |
89 // 10.0 -> 10 | 103 // 10.0 -> 10 |
90 speech_synthesizer_->SetRate(static_cast<int32>(10 * log10(params.rate))); | 104 speech_synthesizer_->SetRate(static_cast<int32>(10 * log10(params.rate))); |
91 } | 105 } |
92 | 106 |
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
165 status.dwRunningState == SPRS_IS_SPEAKING) { | 179 status.dwRunningState == SPRS_IS_SPEAKING) { |
166 return true; | 180 return true; |
167 } | 181 } |
168 } | 182 } |
169 } | 183 } |
170 return false; | 184 return false; |
171 } | 185 } |
172 | 186 |
173 void TtsPlatformImplWin::GetVoices( | 187 void TtsPlatformImplWin::GetVoices( |
174 std::vector<VoiceData>* out_voices) { | 188 std::vector<VoiceData>* out_voices) { |
175 // TODO: get all voices, not just default voice. | 189 base::win::ScopedComPtr<IEnumSpObjectTokens> voice_tokens; |
176 // http://crbug.com/88059 | 190 unsigned long voice_count; |
177 out_voices->push_back(VoiceData()); | 191 if (S_OK != SpEnumTokens(SPCAT_VOICES, NULL, NULL, voice_tokens.Receive())) |
178 VoiceData& voice = out_voices->back(); | 192 return; |
179 voice.native = true; | 193 if (S_OK != voice_tokens->GetCount(&voice_count)) |
180 voice.name = "native"; | 194 return; |
David Tseng
2013/12/07 00:49:39
Do we want to let the js client know there was an
dmazzoni
2013/12/09 17:12:24
We don't have a mechanism for this currently, i.e.
| |
181 voice.events.insert(TTS_EVENT_START); | 195 |
182 voice.events.insert(TTS_EVENT_END); | 196 for (unsigned i = 0; i < voice_count; i++) { |
183 voice.events.insert(TTS_EVENT_MARKER); | 197 VoiceData voice; |
184 voice.events.insert(TTS_EVENT_WORD); | 198 |
185 voice.events.insert(TTS_EVENT_SENTENCE); | 199 base::win::ScopedComPtr<ISpObjectToken> voice_token; |
186 voice.events.insert(TTS_EVENT_PAUSE); | 200 if (S_OK != voice_tokens->Next(1, voice_token.Receive(), NULL)) |
187 voice.events.insert(TTS_EVENT_RESUME); | 201 return; |
202 | |
203 base::win::ScopedCoMem<WCHAR> description; | |
204 if (S_OK != SpGetDescription(voice_token, &description)) | |
205 continue; | |
206 voice.name = WideToUTF8(description.get()); | |
207 | |
208 base::win::ScopedComPtr<ISpDataKey> attributes; | |
209 if (S_OK != voice_token->OpenKey(kAttributesKey, attributes.Receive())) | |
210 continue; | |
211 | |
212 base::win::ScopedCoMem<WCHAR> gender; | |
213 if (S_OK == attributes->GetStringValue(kGenderValue, &gender)) { | |
214 if (0 == _wcsicmp(gender.get(), L"male")) | |
215 voice.gender = TTS_GENDER_MALE; | |
216 else if (0 == _wcsicmp(gender.get(), L"female")) | |
217 voice.gender = TTS_GENDER_FEMALE; | |
218 } | |
219 | |
220 base::win::ScopedCoMem<WCHAR> language; | |
221 if (S_OK == attributes->GetStringValue(kLanguageValue, &language)) { | |
222 int lcid_value; | |
223 base::HexStringToInt(WideToUTF8(language.get()), &lcid_value); | |
224 LCID lcid = MAKELCID(lcid_value, SORT_DEFAULT); | |
225 WCHAR locale_name[LOCALE_NAME_MAX_LENGTH] = {0}; | |
226 LCIDToLocaleName(lcid, locale_name, LOCALE_NAME_MAX_LENGTH, 0); | |
227 voice.lang = WideToUTF8(locale_name); | |
228 } | |
229 | |
230 voice.native = true; | |
231 voice.events.insert(TTS_EVENT_START); | |
232 voice.events.insert(TTS_EVENT_END); | |
233 voice.events.insert(TTS_EVENT_MARKER); | |
234 voice.events.insert(TTS_EVENT_WORD); | |
235 voice.events.insert(TTS_EVENT_SENTENCE); | |
236 voice.events.insert(TTS_EVENT_PAUSE); | |
237 voice.events.insert(TTS_EVENT_RESUME); | |
238 out_voices->push_back(voice); | |
239 } | |
David Tseng
2013/12/07 00:49:39
Does this fetch voices across multiple tts engines
dmazzoni
2013/12/09 17:12:24
SPCAT_VOICES returns voices from all engines in on
| |
188 } | 240 } |
189 | 241 |
190 void TtsPlatformImplWin::OnSpeechEvent() { | 242 void TtsPlatformImplWin::OnSpeechEvent() { |
191 TtsController* controller = TtsController::GetInstance(); | 243 TtsController* controller = TtsController::GetInstance(); |
192 SPEVENT event; | 244 SPEVENT event; |
193 while (S_OK == speech_synthesizer_->GetEvents(1, &event, NULL)) { | 245 while (S_OK == speech_synthesizer_->GetEvents(1, &event, NULL)) { |
194 if (event.ulStreamNum != stream_number_) | 246 if (event.ulStreamNum != stream_number_) |
195 continue; | 247 continue; |
196 | 248 |
197 switch (event.eEventId) { | 249 switch (event.eEventId) { |
(...skipping 19 matching lines...) Expand all Loading... | |
217 case SPEI_SENTENCE_BOUNDARY: | 269 case SPEI_SENTENCE_BOUNDARY: |
218 char_position_ = static_cast<ULONG>(event.lParam) - prefix_len_; | 270 char_position_ = static_cast<ULONG>(event.lParam) - prefix_len_; |
219 controller->OnTtsEvent( | 271 controller->OnTtsEvent( |
220 utterance_id_, TTS_EVENT_SENTENCE, char_position_, | 272 utterance_id_, TTS_EVENT_SENTENCE, char_position_, |
221 std::string()); | 273 std::string()); |
222 break; | 274 break; |
223 } | 275 } |
224 } | 276 } |
225 } | 277 } |
226 | 278 |
279 void TtsPlatformImplWin::SetVoiceFromName(const std::string& name) { | |
280 if (name.empty()) | |
281 return; | |
282 | |
283 base::win::ScopedComPtr<IEnumSpObjectTokens> voice_tokens; | |
284 unsigned long voice_count; | |
285 if (S_OK != SpEnumTokens(SPCAT_VOICES, NULL, NULL, voice_tokens.Receive())) | |
286 return; | |
287 if (S_OK != voice_tokens->GetCount(&voice_count)) | |
288 return; | |
289 | |
290 for (unsigned i = 0; i < voice_count; i++) { | |
291 base::win::ScopedComPtr<ISpObjectToken> voice_token; | |
292 if (S_OK != voice_tokens->Next(1, voice_token.Receive(), NULL)) | |
293 return; | |
294 | |
295 base::win::ScopedCoMem<WCHAR> description; | |
296 if (S_OK != SpGetDescription(voice_token, &description)) | |
297 continue; | |
298 if (name == WideToUTF8(description.get())) { | |
299 speech_synthesizer_->SetVoice(voice_token); | |
David Tseng
2013/12/07 00:49:39
Did you notice any performance drawbacks with doin
dmazzoni
2013/12/09 17:12:24
I didn't notice anything, but that's a good idea.
| |
300 break; | |
301 } | |
302 } | |
303 } | |
304 | |
227 TtsPlatformImplWin::TtsPlatformImplWin() | 305 TtsPlatformImplWin::TtsPlatformImplWin() |
228 : utterance_id_(0), | 306 : utterance_id_(0), |
229 prefix_len_(0), | 307 prefix_len_(0), |
230 stream_number_(0), | 308 stream_number_(0), |
231 char_position_(0), | 309 char_position_(0), |
232 paused_(false) { | 310 paused_(false) { |
233 speech_synthesizer_.CreateInstance(CLSID_SpVoice); | 311 speech_synthesizer_.CreateInstance(CLSID_SpVoice); |
234 if (speech_synthesizer_.get()) { | 312 if (speech_synthesizer_.get()) { |
235 ULONGLONG event_mask = | 313 ULONGLONG event_mask = |
236 SPFEI(SPEI_START_INPUT_STREAM) | | 314 SPFEI(SPEI_START_INPUT_STREAM) | |
(...skipping 11 matching lines...) Expand all Loading... | |
248 TtsPlatformImplWin* TtsPlatformImplWin::GetInstance() { | 326 TtsPlatformImplWin* TtsPlatformImplWin::GetInstance() { |
249 return Singleton<TtsPlatformImplWin, | 327 return Singleton<TtsPlatformImplWin, |
250 LeakySingletonTraits<TtsPlatformImplWin> >::get(); | 328 LeakySingletonTraits<TtsPlatformImplWin> >::get(); |
251 } | 329 } |
252 | 330 |
253 // static | 331 // static |
254 void TtsPlatformImplWin::SpeechEventCallback( | 332 void TtsPlatformImplWin::SpeechEventCallback( |
255 WPARAM w_param, LPARAM l_param) { | 333 WPARAM w_param, LPARAM l_param) { |
256 GetInstance()->OnSpeechEvent(); | 334 GetInstance()->OnSpeechEvent(); |
257 } | 335 } |
OLD | NEW |