OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <math.h> | 5 #include <math.h> |
6 #include <sapi.h> | 6 #include <sapi.h> |
| 7 #include <sphelper.h> |
7 #include <stdint.h> | 8 #include <stdint.h> |
8 | 9 |
9 #include "base/macros.h" | 10 #include "base/macros.h" |
10 #include "base/memory/singleton.h" | 11 #include "base/memory/singleton.h" |
11 #include "base/strings/string_number_conversions.h" | 12 #include "base/strings/string_number_conversions.h" |
| 13 #include "base/strings/string_piece.h" |
12 #include "base/strings/utf_string_conversions.h" | 14 #include "base/strings/utf_string_conversions.h" |
13 #include "base/values.h" | 15 #include "base/values.h" |
| 16 #include "base/win/scoped_co_mem.h" |
14 #include "base/win/scoped_comptr.h" | 17 #include "base/win/scoped_comptr.h" |
15 #include "chrome/browser/speech/tts_controller.h" | 18 #include "chrome/browser/speech/tts_controller.h" |
16 #include "chrome/browser/speech/tts_platform.h" | 19 #include "chrome/browser/speech/tts_platform.h" |
17 | 20 |
| 21 namespace { |
| 22 |
| 23 // ISpObjectToken key and value names. |
| 24 const wchar_t kAttributesKey[] = L"Attributes"; |
| 25 const wchar_t kGenderValue[] = L"Gender"; |
| 26 const wchar_t kLanguageValue[] = L"Language"; |
| 27 |
| 28 } // anonymous namespace. |
| 29 |
18 class TtsPlatformImplWin : public TtsPlatformImpl { | 30 class TtsPlatformImplWin : public TtsPlatformImpl { |
19 public: | 31 public: |
20 bool PlatformImplAvailable() override { | 32 bool PlatformImplAvailable() override { |
21 return true; | 33 return true; |
22 } | 34 } |
23 | 35 |
24 bool Speak( | 36 bool Speak( |
25 int utterance_id, | 37 int utterance_id, |
26 const std::string& utterance, | 38 const std::string& utterance, |
27 const std::string& lang, | 39 const std::string& lang, |
(...skipping 14 matching lines...) Expand all Loading... |
42 static TtsPlatformImplWin* GetInstance(); | 54 static TtsPlatformImplWin* GetInstance(); |
43 | 55 |
44 static void __stdcall SpeechEventCallback(WPARAM w_param, LPARAM l_param); | 56 static void __stdcall SpeechEventCallback(WPARAM w_param, LPARAM l_param); |
45 | 57 |
46 private: | 58 private: |
47 TtsPlatformImplWin(); | 59 TtsPlatformImplWin(); |
48 ~TtsPlatformImplWin() override {} | 60 ~TtsPlatformImplWin() override {} |
49 | 61 |
50 void OnSpeechEvent(); | 62 void OnSpeechEvent(); |
51 | 63 |
| 64 void SetVoiceFromName(const std::string& name); |
| 65 |
52 base::win::ScopedComPtr<ISpVoice> speech_synthesizer_; | 66 base::win::ScopedComPtr<ISpVoice> speech_synthesizer_; |
53 | 67 |
54 // These apply to the current utterance only. | 68 // These apply to the current utterance only. |
55 std::wstring utterance_; | 69 std::wstring utterance_; |
56 int utterance_id_; | 70 int utterance_id_; |
57 int prefix_len_; | 71 int prefix_len_; |
58 ULONG stream_number_; | 72 ULONG stream_number_; |
59 int char_position_; | 73 int char_position_; |
60 bool paused_; | 74 bool paused_; |
| 75 std::string last_voice_name_; |
61 | 76 |
62 friend struct base::DefaultSingletonTraits<TtsPlatformImplWin>; | 77 friend struct base::DefaultSingletonTraits<TtsPlatformImplWin>; |
63 | 78 |
64 DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplWin); | 79 DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplWin); |
65 }; | 80 }; |
66 | 81 |
67 // static | 82 // static |
68 TtsPlatformImpl* TtsPlatformImpl::GetInstance() { | 83 TtsPlatformImpl* TtsPlatformImpl::GetInstance() { |
69 return TtsPlatformImplWin::GetInstance(); | 84 return TtsPlatformImplWin::GetInstance(); |
70 } | 85 } |
71 | 86 |
72 bool TtsPlatformImplWin::Speak( | 87 bool TtsPlatformImplWin::Speak( |
73 int utterance_id, | 88 int utterance_id, |
74 const std::string& src_utterance, | 89 const std::string& src_utterance, |
75 const std::string& lang, | 90 const std::string& lang, |
76 const VoiceData& voice, | 91 const VoiceData& voice, |
77 const UtteranceContinuousParameters& params) { | 92 const UtteranceContinuousParameters& params) { |
78 std::wstring prefix; | 93 std::wstring prefix; |
79 std::wstring suffix; | 94 std::wstring suffix; |
80 | 95 |
81 if (!speech_synthesizer_.get()) | 96 if (!speech_synthesizer_.get()) |
82 return false; | 97 return false; |
83 | 98 |
84 // TODO(dmazzoni): support languages other than the default: crbug.com/88059 | 99 SetVoiceFromName(voice.name); |
85 | 100 |
86 if (params.rate >= 0.0) { | 101 if (params.rate >= 0.0) { |
87 // Map our multiplicative range of 0.1x to 10.0x onto Microsoft's | 102 // Map our multiplicative range of 0.1x to 10.0x onto Microsoft's |
88 // linear range of -10 to 10: | 103 // linear range of -10 to 10: |
89 // 0.1 -> -10 | 104 // 0.1 -> -10 |
90 // 1.0 -> 0 | 105 // 1.0 -> 0 |
91 // 10.0 -> 10 | 106 // 10.0 -> 10 |
92 speech_synthesizer_->SetRate(static_cast<int32_t>(10 * log10(params.rate))); | 107 speech_synthesizer_->SetRate(static_cast<int32_t>(10 * log10(params.rate))); |
93 } | 108 } |
94 | 109 |
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
167 status.dwRunningState == SPRS_IS_SPEAKING) { | 182 status.dwRunningState == SPRS_IS_SPEAKING) { |
168 return true; | 183 return true; |
169 } | 184 } |
170 } | 185 } |
171 } | 186 } |
172 return false; | 187 return false; |
173 } | 188 } |
174 | 189 |
175 void TtsPlatformImplWin::GetVoices( | 190 void TtsPlatformImplWin::GetVoices( |
176 std::vector<VoiceData>* out_voices) { | 191 std::vector<VoiceData>* out_voices) { |
177 // TODO: get all voices, not just default voice. | 192 base::win::ScopedComPtr<IEnumSpObjectTokens> voice_tokens; |
178 // http://crbug.com/88059 | 193 unsigned long voice_count; |
179 out_voices->push_back(VoiceData()); | 194 if (S_OK != SpEnumTokens(SPCAT_VOICES, NULL, NULL, voice_tokens.Receive())) |
180 VoiceData& voice = out_voices->back(); | 195 return; |
181 voice.native = true; | 196 if (S_OK != voice_tokens->GetCount(&voice_count)) |
182 voice.name = "native"; | 197 return; |
183 voice.events.insert(TTS_EVENT_START); | 198 |
184 voice.events.insert(TTS_EVENT_END); | 199 for (unsigned i = 0; i < voice_count; i++) { |
185 voice.events.insert(TTS_EVENT_MARKER); | 200 VoiceData voice; |
186 voice.events.insert(TTS_EVENT_WORD); | 201 |
187 voice.events.insert(TTS_EVENT_SENTENCE); | 202 base::win::ScopedComPtr<ISpObjectToken> voice_token; |
188 voice.events.insert(TTS_EVENT_PAUSE); | 203 if (S_OK != voice_tokens->Next(1, voice_token.Receive(), NULL)) |
189 voice.events.insert(TTS_EVENT_RESUME); | 204 return; |
| 205 |
| 206 base::win::ScopedCoMem<WCHAR> description; |
| 207 if (S_OK != SpGetDescription(voice_token.get(), &description)) |
| 208 continue; |
| 209 voice.name = base::WideToUTF8(description.get()); |
| 210 |
| 211 base::win::ScopedComPtr<ISpDataKey> attributes; |
| 212 if (S_OK != voice_token->OpenKey(kAttributesKey, attributes.Receive())) |
| 213 continue; |
| 214 |
| 215 base::win::ScopedCoMem<WCHAR> gender; |
| 216 if (S_OK == attributes->GetStringValue(kGenderValue, &gender)) { |
| 217 if (0 == _wcsicmp(gender.get(), L"male")) |
| 218 voice.gender = TTS_GENDER_MALE; |
| 219 else if (0 == _wcsicmp(gender.get(), L"female")) |
| 220 voice.gender = TTS_GENDER_FEMALE; |
| 221 } |
| 222 |
| 223 base::win::ScopedCoMem<WCHAR> language; |
| 224 if (S_OK == attributes->GetStringValue(kLanguageValue, &language)) { |
| 225 int lcid_value; |
| 226 base::HexStringToInt(base::WideToUTF8(language.get()), &lcid_value); |
| 227 LCID lcid = MAKELCID(lcid_value, SORT_DEFAULT); |
| 228 WCHAR locale_name[LOCALE_NAME_MAX_LENGTH] = {0}; |
| 229 LCIDToLocaleName(lcid, locale_name, LOCALE_NAME_MAX_LENGTH, 0); |
| 230 voice.lang = base::WideToUTF8(locale_name); |
| 231 } |
| 232 |
| 233 voice.native = true; |
| 234 voice.events.insert(TTS_EVENT_START); |
| 235 voice.events.insert(TTS_EVENT_END); |
| 236 voice.events.insert(TTS_EVENT_MARKER); |
| 237 voice.events.insert(TTS_EVENT_WORD); |
| 238 voice.events.insert(TTS_EVENT_SENTENCE); |
| 239 voice.events.insert(TTS_EVENT_PAUSE); |
| 240 voice.events.insert(TTS_EVENT_RESUME); |
| 241 out_voices->push_back(voice); |
| 242 } |
190 } | 243 } |
191 | 244 |
192 void TtsPlatformImplWin::OnSpeechEvent() { | 245 void TtsPlatformImplWin::OnSpeechEvent() { |
193 TtsController* controller = TtsController::GetInstance(); | 246 TtsController* controller = TtsController::GetInstance(); |
194 SPEVENT event; | 247 SPEVENT event; |
195 while (S_OK == speech_synthesizer_->GetEvents(1, &event, NULL)) { | 248 while (S_OK == speech_synthesizer_->GetEvents(1, &event, NULL)) { |
196 if (event.ulStreamNum != stream_number_) | 249 if (event.ulStreamNum != stream_number_) |
197 continue; | 250 continue; |
198 | 251 |
199 switch (event.eEventId) { | 252 switch (event.eEventId) { |
(...skipping 21 matching lines...) Expand all Loading... |
221 controller->OnTtsEvent( | 274 controller->OnTtsEvent( |
222 utterance_id_, TTS_EVENT_SENTENCE, char_position_, | 275 utterance_id_, TTS_EVENT_SENTENCE, char_position_, |
223 std::string()); | 276 std::string()); |
224 break; | 277 break; |
225 default: | 278 default: |
226 break; | 279 break; |
227 } | 280 } |
228 } | 281 } |
229 } | 282 } |
230 | 283 |
| 284 void TtsPlatformImplWin::SetVoiceFromName(const std::string& name) { |
| 285 if (name.empty() || name == last_voice_name_) |
| 286 return; |
| 287 |
| 288 last_voice_name_ = name; |
| 289 |
| 290 base::win::ScopedComPtr<IEnumSpObjectTokens> voice_tokens; |
| 291 unsigned long voice_count; |
| 292 if (S_OK != SpEnumTokens(SPCAT_VOICES, NULL, NULL, voice_tokens.Receive())) |
| 293 return; |
| 294 if (S_OK != voice_tokens->GetCount(&voice_count)) |
| 295 return; |
| 296 |
| 297 for (unsigned i = 0; i < voice_count; i++) { |
| 298 base::win::ScopedComPtr<ISpObjectToken> voice_token; |
| 299 if (S_OK != voice_tokens->Next(1, voice_token.Receive(), NULL)) |
| 300 return; |
| 301 |
| 302 base::win::ScopedCoMem<WCHAR> description; |
| 303 if (S_OK != SpGetDescription(voice_token.get(), &description)) |
| 304 continue; |
| 305 if (name == base::WideToUTF8(description.get())) { |
| 306 speech_synthesizer_->SetVoice(voice_token.get()); |
| 307 break; |
| 308 } |
| 309 } |
| 310 } |
| 311 |
231 TtsPlatformImplWin::TtsPlatformImplWin() | 312 TtsPlatformImplWin::TtsPlatformImplWin() |
232 : utterance_id_(0), | 313 : utterance_id_(0), |
233 prefix_len_(0), | 314 prefix_len_(0), |
234 stream_number_(0), | 315 stream_number_(0), |
235 char_position_(0), | 316 char_position_(0), |
236 paused_(false) { | 317 paused_(false) { |
237 speech_synthesizer_.CreateInstance(CLSID_SpVoice); | 318 speech_synthesizer_.CreateInstance(CLSID_SpVoice); |
238 if (speech_synthesizer_.get()) { | 319 if (speech_synthesizer_.get()) { |
239 ULONGLONG event_mask = | 320 ULONGLONG event_mask = |
240 SPFEI(SPEI_START_INPUT_STREAM) | | 321 SPFEI(SPEI_START_INPUT_STREAM) | |
(...skipping 11 matching lines...) Expand all Loading... |
252 TtsPlatformImplWin* TtsPlatformImplWin::GetInstance() { | 333 TtsPlatformImplWin* TtsPlatformImplWin::GetInstance() { |
253 return base::Singleton<TtsPlatformImplWin, | 334 return base::Singleton<TtsPlatformImplWin, |
254 base::LeakySingletonTraits<TtsPlatformImplWin>>::get(); | 335 base::LeakySingletonTraits<TtsPlatformImplWin>>::get(); |
255 } | 336 } |
256 | 337 |
257 // static | 338 // static |
258 void TtsPlatformImplWin::SpeechEventCallback( | 339 void TtsPlatformImplWin::SpeechEventCallback( |
259 WPARAM w_param, LPARAM l_param) { | 340 WPARAM w_param, LPARAM l_param) { |
260 GetInstance()->OnSpeechEvent(); | 341 GetInstance()->OnSpeechEvent(); |
261 } | 342 } |
OLD | NEW |