Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include <math.h> | 5 #include <math.h> |
| 6 #include <sapi.h> | 6 #include <sapi.h> |
| 7 #include <sphelper.h> | |
| 7 | 8 |
| 8 #include "base/memory/singleton.h" | 9 #include "base/memory/singleton.h" |
| 9 #include "base/strings/string_number_conversions.h" | 10 #include "base/strings/string_number_conversions.h" |
| 11 #include "base/strings/string_piece.h" | |
| 10 #include "base/strings/utf_string_conversions.h" | 12 #include "base/strings/utf_string_conversions.h" |
| 11 #include "base/values.h" | 13 #include "base/values.h" |
| 14 #include "base/win/scoped_co_mem.h" | |
| 12 #include "base/win/scoped_comptr.h" | 15 #include "base/win/scoped_comptr.h" |
| 13 #include "chrome/browser/speech/tts_controller.h" | 16 #include "chrome/browser/speech/tts_controller.h" |
| 14 #include "chrome/browser/speech/tts_platform.h" | 17 #include "chrome/browser/speech/tts_platform.h" |
| 15 | 18 |
| 19 namespace { | |
| 20 | |
| 21 // ISpObjectToken key and value names. | |
| 22 const wchar_t kAttributesKey[] = L"Attributes"; | |
| 23 const wchar_t kGenderValue[] = L"Gender"; | |
| 24 const wchar_t kLanguageValue[] = L"Language"; | |
| 25 | |
| 26 } // anonymous namespace. | |
| 27 | |
| 16 class TtsPlatformImplWin : public TtsPlatformImpl { | 28 class TtsPlatformImplWin : public TtsPlatformImpl { |
| 17 public: | 29 public: |
| 18 virtual bool PlatformImplAvailable() { | 30 virtual bool PlatformImplAvailable() { |
| 19 return true; | 31 return true; |
| 20 } | 32 } |
| 21 | 33 |
| 22 virtual bool Speak( | 34 virtual bool Speak( |
| 23 int utterance_id, | 35 int utterance_id, |
| 24 const std::string& utterance, | 36 const std::string& utterance, |
| 25 const std::string& lang, | 37 const std::string& lang, |
| (...skipping 14 matching lines...) Expand all Loading... | |
| 40 static TtsPlatformImplWin* GetInstance(); | 52 static TtsPlatformImplWin* GetInstance(); |
| 41 | 53 |
| 42 static void __stdcall SpeechEventCallback(WPARAM w_param, LPARAM l_param); | 54 static void __stdcall SpeechEventCallback(WPARAM w_param, LPARAM l_param); |
| 43 | 55 |
| 44 private: | 56 private: |
| 45 TtsPlatformImplWin(); | 57 TtsPlatformImplWin(); |
| 46 virtual ~TtsPlatformImplWin() {} | 58 virtual ~TtsPlatformImplWin() {} |
| 47 | 59 |
| 48 void OnSpeechEvent(); | 60 void OnSpeechEvent(); |
| 49 | 61 |
| 62 void SetVoiceFromName(const std::string& name); | |
| 63 | |
| 50 base::win::ScopedComPtr<ISpVoice> speech_synthesizer_; | 64 base::win::ScopedComPtr<ISpVoice> speech_synthesizer_; |
| 51 | 65 |
| 52 // These apply to the current utterance only. | 66 // These apply to the current utterance only. |
| 53 std::wstring utterance_; | 67 std::wstring utterance_; |
| 54 int utterance_id_; | 68 int utterance_id_; |
| 55 int prefix_len_; | 69 int prefix_len_; |
| 56 ULONG stream_number_; | 70 ULONG stream_number_; |
| 57 int char_position_; | 71 int char_position_; |
| 58 bool paused_; | 72 bool paused_; |
| 59 | 73 |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 72 const std::string& src_utterance, | 86 const std::string& src_utterance, |
| 73 const std::string& lang, | 87 const std::string& lang, |
| 74 const VoiceData& voice, | 88 const VoiceData& voice, |
| 75 const UtteranceContinuousParameters& params) { | 89 const UtteranceContinuousParameters& params) { |
| 76 std::wstring prefix; | 90 std::wstring prefix; |
| 77 std::wstring suffix; | 91 std::wstring suffix; |
| 78 | 92 |
| 79 if (!speech_synthesizer_.get()) | 93 if (!speech_synthesizer_.get()) |
| 80 return false; | 94 return false; |
| 81 | 95 |
| 82 // TODO(dmazzoni): support languages other than the default: crbug.com/88059 | 96 SetVoiceFromName(voice.name); |
| 83 | 97 |
| 84 if (params.rate >= 0.0) { | 98 if (params.rate >= 0.0) { |
| 85 // Map our multiplicative range of 0.1x to 10.0x onto Microsoft's | 99 // Map our multiplicative range of 0.1x to 10.0x onto Microsoft's |
| 86 // linear range of -10 to 10: | 100 // linear range of -10 to 10: |
| 87 // 0.1 -> -10 | 101 // 0.1 -> -10 |
| 88 // 1.0 -> 0 | 102 // 1.0 -> 0 |
| 89 // 10.0 -> 10 | 103 // 10.0 -> 10 |
| 90 speech_synthesizer_->SetRate(static_cast<int32>(10 * log10(params.rate))); | 104 speech_synthesizer_->SetRate(static_cast<int32>(10 * log10(params.rate))); |
| 91 } | 105 } |
| 92 | 106 |
| (...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 165 status.dwRunningState == SPRS_IS_SPEAKING) { | 179 status.dwRunningState == SPRS_IS_SPEAKING) { |
| 166 return true; | 180 return true; |
| 167 } | 181 } |
| 168 } | 182 } |
| 169 } | 183 } |
| 170 return false; | 184 return false; |
| 171 } | 185 } |
| 172 | 186 |
| 173 void TtsPlatformImplWin::GetVoices( | 187 void TtsPlatformImplWin::GetVoices( |
| 174 std::vector<VoiceData>* out_voices) { | 188 std::vector<VoiceData>* out_voices) { |
| 175 // TODO: get all voices, not just default voice. | 189 base::win::ScopedComPtr<IEnumSpObjectTokens> voice_tokens; |
| 176 // http://crbug.com/88059 | 190 unsigned long voice_count; |
| 177 out_voices->push_back(VoiceData()); | 191 if (S_OK != SpEnumTokens(SPCAT_VOICES, NULL, NULL, voice_tokens.Receive())) |
| 178 VoiceData& voice = out_voices->back(); | 192 return; |
| 179 voice.native = true; | 193 if (S_OK != voice_tokens->GetCount(&voice_count)) |
| 180 voice.name = "native"; | 194 return; |
|
David Tseng
2013/12/07 00:49:39
Do we want to let the js client know there was an
dmazzoni
2013/12/09 17:12:24
We don't have a mechanism for this currently, i.e.
| |
| 181 voice.events.insert(TTS_EVENT_START); | 195 |
| 182 voice.events.insert(TTS_EVENT_END); | 196 for (unsigned i = 0; i < voice_count; i++) { |
| 183 voice.events.insert(TTS_EVENT_MARKER); | 197 VoiceData voice; |
| 184 voice.events.insert(TTS_EVENT_WORD); | 198 |
| 185 voice.events.insert(TTS_EVENT_SENTENCE); | 199 base::win::ScopedComPtr<ISpObjectToken> voice_token; |
| 186 voice.events.insert(TTS_EVENT_PAUSE); | 200 if (S_OK != voice_tokens->Next(1, voice_token.Receive(), NULL)) |
| 187 voice.events.insert(TTS_EVENT_RESUME); | 201 return; |
| 202 | |
| 203 base::win::ScopedCoMem<WCHAR> description; | |
| 204 if (S_OK != SpGetDescription(voice_token, &description)) | |
| 205 continue; | |
| 206 voice.name = WideToUTF8(description.get()); | |
| 207 | |
| 208 base::win::ScopedComPtr<ISpDataKey> attributes; | |
| 209 if (S_OK != voice_token->OpenKey(kAttributesKey, attributes.Receive())) | |
| 210 continue; | |
| 211 | |
| 212 base::win::ScopedCoMem<WCHAR> gender; | |
| 213 if (S_OK == attributes->GetStringValue(kGenderValue, &gender)) { | |
| 214 if (0 == _wcsicmp(gender.get(), L"male")) | |
| 215 voice.gender = TTS_GENDER_MALE; | |
| 216 else if (0 == _wcsicmp(gender.get(), L"female")) | |
| 217 voice.gender = TTS_GENDER_FEMALE; | |
| 218 } | |
| 219 | |
| 220 base::win::ScopedCoMem<WCHAR> language; | |
| 221 if (S_OK == attributes->GetStringValue(kLanguageValue, &language)) { | |
| 222 int lcid_value; | |
| 223 base::HexStringToInt(WideToUTF8(language.get()), &lcid_value); | |
| 224 LCID lcid = MAKELCID(lcid_value, SORT_DEFAULT); | |
| 225 WCHAR locale_name[LOCALE_NAME_MAX_LENGTH] = {0}; | |
| 226 LCIDToLocaleName(lcid, locale_name, LOCALE_NAME_MAX_LENGTH, 0); | |
| 227 voice.lang = WideToUTF8(locale_name); | |
| 228 } | |
| 229 | |
| 230 voice.native = true; | |
| 231 voice.events.insert(TTS_EVENT_START); | |
| 232 voice.events.insert(TTS_EVENT_END); | |
| 233 voice.events.insert(TTS_EVENT_MARKER); | |
| 234 voice.events.insert(TTS_EVENT_WORD); | |
| 235 voice.events.insert(TTS_EVENT_SENTENCE); | |
| 236 voice.events.insert(TTS_EVENT_PAUSE); | |
| 237 voice.events.insert(TTS_EVENT_RESUME); | |
| 238 out_voices->push_back(voice); | |
| 239 } | |
|
David Tseng
2013/12/07 00:49:39
Does this fetch voices across multiple tts engines
dmazzoni
2013/12/09 17:12:24
SPCAT_VOICES returns voices from all engines in on
| |
| 188 } | 240 } |
| 189 | 241 |
| 190 void TtsPlatformImplWin::OnSpeechEvent() { | 242 void TtsPlatformImplWin::OnSpeechEvent() { |
| 191 TtsController* controller = TtsController::GetInstance(); | 243 TtsController* controller = TtsController::GetInstance(); |
| 192 SPEVENT event; | 244 SPEVENT event; |
| 193 while (S_OK == speech_synthesizer_->GetEvents(1, &event, NULL)) { | 245 while (S_OK == speech_synthesizer_->GetEvents(1, &event, NULL)) { |
| 194 if (event.ulStreamNum != stream_number_) | 246 if (event.ulStreamNum != stream_number_) |
| 195 continue; | 247 continue; |
| 196 | 248 |
| 197 switch (event.eEventId) { | 249 switch (event.eEventId) { |
| (...skipping 19 matching lines...) Expand all Loading... | |
| 217 case SPEI_SENTENCE_BOUNDARY: | 269 case SPEI_SENTENCE_BOUNDARY: |
| 218 char_position_ = static_cast<ULONG>(event.lParam) - prefix_len_; | 270 char_position_ = static_cast<ULONG>(event.lParam) - prefix_len_; |
| 219 controller->OnTtsEvent( | 271 controller->OnTtsEvent( |
| 220 utterance_id_, TTS_EVENT_SENTENCE, char_position_, | 272 utterance_id_, TTS_EVENT_SENTENCE, char_position_, |
| 221 std::string()); | 273 std::string()); |
| 222 break; | 274 break; |
| 223 } | 275 } |
| 224 } | 276 } |
| 225 } | 277 } |
| 226 | 278 |
| 279 void TtsPlatformImplWin::SetVoiceFromName(const std::string& name) { | |
| 280 if (name.empty()) | |
| 281 return; | |
| 282 | |
| 283 base::win::ScopedComPtr<IEnumSpObjectTokens> voice_tokens; | |
| 284 unsigned long voice_count; | |
| 285 if (S_OK != SpEnumTokens(SPCAT_VOICES, NULL, NULL, voice_tokens.Receive())) | |
| 286 return; | |
| 287 if (S_OK != voice_tokens->GetCount(&voice_count)) | |
| 288 return; | |
| 289 | |
| 290 for (unsigned i = 0; i < voice_count; i++) { | |
| 291 base::win::ScopedComPtr<ISpObjectToken> voice_token; | |
| 292 if (S_OK != voice_tokens->Next(1, voice_token.Receive(), NULL)) | |
| 293 return; | |
| 294 | |
| 295 base::win::ScopedCoMem<WCHAR> description; | |
| 296 if (S_OK != SpGetDescription(voice_token, &description)) | |
| 297 continue; | |
| 298 if (name == WideToUTF8(description.get())) { | |
| 299 speech_synthesizer_->SetVoice(voice_token); | |
|
David Tseng
2013/12/07 00:49:39
Did you notice any performance drawbacks with doin
dmazzoni
2013/12/09 17:12:24
I didn't notice anything, but that's a good idea.
| |
| 300 break; | |
| 301 } | |
| 302 } | |
| 303 } | |
| 304 | |
| 227 TtsPlatformImplWin::TtsPlatformImplWin() | 305 TtsPlatformImplWin::TtsPlatformImplWin() |
| 228 : utterance_id_(0), | 306 : utterance_id_(0), |
| 229 prefix_len_(0), | 307 prefix_len_(0), |
| 230 stream_number_(0), | 308 stream_number_(0), |
| 231 char_position_(0), | 309 char_position_(0), |
| 232 paused_(false) { | 310 paused_(false) { |
| 233 speech_synthesizer_.CreateInstance(CLSID_SpVoice); | 311 speech_synthesizer_.CreateInstance(CLSID_SpVoice); |
| 234 if (speech_synthesizer_.get()) { | 312 if (speech_synthesizer_.get()) { |
| 235 ULONGLONG event_mask = | 313 ULONGLONG event_mask = |
| 236 SPFEI(SPEI_START_INPUT_STREAM) | | 314 SPFEI(SPEI_START_INPUT_STREAM) | |
| (...skipping 11 matching lines...) Expand all Loading... | |
| 248 TtsPlatformImplWin* TtsPlatformImplWin::GetInstance() { | 326 TtsPlatformImplWin* TtsPlatformImplWin::GetInstance() { |
| 249 return Singleton<TtsPlatformImplWin, | 327 return Singleton<TtsPlatformImplWin, |
| 250 LeakySingletonTraits<TtsPlatformImplWin> >::get(); | 328 LeakySingletonTraits<TtsPlatformImplWin> >::get(); |
| 251 } | 329 } |
| 252 | 330 |
| 253 // static | 331 // static |
| 254 void TtsPlatformImplWin::SpeechEventCallback( | 332 void TtsPlatformImplWin::SpeechEventCallback( |
| 255 WPARAM w_param, LPARAM l_param) { | 333 WPARAM w_param, LPARAM l_param) { |
| 256 GetInstance()->OnSpeechEvent(); | 334 GetInstance()->OnSpeechEvent(); |
| 257 } | 335 } |
| OLD | NEW |