Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(143)

Side by Side Diff: chrome/browser/speech/tts_win.cc

Issue 97793002: Support multiple TTS voices on Windows. (Closed) Base URL: http://git.chromium.org/chromium/src.git@master
Patch Set: Don't change voice if it hasn't changed Created 7 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <math.h> 5 #include <math.h>
6 #include <sapi.h> 6 #include <sapi.h>
7 #include <sphelper.h>
7 8
8 #include "base/memory/singleton.h" 9 #include "base/memory/singleton.h"
9 #include "base/strings/string_number_conversions.h" 10 #include "base/strings/string_number_conversions.h"
11 #include "base/strings/string_piece.h"
10 #include "base/strings/utf_string_conversions.h" 12 #include "base/strings/utf_string_conversions.h"
11 #include "base/values.h" 13 #include "base/values.h"
14 #include "base/win/scoped_co_mem.h"
12 #include "base/win/scoped_comptr.h" 15 #include "base/win/scoped_comptr.h"
13 #include "chrome/browser/speech/tts_controller.h" 16 #include "chrome/browser/speech/tts_controller.h"
14 #include "chrome/browser/speech/tts_platform.h" 17 #include "chrome/browser/speech/tts_platform.h"
15 18
19 namespace {
20
21 // ISpObjectToken key and value names.
22 const wchar_t kAttributesKey[] = L"Attributes";
23 const wchar_t kGenderValue[] = L"Gender";
24 const wchar_t kLanguageValue[] = L"Language";
25
26 } // anonymous namespace.
27
16 class TtsPlatformImplWin : public TtsPlatformImpl { 28 class TtsPlatformImplWin : public TtsPlatformImpl {
17 public: 29 public:
18 virtual bool PlatformImplAvailable() { 30 virtual bool PlatformImplAvailable() {
19 return true; 31 return true;
20 } 32 }
21 33
22 virtual bool Speak( 34 virtual bool Speak(
23 int utterance_id, 35 int utterance_id,
24 const std::string& utterance, 36 const std::string& utterance,
25 const std::string& lang, 37 const std::string& lang,
(...skipping 14 matching lines...) Expand all
40 static TtsPlatformImplWin* GetInstance(); 52 static TtsPlatformImplWin* GetInstance();
41 53
42 static void __stdcall SpeechEventCallback(WPARAM w_param, LPARAM l_param); 54 static void __stdcall SpeechEventCallback(WPARAM w_param, LPARAM l_param);
43 55
44 private: 56 private:
45 TtsPlatformImplWin(); 57 TtsPlatformImplWin();
46 virtual ~TtsPlatformImplWin() {} 58 virtual ~TtsPlatformImplWin() {}
47 59
48 void OnSpeechEvent(); 60 void OnSpeechEvent();
49 61
62 void SetVoiceFromName(const std::string& name);
63
50 base::win::ScopedComPtr<ISpVoice> speech_synthesizer_; 64 base::win::ScopedComPtr<ISpVoice> speech_synthesizer_;
51 65
52 // These apply to the current utterance only. 66 // These apply to the current utterance only.
53 std::wstring utterance_; 67 std::wstring utterance_;
54 int utterance_id_; 68 int utterance_id_;
55 int prefix_len_; 69 int prefix_len_;
56 ULONG stream_number_; 70 ULONG stream_number_;
57 int char_position_; 71 int char_position_;
58 bool paused_; 72 bool paused_;
73 std::string last_voice_name_;
59 74
60 friend struct DefaultSingletonTraits<TtsPlatformImplWin>; 75 friend struct DefaultSingletonTraits<TtsPlatformImplWin>;
61 76
62 DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplWin); 77 DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplWin);
63 }; 78 };
64 79
65 // static 80 // static
66 TtsPlatformImpl* TtsPlatformImpl::GetInstance() { 81 TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
67 return TtsPlatformImplWin::GetInstance(); 82 return TtsPlatformImplWin::GetInstance();
68 } 83 }
69 84
70 bool TtsPlatformImplWin::Speak( 85 bool TtsPlatformImplWin::Speak(
71 int utterance_id, 86 int utterance_id,
72 const std::string& src_utterance, 87 const std::string& src_utterance,
73 const std::string& lang, 88 const std::string& lang,
74 const VoiceData& voice, 89 const VoiceData& voice,
75 const UtteranceContinuousParameters& params) { 90 const UtteranceContinuousParameters& params) {
76 std::wstring prefix; 91 std::wstring prefix;
77 std::wstring suffix; 92 std::wstring suffix;
78 93
79 if (!speech_synthesizer_.get()) 94 if (!speech_synthesizer_.get())
80 return false; 95 return false;
81 96
82 // TODO(dmazzoni): support languages other than the default: crbug.com/88059 97 SetVoiceFromName(voice.name);
83 98
84 if (params.rate >= 0.0) { 99 if (params.rate >= 0.0) {
85 // Map our multiplicative range of 0.1x to 10.0x onto Microsoft's 100 // Map our multiplicative range of 0.1x to 10.0x onto Microsoft's
86 // linear range of -10 to 10: 101 // linear range of -10 to 10:
87 // 0.1 -> -10 102 // 0.1 -> -10
88 // 1.0 -> 0 103 // 1.0 -> 0
89 // 10.0 -> 10 104 // 10.0 -> 10
90 speech_synthesizer_->SetRate(static_cast<int32>(10 * log10(params.rate))); 105 speech_synthesizer_->SetRate(static_cast<int32>(10 * log10(params.rate)));
91 } 106 }
92 107
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
165 status.dwRunningState == SPRS_IS_SPEAKING) { 180 status.dwRunningState == SPRS_IS_SPEAKING) {
166 return true; 181 return true;
167 } 182 }
168 } 183 }
169 } 184 }
170 return false; 185 return false;
171 } 186 }
172 187
173 void TtsPlatformImplWin::GetVoices( 188 void TtsPlatformImplWin::GetVoices(
174 std::vector<VoiceData>* out_voices) { 189 std::vector<VoiceData>* out_voices) {
175 // TODO: get all voices, not just default voice. 190 base::win::ScopedComPtr<IEnumSpObjectTokens> voice_tokens;
176 // http://crbug.com/88059 191 unsigned long voice_count;
177 out_voices->push_back(VoiceData()); 192 if (S_OK != SpEnumTokens(SPCAT_VOICES, NULL, NULL, voice_tokens.Receive()))
178 VoiceData& voice = out_voices->back(); 193 return;
179 voice.native = true; 194 if (S_OK != voice_tokens->GetCount(&voice_count))
180 voice.name = "native"; 195 return;
181 voice.events.insert(TTS_EVENT_START); 196
182 voice.events.insert(TTS_EVENT_END); 197 for (unsigned i = 0; i < voice_count; i++) {
183 voice.events.insert(TTS_EVENT_MARKER); 198 VoiceData voice;
184 voice.events.insert(TTS_EVENT_WORD); 199
185 voice.events.insert(TTS_EVENT_SENTENCE); 200 base::win::ScopedComPtr<ISpObjectToken> voice_token;
186 voice.events.insert(TTS_EVENT_PAUSE); 201 if (S_OK != voice_tokens->Next(1, voice_token.Receive(), NULL))
187 voice.events.insert(TTS_EVENT_RESUME); 202 return;
203
204 base::win::ScopedCoMem<WCHAR> description;
205 if (S_OK != SpGetDescription(voice_token, &description))
206 continue;
207 voice.name = WideToUTF8(description.get());
208
209 base::win::ScopedComPtr<ISpDataKey> attributes;
210 if (S_OK != voice_token->OpenKey(kAttributesKey, attributes.Receive()))
211 continue;
212
213 base::win::ScopedCoMem<WCHAR> gender;
214 if (S_OK == attributes->GetStringValue(kGenderValue, &gender)) {
215 if (0 == _wcsicmp(gender.get(), L"male"))
216 voice.gender = TTS_GENDER_MALE;
217 else if (0 == _wcsicmp(gender.get(), L"female"))
218 voice.gender = TTS_GENDER_FEMALE;
219 }
220
221 base::win::ScopedCoMem<WCHAR> language;
222 if (S_OK == attributes->GetStringValue(kLanguageValue, &language)) {
223 int lcid_value;
224 base::HexStringToInt(WideToUTF8(language.get()), &lcid_value);
225 LCID lcid = MAKELCID(lcid_value, SORT_DEFAULT);
226 WCHAR locale_name[LOCALE_NAME_MAX_LENGTH] = {0};
227 LCIDToLocaleName(lcid, locale_name, LOCALE_NAME_MAX_LENGTH, 0);
228 voice.lang = WideToUTF8(locale_name);
229 }
230
231 voice.native = true;
232 voice.events.insert(TTS_EVENT_START);
233 voice.events.insert(TTS_EVENT_END);
234 voice.events.insert(TTS_EVENT_MARKER);
235 voice.events.insert(TTS_EVENT_WORD);
236 voice.events.insert(TTS_EVENT_SENTENCE);
237 voice.events.insert(TTS_EVENT_PAUSE);
238 voice.events.insert(TTS_EVENT_RESUME);
239 out_voices->push_back(voice);
240 }
188 } 241 }
189 242
190 void TtsPlatformImplWin::OnSpeechEvent() { 243 void TtsPlatformImplWin::OnSpeechEvent() {
191 TtsController* controller = TtsController::GetInstance(); 244 TtsController* controller = TtsController::GetInstance();
192 SPEVENT event; 245 SPEVENT event;
193 while (S_OK == speech_synthesizer_->GetEvents(1, &event, NULL)) { 246 while (S_OK == speech_synthesizer_->GetEvents(1, &event, NULL)) {
194 if (event.ulStreamNum != stream_number_) 247 if (event.ulStreamNum != stream_number_)
195 continue; 248 continue;
196 249
197 switch (event.eEventId) { 250 switch (event.eEventId) {
(...skipping 19 matching lines...) Expand all
217 case SPEI_SENTENCE_BOUNDARY: 270 case SPEI_SENTENCE_BOUNDARY:
218 char_position_ = static_cast<ULONG>(event.lParam) - prefix_len_; 271 char_position_ = static_cast<ULONG>(event.lParam) - prefix_len_;
219 controller->OnTtsEvent( 272 controller->OnTtsEvent(
220 utterance_id_, TTS_EVENT_SENTENCE, char_position_, 273 utterance_id_, TTS_EVENT_SENTENCE, char_position_,
221 std::string()); 274 std::string());
222 break; 275 break;
223 } 276 }
224 } 277 }
225 } 278 }
226 279
280 void TtsPlatformImplWin::SetVoiceFromName(const std::string& name) {
281 if (name.empty() || name == last_voice_name_)
282 return;
283
284 last_voice_name_ = name;
285
286 base::win::ScopedComPtr<IEnumSpObjectTokens> voice_tokens;
287 unsigned long voice_count;
288 if (S_OK != SpEnumTokens(SPCAT_VOICES, NULL, NULL, voice_tokens.Receive()))
289 return;
290 if (S_OK != voice_tokens->GetCount(&voice_count))
291 return;
292
293 for (unsigned i = 0; i < voice_count; i++) {
294 base::win::ScopedComPtr<ISpObjectToken> voice_token;
295 if (S_OK != voice_tokens->Next(1, voice_token.Receive(), NULL))
296 return;
297
298 base::win::ScopedCoMem<WCHAR> description;
299 if (S_OK != SpGetDescription(voice_token, &description))
300 continue;
301 if (name == WideToUTF8(description.get())) {
302 speech_synthesizer_->SetVoice(voice_token);
303 break;
304 }
305 }
306 }
307
227 TtsPlatformImplWin::TtsPlatformImplWin() 308 TtsPlatformImplWin::TtsPlatformImplWin()
228 : utterance_id_(0), 309 : utterance_id_(0),
229 prefix_len_(0), 310 prefix_len_(0),
230 stream_number_(0), 311 stream_number_(0),
231 char_position_(0), 312 char_position_(0),
232 paused_(false) { 313 paused_(false) {
233 speech_synthesizer_.CreateInstance(CLSID_SpVoice); 314 speech_synthesizer_.CreateInstance(CLSID_SpVoice);
234 if (speech_synthesizer_.get()) { 315 if (speech_synthesizer_.get()) {
235 ULONGLONG event_mask = 316 ULONGLONG event_mask =
236 SPFEI(SPEI_START_INPUT_STREAM) | 317 SPFEI(SPEI_START_INPUT_STREAM) |
(...skipping 11 matching lines...) Expand all
248 TtsPlatformImplWin* TtsPlatformImplWin::GetInstance() { 329 TtsPlatformImplWin* TtsPlatformImplWin::GetInstance() {
249 return Singleton<TtsPlatformImplWin, 330 return Singleton<TtsPlatformImplWin,
250 LeakySingletonTraits<TtsPlatformImplWin> >::get(); 331 LeakySingletonTraits<TtsPlatformImplWin> >::get();
251 } 332 }
252 333
253 // static 334 // static
254 void TtsPlatformImplWin::SpeechEventCallback( 335 void TtsPlatformImplWin::SpeechEventCallback(
255 WPARAM w_param, LPARAM l_param) { 336 WPARAM w_param, LPARAM l_param) {
256 GetInstance()->OnSpeechEvent(); 337 GetInstance()->OnSpeechEvent();
257 } 338 }
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698