Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(576)

Side by Side Diff: chrome/browser/speech/tts_win.cc

Issue 2695813007: Reland Support multiple TTS voices on Windows. (Closed)
Patch Set: Fix syntax error Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <math.h> 5 #include <math.h>
6 #include <sapi.h> 6 #include <sapi.h>
7 #include <sphelper.h>
7 #include <stdint.h> 8 #include <stdint.h>
8 9
9 #include "base/macros.h" 10 #include "base/macros.h"
10 #include "base/memory/singleton.h" 11 #include "base/memory/singleton.h"
11 #include "base/strings/string_number_conversions.h" 12 #include "base/strings/string_number_conversions.h"
13 #include "base/strings/string_piece.h"
12 #include "base/strings/utf_string_conversions.h" 14 #include "base/strings/utf_string_conversions.h"
13 #include "base/values.h" 15 #include "base/values.h"
16 #include "base/win/scoped_co_mem.h"
14 #include "base/win/scoped_comptr.h" 17 #include "base/win/scoped_comptr.h"
15 #include "chrome/browser/speech/tts_controller.h" 18 #include "chrome/browser/speech/tts_controller.h"
16 #include "chrome/browser/speech/tts_platform.h" 19 #include "chrome/browser/speech/tts_platform.h"
17 20
21 namespace {
22
23 // ISpObjectToken key and value names.
24 const wchar_t kAttributesKey[] = L"Attributes";
25 const wchar_t kGenderValue[] = L"Gender";
26 const wchar_t kLanguageValue[] = L"Language";
27
28 } // anonymous namespace.
29
18 class TtsPlatformImplWin : public TtsPlatformImpl { 30 class TtsPlatformImplWin : public TtsPlatformImpl {
19 public: 31 public:
20 bool PlatformImplAvailable() override { 32 bool PlatformImplAvailable() override {
21 return true; 33 return true;
22 } 34 }
23 35
24 bool Speak( 36 bool Speak(
25 int utterance_id, 37 int utterance_id,
26 const std::string& utterance, 38 const std::string& utterance,
27 const std::string& lang, 39 const std::string& lang,
(...skipping 14 matching lines...) Expand all
42 static TtsPlatformImplWin* GetInstance(); 54 static TtsPlatformImplWin* GetInstance();
43 55
44 static void __stdcall SpeechEventCallback(WPARAM w_param, LPARAM l_param); 56 static void __stdcall SpeechEventCallback(WPARAM w_param, LPARAM l_param);
45 57
46 private: 58 private:
47 TtsPlatformImplWin(); 59 TtsPlatformImplWin();
48 ~TtsPlatformImplWin() override {} 60 ~TtsPlatformImplWin() override {}
49 61
50 void OnSpeechEvent(); 62 void OnSpeechEvent();
51 63
64 void SetVoiceFromName(const std::string& name);
65
52 base::win::ScopedComPtr<ISpVoice> speech_synthesizer_; 66 base::win::ScopedComPtr<ISpVoice> speech_synthesizer_;
53 67
54 // These apply to the current utterance only. 68 // These apply to the current utterance only.
55 std::wstring utterance_; 69 std::wstring utterance_;
56 int utterance_id_; 70 int utterance_id_;
57 int prefix_len_; 71 int prefix_len_;
58 ULONG stream_number_; 72 ULONG stream_number_;
59 int char_position_; 73 int char_position_;
60 bool paused_; 74 bool paused_;
75 std::string last_voice_name_;
61 76
62 friend struct base::DefaultSingletonTraits<TtsPlatformImplWin>; 77 friend struct base::DefaultSingletonTraits<TtsPlatformImplWin>;
63 78
64 DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplWin); 79 DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplWin);
65 }; 80 };
66 81
67 // static 82 // static
68 TtsPlatformImpl* TtsPlatformImpl::GetInstance() { 83 TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
69 return TtsPlatformImplWin::GetInstance(); 84 return TtsPlatformImplWin::GetInstance();
70 } 85 }
71 86
72 bool TtsPlatformImplWin::Speak( 87 bool TtsPlatformImplWin::Speak(
73 int utterance_id, 88 int utterance_id,
74 const std::string& src_utterance, 89 const std::string& src_utterance,
75 const std::string& lang, 90 const std::string& lang,
76 const VoiceData& voice, 91 const VoiceData& voice,
77 const UtteranceContinuousParameters& params) { 92 const UtteranceContinuousParameters& params) {
78 std::wstring prefix; 93 std::wstring prefix;
79 std::wstring suffix; 94 std::wstring suffix;
80 95
81 if (!speech_synthesizer_.get()) 96 if (!speech_synthesizer_.get())
82 return false; 97 return false;
83 98
84 // TODO(dmazzoni): support languages other than the default: crbug.com/88059 99 SetVoiceFromName(voice.name);
85 100
86 if (params.rate >= 0.0) { 101 if (params.rate >= 0.0) {
87 // Map our multiplicative range of 0.1x to 10.0x onto Microsoft's 102 // Map our multiplicative range of 0.1x to 10.0x onto Microsoft's
88 // linear range of -10 to 10: 103 // linear range of -10 to 10:
89 // 0.1 -> -10 104 // 0.1 -> -10
90 // 1.0 -> 0 105 // 1.0 -> 0
91 // 10.0 -> 10 106 // 10.0 -> 10
92 speech_synthesizer_->SetRate(static_cast<int32_t>(10 * log10(params.rate))); 107 speech_synthesizer_->SetRate(static_cast<int32_t>(10 * log10(params.rate)));
93 } 108 }
94 109
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
167 status.dwRunningState == SPRS_IS_SPEAKING) { 182 status.dwRunningState == SPRS_IS_SPEAKING) {
168 return true; 183 return true;
169 } 184 }
170 } 185 }
171 } 186 }
172 return false; 187 return false;
173 } 188 }
174 189
175 void TtsPlatformImplWin::GetVoices( 190 void TtsPlatformImplWin::GetVoices(
176 std::vector<VoiceData>* out_voices) { 191 std::vector<VoiceData>* out_voices) {
177 // TODO: get all voices, not just default voice. 192 base::win::ScopedComPtr<IEnumSpObjectTokens> voice_tokens;
178 // http://crbug.com/88059 193 unsigned long voice_count;
179 out_voices->push_back(VoiceData()); 194 if (S_OK != SpEnumTokens(SPCAT_VOICES, NULL, NULL, voice_tokens.Receive()))
180 VoiceData& voice = out_voices->back(); 195 return;
181 voice.native = true; 196 if (S_OK != voice_tokens->GetCount(&voice_count))
182 voice.name = "native"; 197 return;
183 voice.events.insert(TTS_EVENT_START); 198
184 voice.events.insert(TTS_EVENT_END); 199 for (unsigned i = 0; i < voice_count; i++) {
185 voice.events.insert(TTS_EVENT_MARKER); 200 VoiceData voice;
186 voice.events.insert(TTS_EVENT_WORD); 201
187 voice.events.insert(TTS_EVENT_SENTENCE); 202 base::win::ScopedComPtr<ISpObjectToken> voice_token;
188 voice.events.insert(TTS_EVENT_PAUSE); 203 if (S_OK != voice_tokens->Next(1, voice_token.Receive(), NULL))
189 voice.events.insert(TTS_EVENT_RESUME); 204 return;
205
206 base::win::ScopedCoMem<WCHAR> description;
207 if (S_OK != SpGetDescription(voice_token.get(), &description))
208 continue;
209 voice.name = base::WideToUTF8(description.get());
210
211 base::win::ScopedComPtr<ISpDataKey> attributes;
212 if (S_OK != voice_token->OpenKey(kAttributesKey, attributes.Receive()))
213 continue;
214
215 base::win::ScopedCoMem<WCHAR> gender;
216 if (S_OK == attributes->GetStringValue(kGenderValue, &gender)) {
217 if (0 == _wcsicmp(gender.get(), L"male"))
218 voice.gender = TTS_GENDER_MALE;
219 else if (0 == _wcsicmp(gender.get(), L"female"))
220 voice.gender = TTS_GENDER_FEMALE;
221 }
222
223 base::win::ScopedCoMem<WCHAR> language;
224 if (S_OK == attributes->GetStringValue(kLanguageValue, &language)) {
225 int lcid_value;
226 base::HexStringToInt(base::WideToUTF8(language.get()), &lcid_value);
227 LCID lcid = MAKELCID(lcid_value, SORT_DEFAULT);
228 WCHAR locale_name[LOCALE_NAME_MAX_LENGTH] = {0};
229 LCIDToLocaleName(lcid, locale_name, LOCALE_NAME_MAX_LENGTH, 0);
230 voice.lang = base::WideToUTF8(locale_name);
231 }
232
233 voice.native = true;
234 voice.events.insert(TTS_EVENT_START);
235 voice.events.insert(TTS_EVENT_END);
236 voice.events.insert(TTS_EVENT_MARKER);
237 voice.events.insert(TTS_EVENT_WORD);
238 voice.events.insert(TTS_EVENT_SENTENCE);
239 voice.events.insert(TTS_EVENT_PAUSE);
240 voice.events.insert(TTS_EVENT_RESUME);
241 out_voices->push_back(voice);
242 }
190 } 243 }
191 244
192 void TtsPlatformImplWin::OnSpeechEvent() { 245 void TtsPlatformImplWin::OnSpeechEvent() {
193 TtsController* controller = TtsController::GetInstance(); 246 TtsController* controller = TtsController::GetInstance();
194 SPEVENT event; 247 SPEVENT event;
195 while (S_OK == speech_synthesizer_->GetEvents(1, &event, NULL)) { 248 while (S_OK == speech_synthesizer_->GetEvents(1, &event, NULL)) {
196 if (event.ulStreamNum != stream_number_) 249 if (event.ulStreamNum != stream_number_)
197 continue; 250 continue;
198 251
199 switch (event.eEventId) { 252 switch (event.eEventId) {
(...skipping 21 matching lines...) Expand all
221 controller->OnTtsEvent( 274 controller->OnTtsEvent(
222 utterance_id_, TTS_EVENT_SENTENCE, char_position_, 275 utterance_id_, TTS_EVENT_SENTENCE, char_position_,
223 std::string()); 276 std::string());
224 break; 277 break;
225 default: 278 default:
226 break; 279 break;
227 } 280 }
228 } 281 }
229 } 282 }
230 283
284 void TtsPlatformImplWin::SetVoiceFromName(const std::string& name) {
285 if (name.empty() || name == last_voice_name_)
286 return;
287
288 last_voice_name_ = name;
289
290 base::win::ScopedComPtr<IEnumSpObjectTokens> voice_tokens;
291 unsigned long voice_count;
292 if (S_OK != SpEnumTokens(SPCAT_VOICES, NULL, NULL, voice_tokens.Receive()))
293 return;
294 if (S_OK != voice_tokens->GetCount(&voice_count))
295 return;
296
297 for (unsigned i = 0; i < voice_count; i++) {
298 base::win::ScopedComPtr<ISpObjectToken> voice_token;
299 if (S_OK != voice_tokens->Next(1, voice_token.Receive(), NULL))
300 return;
301
302 base::win::ScopedCoMem<WCHAR> description;
303 if (S_OK != SpGetDescription(voice_token.get(), &description))
304 continue;
305 if (name == base::WideToUTF8(description.get())) {
306 speech_synthesizer_->SetVoice(voice_token.get());
307 break;
308 }
309 }
310 }
311
231 TtsPlatformImplWin::TtsPlatformImplWin() 312 TtsPlatformImplWin::TtsPlatformImplWin()
232 : utterance_id_(0), 313 : utterance_id_(0),
233 prefix_len_(0), 314 prefix_len_(0),
234 stream_number_(0), 315 stream_number_(0),
235 char_position_(0), 316 char_position_(0),
236 paused_(false) { 317 paused_(false) {
237 speech_synthesizer_.CreateInstance(CLSID_SpVoice); 318 speech_synthesizer_.CreateInstance(CLSID_SpVoice);
238 if (speech_synthesizer_.get()) { 319 if (speech_synthesizer_.get()) {
239 ULONGLONG event_mask = 320 ULONGLONG event_mask =
240 SPFEI(SPEI_START_INPUT_STREAM) | 321 SPFEI(SPEI_START_INPUT_STREAM) |
(...skipping 11 matching lines...) Expand all
252 TtsPlatformImplWin* TtsPlatformImplWin::GetInstance() { 333 TtsPlatformImplWin* TtsPlatformImplWin::GetInstance() {
253 return base::Singleton<TtsPlatformImplWin, 334 return base::Singleton<TtsPlatformImplWin,
254 base::LeakySingletonTraits<TtsPlatformImplWin>>::get(); 335 base::LeakySingletonTraits<TtsPlatformImplWin>>::get();
255 } 336 }
256 337
257 // static 338 // static
258 void TtsPlatformImplWin::SpeechEventCallback( 339 void TtsPlatformImplWin::SpeechEventCallback(
259 WPARAM w_param, LPARAM l_param) { 340 WPARAM w_param, LPARAM l_param) {
260 GetInstance()->OnSpeechEvent(); 341 GetInstance()->OnSpeechEvent();
261 } 342 }
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698