chrome/browser/speech/tts_win.cc - Issue 97793002: Support multiple TTS voices on Windows.

Side by Side Diff: chrome/browser/speech/tts_win.cc

Issue 97793002: Support multiple TTS voices on Windows. (Closed) Base URL: http://git.chromium.org/chromium/src.git@master

Patch Set: Created 7 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include <math.h>	5 #include <math.h>

6 #include <sapi.h>	6 #include <sapi.h>

	7 #include <sphelper.h>

7	8

8 #include "base/memory/singleton.h"	9 #include "base/memory/singleton.h"

9 #include "base/strings/string_number_conversions.h"	10 #include "base/strings/string_number_conversions.h"

	11 #include "base/strings/string_piece.h"

10 #include "base/strings/utf_string_conversions.h"	12 #include "base/strings/utf_string_conversions.h"

11 #include "base/values.h"	13 #include "base/values.h"

	14 #include "base/win/scoped_co_mem.h"

12 #include "base/win/scoped_comptr.h"	15 #include "base/win/scoped_comptr.h"

13 #include "chrome/browser/speech/tts_controller.h"	16 #include "chrome/browser/speech/tts_controller.h"

14 #include "chrome/browser/speech/tts_platform.h"	17 #include "chrome/browser/speech/tts_platform.h"

15	18

	19 namespace {

	20

	21 // ISpObjectToken key and value names.

	22 const wchar_t kAttributesKey[] = L"Attributes";

	23 const wchar_t kGenderValue[] = L"Gender";

	24 const wchar_t kLanguageValue[] = L"Language";

	25

	26 } // anonymous namespace.

	27

16 class TtsPlatformImplWin : public TtsPlatformImpl {	28 class TtsPlatformImplWin : public TtsPlatformImpl {

17 public:	29 public:

18 virtual bool PlatformImplAvailable() {	30 virtual bool PlatformImplAvailable() {

19 return true;	31 return true;

20 }	32 }

21	33

22 virtual bool Speak(	34 virtual bool Speak(

23 int utterance_id,	35 int utterance_id,

24 const std::string& utterance,	36 const std::string& utterance,

25 const std::string& lang,	37 const std::string& lang,

(...skipping 14 matching lines...) Expand all Loading...
40 static TtsPlatformImplWin* GetInstance();	52 static TtsPlatformImplWin* GetInstance();

41	53

42 static void __stdcall SpeechEventCallback(WPARAM w_param, LPARAM l_param);	54 static void __stdcall SpeechEventCallback(WPARAM w_param, LPARAM l_param);

43	55

44 private:	56 private:

45 TtsPlatformImplWin();	57 TtsPlatformImplWin();

46 virtual ~TtsPlatformImplWin() {}	58 virtual ~TtsPlatformImplWin() {}

47	59

48 void OnSpeechEvent();	60 void OnSpeechEvent();

49	61

	62 void SetVoiceFromName(const std::string& name);

	63

50 base::win::ScopedComPtr<ISpVoice> speech_synthesizer_;	64 base::win::ScopedComPtr<ISpVoice> speech_synthesizer_;

51	65

52 // These apply to the current utterance only.	66 // These apply to the current utterance only.

53 std::wstring utterance_;	67 std::wstring utterance_;

54 int utterance_id_;	68 int utterance_id_;

55 int prefix_len_;	69 int prefix_len_;

56 ULONG stream_number_;	70 ULONG stream_number_;

57 int char_position_;	71 int char_position_;

58 bool paused_;	72 bool paused_;

59	73

(...skipping 12 matching lines...) Expand all Loading...
72 const std::string& src_utterance,	86 const std::string& src_utterance,

73 const std::string& lang,	87 const std::string& lang,

74 const VoiceData& voice,	88 const VoiceData& voice,

75 const UtteranceContinuousParameters& params) {	89 const UtteranceContinuousParameters& params) {

76 std::wstring prefix;	90 std::wstring prefix;

77 std::wstring suffix;	91 std::wstring suffix;

78	92

79 if (!speech_synthesizer_.get())	93 if (!speech_synthesizer_.get())

80 return false;	94 return false;

81	95

82 // TODO(dmazzoni): support languages other than the default: crbug.com/88059	96 SetVoiceFromName(voice.name);

83	97

84 if (params.rate >= 0.0) {	98 if (params.rate >= 0.0) {

85 // Map our multiplicative range of 0.1x to 10.0x onto Microsoft's	99 // Map our multiplicative range of 0.1x to 10.0x onto Microsoft's

86 // linear range of -10 to 10:	100 // linear range of -10 to 10:

87 // 0.1 -> -10	101 // 0.1 -> -10

88 // 1.0 -> 0	102 // 1.0 -> 0

89 // 10.0 -> 10	103 // 10.0 -> 10

90 speech_synthesizer_->SetRate(static_cast<int32>(10 * log10(params.rate)));	104 speech_synthesizer_->SetRate(static_cast<int32>(10 * log10(params.rate)));

91 }	105 }

92	106

(...skipping 72 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
165 status.dwRunningState == SPRS_IS_SPEAKING) {	179 status.dwRunningState == SPRS_IS_SPEAKING) {

166 return true;	180 return true;

167 }	181 }

168 }	182 }

169 }	183 }

170 return false;	184 return false;

171 }	185 }

172	186

173 void TtsPlatformImplWin::GetVoices(	187 void TtsPlatformImplWin::GetVoices(

174 std::vector<VoiceData>* out_voices) {	188 std::vector<VoiceData>* out_voices) {

175 // TODO: get all voices, not just default voice.	189 base::win::ScopedComPtr<IEnumSpObjectTokens> voice_tokens;

176 // http://crbug.com/88059	190 unsigned long voice_count;

177 out_voices->push_back(VoiceData());	191 if (S_OK != SpEnumTokens(SPCAT_VOICES, NULL, NULL, voice_tokens.Receive()))

178 VoiceData& voice = out_voices->back();	192 return;

179 voice.native = true;	193 if (S_OK != voice_tokens->GetCount(&voice_count))

180 voice.name = "native";	194 return;
	David Tseng 2013/12/07 00:49:39 Do we want to let the js client know there was an Do we want to let the js client know there was an error retrieving voices? dmazzoni 2013/12/09 17:12:24 We don't have a mechanism for this currently, i.e. Show quoted text On 2013/12/07 00:49:39, David Tseng wrote: > Do we want to let the js client know there was an error retrieving voices? We don't have a mechanism for this currently, i.e. getVoices just returns a list of voices from all sources (native and extensions), it doesn't have a way to say that one of the two failed for some reason. In practice I've never seen this fail, even when speech is in use by another app. I think it'd be worth considering passing information through if we encounter a specific error in the wild that app developers might want to handle - but absent any specific example I think it's fine to just not return any native voices, and let the app use Google Network Speech.
181 voice.events.insert(TTS_EVENT_START);	195

182 voice.events.insert(TTS_EVENT_END);	196 for (unsigned i = 0; i < voice_count; i++) {

183 voice.events.insert(TTS_EVENT_MARKER);	197 VoiceData voice;

184 voice.events.insert(TTS_EVENT_WORD);	198

185 voice.events.insert(TTS_EVENT_SENTENCE);	199 base::win::ScopedComPtr<ISpObjectToken> voice_token;

186 voice.events.insert(TTS_EVENT_PAUSE);	200 if (S_OK != voice_tokens->Next(1, voice_token.Receive(), NULL))

187 voice.events.insert(TTS_EVENT_RESUME);	201 return;

	202

	203 base::win::ScopedCoMem<WCHAR> description;

	204 if (S_OK != SpGetDescription(voice_token, &description))

	205 continue;

	206 voice.name = WideToUTF8(description.get());

	207

	208 base::win::ScopedComPtr<ISpDataKey> attributes;

	209 if (S_OK != voice_token->OpenKey(kAttributesKey, attributes.Receive()))

	210 continue;

	211

	212 base::win::ScopedCoMem<WCHAR> gender;

	213 if (S_OK == attributes->GetStringValue(kGenderValue, &gender)) {

	214 if (0 == _wcsicmp(gender.get(), L"male"))

	215 voice.gender = TTS_GENDER_MALE;

	216 else if (0 == _wcsicmp(gender.get(), L"female"))

	217 voice.gender = TTS_GENDER_FEMALE;

	218 }

	219

	220 base::win::ScopedCoMem<WCHAR> language;

	221 if (S_OK == attributes->GetStringValue(kLanguageValue, &language)) {

	222 int lcid_value;

	223 base::HexStringToInt(WideToUTF8(language.get()), &lcid_value);

	224 LCID lcid = MAKELCID(lcid_value, SORT_DEFAULT);

	225 WCHAR locale_name[LOCALE_NAME_MAX_LENGTH] = {0};

	226 LCIDToLocaleName(lcid, locale_name, LOCALE_NAME_MAX_LENGTH, 0);

	227 voice.lang = WideToUTF8(locale_name);

	228 }

	229

	230 voice.native = true;

	231 voice.events.insert(TTS_EVENT_START);

	232 voice.events.insert(TTS_EVENT_END);

	233 voice.events.insert(TTS_EVENT_MARKER);

	234 voice.events.insert(TTS_EVENT_WORD);

	235 voice.events.insert(TTS_EVENT_SENTENCE);

	236 voice.events.insert(TTS_EVENT_PAUSE);

	237 voice.events.insert(TTS_EVENT_RESUME);

	238 out_voices->push_back(voice);

	239 }
	David Tseng 2013/12/07 00:49:39 Does this fetch voices across multiple tts engines Does this fetch voices across multiple tts engines? From my cursory reading of this code, it doesn't seem to? dmazzoni 2013/12/09 17:12:24 SPCAT_VOICES returns voices from all engines in on Show quoted text On 2013/12/07 00:49:39, David Tseng wrote: > Does this fetch voices across multiple tts engines? From my cursory reading of > this code, it doesn't seem to? SPCAT_VOICES returns voices from all engines in one big list. It's basically just iterating over certain types of tokens in the registry.
188 }	240 }

189	241

190 void TtsPlatformImplWin::OnSpeechEvent() {	242 void TtsPlatformImplWin::OnSpeechEvent() {

191 TtsController* controller = TtsController::GetInstance();	243 TtsController* controller = TtsController::GetInstance();

192 SPEVENT event;	244 SPEVENT event;

193 while (S_OK == speech_synthesizer_->GetEvents(1, &event, NULL)) {	245 while (S_OK == speech_synthesizer_->GetEvents(1, &event, NULL)) {

194 if (event.ulStreamNum != stream_number_)	246 if (event.ulStreamNum != stream_number_)

195 continue;	247 continue;

196	248

197 switch (event.eEventId) {	249 switch (event.eEventId) {

(...skipping 19 matching lines...) Expand all Loading...
217 case SPEI_SENTENCE_BOUNDARY:	269 case SPEI_SENTENCE_BOUNDARY:

218 char_position_ = static_cast<ULONG>(event.lParam) - prefix_len_;	270 char_position_ = static_cast<ULONG>(event.lParam) - prefix_len_;

219 controller->OnTtsEvent(	271 controller->OnTtsEvent(

220 utterance_id_, TTS_EVENT_SENTENCE, char_position_,	272 utterance_id_, TTS_EVENT_SENTENCE, char_position_,

221 std::string());	273 std::string());

222 break;	274 break;

223 }	275 }

224 }	276 }

225 }	277 }

226	278

	279 void TtsPlatformImplWin::SetVoiceFromName(const std::string& name) {

	280 if (name.empty())

	281 return;

	282

	283 base::win::ScopedComPtr<IEnumSpObjectTokens> voice_tokens;

	284 unsigned long voice_count;

	285 if (S_OK != SpEnumTokens(SPCAT_VOICES, NULL, NULL, voice_tokens.Receive()))

	286 return;

	287 if (S_OK != voice_tokens->GetCount(&voice_count))

	288 return;

	289

	290 for (unsigned i = 0; i < voice_count; i++) {

	291 base::win::ScopedComPtr<ISpObjectToken> voice_token;

	292 if (S_OK != voice_tokens->Next(1, voice_token.Receive(), NULL))

	293 return;

	294

	295 base::win::ScopedCoMem<WCHAR> description;

	296 if (S_OK != SpGetDescription(voice_token, &description))

	297 continue;

	298 if (name == WideToUTF8(description.get())) {

	299 speech_synthesizer_->SetVoice(voice_token);
	David Tseng 2013/12/07 00:49:39 Did you notice any performance drawbacks with doin Did you notice any performance drawbacks with doing a voice lookup every time we speak? Perhaps return earlier if the current voice is the requested voice? dmazzoni 2013/12/09 17:12:24 I didn't notice anything, but that's a good idea. Show quoted text On 2013/12/07 00:49:39, David Tseng wrote: > Did you notice any performance drawbacks with doing a voice lookup every time we > speak? Perhaps return earlier if the current voice is the requested voice? I didn't notice anything, but that's a good idea. Done.
	300 break;

	301 }

	302 }

	303 }

	304

227 TtsPlatformImplWin::TtsPlatformImplWin()	305 TtsPlatformImplWin::TtsPlatformImplWin()

228 : utterance_id_(0),	306 : utterance_id_(0),

229 prefix_len_(0),	307 prefix_len_(0),

230 stream_number_(0),	308 stream_number_(0),

231 char_position_(0),	309 char_position_(0),

232 paused_(false) {	310 paused_(false) {

233 speech_synthesizer_.CreateInstance(CLSID_SpVoice);	311 speech_synthesizer_.CreateInstance(CLSID_SpVoice);

234 if (speech_synthesizer_.get()) {	312 if (speech_synthesizer_.get()) {

235 ULONGLONG event_mask =	313 ULONGLONG event_mask =

236 SPFEI(SPEI_START_INPUT_STREAM) \|	314 SPFEI(SPEI_START_INPUT_STREAM) \|

(...skipping 11 matching lines...) Expand all Loading...
248 TtsPlatformImplWin* TtsPlatformImplWin::GetInstance() {	326 TtsPlatformImplWin* TtsPlatformImplWin::GetInstance() {

249 return Singleton<TtsPlatformImplWin,	327 return Singleton<TtsPlatformImplWin,

250 LeakySingletonTraits<TtsPlatformImplWin> >::get();	328 LeakySingletonTraits<TtsPlatformImplWin> >::get();

251 }	329 }

252	330

253 // static	331 // static

254 void TtsPlatformImplWin::SpeechEventCallback(	332 void TtsPlatformImplWin::SpeechEventCallback(

255 WPARAM w_param, LPARAM l_param) {	333 WPARAM w_param, LPARAM l_param) {

256 GetInstance()->OnSpeechEvent();	334 GetInstance()->OnSpeechEvent();

257 }	335 }

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »