chrome/browser/speech/tts_win.cc - Issue 97793002: Support multiple TTS voices on Windows.

Side by Side Diff: chrome/browser/speech/tts_win.cc

Issue 97793002: Support multiple TTS voices on Windows. (Closed) Base URL: http://git.chromium.org/chromium/src.git@master

Patch Set: Don't change voice if it hasn't changed Created 7 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include <math.h>	5 #include <math.h>

6 #include <sapi.h>	6 #include <sapi.h>

	7 #include <sphelper.h>

7	8

8 #include "base/memory/singleton.h"	9 #include "base/memory/singleton.h"

9 #include "base/strings/string_number_conversions.h"	10 #include "base/strings/string_number_conversions.h"

	11 #include "base/strings/string_piece.h"

10 #include "base/strings/utf_string_conversions.h"	12 #include "base/strings/utf_string_conversions.h"

11 #include "base/values.h"	13 #include "base/values.h"

	14 #include "base/win/scoped_co_mem.h"

12 #include "base/win/scoped_comptr.h"	15 #include "base/win/scoped_comptr.h"

13 #include "chrome/browser/speech/tts_controller.h"	16 #include "chrome/browser/speech/tts_controller.h"

14 #include "chrome/browser/speech/tts_platform.h"	17 #include "chrome/browser/speech/tts_platform.h"

15	18

	19 namespace {

	20

	21 // ISpObjectToken key and value names.

	22 const wchar_t kAttributesKey[] = L"Attributes";

	23 const wchar_t kGenderValue[] = L"Gender";

	24 const wchar_t kLanguageValue[] = L"Language";

	25

	26 } // anonymous namespace.

	27

16 class TtsPlatformImplWin : public TtsPlatformImpl {	28 class TtsPlatformImplWin : public TtsPlatformImpl {

17 public:	29 public:

18 virtual bool PlatformImplAvailable() {	30 virtual bool PlatformImplAvailable() {

19 return true;	31 return true;

20 }	32 }

21	33

22 virtual bool Speak(	34 virtual bool Speak(

23 int utterance_id,	35 int utterance_id,

24 const std::string& utterance,	36 const std::string& utterance,

25 const std::string& lang,	37 const std::string& lang,

(...skipping 14 matching lines...) Expand all Loading...
40 static TtsPlatformImplWin* GetInstance();	52 static TtsPlatformImplWin* GetInstance();

41	53

42 static void __stdcall SpeechEventCallback(WPARAM w_param, LPARAM l_param);	54 static void __stdcall SpeechEventCallback(WPARAM w_param, LPARAM l_param);

43	55

44 private:	56 private:

45 TtsPlatformImplWin();	57 TtsPlatformImplWin();

46 virtual ~TtsPlatformImplWin() {}	58 virtual ~TtsPlatformImplWin() {}

47	59

48 void OnSpeechEvent();	60 void OnSpeechEvent();

49	61

	62 void SetVoiceFromName(const std::string& name);

	63

50 base::win::ScopedComPtr<ISpVoice> speech_synthesizer_;	64 base::win::ScopedComPtr<ISpVoice> speech_synthesizer_;

51	65

52 // These apply to the current utterance only.	66 // These apply to the current utterance only.

53 std::wstring utterance_;	67 std::wstring utterance_;

54 int utterance_id_;	68 int utterance_id_;

55 int prefix_len_;	69 int prefix_len_;

56 ULONG stream_number_;	70 ULONG stream_number_;

57 int char_position_;	71 int char_position_;

58 bool paused_;	72 bool paused_;

	73 std::string last_voice_name_;

59	74

60 friend struct DefaultSingletonTraits<TtsPlatformImplWin>;	75 friend struct DefaultSingletonTraits<TtsPlatformImplWin>;

61	76

62 DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplWin);	77 DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplWin);

63 };	78 };

64	79

65 // static	80 // static

66 TtsPlatformImpl* TtsPlatformImpl::GetInstance() {	81 TtsPlatformImpl* TtsPlatformImpl::GetInstance() {

67 return TtsPlatformImplWin::GetInstance();	82 return TtsPlatformImplWin::GetInstance();

68 }	83 }

69	84

70 bool TtsPlatformImplWin::Speak(	85 bool TtsPlatformImplWin::Speak(

71 int utterance_id,	86 int utterance_id,

72 const std::string& src_utterance,	87 const std::string& src_utterance,

73 const std::string& lang,	88 const std::string& lang,

74 const VoiceData& voice,	89 const VoiceData& voice,

75 const UtteranceContinuousParameters& params) {	90 const UtteranceContinuousParameters& params) {

76 std::wstring prefix;	91 std::wstring prefix;

77 std::wstring suffix;	92 std::wstring suffix;

78	93

79 if (!speech_synthesizer_.get())	94 if (!speech_synthesizer_.get())

80 return false;	95 return false;

81	96

82 // TODO(dmazzoni): support languages other than the default: crbug.com/88059	97 SetVoiceFromName(voice.name);

83	98

84 if (params.rate >= 0.0) {	99 if (params.rate >= 0.0) {

85 // Map our multiplicative range of 0.1x to 10.0x onto Microsoft's	100 // Map our multiplicative range of 0.1x to 10.0x onto Microsoft's

86 // linear range of -10 to 10:	101 // linear range of -10 to 10:

87 // 0.1 -> -10	102 // 0.1 -> -10

88 // 1.0 -> 0	103 // 1.0 -> 0

89 // 10.0 -> 10	104 // 10.0 -> 10

90 speech_synthesizer_->SetRate(static_cast<int32>(10 * log10(params.rate)));	105 speech_synthesizer_->SetRate(static_cast<int32>(10 * log10(params.rate)));

91 }	106 }

92	107

(...skipping 72 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
165 status.dwRunningState == SPRS_IS_SPEAKING) {	180 status.dwRunningState == SPRS_IS_SPEAKING) {

166 return true;	181 return true;

167 }	182 }

168 }	183 }

169 }	184 }

170 return false;	185 return false;

171 }	186 }

172	187

173 void TtsPlatformImplWin::GetVoices(	188 void TtsPlatformImplWin::GetVoices(

174 std::vector<VoiceData>* out_voices) {	189 std::vector<VoiceData>* out_voices) {

175 // TODO: get all voices, not just default voice.	190 base::win::ScopedComPtr<IEnumSpObjectTokens> voice_tokens;

176 // http://crbug.com/88059	191 unsigned long voice_count;

177 out_voices->push_back(VoiceData());	192 if (S_OK != SpEnumTokens(SPCAT_VOICES, NULL, NULL, voice_tokens.Receive()))

178 VoiceData& voice = out_voices->back();	193 return;

179 voice.native = true;	194 if (S_OK != voice_tokens->GetCount(&voice_count))

180 voice.name = "native";	195 return;

181 voice.events.insert(TTS_EVENT_START);	196

182 voice.events.insert(TTS_EVENT_END);	197 for (unsigned i = 0; i < voice_count; i++) {

183 voice.events.insert(TTS_EVENT_MARKER);	198 VoiceData voice;

184 voice.events.insert(TTS_EVENT_WORD);	199

185 voice.events.insert(TTS_EVENT_SENTENCE);	200 base::win::ScopedComPtr<ISpObjectToken> voice_token;

186 voice.events.insert(TTS_EVENT_PAUSE);	201 if (S_OK != voice_tokens->Next(1, voice_token.Receive(), NULL))

187 voice.events.insert(TTS_EVENT_RESUME);	202 return;

	203

	204 base::win::ScopedCoMem<WCHAR> description;

	205 if (S_OK != SpGetDescription(voice_token, &description))

	206 continue;

	207 voice.name = WideToUTF8(description.get());

	208

	209 base::win::ScopedComPtr<ISpDataKey> attributes;

	210 if (S_OK != voice_token->OpenKey(kAttributesKey, attributes.Receive()))

	211 continue;

	212

	213 base::win::ScopedCoMem<WCHAR> gender;

	214 if (S_OK == attributes->GetStringValue(kGenderValue, &gender)) {

	215 if (0 == _wcsicmp(gender.get(), L"male"))

	216 voice.gender = TTS_GENDER_MALE;

	217 else if (0 == _wcsicmp(gender.get(), L"female"))

	218 voice.gender = TTS_GENDER_FEMALE;

	219 }

	220

	221 base::win::ScopedCoMem<WCHAR> language;

	222 if (S_OK == attributes->GetStringValue(kLanguageValue, &language)) {

	223 int lcid_value;

	224 base::HexStringToInt(WideToUTF8(language.get()), &lcid_value);

	225 LCID lcid = MAKELCID(lcid_value, SORT_DEFAULT);

	226 WCHAR locale_name[LOCALE_NAME_MAX_LENGTH] = {0};

	227 LCIDToLocaleName(lcid, locale_name, LOCALE_NAME_MAX_LENGTH, 0);

	228 voice.lang = WideToUTF8(locale_name);

	229 }

	230

	231 voice.native = true;

	232 voice.events.insert(TTS_EVENT_START);

	233 voice.events.insert(TTS_EVENT_END);

	234 voice.events.insert(TTS_EVENT_MARKER);

	235 voice.events.insert(TTS_EVENT_WORD);

	236 voice.events.insert(TTS_EVENT_SENTENCE);

	237 voice.events.insert(TTS_EVENT_PAUSE);

	238 voice.events.insert(TTS_EVENT_RESUME);

	239 out_voices->push_back(voice);

	240 }

188 }	241 }

189	242

190 void TtsPlatformImplWin::OnSpeechEvent() {	243 void TtsPlatformImplWin::OnSpeechEvent() {

191 TtsController* controller = TtsController::GetInstance();	244 TtsController* controller = TtsController::GetInstance();

192 SPEVENT event;	245 SPEVENT event;

193 while (S_OK == speech_synthesizer_->GetEvents(1, &event, NULL)) {	246 while (S_OK == speech_synthesizer_->GetEvents(1, &event, NULL)) {

194 if (event.ulStreamNum != stream_number_)	247 if (event.ulStreamNum != stream_number_)

195 continue;	248 continue;

196	249

197 switch (event.eEventId) {	250 switch (event.eEventId) {

(...skipping 19 matching lines...) Expand all Loading...
217 case SPEI_SENTENCE_BOUNDARY:	270 case SPEI_SENTENCE_BOUNDARY:

218 char_position_ = static_cast<ULONG>(event.lParam) - prefix_len_;	271 char_position_ = static_cast<ULONG>(event.lParam) - prefix_len_;

219 controller->OnTtsEvent(	272 controller->OnTtsEvent(

220 utterance_id_, TTS_EVENT_SENTENCE, char_position_,	273 utterance_id_, TTS_EVENT_SENTENCE, char_position_,

221 std::string());	274 std::string());

222 break;	275 break;

223 }	276 }

224 }	277 }

225 }	278 }

226	279

	280 void TtsPlatformImplWin::SetVoiceFromName(const std::string& name) {

	281 if (name.empty() \|\| name == last_voice_name_)

	282 return;

	283

	284 last_voice_name_ = name;

	285

	286 base::win::ScopedComPtr<IEnumSpObjectTokens> voice_tokens;

	287 unsigned long voice_count;

	288 if (S_OK != SpEnumTokens(SPCAT_VOICES, NULL, NULL, voice_tokens.Receive()))

	289 return;

	290 if (S_OK != voice_tokens->GetCount(&voice_count))

	291 return;

	292

	293 for (unsigned i = 0; i < voice_count; i++) {

	294 base::win::ScopedComPtr<ISpObjectToken> voice_token;

	295 if (S_OK != voice_tokens->Next(1, voice_token.Receive(), NULL))

	296 return;

	297

	298 base::win::ScopedCoMem<WCHAR> description;

	299 if (S_OK != SpGetDescription(voice_token, &description))

	300 continue;

	301 if (name == WideToUTF8(description.get())) {

	302 speech_synthesizer_->SetVoice(voice_token);

	303 break;

	304 }

	305 }

	306 }

	307

227 TtsPlatformImplWin::TtsPlatformImplWin()	308 TtsPlatformImplWin::TtsPlatformImplWin()

228 : utterance_id_(0),	309 : utterance_id_(0),

229 prefix_len_(0),	310 prefix_len_(0),

230 stream_number_(0),	311 stream_number_(0),

231 char_position_(0),	312 char_position_(0),

232 paused_(false) {	313 paused_(false) {

233 speech_synthesizer_.CreateInstance(CLSID_SpVoice);	314 speech_synthesizer_.CreateInstance(CLSID_SpVoice);

234 if (speech_synthesizer_.get()) {	315 if (speech_synthesizer_.get()) {

235 ULONGLONG event_mask =	316 ULONGLONG event_mask =

236 SPFEI(SPEI_START_INPUT_STREAM) \|	317 SPFEI(SPEI_START_INPUT_STREAM) \|

(...skipping 11 matching lines...) Expand all Loading...
248 TtsPlatformImplWin* TtsPlatformImplWin::GetInstance() {	329 TtsPlatformImplWin* TtsPlatformImplWin::GetInstance() {

249 return Singleton<TtsPlatformImplWin,	330 return Singleton<TtsPlatformImplWin,

250 LeakySingletonTraits<TtsPlatformImplWin> >::get();	331 LeakySingletonTraits<TtsPlatformImplWin> >::get();

251 }	332 }

252	333

253 // static	334 // static

254 void TtsPlatformImplWin::SpeechEventCallback(	335 void TtsPlatformImplWin::SpeechEventCallback(

255 WPARAM w_param, LPARAM l_param) {	336 WPARAM w_param, LPARAM l_param) {

256 GetInstance()->OnSpeechEvent();	337 GetInstance()->OnSpeechEvent();

257 }	338 }

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »