chrome/browser/speech/tts_win.cc - Issue 2695813007: Reland Support multiple TTS voices on Windows.

Side by Side Diff: chrome/browser/speech/tts_win.cc

Issue 2695813007: Reland Support multiple TTS voices on Windows. (Closed)

Patch Set: Fix syntax error Created 3 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include <math.h>	5 #include <math.h>

6 #include <sapi.h>	6 #include <sapi.h>

	7 #include <sphelper.h>

7 #include <stdint.h>	8 #include <stdint.h>

8	9

9 #include "base/macros.h"	10 #include "base/macros.h"

10 #include "base/memory/singleton.h"	11 #include "base/memory/singleton.h"

11 #include "base/strings/string_number_conversions.h"	12 #include "base/strings/string_number_conversions.h"

	13 #include "base/strings/string_piece.h"

12 #include "base/strings/utf_string_conversions.h"	14 #include "base/strings/utf_string_conversions.h"

13 #include "base/values.h"	15 #include "base/values.h"

	16 #include "base/win/scoped_co_mem.h"

14 #include "base/win/scoped_comptr.h"	17 #include "base/win/scoped_comptr.h"

15 #include "chrome/browser/speech/tts_controller.h"	18 #include "chrome/browser/speech/tts_controller.h"

16 #include "chrome/browser/speech/tts_platform.h"	19 #include "chrome/browser/speech/tts_platform.h"

17	20

	21 namespace {

	22

	23 // ISpObjectToken key and value names.

	24 const wchar_t kAttributesKey[] = L"Attributes";

	25 const wchar_t kGenderValue[] = L"Gender";

	26 const wchar_t kLanguageValue[] = L"Language";

	27

	28 } // anonymous namespace.

	29

18 class TtsPlatformImplWin : public TtsPlatformImpl {	30 class TtsPlatformImplWin : public TtsPlatformImpl {

19 public:	31 public:

20 bool PlatformImplAvailable() override {	32 bool PlatformImplAvailable() override {

21 return true;	33 return true;

22 }	34 }

23	35

24 bool Speak(	36 bool Speak(

25 int utterance_id,	37 int utterance_id,

26 const std::string& utterance,	38 const std::string& utterance,

27 const std::string& lang,	39 const std::string& lang,

(...skipping 14 matching lines...) Expand all Loading...
42 static TtsPlatformImplWin* GetInstance();	54 static TtsPlatformImplWin* GetInstance();

43	55

44 static void __stdcall SpeechEventCallback(WPARAM w_param, LPARAM l_param);	56 static void __stdcall SpeechEventCallback(WPARAM w_param, LPARAM l_param);

45	57

46 private:	58 private:

47 TtsPlatformImplWin();	59 TtsPlatformImplWin();

48 ~TtsPlatformImplWin() override {}	60 ~TtsPlatformImplWin() override {}

49	61

50 void OnSpeechEvent();	62 void OnSpeechEvent();

51	63

	64 void SetVoiceFromName(const std::string& name);

	65

52 base::win::ScopedComPtr<ISpVoice> speech_synthesizer_;	66 base::win::ScopedComPtr<ISpVoice> speech_synthesizer_;

53	67

54 // These apply to the current utterance only.	68 // These apply to the current utterance only.

55 std::wstring utterance_;	69 std::wstring utterance_;

56 int utterance_id_;	70 int utterance_id_;

57 int prefix_len_;	71 int prefix_len_;

58 ULONG stream_number_;	72 ULONG stream_number_;

59 int char_position_;	73 int char_position_;

60 bool paused_;	74 bool paused_;

	75 std::string last_voice_name_;

61	76

62 friend struct base::DefaultSingletonTraits<TtsPlatformImplWin>;	77 friend struct base::DefaultSingletonTraits<TtsPlatformImplWin>;

63	78

64 DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplWin);	79 DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplWin);

65 };	80 };

66	81

67 // static	82 // static

68 TtsPlatformImpl* TtsPlatformImpl::GetInstance() {	83 TtsPlatformImpl* TtsPlatformImpl::GetInstance() {

69 return TtsPlatformImplWin::GetInstance();	84 return TtsPlatformImplWin::GetInstance();

70 }	85 }

71	86

72 bool TtsPlatformImplWin::Speak(	87 bool TtsPlatformImplWin::Speak(

73 int utterance_id,	88 int utterance_id,

74 const std::string& src_utterance,	89 const std::string& src_utterance,

75 const std::string& lang,	90 const std::string& lang,

76 const VoiceData& voice,	91 const VoiceData& voice,

77 const UtteranceContinuousParameters& params) {	92 const UtteranceContinuousParameters& params) {

78 std::wstring prefix;	93 std::wstring prefix;

79 std::wstring suffix;	94 std::wstring suffix;

80	95

81 if (!speech_synthesizer_.get())	96 if (!speech_synthesizer_.get())

82 return false;	97 return false;

83	98

84 // TODO(dmazzoni): support languages other than the default: crbug.com/88059	99 SetVoiceFromName(voice.name);

85	100

86 if (params.rate >= 0.0) {	101 if (params.rate >= 0.0) {

87 // Map our multiplicative range of 0.1x to 10.0x onto Microsoft's	102 // Map our multiplicative range of 0.1x to 10.0x onto Microsoft's

88 // linear range of -10 to 10:	103 // linear range of -10 to 10:

89 // 0.1 -> -10	104 // 0.1 -> -10

90 // 1.0 -> 0	105 // 1.0 -> 0

91 // 10.0 -> 10	106 // 10.0 -> 10

92 speech_synthesizer_->SetRate(static_cast<int32_t>(10 * log10(params.rate)));	107 speech_synthesizer_->SetRate(static_cast<int32_t>(10 * log10(params.rate)));

93 }	108 }

94	109

(...skipping 72 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
167 status.dwRunningState == SPRS_IS_SPEAKING) {	182 status.dwRunningState == SPRS_IS_SPEAKING) {

168 return true;	183 return true;

169 }	184 }

170 }	185 }

171 }	186 }

172 return false;	187 return false;

173 }	188 }

174	189

175 void TtsPlatformImplWin::GetVoices(	190 void TtsPlatformImplWin::GetVoices(

176 std::vector<VoiceData>* out_voices) {	191 std::vector<VoiceData>* out_voices) {

177 // TODO: get all voices, not just default voice.	192 base::win::ScopedComPtr<IEnumSpObjectTokens> voice_tokens;

178 // http://crbug.com/88059	193 unsigned long voice_count;

179 out_voices->push_back(VoiceData());	194 if (S_OK != SpEnumTokens(SPCAT_VOICES, NULL, NULL, voice_tokens.Receive()))

180 VoiceData& voice = out_voices->back();	195 return;

181 voice.native = true;	196 if (S_OK != voice_tokens->GetCount(&voice_count))

182 voice.name = "native";	197 return;

183 voice.events.insert(TTS_EVENT_START);	198

184 voice.events.insert(TTS_EVENT_END);	199 for (unsigned i = 0; i < voice_count; i++) {

185 voice.events.insert(TTS_EVENT_MARKER);	200 VoiceData voice;

186 voice.events.insert(TTS_EVENT_WORD);	201

187 voice.events.insert(TTS_EVENT_SENTENCE);	202 base::win::ScopedComPtr<ISpObjectToken> voice_token;

188 voice.events.insert(TTS_EVENT_PAUSE);	203 if (S_OK != voice_tokens->Next(1, voice_token.Receive(), NULL))

189 voice.events.insert(TTS_EVENT_RESUME);	204 return;

	205

	206 base::win::ScopedCoMem<WCHAR> description;

	207 if (S_OK != SpGetDescription(voice_token.get(), &description))

	208 continue;

	209 voice.name = base::WideToUTF8(description.get());

	210

	211 base::win::ScopedComPtr<ISpDataKey> attributes;

	212 if (S_OK != voice_token->OpenKey(kAttributesKey, attributes.Receive()))

	213 continue;

	214

	215 base::win::ScopedCoMem<WCHAR> gender;

	216 if (S_OK == attributes->GetStringValue(kGenderValue, &gender)) {

	217 if (0 == _wcsicmp(gender.get(), L"male"))

	218 voice.gender = TTS_GENDER_MALE;

	219 else if (0 == _wcsicmp(gender.get(), L"female"))

	220 voice.gender = TTS_GENDER_FEMALE;

	221 }

	222

	223 base::win::ScopedCoMem<WCHAR> language;

	224 if (S_OK == attributes->GetStringValue(kLanguageValue, &language)) {

	225 int lcid_value;

	226 base::HexStringToInt(base::WideToUTF8(language.get()), &lcid_value);

	227 LCID lcid = MAKELCID(lcid_value, SORT_DEFAULT);

	228 WCHAR locale_name[LOCALE_NAME_MAX_LENGTH] = {0};

	229 LCIDToLocaleName(lcid, locale_name, LOCALE_NAME_MAX_LENGTH, 0);

	230 voice.lang = base::WideToUTF8(locale_name);

	231 }

	232

	233 voice.native = true;

	234 voice.events.insert(TTS_EVENT_START);

	235 voice.events.insert(TTS_EVENT_END);

	236 voice.events.insert(TTS_EVENT_MARKER);

	237 voice.events.insert(TTS_EVENT_WORD);

	238 voice.events.insert(TTS_EVENT_SENTENCE);

	239 voice.events.insert(TTS_EVENT_PAUSE);

	240 voice.events.insert(TTS_EVENT_RESUME);

	241 out_voices->push_back(voice);

	242 }

190 }	243 }

191	244

192 void TtsPlatformImplWin::OnSpeechEvent() {	245 void TtsPlatformImplWin::OnSpeechEvent() {

193 TtsController* controller = TtsController::GetInstance();	246 TtsController* controller = TtsController::GetInstance();

194 SPEVENT event;	247 SPEVENT event;

195 while (S_OK == speech_synthesizer_->GetEvents(1, &event, NULL)) {	248 while (S_OK == speech_synthesizer_->GetEvents(1, &event, NULL)) {

196 if (event.ulStreamNum != stream_number_)	249 if (event.ulStreamNum != stream_number_)

197 continue;	250 continue;

198	251

199 switch (event.eEventId) {	252 switch (event.eEventId) {

(...skipping 21 matching lines...) Expand all Loading...
221 controller->OnTtsEvent(	274 controller->OnTtsEvent(

222 utterance_id_, TTS_EVENT_SENTENCE, char_position_,	275 utterance_id_, TTS_EVENT_SENTENCE, char_position_,

223 std::string());	276 std::string());

224 break;	277 break;

225 default:	278 default:

226 break;	279 break;

227 }	280 }

228 }	281 }

229 }	282 }

230	283

	284 void TtsPlatformImplWin::SetVoiceFromName(const std::string& name) {

	285 if (name.empty() \|\| name == last_voice_name_)

	286 return;

	287

	288 last_voice_name_ = name;

	289

	290 base::win::ScopedComPtr<IEnumSpObjectTokens> voice_tokens;

	291 unsigned long voice_count;

	292 if (S_OK != SpEnumTokens(SPCAT_VOICES, NULL, NULL, voice_tokens.Receive()))

	293 return;

	294 if (S_OK != voice_tokens->GetCount(&voice_count))

	295 return;

	296

	297 for (unsigned i = 0; i < voice_count; i++) {

	298 base::win::ScopedComPtr<ISpObjectToken> voice_token;

	299 if (S_OK != voice_tokens->Next(1, voice_token.Receive(), NULL))

	300 return;

	301

	302 base::win::ScopedCoMem<WCHAR> description;

	303 if (S_OK != SpGetDescription(voice_token.get(), &description))

	304 continue;

	305 if (name == base::WideToUTF8(description.get())) {

	306 speech_synthesizer_->SetVoice(voice_token.get());

	307 break;

	308 }

	309 }

	310 }

	311

231 TtsPlatformImplWin::TtsPlatformImplWin()	312 TtsPlatformImplWin::TtsPlatformImplWin()

232 : utterance_id_(0),	313 : utterance_id_(0),

233 prefix_len_(0),	314 prefix_len_(0),

234 stream_number_(0),	315 stream_number_(0),

235 char_position_(0),	316 char_position_(0),

236 paused_(false) {	317 paused_(false) {

237 speech_synthesizer_.CreateInstance(CLSID_SpVoice);	318 speech_synthesizer_.CreateInstance(CLSID_SpVoice);

238 if (speech_synthesizer_.get()) {	319 if (speech_synthesizer_.get()) {

239 ULONGLONG event_mask =	320 ULONGLONG event_mask =

240 SPFEI(SPEI_START_INPUT_STREAM) \|	321 SPFEI(SPEI_START_INPUT_STREAM) \|

(...skipping 11 matching lines...) Expand all Loading...
252 TtsPlatformImplWin* TtsPlatformImplWin::GetInstance() {	333 TtsPlatformImplWin* TtsPlatformImplWin::GetInstance() {

253 return base::Singleton<TtsPlatformImplWin,	334 return base::Singleton<TtsPlatformImplWin,

254 base::LeakySingletonTraits<TtsPlatformImplWin>>::get();	335 base::LeakySingletonTraits<TtsPlatformImplWin>>::get();

255 }	336 }

256	337

257 // static	338 // static

258 void TtsPlatformImplWin::SpeechEventCallback(	339 void TtsPlatformImplWin::SpeechEventCallback(

259 WPARAM w_param, LPARAM l_param) {	340 WPARAM w_param, LPARAM l_param) {

260 GetInstance()->OnSpeechEvent();	341 GetInstance()->OnSpeechEvent();

261 }	342 }

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »