OLD | NEW |
| (Empty) |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 /** | |
6 * @fileoverview | |
7 * This is a component extension that implements a text-to-speech (TTS) | |
8 * engine powered by Google's speech synthesis API. | |
9 * | |
10 * This is an "event page", so it's not loaded when the API isn't being used, | |
11 * and doesn't waste resources. When a web page or web app makes a speech | |
12 * request and the parameters match one of the voices in this extension's | |
13 * manifest, it makes a request to Google's API using Chrome's private key | |
14 * and plays the resulting speech using HTML5 audio. | |
15 */ | |
16 | |
17 /** | |
18 * The main class for this extension. Adds listeners to | |
19 * chrome.ttsEngine.onSpeak and chrome.ttsEngine.onStop and implements | |
20 * them using Google's speech synthesis API. | |
21 * @constructor | |
22 */ | |
23 function TtsExtension() {} | |
24 | |
25 TtsExtension.prototype = { | |
26 /** | |
27 * The url prefix of the speech server, including static query | |
28 * parameters that don't change. | |
29 * @type {string} | |
30 * @const | |
31 * @private | |
32 */ | |
33 SPEECH_SERVER_URL_: | |
34 'https://www.google.com/speech-api/v2/synthesize?' + | |
35 'enc=mpeg&client=chromium', | |
36 | |
37 /** | |
38 * A mapping from language and gender to voice name, hardcoded for now | |
39 * until the speech synthesis server capabilities response provides this. | |
40 * The key of this map is of the form '<lang>-<gender>'. | |
41 * @type {Object.<string, string>} | |
42 * @private | |
43 */ | |
44 LANG_AND_GENDER_TO_VOICE_NAME_: { | |
45 'en-gb-male': 'rjs', | |
46 'en-gb-female': 'fis', | |
47 }, | |
48 | |
49 /** | |
50 * The arguments passed to the onSpeak event handler for the utterance | |
51 * that's currently being spoken. Should be null when no object is | |
52 * pending. | |
53 * | |
54 * @type {?{utterance: string, options: Object, callback: Function}} | |
55 * @private | |
56 */ | |
57 currentUtterance_: null, | |
58 | |
59 /** | |
60 * The HTML5 audio element we use for playing the sound served by the | |
61 * speech server. | |
62 * @type {HTMLAudioElement} | |
63 * @private | |
64 */ | |
65 audioElement_: null, | |
66 | |
67 /** | |
68 * A mapping from voice name to language and gender, derived from the | |
69 * manifest file. This is used in case the speech synthesis request | |
70 * specifies a voice name but doesn't specify a language code or gender. | |
71 * @type {Object.<string, {lang: string, gender: string}>} | |
72 * @private | |
73 */ | |
74 voiceNameToLangAndGender_: {}, | |
75 | |
76 /** | |
77 * This is the main function called to initialize this extension. | |
78 * Initializes data structures and adds event listeners. | |
79 */ | |
80 init: function() { | |
81 // Get voices from manifest. | |
82 var voices = chrome.app.getDetails().tts_engine.voices; | |
83 for (var i = 0; i < voices.length; i++) { | |
84 this.voiceNameToLangAndGender_[voices[i].voice_name] = { | |
85 lang: voices[i].lang, | |
86 gender: voices[i].gender | |
87 }; | |
88 } | |
89 | |
90 // Initialize the audio element and event listeners on it. | |
91 this.audioElement_ = document.createElement('audio'); | |
92 document.body.appendChild(this.audioElement_); | |
93 this.audioElement_.addEventListener( | |
94 'ended', this.onStop_.bind(this), false); | |
95 this.audioElement_.addEventListener( | |
96 'canplaythrough', this.onStart_.bind(this), false); | |
97 | |
98 // Install event listeners for the ttsEngine API. | |
99 chrome.ttsEngine.onSpeak.addListener(this.onSpeak_.bind(this)); | |
100 chrome.ttsEngine.onStop.addListener(this.onStop_.bind(this)); | |
101 chrome.ttsEngine.onPause.addListener(this.onPause_.bind(this)); | |
102 chrome.ttsEngine.onResume.addListener(this.onResume_.bind(this)); | |
103 }, | |
104 | |
105 /** | |
106 * Handler for the chrome.ttsEngine.onSpeak interface. | |
107 * Gets Chrome's Google API key and then uses it to generate a request | |
108 * url for the requested speech utterance. Sets that url as the source | |
109 * of the HTML5 audio element. | |
110 * @param {string} utterance The text to be spoken. | |
111 * @param {Object} options Options to control the speech, as defined | |
112 * in the Chrome ttsEngine extension API. | |
113 * @private | |
114 */ | |
115 onSpeak_: function(utterance, options, callback) { | |
116 // Truncate the utterance if it's too long. Both Chrome's tts | |
117 // extension api and the web speech api specify 32k as the | |
118 // maximum limit for an utterance. | |
119 if (utterance.length > 32768) | |
120 utterance = utterance.substr(0, 32768); | |
121 | |
122 try { | |
123 // First, stop any pending audio. | |
124 this.onStop_(); | |
125 | |
126 this.currentUtterance_ = { | |
127 utterance: utterance, | |
128 options: options, | |
129 callback: callback | |
130 }; | |
131 | |
132 var lang = options.lang; | |
133 var gender = options.gender; | |
134 if (options.voiceName) { | |
135 lang = this.voiceNameToLangAndGender_[options.voiceName].lang; | |
136 gender = this.voiceNameToLangAndGender_[options.voiceName].gender; | |
137 } | |
138 | |
139 // Look up the specific voice name for this language and gender. | |
140 // If it's not in the map, it doesn't matter - the language will | |
141 // be used directly. This is only used for languages where more | |
142 // than one gender is actually available. | |
143 var key = lang.toLowerCase() + '-' + gender; | |
144 var voiceName = this.LANG_AND_GENDER_TO_VOICE_NAME_[key]; | |
145 | |
146 var url = this.SPEECH_SERVER_URL_; | |
147 chrome.systemPrivate.getApiKey((function(key) { | |
148 url += '&key=' + key; | |
149 url += '&text=' + escape(utterance); | |
150 url += '&lang=' + lang.toLowerCase(); | |
151 | |
152 if (voiceName) | |
153 url += '&name=' + voiceName; | |
154 | |
155 if (options.rate) { | |
156 // Input rate is between 0.1 and 10.0 with a default of 1.0. | |
157 // Output speed is between 0.0 and 1.0 with a default of 0.5. | |
158 url += '&speed=' + (options.rate / 2.0); | |
159 } | |
160 | |
161 if (options.pitch) { | |
162 // Input pitch is between 0.0 and 2.0 with a default of 1.0. | |
163 // Output pitch is between 0.0 and 1.0 with a default of 0.5. | |
164 url += '&pitch=' + (options.pitch / 2.0); | |
165 } | |
166 | |
167 // This begins loading the audio but does not play it. | |
168 // When enough of the audio has loaded to begin playback, | |
169 // the 'canplaythrough' handler will call this.onStart_, | |
170 // which sends a start event to the ttsEngine callback and | |
171 // then begins playing audio. | |
172 this.audioElement_.src = url; | |
173 }).bind(this)); | |
174 } catch (err) { | |
175 console.error(String(err)); | |
176 callback({ | |
177 'type': 'error', | |
178 'errorMessage': String(err) | |
179 }); | |
180 this.currentUtterance_ = null; | |
181 } | |
182 }, | |
183 | |
184 /** | |
185 * Handler for the chrome.ttsEngine.onStop interface. | |
186 * Called either when the ttsEngine API requests us to stop, or when | |
187 * we reach the end of the audio stream. Pause the audio element to | |
188 * silence it, and send a callback to the ttsEngine API to let it know | |
189 * that we've completed. Note that the ttsEngine API manages callback | |
190 * messages and will automatically replace the 'end' event with a | |
191 * more specific callback like 'interrupted' when sending it to the | |
192 * TTS client. | |
193 * @private | |
194 */ | |
195 onStop_: function() { | |
196 if (this.currentUtterance_) { | |
197 this.audioElement_.pause(); | |
198 this.currentUtterance_.callback({ | |
199 'type': 'end', | |
200 'charIndex': this.currentUtterance_.utterance.length | |
201 }); | |
202 } | |
203 this.currentUtterance_ = null; | |
204 }, | |
205 | |
206 /** | |
207 * Handler for the canplaythrough event on the audio element. | |
208 * Called when the audio element has buffered enough audio to begin | |
209 * playback. Send the 'start' event to the ttsEngine callback and | |
210 * then begin playing the audio element. | |
211 * @private | |
212 */ | |
213 onStart_: function() { | |
214 if (this.currentUtterance_) { | |
215 if (this.currentUtterance_.options.volume !== undefined) { | |
216 // Both APIs use the same range for volume, between 0.0 and 1.0. | |
217 this.audioElement_.volume = this.currentUtterance_.options.volume; | |
218 } | |
219 this.audioElement_.play(); | |
220 this.currentUtterance_.callback({ | |
221 'type': 'start', | |
222 'charIndex': 0 | |
223 }); | |
224 } | |
225 }, | |
226 | |
227 /** | |
228 * Handler for the chrome.ttsEngine.onPause interface. | |
229 * Pauses audio if we're in the middle of an utterance. | |
230 * @private | |
231 */ | |
232 onPause_: function() { | |
233 if (this.currentUtterance_) { | |
234 this.audioElement_.pause(); | |
235 } | |
236 }, | |
237 | |
238 /** | |
239 * Handler for the chrome.ttsEngine.onPause interface. | |
240 * Resumes audio if we're in the middle of an utterance. | |
241 * @private | |
242 */ | |
243 onResume_: function() { | |
244 if (this.currentUtterance_) { | |
245 this.audioElement_.play(); | |
246 } | |
247 } | |
248 | |
249 }; | |
250 | |
251 (new TtsExtension()).init(); | |
OLD | NEW |