OLD | NEW |
(Empty) | |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 /** |
| 6 * @fileoverview |
| 7 * This is a component extension that implements a text-to-speech (TTS) |
| 8 * engine powered by Google's speech synthesis API. |
| 9 * |
| 10 * This is an "event page", so it's not loaded when the API isn't being used, |
| 11 * and doesn't waste resources. When a web page or web app makes a speech |
| 12 * request and the parameters match one of the voices in this extension's |
| 13 * manifest, it makes a request to Google's API using Chrome's private key |
| 14 * and plays the resulting speech using HTML5 audio. |
| 15 */ |
| 16 |
| 17 /** |
| 18 * The main class for this extension. Adds listeners to |
| 19 * chrome.ttsEngine.onSpeak and chrome.ttsEngine.onStop and implements |
| 20 * them using Google's speech synthesis API. |
| 21 * @constructor |
| 22 */ |
| 23 function TtsExtension() {} |
| 24 |
| 25 TtsExtension.prototype = { |
| 26 /** |
| 27 * The url prefix of the speech server, including static query |
| 28 * parameters that don't change. |
| 29 * @type {string} |
| 30 * @const |
| 31 * @private |
| 32 */ |
| 33 SPEECH_SERVER_URL_: |
| 34 'https://www.google.com/speech-api/v2/synthesize?' + |
| 35 'enc=mpeg&client=chromium', |
| 36 |
| 37 /** |
| 38 * A mapping from language and gender to voice name, hardcoded for now |
| 39 * until the speech synthesis server capabilities response provides this. |
| 40 * The key of this map is of the form '<lang>-<gender>'. |
| 41 * @type {Object.<string, string>} |
| 42 * @private |
| 43 */ |
| 44 LANG_AND_GENDER_TO_VOICE_NAME_: { |
| 45 'en-gb-male': 'rjs', |
| 46 'en-gb-female': 'fis', |
| 47 }, |
| 48 |
| 49 /** |
| 50 * The arguments passed to the onSpeak event handler for the utterance |
| 51 * that's currently being spoken. Should be null when no object is |
| 52 * pending. |
| 53 * |
| 54 * @type {?{utterance: string, options: Object, callback: Function}} |
| 55 * @private |
| 56 */ |
| 57 currentUtterance_: null, |
| 58 |
| 59 /** |
| 60 * The HTML5 audio element we use for playing the sound served by the |
| 61 * speech server. |
| 62 * @type {HTMLAudioElement} |
| 63 * @private |
| 64 */ |
| 65 audioElement_: null, |
| 66 |
| 67 /** |
| 68 * A mapping from voice name to language and gender, derived from the |
| 69 * manifest file. This is used in case the speech synthesis request |
| 70 * specifies a voice name but doesn't specify a language code or gender. |
| 71 * @type {Object.<string, {lang: string, gender: string}>} |
| 72 * @private |
| 73 */ |
| 74 voiceNameToLangAndGender_: {}, |
| 75 |
| 76 /** |
| 77 * This is the main function called to initialize this extension. |
| 78 * Initializes data structures and adds event listeners. |
| 79 */ |
| 80 init: function() { |
| 81 // Get voices from manifest. |
| 82 var voices = chrome.app.getDetails().tts_engine.voices; |
| 83 for (var i = 0; i < voices.length; i++) { |
| 84 this.voiceNameToLangAndGender_[voices[i].voice_name] = { |
| 85 lang: voices[i].lang, |
| 86 gender: voices[i].gender |
| 87 }; |
| 88 } |
| 89 |
| 90 // Initialize the audio element and event listeners on it. |
| 91 this.audioElement_ = document.createElement('audio'); |
| 92 document.body.appendChild(this.audioElement_); |
| 93 this.audioElement_.addEventListener( |
| 94 'ended', this.onStop_.bind(this), false); |
| 95 this.audioElement_.addEventListener( |
| 96 'canplaythrough', this.onStart_.bind(this), false); |
| 97 |
| 98 // Install event listeners for the ttsEngine API. |
| 99 chrome.ttsEngine.onSpeak.addListener(this.onSpeak_.bind(this)); |
| 100 chrome.ttsEngine.onStop.addListener(this.onStop_.bind(this)); |
| 101 chrome.ttsEngine.onPause.addListener(this.onPause_.bind(this)); |
| 102 chrome.ttsEngine.onResume.addListener(this.onResume_.bind(this)); |
| 103 }, |
| 104 |
| 105 /** |
| 106 * Handler for the chrome.ttsEngine.onSpeak interface. |
| 107 * Gets Chrome's Google API key and then uses it to generate a request |
| 108 * url for the requested speech utterance. Sets that url as the source |
| 109 * of the HTML5 audio element. |
| 110 * @param {string} utterance The text to be spoken. |
| 111 * @param {Object} options Options to control the speech, as defined |
| 112 * in the Chrome ttsEngine extension API. |
| 113 * @private |
| 114 */ |
| 115 onSpeak_: function(utterance, options, callback) { |
| 116 // Truncate the utterance if it's too long. Both Chrome's tts |
| 117 // extension api and the web speech api specify 32k as the |
| 118 // maximum limit for an utterance. |
| 119 if (utterance.length > 32768) |
| 120 utterance = utterance.substr(0, 32768); |
| 121 |
| 122 try { |
| 123 // First, stop any pending audio. |
| 124 this.onStop_(); |
| 125 |
| 126 this.currentUtterance_ = { |
| 127 utterance: utterance, |
| 128 options: options, |
| 129 callback: callback |
| 130 }; |
| 131 |
| 132 var lang = options.lang; |
| 133 var gender = options.gender; |
| 134 if (options.voiceName) { |
| 135 lang = this.voiceNameToLangAndGender_[options.voiceName].lang; |
| 136 gender = this.voiceNameToLangAndGender_[options.voiceName].gender; |
| 137 } |
| 138 |
| 139 // Look up the specific voice name for this language and gender. |
| 140 // If it's not in the map, it doesn't matter - the language will |
| 141 // be used directly. This is only used for languages where more |
| 142 // than one gender is actually available. |
| 143 var key = lang.toLowerCase() + '-' + gender; |
| 144 var voiceName = this.LANG_AND_GENDER_TO_VOICE_NAME_[key]; |
| 145 |
| 146 var url = this.SPEECH_SERVER_URL_; |
| 147 chrome.systemPrivate.getApiKey((function(key) { |
| 148 url += '&key=' + key; |
| 149 url += '&text=' + escape(utterance); |
| 150 url += '&lang=' + lang.toLowerCase(); |
| 151 |
| 152 if (voiceName) |
| 153 url += '&name=' + voiceName; |
| 154 |
| 155 if (options.rate) { |
| 156 // Input rate is between 0.1 and 10.0 with a default of 1.0. |
| 157 // Output speed is between 0.0 and 1.0 with a default of 0.5. |
| 158 url += '&speed=' + (options.rate / 2.0); |
| 159 } |
| 160 |
| 161 if (options.pitch) { |
| 162 // Input pitch is between 0.0 and 2.0 with a default of 1.0. |
| 163 // Output pitch is between 0.0 and 1.0 with a default of 0.5. |
| 164 url += '&pitch=' + (options.pitch / 2.0); |
| 165 } |
| 166 |
| 167 // This begins loading the audio but does not play it. |
| 168 // When enough of the audio has loaded to begin playback, |
| 169 // the 'canplaythrough' handler will call this.onStart_, |
| 170 // which sends a start event to the ttsEngine callback and |
| 171 // then begins playing audio. |
| 172 this.audioElement_.src = url; |
| 173 }).bind(this)); |
| 174 } catch (err) { |
| 175 console.error(String(err)); |
| 176 callback({ |
| 177 'type': 'error', |
| 178 'errorMessage': String(err) |
| 179 }); |
| 180 this.currentUtterance_ = null; |
| 181 } |
| 182 }, |
| 183 |
| 184 /** |
| 185 * Handler for the chrome.ttsEngine.onStop interface. |
| 186 * Called either when the ttsEngine API requests us to stop, or when |
| 187 * we reach the end of the audio stream. Pause the audio element to |
| 188 * silence it, and send a callback to the ttsEngine API to let it know |
| 189 * that we've completed. Note that the ttsEngine API manages callback |
| 190 * messages and will automatically replace the 'end' event with a |
| 191 * more specific callback like 'interrupted' when sending it to the |
| 192 * TTS client. |
| 193 * @private |
| 194 */ |
| 195 onStop_: function() { |
| 196 if (this.currentUtterance_) { |
| 197 this.audioElement_.pause(); |
| 198 this.currentUtterance_.callback({ |
| 199 'type': 'end', |
| 200 'charIndex': this.currentUtterance_.utterance.length |
| 201 }); |
| 202 } |
| 203 this.currentUtterance_ = null; |
| 204 }, |
| 205 |
| 206 /** |
| 207 * Handler for the canplaythrough event on the audio element. |
| 208 * Called when the audio element has buffered enough audio to begin |
| 209 * playback. Send the 'start' event to the ttsEngine callback and |
| 210 * then begin playing the audio element. |
| 211 * @private |
| 212 */ |
| 213 onStart_: function() { |
| 214 if (this.currentUtterance_) { |
| 215 if (this.currentUtterance_.options.volume !== undefined) { |
| 216 // Both APIs use the same range for volume, between 0.0 and 1.0. |
| 217 this.audioElement_.volume = this.currentUtterance_.options.volume; |
| 218 } |
| 219 this.audioElement_.play(); |
| 220 this.currentUtterance_.callback({ |
| 221 'type': 'start', |
| 222 'charIndex': 0 |
| 223 }); |
| 224 } |
| 225 }, |
| 226 |
| 227 /** |
| 228 * Handler for the chrome.ttsEngine.onPause interface. |
| 229 * Pauses audio if we're in the middle of an utterance. |
| 230 * @private |
| 231 */ |
| 232 onPause_: function() { |
| 233 if (this.currentUtterance_) { |
| 234 this.audioElement_.pause(); |
| 235 } |
| 236 }, |
| 237 |
| 238 /** |
| 239 * Handler for the chrome.ttsEngine.onPause interface. |
| 240 * Resumes audio if we're in the middle of an utterance. |
| 241 * @private |
| 242 */ |
| 243 onResume_: function() { |
| 244 if (this.currentUtterance_) { |
| 245 this.audioElement_.play(); |
| 246 } |
| 247 } |
| 248 |
| 249 }; |
| 250 |
| 251 (new TtsExtension()).init(); |
OLD | NEW |