chrome/browser/resources/network_speech_synthesis/tts_extension.js - Issue 27034009: Implement Google network speech synthesis

Unified Diff: chrome/browser/resources/network_speech_synthesis/tts_extension.js

Issue 27034009: Implement Google network speech synthesis (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Revert change to register_engine for now, rebase for relanding Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « chrome/browser/resources/network_speech_synthesis/manifest.json ('k') | chrome/browser/speech/extension_api/tts_engine_extension_api.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: chrome/browser/resources/network_speech_synthesis/tts_extension.js

diff --git a/chrome/browser/resources/network_speech_synthesis/tts_extension.js b/chrome/browser/resources/network_speech_synthesis/tts_extension.js

new file mode 100644

index 0000000000000000000000000000000000000000..b2a3e7173b92a6a3be89657eb289e4d65613a5aa

--- /dev/null

+++ b/chrome/browser/resources/network_speech_synthesis/tts_extension.js

@@ -0,0 +1,251 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+/**

+ * @fileoverview

+ * This is a component extension that implements a text-to-speech (TTS)

+ * engine powered by Google's speech synthesis API.

+ *

+ * This is an "event page", so it's not loaded when the API isn't being used,

+ * and doesn't waste resources. When a web page or web app makes a speech

+ * request and the parameters match one of the voices in this extension's

+ * manifest, it makes a request to Google's API using Chrome's private key

+ * and plays the resulting speech using HTML5 audio.

+ */

+/**

+ * The main class for this extension. Adds listeners to

+ * chrome.ttsEngine.onSpeak and chrome.ttsEngine.onStop and implements

+ * them using Google's speech synthesis API.

+ * @constructor

+ */

+function TtsExtension() {}

+TtsExtension.prototype = {

+ /**

+ * The url prefix of the speech server, including static query

+ * parameters that don't change.

+ * @type {string}

+ * @const

+ * @private

+ */

+ SPEECH_SERVER_URL_:

+ 'https://www.google.com/speech-api/v2/synthesize?' +

+ 'enc=mpeg&client=chromium',

+ /**

+ * A mapping from language and gender to voice name, hardcoded for now

+ * until the speech synthesis server capabilities response provides this.

+ * The key of this map is of the form '<lang>-<gender>'.

+ * @type {Object.<string, string>}

+ * @private

+ */

+ LANG_AND_GENDER_TO_VOICE_NAME_: {

+ 'en-gb-male': 'rjs',

+ 'en-gb-female': 'fis',

+ },

+ /**

+ * The arguments passed to the onSpeak event handler for the utterance

+ * that's currently being spoken. Should be null when no object is

+ * pending.

+ *

+ * @type {?{utterance: string, options: Object, callback: Function}}

+ * @private

+ */

+ currentUtterance_: null,

+ /**

+ * The HTML5 audio element we use for playing the sound served by the

+ * speech server.

+ * @type {HTMLAudioElement}

+ * @private

+ */

+ audioElement_: null,

+ /**

+ * A mapping from voice name to language and gender, derived from the

+ * manifest file. This is used in case the speech synthesis request

+ * specifies a voice name but doesn't specify a language code or gender.

+ * @type {Object.<string, {lang: string, gender: string}>}

+ * @private

+ */

+ voiceNameToLangAndGender_: {},

+ /**

+ * This is the main function called to initialize this extension.

+ * Initializes data structures and adds event listeners.

+ */

+ init: function() {

+ // Get voices from manifest.

+ var voices = chrome.app.getDetails().tts_engine.voices;

+ for (var i = 0; i < voices.length; i++) {

+ this.voiceNameToLangAndGender_[voices[i].voice_name] = {

+ lang: voices[i].lang,

+ gender: voices[i].gender

+ };

+ }

+ // Initialize the audio element and event listeners on it.

+ this.audioElement_ = document.createElement('audio');

+ document.body.appendChild(this.audioElement_);

+ this.audioElement_.addEventListener(

+ 'ended', this.onStop_.bind(this), false);

+ this.audioElement_.addEventListener(

+ 'canplaythrough', this.onStart_.bind(this), false);

+ // Install event listeners for the ttsEngine API.

+ chrome.ttsEngine.onSpeak.addListener(this.onSpeak_.bind(this));

+ chrome.ttsEngine.onStop.addListener(this.onStop_.bind(this));

+ chrome.ttsEngine.onPause.addListener(this.onPause_.bind(this));

+ chrome.ttsEngine.onResume.addListener(this.onResume_.bind(this));

+ },

+ /**

+ * Handler for the chrome.ttsEngine.onSpeak interface.

+ * Gets Chrome's Google API key and then uses it to generate a request

+ * url for the requested speech utterance. Sets that url as the source

+ * of the HTML5 audio element.

+ * @param {string} utterance The text to be spoken.

+ * @param {Object} options Options to control the speech, as defined

+ * in the Chrome ttsEngine extension API.

+ * @private

+ */

+ onSpeak_: function(utterance, options, callback) {

+ // Truncate the utterance if it's too long. Both Chrome's tts

+ // extension api and the web speech api specify 32k as the

+ // maximum limit for an utterance.

+ if (utterance.length > 32768)

+ utterance = utterance.substr(0, 32768);

+ try {

+ // First, stop any pending audio.

+ this.onStop_();

+ this.currentUtterance_ = {

+ utterance: utterance,

+ options: options,

+ callback: callback

+ };

+ var lang = options.lang;

+ var gender = options.gender;

+ if (options.voiceName) {

+ lang = this.voiceNameToLangAndGender_[options.voiceName].lang;

+ gender = this.voiceNameToLangAndGender_[options.voiceName].gender;

+ }

+ // Look up the specific voice name for this language and gender.

+ // If it's not in the map, it doesn't matter - the language will

+ // be used directly. This is only used for languages where more

+ // than one gender is actually available.

+ var key = lang.toLowerCase() + '-' + gender;

+ var voiceName = this.LANG_AND_GENDER_TO_VOICE_NAME_[key];

+ var url = this.SPEECH_SERVER_URL_;

+ chrome.systemPrivate.getApiKey((function(key) {

+ url += '&key=' + key;

+ url += '&text=' + escape(utterance);

+ url += '&lang=' + lang.toLowerCase();

+ if (voiceName)

+ url += '&name=' + voiceName;

+ if (options.rate) {

+ // Input rate is between 0.1 and 10.0 with a default of 1.0.

+ // Output speed is between 0.0 and 1.0 with a default of 0.5.

+ url += '&speed=' + (options.rate / 2.0);

+ }

+ if (options.pitch) {

+ // Input pitch is between 0.0 and 2.0 with a default of 1.0.

+ // Output pitch is between 0.0 and 1.0 with a default of 0.5.

+ url += '&pitch=' + (options.pitch / 2.0);

+ }

+ // This begins loading the audio but does not play it.

+ // When enough of the audio has loaded to begin playback,

+ // the 'canplaythrough' handler will call this.onStart_,

+ // which sends a start event to the ttsEngine callback and

+ // then begins playing audio.

+ this.audioElement_.src = url;

+ }).bind(this));

+ } catch (err) {

+ console.error(String(err));

+ callback({

+ 'type': 'error',

+ 'errorMessage': String(err)

+ });

+ this.currentUtterance_ = null;

+ }

+ },

+ /**

+ * Handler for the chrome.ttsEngine.onStop interface.

+ * Called either when the ttsEngine API requests us to stop, or when

+ * we reach the end of the audio stream. Pause the audio element to

+ * silence it, and send a callback to the ttsEngine API to let it know

+ * that we've completed. Note that the ttsEngine API manages callback

+ * messages and will automatically replace the 'end' event with a

+ * more specific callback like 'interrupted' when sending it to the

+ * TTS client.

+ * @private

+ */

+ onStop_: function() {

+ if (this.currentUtterance_) {

+ this.audioElement_.pause();

+ this.currentUtterance_.callback({

+ 'type': 'end',

+ 'charIndex': this.currentUtterance_.utterance.length

+ });

+ }

+ this.currentUtterance_ = null;

+ },

+ /**

+ * Handler for the canplaythrough event on the audio element.

+ * Called when the audio element has buffered enough audio to begin

+ * playback. Send the 'start' event to the ttsEngine callback and

+ * then begin playing the audio element.

+ * @private

+ */

+ onStart_: function() {

+ if (this.currentUtterance_) {

+ if (this.currentUtterance_.options.volume !== undefined) {

+ // Both APIs use the same range for volume, between 0.0 and 1.0.

+ this.audioElement_.volume = this.currentUtterance_.options.volume;

+ }

+ this.audioElement_.play();

+ this.currentUtterance_.callback({

+ 'type': 'start',

+ 'charIndex': 0

+ });

+ }

+ },

+ /**

+ * Handler for the chrome.ttsEngine.onPause interface.

+ * Pauses audio if we're in the middle of an utterance.

+ * @private

+ */

+ onPause_: function() {

+ if (this.currentUtterance_) {

+ this.audioElement_.pause();

+ }

+ },

+ /**

+ * Handler for the chrome.ttsEngine.onPause interface.

+ * Resumes audio if we're in the middle of an utterance.

+ * @private

+ */

+ onResume_: function() {

+ if (this.currentUtterance_) {

+ this.audioElement_.play();

+ }

+};

+(new TtsExtension()).init();