| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "content/renderer/speech_recognition_dispatcher.h" | 5 #include "content/renderer/speech_recognition_dispatcher.h" |
| 6 | 6 |
| 7 #include "base/basictypes.h" | 7 #include "base/basictypes.h" |
| 8 #include "base/strings/utf_string_conversions.h" | 8 #include "base/strings/utf_string_conversions.h" |
| 9 #include "content/common/speech_recognition_messages.h" | 9 #include "content/common/speech_recognition_messages.h" |
| 10 #include "content/renderer/render_view_impl.h" | 10 #include "content/renderer/render_view_impl.h" |
| 11 #include "third_party/WebKit/public/platform/WebString.h" | 11 #include "third_party/WebKit/public/platform/WebString.h" |
| 12 #include "third_party/WebKit/public/platform/WebVector.h" | 12 #include "third_party/WebKit/public/platform/WebVector.h" |
| 13 #include "third_party/WebKit/public/web/WebSpeechGrammar.h" | 13 #include "third_party/WebKit/public/web/WebSpeechGrammar.h" |
| 14 #include "third_party/WebKit/public/web/WebSpeechRecognitionParams.h" | 14 #include "third_party/WebKit/public/web/WebSpeechRecognitionParams.h" |
| 15 #include "third_party/WebKit/public/web/WebSpeechRecognitionResult.h" | 15 #include "third_party/WebKit/public/web/WebSpeechRecognitionResult.h" |
| 16 #include "third_party/WebKit/public/web/WebSpeechRecognizerClient.h" | 16 #include "third_party/WebKit/public/web/WebSpeechRecognizerClient.h" |
| 17 | 17 |
| 18 #if defined(ENABLE_WEBRTC) |
| 19 #include "content/renderer/media/speech_recognition_audio_sink.h" |
| 20 #endif |
| 21 |
| 18 using blink::WebVector; | 22 using blink::WebVector; |
| 19 using blink::WebString; | 23 using blink::WebString; |
| 20 using blink::WebSpeechGrammar; | 24 using blink::WebSpeechGrammar; |
| 21 using blink::WebSpeechRecognitionHandle; | 25 using blink::WebSpeechRecognitionHandle; |
| 22 using blink::WebSpeechRecognitionResult; | 26 using blink::WebSpeechRecognitionResult; |
| 23 using blink::WebSpeechRecognitionParams; | 27 using blink::WebSpeechRecognitionParams; |
| 24 using blink::WebSpeechRecognizerClient; | 28 using blink::WebSpeechRecognizerClient; |
| 25 | 29 |
| 26 namespace content { | 30 namespace content { |
| 27 | 31 |
| 28 SpeechRecognitionDispatcher::SpeechRecognitionDispatcher( | 32 SpeechRecognitionDispatcher::SpeechRecognitionDispatcher( |
| 29 RenderViewImpl* render_view) | 33 RenderViewImpl* render_view) |
| 30 : RenderViewObserver(render_view), | 34 : RenderViewObserver(render_view), |
| 31 recognizer_client_(NULL), | 35 recognizer_client_(NULL), |
| 32 next_id_(1) { | 36 next_id_(1) {} |
| 33 } | |
| 34 | 37 |
| 35 SpeechRecognitionDispatcher::~SpeechRecognitionDispatcher() { | 38 SpeechRecognitionDispatcher::~SpeechRecognitionDispatcher() {} |
| 36 } | |
| 37 | 39 |
| 38 void SpeechRecognitionDispatcher::AbortAllRecognitions() { | 40 void SpeechRecognitionDispatcher::AbortAllRecognitions() { |
| 41 ResetAudioSink(); |
| 39 Send(new SpeechRecognitionHostMsg_AbortAllRequests( | 42 Send(new SpeechRecognitionHostMsg_AbortAllRequests( |
| 40 routing_id())); | 43 routing_id())); |
| 41 } | 44 } |
| 42 | 45 |
| 43 bool SpeechRecognitionDispatcher::OnMessageReceived( | 46 bool SpeechRecognitionDispatcher::OnMessageReceived( |
| 44 const IPC::Message& message) { | 47 const IPC::Message& message) { |
| 45 bool handled = true; | 48 bool handled = true; |
| 46 IPC_BEGIN_MESSAGE_MAP(SpeechRecognitionDispatcher, message) | 49 IPC_BEGIN_MESSAGE_MAP(SpeechRecognitionDispatcher, message) |
| 47 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Started, OnRecognitionStarted) | 50 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Started, OnRecognitionStarted) |
| 48 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioStarted, OnAudioStarted) | 51 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioStarted, OnAudioStarted) |
| 49 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundStarted, OnSoundStarted) | 52 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundStarted, OnSoundStarted) |
| 50 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundEnded, OnSoundEnded) | 53 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundEnded, OnSoundEnded) |
| 51 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioEnded, OnAudioEnded) | 54 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioEnded, OnAudioEnded) |
| 52 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ErrorOccurred, OnErrorOccurred) | 55 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ErrorOccurred, OnErrorOccurred) |
| 53 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Ended, OnRecognitionEnded) | 56 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Ended, OnRecognitionEnded) |
| 54 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ResultRetrieved, | 57 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ResultRetrieved, |
| 55 OnResultsRetrieved) | 58 OnResultsRetrieved) |
| 59 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioReceiverReady, |
| 60 OnAudioReceiverReady) |
| 56 IPC_MESSAGE_UNHANDLED(handled = false) | 61 IPC_MESSAGE_UNHANDLED(handled = false) |
| 57 IPC_END_MESSAGE_MAP() | 62 IPC_END_MESSAGE_MAP() |
| 58 return handled; | 63 return handled; |
| 59 } | 64 } |
| 60 | 65 |
| 61 void SpeechRecognitionDispatcher::start( | 66 void SpeechRecognitionDispatcher::start( |
| 62 const WebSpeechRecognitionHandle& handle, | 67 const WebSpeechRecognitionHandle& handle, |
| 63 const WebSpeechRecognitionParams& params, | 68 const WebSpeechRecognitionParams& params, |
| 64 WebSpeechRecognizerClient* recognizer_client) { | 69 WebSpeechRecognizerClient* recognizer_client) { |
| 65 DCHECK(!recognizer_client_ || recognizer_client_ == recognizer_client); | 70 DCHECK(!recognizer_client_ || recognizer_client_ == recognizer_client); |
| 66 recognizer_client_ = recognizer_client; | 71 recognizer_client_ = recognizer_client; |
| 67 | 72 |
| 73 #if defined(ENABLE_WEBRTC) |
| 74 const blink::WebMediaStreamTrack track = params.audioTrack(); |
| 75 if (!track.isNull()) { |
| 76 // Check if this type of track is allowed by implemented policy. |
| 77 if (SpeechRecognitionAudioSink::IsSupportedTrack(track)) { |
| 78 audio_track_.assign(track); |
| 79 } else { |
| 80 audio_track_.reset(); |
| 81 // Notify user that the track used is not supported. |
| 82 recognizer_client_->didReceiveError( |
| 83 handle, |
| 84 WebString("Provided audioTrack is not supported."), |
| 85 WebSpeechRecognizerClient::AudioCaptureError); |
| 86 |
| 87 return; |
| 88 } |
| 89 } |
| 90 |
| 91 // Destroy any previous instance to detach from the audio track. |
| 92 // Each new session should reinstantiate the provider once the track is ready. |
| 93 ResetAudioSink(); |
| 94 #endif |
| 95 |
| 68 SpeechRecognitionHostMsg_StartRequest_Params msg_params; | 96 SpeechRecognitionHostMsg_StartRequest_Params msg_params; |
| 69 for (size_t i = 0; i < params.grammars().size(); ++i) { | 97 for (size_t i = 0; i < params.grammars().size(); ++i) { |
| 70 const WebSpeechGrammar& grammar = params.grammars()[i]; | 98 const WebSpeechGrammar& grammar = params.grammars()[i]; |
| 71 msg_params.grammars.push_back( | 99 msg_params.grammars.push_back( |
| 72 SpeechRecognitionGrammar(grammar.src().spec(), grammar.weight())); | 100 SpeechRecognitionGrammar(grammar.src().spec(), grammar.weight())); |
| 73 } | 101 } |
| 74 msg_params.language = base::UTF16ToUTF8(params.language()); | 102 msg_params.language = base::UTF16ToUTF8(params.language()); |
| 75 msg_params.max_hypotheses = static_cast<uint32>(params.maxAlternatives()); | 103 msg_params.max_hypotheses = static_cast<uint32>(params.maxAlternatives()); |
| 76 msg_params.continuous = params.continuous(); | 104 msg_params.continuous = params.continuous(); |
| 77 msg_params.interim_results = params.interimResults(); | 105 msg_params.interim_results = params.interimResults(); |
| 78 msg_params.origin_url = params.origin().toString().utf8(); | 106 msg_params.origin_url = params.origin().toString().utf8(); |
| 79 msg_params.render_view_id = routing_id(); | 107 msg_params.render_view_id = routing_id(); |
| 80 msg_params.request_id = GetOrCreateIDForHandle(handle); | 108 msg_params.request_id = GetOrCreateIDForHandle(handle); |
| 109 #if defined(ENABLE_WEBRTC) |
| 110 // Fall back to default input when the track is not allowed. |
| 111 msg_params.using_audio_track = !audio_track_.isNull(); |
| 112 #else |
| 113 msg_params.using_audio_track = false; |
| 114 #endif |
| 81 // The handle mapping will be removed in |OnRecognitionEnd|. | 115 // The handle mapping will be removed in |OnRecognitionEnd|. |
| 82 Send(new SpeechRecognitionHostMsg_StartRequest(msg_params)); | 116 Send(new SpeechRecognitionHostMsg_StartRequest(msg_params)); |
| 83 } | 117 } |
| 84 | 118 |
| 85 void SpeechRecognitionDispatcher::stop( | 119 void SpeechRecognitionDispatcher::stop( |
| 86 const WebSpeechRecognitionHandle& handle, | 120 const WebSpeechRecognitionHandle& handle, |
| 87 WebSpeechRecognizerClient* recognizer_client) { | 121 WebSpeechRecognizerClient* recognizer_client) { |
| 122 ResetAudioSink(); |
| 88 // Ignore a |stop| issued without a matching |start|. | 123 // Ignore a |stop| issued without a matching |start|. |
| 89 if (recognizer_client_ != recognizer_client || !HandleExists(handle)) | 124 if (recognizer_client_ != recognizer_client || !HandleExists(handle)) |
| 90 return; | 125 return; |
| 91 Send(new SpeechRecognitionHostMsg_StopCaptureRequest( | 126 Send(new SpeechRecognitionHostMsg_StopCaptureRequest( |
| 92 routing_id(), GetOrCreateIDForHandle(handle))); | 127 routing_id(), GetOrCreateIDForHandle(handle))); |
| 93 } | 128 } |
| 94 | 129 |
| 95 void SpeechRecognitionDispatcher::abort( | 130 void SpeechRecognitionDispatcher::abort( |
| 96 const WebSpeechRecognitionHandle& handle, | 131 const WebSpeechRecognitionHandle& handle, |
| 97 WebSpeechRecognizerClient* recognizer_client) { | 132 WebSpeechRecognizerClient* recognizer_client) { |
| 133 ResetAudioSink(); |
| 98 // Ignore an |abort| issued without a matching |start|. | 134 // Ignore an |abort| issued without a matching |start|. |
| 99 if (recognizer_client_ != recognizer_client || !HandleExists(handle)) | 135 if (recognizer_client_ != recognizer_client || !HandleExists(handle)) |
| 100 return; | 136 return; |
| 101 Send(new SpeechRecognitionHostMsg_AbortRequest( | 137 Send(new SpeechRecognitionHostMsg_AbortRequest( |
| 102 routing_id(), GetOrCreateIDForHandle(handle))); | 138 routing_id(), GetOrCreateIDForHandle(handle))); |
| 103 } | 139 } |
| 104 | 140 |
| 105 void SpeechRecognitionDispatcher::OnRecognitionStarted(int request_id) { | 141 void SpeechRecognitionDispatcher::OnRecognitionStarted(int request_id) { |
| 106 recognizer_client_->didStart(GetHandleFromID(request_id)); | 142 recognizer_client_->didStart(GetHandleFromID(request_id)); |
| 107 } | 143 } |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 147 NOTREACHED(); | 183 NOTREACHED(); |
| 148 return WebSpeechRecognizerClient::OtherError; | 184 return WebSpeechRecognizerClient::OtherError; |
| 149 } | 185 } |
| 150 | 186 |
| 151 void SpeechRecognitionDispatcher::OnErrorOccurred( | 187 void SpeechRecognitionDispatcher::OnErrorOccurred( |
| 152 int request_id, const SpeechRecognitionError& error) { | 188 int request_id, const SpeechRecognitionError& error) { |
| 153 if (error.code == SPEECH_RECOGNITION_ERROR_NO_MATCH) { | 189 if (error.code == SPEECH_RECOGNITION_ERROR_NO_MATCH) { |
| 154 recognizer_client_->didReceiveNoMatch(GetHandleFromID(request_id), | 190 recognizer_client_->didReceiveNoMatch(GetHandleFromID(request_id), |
| 155 WebSpeechRecognitionResult()); | 191 WebSpeechRecognitionResult()); |
| 156 } else { | 192 } else { |
| 193 ResetAudioSink(); |
| 157 recognizer_client_->didReceiveError( | 194 recognizer_client_->didReceiveError( |
| 158 GetHandleFromID(request_id), | 195 GetHandleFromID(request_id), |
| 159 WebString(), // TODO(primiano): message? | 196 WebString(), // TODO(primiano): message? |
| 160 WebKitErrorCode(error.code)); | 197 WebKitErrorCode(error.code)); |
| 161 } | 198 } |
| 162 } | 199 } |
| 163 | 200 |
| 164 void SpeechRecognitionDispatcher::OnRecognitionEnded(int request_id) { | 201 void SpeechRecognitionDispatcher::OnRecognitionEnded(int request_id) { |
| 165 // TODO(tommi): It is possible that the handle isn't found in the array if | 202 // TODO(tommi): It is possible that the handle isn't found in the array if |
| 166 // the user just refreshed the page. It seems that we then get a notification | 203 // the user just refreshed the page. It seems that we then get a notification |
| 167 // for the previously loaded instance of the page. | 204 // for the previously loaded instance of the page. |
| 168 HandleMap::iterator iter = handle_map_.find(request_id); | 205 HandleMap::iterator iter = handle_map_.find(request_id); |
| 169 if (iter == handle_map_.end()) { | 206 if (iter == handle_map_.end()) { |
| 170 DLOG(ERROR) << "OnRecognitionEnded called for a handle that doesn't exist"; | 207 DLOG(ERROR) << "OnRecognitionEnded called for a handle that doesn't exist"; |
| 171 } else { | 208 } else { |
| 172 WebSpeechRecognitionHandle handle = iter->second; | 209 WebSpeechRecognitionHandle handle = iter->second; |
| 173 // Note: we need to erase the handle from the map *before* calling didEnd. | 210 // Note: we need to erase the handle from the map *before* calling didEnd. |
| 174 // didEnd may call back synchronously to start a new recognition session, | 211 // didEnd may call back synchronously to start a new recognition session, |
| 175 // and we don't want to delete the handle from the map after that happens. | 212 // and we don't want to delete the handle from the map after that happens. |
| 176 handle_map_.erase(request_id); | 213 handle_map_.erase(request_id); |
| 214 ResetAudioSink(); |
| 177 recognizer_client_->didEnd(handle); | 215 recognizer_client_->didEnd(handle); |
| 178 } | 216 } |
| 179 } | 217 } |
| 180 | 218 |
| 181 void SpeechRecognitionDispatcher::OnResultsRetrieved( | 219 void SpeechRecognitionDispatcher::OnResultsRetrieved( |
| 182 int request_id, const SpeechRecognitionResults& results) { | 220 int request_id, const SpeechRecognitionResults& results) { |
| 183 size_t provisional_count = 0; | 221 size_t provisional_count = 0; |
| 184 SpeechRecognitionResults::const_iterator it = results.begin(); | 222 SpeechRecognitionResults::const_iterator it = results.begin(); |
| 185 for (; it != results.end(); ++it) { | 223 for (; it != results.end(); ++it) { |
| 186 if (it->is_provisional) | 224 if (it->is_provisional) |
| (...skipping 17 matching lines...) Expand all Loading... |
| 204 transcripts[i] = result.hypotheses[i].utterance; | 242 transcripts[i] = result.hypotheses[i].utterance; |
| 205 confidences[i] = static_cast<float>(result.hypotheses[i].confidence); | 243 confidences[i] = static_cast<float>(result.hypotheses[i].confidence); |
| 206 } | 244 } |
| 207 webkit_result->assign(transcripts, confidences, !result.is_provisional); | 245 webkit_result->assign(transcripts, confidences, !result.is_provisional); |
| 208 } | 246 } |
| 209 | 247 |
| 210 recognizer_client_->didReceiveResults( | 248 recognizer_client_->didReceiveResults( |
| 211 GetHandleFromID(request_id), final, provisional); | 249 GetHandleFromID(request_id), final, provisional); |
| 212 } | 250 } |
| 213 | 251 |
| 252 void SpeechRecognitionDispatcher::OnAudioReceiverReady( |
| 253 int request_id, |
| 254 const media::AudioParameters& params, |
| 255 const base::SharedMemoryHandle memory, |
| 256 const base::SyncSocket::TransitDescriptor descriptor) { |
| 257 #if defined(ENABLE_WEBRTC) |
| 258 DCHECK(!speech_audio_sink_.get()); |
| 259 if (audio_track_.isNull()) { |
| 260 ResetAudioSink(); |
| 261 return; |
| 262 } |
| 263 |
| 264 // The instantiation and type of SyncSocket is up to the client since it |
| 265 // is dependency injected to the SpeechRecognitionAudioSink. |
| 266 scoped_ptr<base::SyncSocket> socket(new base::CancelableSyncSocket( |
| 267 base::SyncSocket::UnwrapHandle(descriptor))); |
| 268 |
| 269 speech_audio_sink_.reset(new SpeechRecognitionAudioSink( |
| 270 audio_track_, params, memory, socket.Pass(), |
| 271 base::Bind(&SpeechRecognitionDispatcher::ResetAudioSink, |
| 272 base::Unretained(this)))); |
| 273 #endif |
| 274 } |
| 214 | 275 |
| 215 int SpeechRecognitionDispatcher::GetOrCreateIDForHandle( | 276 int SpeechRecognitionDispatcher::GetOrCreateIDForHandle( |
| 216 const WebSpeechRecognitionHandle& handle) { | 277 const WebSpeechRecognitionHandle& handle) { |
| 217 // Search first for an existing mapping. | 278 // Search first for an existing mapping. |
| 218 for (HandleMap::iterator iter = handle_map_.begin(); | 279 for (HandleMap::iterator iter = handle_map_.begin(); |
| 219 iter != handle_map_.end(); | 280 iter != handle_map_.end(); |
| 220 ++iter) { | 281 ++iter) { |
| 221 if (iter->second.equals(handle)) | 282 if (iter->second.equals(handle)) |
| 222 return iter->first; | 283 return iter->first; |
| 223 } | 284 } |
| 224 // If no existing mapping found, create a new one. | 285 // If no existing mapping found, create a new one. |
| 225 const int new_id = next_id_; | 286 const int new_id = next_id_; |
| 226 handle_map_[new_id] = handle; | 287 handle_map_[new_id] = handle; |
| 227 ++next_id_; | 288 ++next_id_; |
| 228 return new_id; | 289 return new_id; |
| 229 } | 290 } |
| 230 | 291 |
| 231 bool SpeechRecognitionDispatcher::HandleExists( | 292 bool SpeechRecognitionDispatcher::HandleExists( |
| 232 const WebSpeechRecognitionHandle& handle) { | 293 const WebSpeechRecognitionHandle& handle) { |
| 233 for (HandleMap::iterator iter = handle_map_.begin(); | 294 for (HandleMap::iterator iter = handle_map_.begin(); |
| 234 iter != handle_map_.end(); | 295 iter != handle_map_.end(); |
| 235 ++iter) { | 296 ++iter) { |
| 236 if (iter->second.equals(handle)) | 297 if (iter->second.equals(handle)) |
| 237 return true; | 298 return true; |
| 238 } | 299 } |
| 239 return false; | 300 return false; |
| 240 } | 301 } |
| 241 | 302 |
| 303 void SpeechRecognitionDispatcher::ResetAudioSink() { |
| 304 #if defined(ENABLE_WEBRTC) |
| 305 speech_audio_sink_.reset(); |
| 306 #endif |
| 307 } |
| 308 |
| 242 const WebSpeechRecognitionHandle& SpeechRecognitionDispatcher::GetHandleFromID( | 309 const WebSpeechRecognitionHandle& SpeechRecognitionDispatcher::GetHandleFromID( |
| 243 int request_id) { | 310 int request_id) { |
| 244 HandleMap::iterator iter = handle_map_.find(request_id); | 311 HandleMap::iterator iter = handle_map_.find(request_id); |
| 245 DCHECK(iter != handle_map_.end()); | 312 DCHECK(iter != handle_map_.end()); |
| 246 return iter->second; | 313 return iter->second; |
| 247 } | 314 } |
| 248 | 315 |
| 249 } // namespace content | 316 } // namespace content |
| OLD | NEW |