OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "content/renderer/speech_recognition_dispatcher.h" | 5 #include "content/renderer/speech_recognition_dispatcher.h" |
6 | 6 |
7 #include "base/basictypes.h" | 7 #include "base/basictypes.h" |
8 #include "base/strings/utf_string_conversions.h" | 8 #include "base/strings/utf_string_conversions.h" |
9 #include "content/common/speech_recognition_messages.h" | 9 #include "content/common/speech_recognition_messages.h" |
| 10 #include "content/renderer/media/media_stream_audio_source.h" |
10 #include "content/renderer/render_view_impl.h" | 11 #include "content/renderer/render_view_impl.h" |
| 12 #include "content/renderer/speech_recognition_audio_source_provider.h" |
11 #include "third_party/WebKit/public/platform/WebString.h" | 13 #include "third_party/WebKit/public/platform/WebString.h" |
12 #include "third_party/WebKit/public/platform/WebVector.h" | 14 #include "third_party/WebKit/public/platform/WebVector.h" |
13 #include "third_party/WebKit/public/web/WebSpeechGrammar.h" | 15 #include "third_party/WebKit/public/web/WebSpeechGrammar.h" |
14 #include "third_party/WebKit/public/web/WebSpeechRecognitionParams.h" | 16 #include "third_party/WebKit/public/web/WebSpeechRecognitionParams.h" |
15 #include "third_party/WebKit/public/web/WebSpeechRecognitionResult.h" | 17 #include "third_party/WebKit/public/web/WebSpeechRecognitionResult.h" |
16 #include "third_party/WebKit/public/web/WebSpeechRecognizerClient.h" | 18 #include "third_party/WebKit/public/web/WebSpeechRecognizerClient.h" |
17 | 19 |
18 using blink::WebVector; | 20 using blink::WebVector; |
19 using blink::WebString; | 21 using blink::WebString; |
20 using blink::WebSpeechGrammar; | 22 using blink::WebSpeechGrammar; |
21 using blink::WebSpeechRecognitionHandle; | 23 using blink::WebSpeechRecognitionHandle; |
22 using blink::WebSpeechRecognitionResult; | 24 using blink::WebSpeechRecognitionResult; |
23 using blink::WebSpeechRecognitionParams; | 25 using blink::WebSpeechRecognitionParams; |
24 using blink::WebSpeechRecognizerClient; | 26 using blink::WebSpeechRecognizerClient; |
25 | 27 |
26 namespace content { | 28 namespace content { |
27 | 29 |
28 SpeechRecognitionDispatcher::SpeechRecognitionDispatcher( | 30 SpeechRecognitionDispatcher::SpeechRecognitionDispatcher( |
29 RenderViewImpl* render_view) | 31 RenderViewImpl* render_view) |
30 : RenderViewObserver(render_view), | 32 : RenderViewObserver(render_view), |
31 recognizer_client_(NULL), | 33 recognizer_client_(NULL), |
| 34 audio_track_set_(false), |
| 35 is_allowed_audio_track_(false), |
| 36 render_loop_(base::MessageLoopProxy::current()), |
32 next_id_(1) { | 37 next_id_(1) { |
33 } | 38 } |
34 | 39 |
35 SpeechRecognitionDispatcher::~SpeechRecognitionDispatcher() { | 40 SpeechRecognitionDispatcher::~SpeechRecognitionDispatcher() { |
36 } | 41 } |
37 | 42 |
38 void SpeechRecognitionDispatcher::AbortAllRecognitions() { | 43 void SpeechRecognitionDispatcher::AbortAllRecognitions() { |
| 44 audio_source_provider_.reset(); |
39 Send(new SpeechRecognitionHostMsg_AbortAllRequests( | 45 Send(new SpeechRecognitionHostMsg_AbortAllRequests( |
40 routing_id())); | 46 routing_id())); |
41 } | 47 } |
42 | 48 |
43 bool SpeechRecognitionDispatcher::OnMessageReceived( | 49 bool SpeechRecognitionDispatcher::OnMessageReceived( |
44 const IPC::Message& message) { | 50 const IPC::Message& message) { |
45 bool handled = true; | 51 bool handled = true; |
46 IPC_BEGIN_MESSAGE_MAP(SpeechRecognitionDispatcher, message) | 52 IPC_BEGIN_MESSAGE_MAP(SpeechRecognitionDispatcher, message) |
47 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Started, OnRecognitionStarted) | 53 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Started, OnRecognitionStarted) |
48 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioStarted, OnAudioStarted) | 54 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioStarted, OnAudioStarted) |
49 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundStarted, OnSoundStarted) | 55 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundStarted, OnSoundStarted) |
50 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundEnded, OnSoundEnded) | 56 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundEnded, OnSoundEnded) |
51 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioEnded, OnAudioEnded) | 57 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioEnded, OnAudioEnded) |
52 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ErrorOccurred, OnErrorOccurred) | 58 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ErrorOccurred, OnErrorOccurred) |
53 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Ended, OnRecognitionEnded) | 59 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Ended, OnRecognitionEnded) |
54 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ResultRetrieved, | 60 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ResultRetrieved, |
55 OnResultsRetrieved) | 61 OnResultsRetrieved) |
| 62 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioTrackReady, |
| 63 OnAudioTrackReady) |
| 64 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioChunkProcessed, |
| 65 OnAudioChunkProcessed) |
56 IPC_MESSAGE_UNHANDLED(handled = false) | 66 IPC_MESSAGE_UNHANDLED(handled = false) |
57 IPC_END_MESSAGE_MAP() | 67 IPC_END_MESSAGE_MAP() |
58 return handled; | 68 return handled; |
59 } | 69 } |
60 | 70 |
| 71 void SpeechRecognitionDispatcher::attach( |
| 72 const blink::WebSpeechRecognitionHandle& handle, |
| 73 const blink::WebMediaStreamTrack& audio_track, |
| 74 blink::WebSpeechRecognizerClient* recognizer_client) { |
| 75 |
| 76 // Check if track is from an allowed source (microphone only for now) |
| 77 // TODO(burnik): externalize the policy of allowed track types from dispatcher |
| 78 DCHECK(audio_track.source().type() == blink::WebMediaStreamSource::TypeAudio); |
| 79 MediaStreamAudioSource* native_source = |
| 80 static_cast <MediaStreamAudioSource*>(audio_track.source().extraData()); |
| 81 StreamDeviceInfo device_info = native_source->device_info(); |
| 82 is_allowed_audio_track_ = (device_info.device.type == |
| 83 content::MEDIA_DEVICE_AUDIO_CAPTURE); |
| 84 |
| 85 audio_track_ = audio_track; |
| 86 audio_track_set_ = true; |
| 87 } |
| 88 |
| 89 void SpeechRecognitionDispatcher::detach( |
| 90 const blink::WebSpeechRecognitionHandle& handle, |
| 91 blink::WebSpeechRecognizerClient* recognizer_client) { |
| 92 audio_track_set_ = false; |
| 93 } |
| 94 |
61 void SpeechRecognitionDispatcher::start( | 95 void SpeechRecognitionDispatcher::start( |
62 const WebSpeechRecognitionHandle& handle, | 96 const WebSpeechRecognitionHandle& handle, |
63 const WebSpeechRecognitionParams& params, | 97 const WebSpeechRecognitionParams& params, |
64 WebSpeechRecognizerClient* recognizer_client) { | 98 WebSpeechRecognizerClient* recognizer_client) { |
65 DCHECK(!recognizer_client_ || recognizer_client_ == recognizer_client); | 99 DCHECK(!recognizer_client_ || recognizer_client_ == recognizer_client); |
66 recognizer_client_ = recognizer_client; | 100 recognizer_client_ = recognizer_client; |
67 | 101 |
| 102 // destroy any previous instance not to starve it waiting on chunk ACKs |
| 103 audio_source_provider_.reset(); |
| 104 |
| 105 if (audio_track_set_ && !is_allowed_audio_track_) { |
| 106 // notify user that the track used is not supported |
| 107 recognizer_client_->didReceiveError( |
| 108 handle, |
| 109 WebString("Provided audioTrack is not supported. Ignoring track."), |
| 110 WebSpeechRecognizerClient::NotAllowedError); |
| 111 } |
| 112 |
68 SpeechRecognitionHostMsg_StartRequest_Params msg_params; | 113 SpeechRecognitionHostMsg_StartRequest_Params msg_params; |
69 for (size_t i = 0; i < params.grammars().size(); ++i) { | 114 for (size_t i = 0; i < params.grammars().size(); ++i) { |
70 const WebSpeechGrammar& grammar = params.grammars()[i]; | 115 const WebSpeechGrammar& grammar = params.grammars()[i]; |
71 msg_params.grammars.push_back( | 116 msg_params.grammars.push_back( |
72 SpeechRecognitionGrammar(grammar.src().spec(), grammar.weight())); | 117 SpeechRecognitionGrammar(grammar.src().spec(), grammar.weight())); |
73 } | 118 } |
74 msg_params.language = base::UTF16ToUTF8(params.language()); | 119 msg_params.language = base::UTF16ToUTF8(params.language()); |
75 msg_params.max_hypotheses = static_cast<uint32>(params.maxAlternatives()); | 120 msg_params.max_hypotheses = static_cast<uint32>(params.maxAlternatives()); |
76 msg_params.continuous = params.continuous(); | 121 msg_params.continuous = params.continuous(); |
77 msg_params.interim_results = params.interimResults(); | 122 msg_params.interim_results = params.interimResults(); |
78 msg_params.origin_url = params.origin().toString().utf8(); | 123 msg_params.origin_url = params.origin().toString().utf8(); |
79 msg_params.render_view_id = routing_id(); | 124 msg_params.render_view_id = routing_id(); |
80 msg_params.request_id = GetOrCreateIDForHandle(handle); | 125 msg_params.request_id = GetOrCreateIDForHandle(handle); |
| 126 // fall back to default input when the track is not allowed |
| 127 msg_params.using_audio_track = (audio_track_set_ && is_allowed_audio_track_); |
81 // The handle mapping will be removed in |OnRecognitionEnd|. | 128 // The handle mapping will be removed in |OnRecognitionEnd|. |
82 Send(new SpeechRecognitionHostMsg_StartRequest(msg_params)); | 129 Send(new SpeechRecognitionHostMsg_StartRequest(msg_params)); |
83 } | 130 } |
84 | 131 |
85 void SpeechRecognitionDispatcher::stop( | 132 void SpeechRecognitionDispatcher::stop( |
86 const WebSpeechRecognitionHandle& handle, | 133 const WebSpeechRecognitionHandle& handle, |
87 WebSpeechRecognizerClient* recognizer_client) { | 134 WebSpeechRecognizerClient* recognizer_client) { |
| 135 audio_source_provider_.reset(); |
88 // Ignore a |stop| issued without a matching |start|. | 136 // Ignore a |stop| issued without a matching |start|. |
89 if (recognizer_client_ != recognizer_client || !HandleExists(handle)) | 137 if (recognizer_client_ != recognizer_client || !HandleExists(handle)) |
90 return; | 138 return; |
91 Send(new SpeechRecognitionHostMsg_StopCaptureRequest( | 139 Send(new SpeechRecognitionHostMsg_StopCaptureRequest( |
92 routing_id(), GetOrCreateIDForHandle(handle))); | 140 routing_id(), GetOrCreateIDForHandle(handle))); |
93 } | 141 } |
94 | 142 |
95 void SpeechRecognitionDispatcher::abort( | 143 void SpeechRecognitionDispatcher::abort( |
96 const WebSpeechRecognitionHandle& handle, | 144 const WebSpeechRecognitionHandle& handle, |
97 WebSpeechRecognizerClient* recognizer_client) { | 145 WebSpeechRecognizerClient* recognizer_client) { |
| 146 audio_source_provider_.reset(); |
98 // Ignore an |abort| issued without a matching |start|. | 147 // Ignore an |abort| issued without a matching |start|. |
99 if (recognizer_client_ != recognizer_client || !HandleExists(handle)) | 148 if (recognizer_client_ != recognizer_client || !HandleExists(handle)) |
100 return; | 149 return; |
101 Send(new SpeechRecognitionHostMsg_AbortRequest( | 150 Send(new SpeechRecognitionHostMsg_AbortRequest( |
102 routing_id(), GetOrCreateIDForHandle(handle))); | 151 routing_id(), GetOrCreateIDForHandle(handle))); |
103 } | 152 } |
104 | 153 |
105 void SpeechRecognitionDispatcher::OnRecognitionStarted(int request_id) { | 154 void SpeechRecognitionDispatcher::OnRecognitionStarted(int request_id) { |
106 recognizer_client_->didStart(GetHandleFromID(request_id)); | 155 recognizer_client_->didStart(GetHandleFromID(request_id)); |
107 } | 156 } |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
147 NOTREACHED(); | 196 NOTREACHED(); |
148 return WebSpeechRecognizerClient::OtherError; | 197 return WebSpeechRecognizerClient::OtherError; |
149 } | 198 } |
150 | 199 |
151 void SpeechRecognitionDispatcher::OnErrorOccurred( | 200 void SpeechRecognitionDispatcher::OnErrorOccurred( |
152 int request_id, const SpeechRecognitionError& error) { | 201 int request_id, const SpeechRecognitionError& error) { |
153 if (error.code == SPEECH_RECOGNITION_ERROR_NO_MATCH) { | 202 if (error.code == SPEECH_RECOGNITION_ERROR_NO_MATCH) { |
154 recognizer_client_->didReceiveNoMatch(GetHandleFromID(request_id), | 203 recognizer_client_->didReceiveNoMatch(GetHandleFromID(request_id), |
155 WebSpeechRecognitionResult()); | 204 WebSpeechRecognitionResult()); |
156 } else { | 205 } else { |
| 206 audio_source_provider_.reset(); |
157 recognizer_client_->didReceiveError( | 207 recognizer_client_->didReceiveError( |
158 GetHandleFromID(request_id), | 208 GetHandleFromID(request_id), |
159 WebString(), // TODO(primiano): message? | 209 WebString(), // TODO(primiano): message? |
160 WebKitErrorCode(error.code)); | 210 WebKitErrorCode(error.code)); |
161 } | 211 } |
162 } | 212 } |
163 | 213 |
164 void SpeechRecognitionDispatcher::OnRecognitionEnded(int request_id) { | 214 void SpeechRecognitionDispatcher::OnRecognitionEnded(int request_id) { |
165 // TODO(tommi): It is possible that the handle isn't found in the array if | 215 // TODO(tommi): It is possible that the handle isn't found in the array if |
166 // the user just refreshed the page. It seems that we then get a notification | 216 // the user just refreshed the page. It seems that we then get a notification |
167 // for the previously loaded instance of the page. | 217 // for the previously loaded instance of the page. |
168 HandleMap::iterator iter = handle_map_.find(request_id); | 218 HandleMap::iterator iter = handle_map_.find(request_id); |
169 if (iter == handle_map_.end()) { | 219 if (iter == handle_map_.end()) { |
170 DLOG(ERROR) << "OnRecognitionEnded called for a handle that doesn't exist"; | 220 DLOG(ERROR) << "OnRecognitionEnded called for a handle that doesn't exist"; |
171 } else { | 221 } else { |
172 WebSpeechRecognitionHandle handle = iter->second; | 222 WebSpeechRecognitionHandle handle = iter->second; |
173 // Note: we need to erase the handle from the map *before* calling didEnd. | 223 // Note: we need to erase the handle from the map *before* calling didEnd. |
174 // didEnd may call back synchronously to start a new recognition session, | 224 // didEnd may call back synchronously to start a new recognition session, |
175 // and we don't want to delete the handle from the map after that happens. | 225 // and we don't want to delete the handle from the map after that happens. |
176 handle_map_.erase(request_id); | 226 handle_map_.erase(request_id); |
| 227 audio_source_provider_.reset(); |
177 recognizer_client_->didEnd(handle); | 228 recognizer_client_->didEnd(handle); |
178 } | 229 } |
179 } | 230 } |
180 | 231 |
181 void SpeechRecognitionDispatcher::OnResultsRetrieved( | 232 void SpeechRecognitionDispatcher::OnResultsRetrieved( |
182 int request_id, const SpeechRecognitionResults& results) { | 233 int request_id, const SpeechRecognitionResults& results) { |
183 size_t provisional_count = 0; | 234 size_t provisional_count = 0; |
184 SpeechRecognitionResults::const_iterator it = results.begin(); | 235 SpeechRecognitionResults::const_iterator it = results.begin(); |
185 for (; it != results.end(); ++it) { | 236 for (; it != results.end(); ++it) { |
186 if (it->is_provisional) | 237 if (it->is_provisional) |
(...skipping 17 matching lines...) Expand all Loading... |
204 transcripts[i] = result.hypotheses[i].utterance; | 255 transcripts[i] = result.hypotheses[i].utterance; |
205 confidences[i] = static_cast<float>(result.hypotheses[i].confidence); | 256 confidences[i] = static_cast<float>(result.hypotheses[i].confidence); |
206 } | 257 } |
207 webkit_result->assign(transcripts, confidences, !result.is_provisional); | 258 webkit_result->assign(transcripts, confidences, !result.is_provisional); |
208 } | 259 } |
209 | 260 |
210 recognizer_client_->didReceiveResults( | 261 recognizer_client_->didReceiveResults( |
211 GetHandleFromID(request_id), final, provisional); | 262 GetHandleFromID(request_id), final, provisional); |
212 } | 263 } |
213 | 264 |
| 265 void SpeechRecognitionDispatcher::OnAudioError(int request_id){ |
| 266 // Browser gets notified on render thread |
| 267 if (!render_loop_->BelongsToCurrentThread()) { |
| 268 render_loop_->PostTask( |
| 269 FROM_HERE, |
| 270 base::Bind(&SpeechRecognitionDispatcher::OnAudioError, |
| 271 base::Unretained(this), request_id)); |
| 272 return; |
| 273 } |
| 274 audio_source_provider_.reset(); |
| 275 } |
| 276 |
| 277 void SpeechRecognitionDispatcher::OnAudioTrackReady( |
| 278 int request_id, |
| 279 const media::AudioParameters& params, |
| 280 base::SharedMemoryHandle handle, |
| 281 uint32 length) { |
| 282 |
| 283 // TODO(burnik): Log and DCHECK(!audio_source_provider_). |
| 284 if (audio_track_.isNull()) { |
| 285 audio_source_provider_.reset(); |
| 286 return; |
| 287 } |
| 288 |
| 289 audio_source_provider_.reset( |
| 290 new SpeechRecognitionAudioSourceProvider( |
| 291 audio_track_, params, handle, length, |
| 292 base::Bind(&SpeechRecognitionDispatcher::OnAudioData, |
| 293 base::Unretained(this), request_id), |
| 294 base::Bind(&SpeechRecognitionDispatcher::OnAudioError, |
| 295 base::Unretained(this), request_id))); |
| 296 } |
| 297 |
| 298 void SpeechRecognitionDispatcher::OnAudioChunkProcessed( |
| 299 int request_id) { |
| 300 |
| 301 // TODO(burnik): Log and DCHECK(!audio_source_provider_). |
| 302 if (audio_track_.isNull()) |
| 303 return; |
| 304 |
| 305 // discard any message to a destroyed instance |
| 306 if(!audio_source_provider_.get()) |
| 307 return; |
| 308 |
| 309 audio_source_provider_->NotifyAudioBusConsumed(); |
| 310 } |
| 311 |
| 312 |
| 313 // TODO(burnik): Consider using sync_socket |
| 314 void SpeechRecognitionDispatcher::OnAudioData(int request_id) { |
| 315 // Browser gets notified on render thread |
| 316 if (!render_loop_->BelongsToCurrentThread()) { |
| 317 render_loop_->PostTask( |
| 318 FROM_HERE, |
| 319 base::Bind(&SpeechRecognitionDispatcher::OnAudioData, |
| 320 base::Unretained(this), request_id)); |
| 321 return; |
| 322 } |
| 323 // If the handle isn't found in the array, which might happen if the |
| 324 // recognition has been ended by the browser, delete the |
| 325 // |audio_source_provider_|. |
| 326 HandleMap::iterator iter = handle_map_.find(request_id); |
| 327 if (iter == handle_map_.end()) { |
| 328 audio_source_provider_.reset(); |
| 329 return; |
| 330 } |
| 331 |
| 332 Send(new SpeechRecognitionHostMsg_OnAudioTrackData(routing_id(), request_id)); |
| 333 } |
214 | 334 |
215 int SpeechRecognitionDispatcher::GetOrCreateIDForHandle( | 335 int SpeechRecognitionDispatcher::GetOrCreateIDForHandle( |
216 const WebSpeechRecognitionHandle& handle) { | 336 const WebSpeechRecognitionHandle& handle) { |
217 // Search first for an existing mapping. | 337 // Search first for an existing mapping. |
218 for (HandleMap::iterator iter = handle_map_.begin(); | 338 for (HandleMap::iterator iter = handle_map_.begin(); |
219 iter != handle_map_.end(); | 339 iter != handle_map_.end(); |
220 ++iter) { | 340 ++iter) { |
221 if (iter->second.equals(handle)) | 341 if (iter->second.equals(handle)) |
222 return iter->first; | 342 return iter->first; |
223 } | 343 } |
(...skipping 16 matching lines...) Expand all Loading... |
240 } | 360 } |
241 | 361 |
242 const WebSpeechRecognitionHandle& SpeechRecognitionDispatcher::GetHandleFromID( | 362 const WebSpeechRecognitionHandle& SpeechRecognitionDispatcher::GetHandleFromID( |
243 int request_id) { | 363 int request_id) { |
244 HandleMap::iterator iter = handle_map_.find(request_id); | 364 HandleMap::iterator iter = handle_map_.find(request_id); |
245 DCHECK(iter != handle_map_.end()); | 365 DCHECK(iter != handle_map_.end()); |
246 return iter->second; | 366 return iter->second; |
247 } | 367 } |
248 | 368 |
249 } // namespace content | 369 } // namespace content |
OLD | NEW |