Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(716)

Side by Side Diff: content/renderer/speech_recognition_dispatcher.cc

Issue 499233003: Binding media stream audio track to speech recognition [renderer] (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: style fix Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "content/renderer/speech_recognition_dispatcher.h" 5 #include "content/renderer/speech_recognition_dispatcher.h"
6 6
7 #include "base/basictypes.h" 7 #include "base/basictypes.h"
8 #include "base/strings/utf_string_conversions.h" 8 #include "base/strings/utf_string_conversions.h"
9 #include "content/common/speech_recognition_messages.h" 9 #include "content/common/speech_recognition_messages.h"
10 #include "content/renderer/media/media_stream_audio_source.h"
10 #include "content/renderer/render_view_impl.h" 11 #include "content/renderer/render_view_impl.h"
12 #include "content/renderer/speech_recognition_audio_source_provider.h"
11 #include "third_party/WebKit/public/platform/WebString.h" 13 #include "third_party/WebKit/public/platform/WebString.h"
12 #include "third_party/WebKit/public/platform/WebVector.h" 14 #include "third_party/WebKit/public/platform/WebVector.h"
13 #include "third_party/WebKit/public/web/WebSpeechGrammar.h" 15 #include "third_party/WebKit/public/web/WebSpeechGrammar.h"
14 #include "third_party/WebKit/public/web/WebSpeechRecognitionParams.h" 16 #include "third_party/WebKit/public/web/WebSpeechRecognitionParams.h"
15 #include "third_party/WebKit/public/web/WebSpeechRecognitionResult.h" 17 #include "third_party/WebKit/public/web/WebSpeechRecognitionResult.h"
16 #include "third_party/WebKit/public/web/WebSpeechRecognizerClient.h" 18 #include "third_party/WebKit/public/web/WebSpeechRecognizerClient.h"
17 19
18 using blink::WebVector; 20 using blink::WebVector;
19 using blink::WebString; 21 using blink::WebString;
20 using blink::WebSpeechGrammar; 22 using blink::WebSpeechGrammar;
21 using blink::WebSpeechRecognitionHandle; 23 using blink::WebSpeechRecognitionHandle;
22 using blink::WebSpeechRecognitionResult; 24 using blink::WebSpeechRecognitionResult;
23 using blink::WebSpeechRecognitionParams; 25 using blink::WebSpeechRecognitionParams;
24 using blink::WebSpeechRecognizerClient; 26 using blink::WebSpeechRecognizerClient;
25 27
26 namespace content { 28 namespace content {
27 29
28 SpeechRecognitionDispatcher::SpeechRecognitionDispatcher( 30 SpeechRecognitionDispatcher::SpeechRecognitionDispatcher(
29 RenderViewImpl* render_view) 31 RenderViewImpl* render_view)
30 : RenderViewObserver(render_view), 32 : RenderViewObserver(render_view),
31 recognizer_client_(NULL), 33 recognizer_client_(NULL),
34 audio_track_set_(false),
35 is_allowed_audio_track_(false),
36 render_loop_(base::MessageLoopProxy::current()),
32 next_id_(1) { 37 next_id_(1) {
33 } 38 }
34 39
35 SpeechRecognitionDispatcher::~SpeechRecognitionDispatcher() { 40 SpeechRecognitionDispatcher::~SpeechRecognitionDispatcher() {
36 } 41 }
37 42
38 void SpeechRecognitionDispatcher::AbortAllRecognitions() { 43 void SpeechRecognitionDispatcher::AbortAllRecognitions() {
44 audio_source_provider_.reset();
39 Send(new SpeechRecognitionHostMsg_AbortAllRequests( 45 Send(new SpeechRecognitionHostMsg_AbortAllRequests(
40 routing_id())); 46 routing_id()));
41 } 47 }
42 48
43 bool SpeechRecognitionDispatcher::OnMessageReceived( 49 bool SpeechRecognitionDispatcher::OnMessageReceived(
44 const IPC::Message& message) { 50 const IPC::Message& message) {
45 bool handled = true; 51 bool handled = true;
46 IPC_BEGIN_MESSAGE_MAP(SpeechRecognitionDispatcher, message) 52 IPC_BEGIN_MESSAGE_MAP(SpeechRecognitionDispatcher, message)
47 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Started, OnRecognitionStarted) 53 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Started, OnRecognitionStarted)
48 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioStarted, OnAudioStarted) 54 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioStarted, OnAudioStarted)
49 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundStarted, OnSoundStarted) 55 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundStarted, OnSoundStarted)
50 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundEnded, OnSoundEnded) 56 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundEnded, OnSoundEnded)
51 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioEnded, OnAudioEnded) 57 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioEnded, OnAudioEnded)
52 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ErrorOccurred, OnErrorOccurred) 58 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ErrorOccurred, OnErrorOccurred)
53 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Ended, OnRecognitionEnded) 59 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Ended, OnRecognitionEnded)
54 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ResultRetrieved, 60 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ResultRetrieved,
55 OnResultsRetrieved) 61 OnResultsRetrieved)
62 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioTrackReady,
63 OnAudioTrackReady)
64 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioChunkProcessed,
65 OnAudioChunkProcessed)
56 IPC_MESSAGE_UNHANDLED(handled = false) 66 IPC_MESSAGE_UNHANDLED(handled = false)
57 IPC_END_MESSAGE_MAP() 67 IPC_END_MESSAGE_MAP()
58 return handled; 68 return handled;
59 } 69 }
60 70
71 void SpeechRecognitionDispatcher::attach(
72 const blink::WebSpeechRecognitionHandle& handle,
73 const blink::WebMediaStreamTrack& audio_track,
74 blink::WebSpeechRecognizerClient* recognizer_client) {
75
76 // Check if track is from an allowed source (microphone only for now)
77 // TODO(burnik): externalize the policy of allowed track types from dispatcher
78 DCHECK(audio_track.source().type() == blink::WebMediaStreamSource::TypeAudio);
79 MediaStreamAudioSource* native_source =
80 static_cast <MediaStreamAudioSource*>(audio_track.source().extraData());
81 StreamDeviceInfo device_info = native_source->device_info();
82 is_allowed_audio_track_ = (device_info.device.type ==
83 content::MEDIA_DEVICE_AUDIO_CAPTURE);
84
85 audio_track_ = audio_track;
86 audio_track_set_ = true;
87 }
88
89 void SpeechRecognitionDispatcher::detach(
90 const blink::WebSpeechRecognitionHandle& handle,
91 blink::WebSpeechRecognizerClient* recognizer_client) {
92 audio_track_set_ = false;
93 }
94
61 void SpeechRecognitionDispatcher::start( 95 void SpeechRecognitionDispatcher::start(
62 const WebSpeechRecognitionHandle& handle, 96 const WebSpeechRecognitionHandle& handle,
63 const WebSpeechRecognitionParams& params, 97 const WebSpeechRecognitionParams& params,
64 WebSpeechRecognizerClient* recognizer_client) { 98 WebSpeechRecognizerClient* recognizer_client) {
65 DCHECK(!recognizer_client_ || recognizer_client_ == recognizer_client); 99 DCHECK(!recognizer_client_ || recognizer_client_ == recognizer_client);
66 recognizer_client_ = recognizer_client; 100 recognizer_client_ = recognizer_client;
67 101
102 // destroy any previous instance not to starve it waiting on chunk ACKs
103 audio_source_provider_.reset();
104
105 if (audio_track_set_ && !is_allowed_audio_track_) {
106 // notify user that the track used is not supported
107 recognizer_client_->didReceiveError(
108 handle,
109 WebString("Provided audioTrack is not supported. Ignoring track."),
110 WebSpeechRecognizerClient::NotAllowedError);
111 }
112
68 SpeechRecognitionHostMsg_StartRequest_Params msg_params; 113 SpeechRecognitionHostMsg_StartRequest_Params msg_params;
69 for (size_t i = 0; i < params.grammars().size(); ++i) { 114 for (size_t i = 0; i < params.grammars().size(); ++i) {
70 const WebSpeechGrammar& grammar = params.grammars()[i]; 115 const WebSpeechGrammar& grammar = params.grammars()[i];
71 msg_params.grammars.push_back( 116 msg_params.grammars.push_back(
72 SpeechRecognitionGrammar(grammar.src().spec(), grammar.weight())); 117 SpeechRecognitionGrammar(grammar.src().spec(), grammar.weight()));
73 } 118 }
74 msg_params.language = base::UTF16ToUTF8(params.language()); 119 msg_params.language = base::UTF16ToUTF8(params.language());
75 msg_params.max_hypotheses = static_cast<uint32>(params.maxAlternatives()); 120 msg_params.max_hypotheses = static_cast<uint32>(params.maxAlternatives());
76 msg_params.continuous = params.continuous(); 121 msg_params.continuous = params.continuous();
77 msg_params.interim_results = params.interimResults(); 122 msg_params.interim_results = params.interimResults();
78 msg_params.origin_url = params.origin().toString().utf8(); 123 msg_params.origin_url = params.origin().toString().utf8();
79 msg_params.render_view_id = routing_id(); 124 msg_params.render_view_id = routing_id();
80 msg_params.request_id = GetOrCreateIDForHandle(handle); 125 msg_params.request_id = GetOrCreateIDForHandle(handle);
126 // fall back to default input when the track is not allowed
127 msg_params.using_audio_track = (audio_track_set_ && is_allowed_audio_track_);
81 // The handle mapping will be removed in |OnRecognitionEnd|. 128 // The handle mapping will be removed in |OnRecognitionEnd|.
82 Send(new SpeechRecognitionHostMsg_StartRequest(msg_params)); 129 Send(new SpeechRecognitionHostMsg_StartRequest(msg_params));
83 } 130 }
84 131
85 void SpeechRecognitionDispatcher::stop( 132 void SpeechRecognitionDispatcher::stop(
86 const WebSpeechRecognitionHandle& handle, 133 const WebSpeechRecognitionHandle& handle,
87 WebSpeechRecognizerClient* recognizer_client) { 134 WebSpeechRecognizerClient* recognizer_client) {
135 audio_source_provider_.reset();
88 // Ignore a |stop| issued without a matching |start|. 136 // Ignore a |stop| issued without a matching |start|.
89 if (recognizer_client_ != recognizer_client || !HandleExists(handle)) 137 if (recognizer_client_ != recognizer_client || !HandleExists(handle))
90 return; 138 return;
91 Send(new SpeechRecognitionHostMsg_StopCaptureRequest( 139 Send(new SpeechRecognitionHostMsg_StopCaptureRequest(
92 routing_id(), GetOrCreateIDForHandle(handle))); 140 routing_id(), GetOrCreateIDForHandle(handle)));
93 } 141 }
94 142
95 void SpeechRecognitionDispatcher::abort( 143 void SpeechRecognitionDispatcher::abort(
96 const WebSpeechRecognitionHandle& handle, 144 const WebSpeechRecognitionHandle& handle,
97 WebSpeechRecognizerClient* recognizer_client) { 145 WebSpeechRecognizerClient* recognizer_client) {
146 audio_source_provider_.reset();
98 // Ignore an |abort| issued without a matching |start|. 147 // Ignore an |abort| issued without a matching |start|.
99 if (recognizer_client_ != recognizer_client || !HandleExists(handle)) 148 if (recognizer_client_ != recognizer_client || !HandleExists(handle))
100 return; 149 return;
101 Send(new SpeechRecognitionHostMsg_AbortRequest( 150 Send(new SpeechRecognitionHostMsg_AbortRequest(
102 routing_id(), GetOrCreateIDForHandle(handle))); 151 routing_id(), GetOrCreateIDForHandle(handle)));
103 } 152 }
104 153
105 void SpeechRecognitionDispatcher::OnRecognitionStarted(int request_id) { 154 void SpeechRecognitionDispatcher::OnRecognitionStarted(int request_id) {
106 recognizer_client_->didStart(GetHandleFromID(request_id)); 155 recognizer_client_->didStart(GetHandleFromID(request_id));
107 } 156 }
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
147 NOTREACHED(); 196 NOTREACHED();
148 return WebSpeechRecognizerClient::OtherError; 197 return WebSpeechRecognizerClient::OtherError;
149 } 198 }
150 199
151 void SpeechRecognitionDispatcher::OnErrorOccurred( 200 void SpeechRecognitionDispatcher::OnErrorOccurred(
152 int request_id, const SpeechRecognitionError& error) { 201 int request_id, const SpeechRecognitionError& error) {
153 if (error.code == SPEECH_RECOGNITION_ERROR_NO_MATCH) { 202 if (error.code == SPEECH_RECOGNITION_ERROR_NO_MATCH) {
154 recognizer_client_->didReceiveNoMatch(GetHandleFromID(request_id), 203 recognizer_client_->didReceiveNoMatch(GetHandleFromID(request_id),
155 WebSpeechRecognitionResult()); 204 WebSpeechRecognitionResult());
156 } else { 205 } else {
206 audio_source_provider_.reset();
157 recognizer_client_->didReceiveError( 207 recognizer_client_->didReceiveError(
158 GetHandleFromID(request_id), 208 GetHandleFromID(request_id),
159 WebString(), // TODO(primiano): message? 209 WebString(), // TODO(primiano): message?
160 WebKitErrorCode(error.code)); 210 WebKitErrorCode(error.code));
161 } 211 }
162 } 212 }
163 213
164 void SpeechRecognitionDispatcher::OnRecognitionEnded(int request_id) { 214 void SpeechRecognitionDispatcher::OnRecognitionEnded(int request_id) {
165 // TODO(tommi): It is possible that the handle isn't found in the array if 215 // TODO(tommi): It is possible that the handle isn't found in the array if
166 // the user just refreshed the page. It seems that we then get a notification 216 // the user just refreshed the page. It seems that we then get a notification
167 // for the previously loaded instance of the page. 217 // for the previously loaded instance of the page.
168 HandleMap::iterator iter = handle_map_.find(request_id); 218 HandleMap::iterator iter = handle_map_.find(request_id);
169 if (iter == handle_map_.end()) { 219 if (iter == handle_map_.end()) {
170 DLOG(ERROR) << "OnRecognitionEnded called for a handle that doesn't exist"; 220 DLOG(ERROR) << "OnRecognitionEnded called for a handle that doesn't exist";
171 } else { 221 } else {
172 WebSpeechRecognitionHandle handle = iter->second; 222 WebSpeechRecognitionHandle handle = iter->second;
173 // Note: we need to erase the handle from the map *before* calling didEnd. 223 // Note: we need to erase the handle from the map *before* calling didEnd.
174 // didEnd may call back synchronously to start a new recognition session, 224 // didEnd may call back synchronously to start a new recognition session,
175 // and we don't want to delete the handle from the map after that happens. 225 // and we don't want to delete the handle from the map after that happens.
176 handle_map_.erase(request_id); 226 handle_map_.erase(request_id);
227 audio_source_provider_.reset();
177 recognizer_client_->didEnd(handle); 228 recognizer_client_->didEnd(handle);
178 } 229 }
179 } 230 }
180 231
181 void SpeechRecognitionDispatcher::OnResultsRetrieved( 232 void SpeechRecognitionDispatcher::OnResultsRetrieved(
182 int request_id, const SpeechRecognitionResults& results) { 233 int request_id, const SpeechRecognitionResults& results) {
183 size_t provisional_count = 0; 234 size_t provisional_count = 0;
184 SpeechRecognitionResults::const_iterator it = results.begin(); 235 SpeechRecognitionResults::const_iterator it = results.begin();
185 for (; it != results.end(); ++it) { 236 for (; it != results.end(); ++it) {
186 if (it->is_provisional) 237 if (it->is_provisional)
(...skipping 17 matching lines...) Expand all
204 transcripts[i] = result.hypotheses[i].utterance; 255 transcripts[i] = result.hypotheses[i].utterance;
205 confidences[i] = static_cast<float>(result.hypotheses[i].confidence); 256 confidences[i] = static_cast<float>(result.hypotheses[i].confidence);
206 } 257 }
207 webkit_result->assign(transcripts, confidences, !result.is_provisional); 258 webkit_result->assign(transcripts, confidences, !result.is_provisional);
208 } 259 }
209 260
210 recognizer_client_->didReceiveResults( 261 recognizer_client_->didReceiveResults(
211 GetHandleFromID(request_id), final, provisional); 262 GetHandleFromID(request_id), final, provisional);
212 } 263 }
213 264
265 void SpeechRecognitionDispatcher::OnAudioError(int request_id){
266 // Browser gets notified on render thread
267 if (!render_loop_->BelongsToCurrentThread()) {
268 render_loop_->PostTask(
269 FROM_HERE,
270 base::Bind(&SpeechRecognitionDispatcher::OnAudioError,
271 base::Unretained(this), request_id));
272 return;
273 }
274 audio_source_provider_.reset();
275 }
276
277 void SpeechRecognitionDispatcher::OnAudioTrackReady(
278 int request_id,
279 const media::AudioParameters& params,
280 base::SharedMemoryHandle handle,
281 uint32 length) {
282
283 // TODO(burnik): Log and DCHECK(!audio_source_provider_).
284 if (audio_track_.isNull()) {
285 audio_source_provider_.reset();
286 return;
287 }
288
289 audio_source_provider_.reset(
290 new SpeechRecognitionAudioSourceProvider(
291 audio_track_, params, handle, length,
292 base::Bind(&SpeechRecognitionDispatcher::OnAudioData,
293 base::Unretained(this), request_id),
294 base::Bind(&SpeechRecognitionDispatcher::OnAudioError,
295 base::Unretained(this), request_id)));
296 }
297
298 void SpeechRecognitionDispatcher::OnAudioChunkProcessed(
299 int request_id) {
300
301 // TODO(burnik): Log and DCHECK(!audio_source_provider_).
302 if (audio_track_.isNull())
303 return;
304
305 // discard any message to a destroyed instance
306 if(!audio_source_provider_.get())
307 return;
308
309 audio_source_provider_->NotifyAudioBusConsumed();
310 }
311
312
313 // TODO(burnik): Consider using sync_socket
314 void SpeechRecognitionDispatcher::OnAudioData(int request_id) {
315 // Browser gets notified on render thread
316 if (!render_loop_->BelongsToCurrentThread()) {
317 render_loop_->PostTask(
318 FROM_HERE,
319 base::Bind(&SpeechRecognitionDispatcher::OnAudioData,
320 base::Unretained(this), request_id));
321 return;
322 }
323 // If the handle isn't found in the array, which might happen if the
324 // recognition has been ended by the browser, delete the
325 // |audio_source_provider_|.
326 HandleMap::iterator iter = handle_map_.find(request_id);
327 if (iter == handle_map_.end()) {
328 audio_source_provider_.reset();
329 return;
330 }
331
332 Send(new SpeechRecognitionHostMsg_OnAudioTrackData(routing_id(), request_id));
333 }
214 334
215 int SpeechRecognitionDispatcher::GetOrCreateIDForHandle( 335 int SpeechRecognitionDispatcher::GetOrCreateIDForHandle(
216 const WebSpeechRecognitionHandle& handle) { 336 const WebSpeechRecognitionHandle& handle) {
217 // Search first for an existing mapping. 337 // Search first for an existing mapping.
218 for (HandleMap::iterator iter = handle_map_.begin(); 338 for (HandleMap::iterator iter = handle_map_.begin();
219 iter != handle_map_.end(); 339 iter != handle_map_.end();
220 ++iter) { 340 ++iter) {
221 if (iter->second.equals(handle)) 341 if (iter->second.equals(handle))
222 return iter->first; 342 return iter->first;
223 } 343 }
(...skipping 16 matching lines...) Expand all
240 } 360 }
241 361
242 const WebSpeechRecognitionHandle& SpeechRecognitionDispatcher::GetHandleFromID( 362 const WebSpeechRecognitionHandle& SpeechRecognitionDispatcher::GetHandleFromID(
243 int request_id) { 363 int request_id) {
244 HandleMap::iterator iter = handle_map_.find(request_id); 364 HandleMap::iterator iter = handle_map_.find(request_id);
245 DCHECK(iter != handle_map_.end()); 365 DCHECK(iter != handle_map_.end());
246 return iter->second; 366 return iter->second;
247 } 367 }
248 368
249 } // namespace content 369 } // namespace content
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698