Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1370)

Side by Side Diff: content/renderer/speech_recognition_dispatcher.cc

Issue 499233003: Binding media stream audio track to speech recognition [renderer] (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Rebase on master - merge fix Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "content/renderer/speech_recognition_dispatcher.h" 5 #include "content/renderer/speech_recognition_dispatcher.h"
6 6
7 #include "base/basictypes.h" 7 #include "base/basictypes.h"
8 #include "base/strings/utf_string_conversions.h" 8 #include "base/strings/utf_string_conversions.h"
9 #include "content/common/speech_recognition_messages.h" 9 #include "content/common/speech_recognition_messages.h"
10 #if defined(ENABLE_WEBRTC)
11 #include "content/renderer/media/speech_recognition_audio_sink.h"
12 #endif
10 #include "content/renderer/render_view_impl.h" 13 #include "content/renderer/render_view_impl.h"
11 #include "third_party/WebKit/public/platform/WebString.h" 14 #include "third_party/WebKit/public/platform/WebString.h"
12 #include "third_party/WebKit/public/platform/WebVector.h" 15 #include "third_party/WebKit/public/platform/WebVector.h"
13 #include "third_party/WebKit/public/web/WebSpeechGrammar.h" 16 #include "third_party/WebKit/public/web/WebSpeechGrammar.h"
14 #include "third_party/WebKit/public/web/WebSpeechRecognitionParams.h" 17 #include "third_party/WebKit/public/web/WebSpeechRecognitionParams.h"
15 #include "third_party/WebKit/public/web/WebSpeechRecognitionResult.h" 18 #include "third_party/WebKit/public/web/WebSpeechRecognitionResult.h"
16 #include "third_party/WebKit/public/web/WebSpeechRecognizerClient.h" 19 #include "third_party/WebKit/public/web/WebSpeechRecognizerClient.h"
17 20
18 using blink::WebVector; 21 using blink::WebVector;
19 using blink::WebString; 22 using blink::WebString;
20 using blink::WebSpeechGrammar; 23 using blink::WebSpeechGrammar;
21 using blink::WebSpeechRecognitionHandle; 24 using blink::WebSpeechRecognitionHandle;
22 using blink::WebSpeechRecognitionResult; 25 using blink::WebSpeechRecognitionResult;
23 using blink::WebSpeechRecognitionParams; 26 using blink::WebSpeechRecognitionParams;
24 using blink::WebSpeechRecognizerClient; 27 using blink::WebSpeechRecognizerClient;
25 28
26 namespace content { 29 namespace content {
27 30
28 SpeechRecognitionDispatcher::SpeechRecognitionDispatcher( 31 SpeechRecognitionDispatcher::SpeechRecognitionDispatcher(
29 RenderViewImpl* render_view) 32 RenderViewImpl* render_view)
30 : RenderViewObserver(render_view), 33 : RenderViewObserver(render_view),
31 recognizer_client_(NULL), 34 recognizer_client_(NULL),
32 next_id_(1) { 35 next_id_(1) {}
33 }
34 36
35 SpeechRecognitionDispatcher::~SpeechRecognitionDispatcher() { 37 SpeechRecognitionDispatcher::~SpeechRecognitionDispatcher() {}
36 }
37 38
38 void SpeechRecognitionDispatcher::AbortAllRecognitions() { 39 void SpeechRecognitionDispatcher::AbortAllRecognitions() {
40 ResetAudioSink();
39 Send(new SpeechRecognitionHostMsg_AbortAllRequests( 41 Send(new SpeechRecognitionHostMsg_AbortAllRequests(
40 routing_id())); 42 routing_id()));
41 } 43 }
42 44
43 bool SpeechRecognitionDispatcher::OnMessageReceived( 45 bool SpeechRecognitionDispatcher::OnMessageReceived(
44 const IPC::Message& message) { 46 const IPC::Message& message) {
45 bool handled = true; 47 bool handled = true;
46 IPC_BEGIN_MESSAGE_MAP(SpeechRecognitionDispatcher, message) 48 IPC_BEGIN_MESSAGE_MAP(SpeechRecognitionDispatcher, message)
47 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Started, OnRecognitionStarted) 49 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Started, OnRecognitionStarted)
48 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioStarted, OnAudioStarted) 50 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioStarted, OnAudioStarted)
49 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundStarted, OnSoundStarted) 51 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundStarted, OnSoundStarted)
50 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundEnded, OnSoundEnded) 52 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundEnded, OnSoundEnded)
51 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioEnded, OnAudioEnded) 53 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioEnded, OnAudioEnded)
52 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ErrorOccurred, OnErrorOccurred) 54 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ErrorOccurred, OnErrorOccurred)
53 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Ended, OnRecognitionEnded) 55 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Ended, OnRecognitionEnded)
54 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ResultRetrieved, 56 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ResultRetrieved,
55 OnResultsRetrieved) 57 OnResultsRetrieved)
58 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SharedAudioBusReady,
no longer working on chromium 2014/10/09 12:19:02 Is SharedAudioBusReady a suitable name here? I thi
burnik 2014/10/09 13:13:04 Done.
59 OnSharedAudioBusReady)
56 IPC_MESSAGE_UNHANDLED(handled = false) 60 IPC_MESSAGE_UNHANDLED(handled = false)
57 IPC_END_MESSAGE_MAP() 61 IPC_END_MESSAGE_MAP()
58 return handled; 62 return handled;
59 } 63 }
60 64
61 void SpeechRecognitionDispatcher::start( 65 void SpeechRecognitionDispatcher::start(
62 const WebSpeechRecognitionHandle& handle, 66 const WebSpeechRecognitionHandle& handle,
63 const WebSpeechRecognitionParams& params, 67 const WebSpeechRecognitionParams& params,
64 WebSpeechRecognizerClient* recognizer_client) { 68 WebSpeechRecognizerClient* recognizer_client) {
65 DCHECK(!recognizer_client_ || recognizer_client_ == recognizer_client); 69 DCHECK(!recognizer_client_ || recognizer_client_ == recognizer_client);
66 recognizer_client_ = recognizer_client; 70 recognizer_client_ = recognizer_client;
67 71
72 #if defined(ENABLE_WEBRTC)
73 const blink::WebMediaStreamTrack track = params.audioTrack();
74 if (!track.isNull()) {
75 // Check if this type of track is allowed by implemented policy.
76 if (SpeechRecognitionAudioSink::IsSupportedTrack(track)) {
77 audio_track_.assign(track);
78 } else {
79 audio_track_.reset();
80 // Notify user that the track used is not supported.
81 recognizer_client_->didReceiveError(
82 handle,
83 WebString("Provided audioTrack is not supported."),
84 WebSpeechRecognizerClient::AudioCaptureError);
85
86 return;
87 }
88 }
89
90 // Destroy any previous instance to detach from the audio track.
91 // Each new session should reinstantiate the provider once the track is ready.
92 ResetAudioSink();
93 #endif
94
68 SpeechRecognitionHostMsg_StartRequest_Params msg_params; 95 SpeechRecognitionHostMsg_StartRequest_Params msg_params;
69 for (size_t i = 0; i < params.grammars().size(); ++i) { 96 for (size_t i = 0; i < params.grammars().size(); ++i) {
70 const WebSpeechGrammar& grammar = params.grammars()[i]; 97 const WebSpeechGrammar& grammar = params.grammars()[i];
71 msg_params.grammars.push_back( 98 msg_params.grammars.push_back(
72 SpeechRecognitionGrammar(grammar.src().spec(), grammar.weight())); 99 SpeechRecognitionGrammar(grammar.src().spec(), grammar.weight()));
73 } 100 }
74 msg_params.language = base::UTF16ToUTF8(params.language()); 101 msg_params.language = base::UTF16ToUTF8(params.language());
75 msg_params.max_hypotheses = static_cast<uint32>(params.maxAlternatives()); 102 msg_params.max_hypotheses = static_cast<uint32>(params.maxAlternatives());
76 msg_params.continuous = params.continuous(); 103 msg_params.continuous = params.continuous();
77 msg_params.interim_results = params.interimResults(); 104 msg_params.interim_results = params.interimResults();
78 msg_params.origin_url = params.origin().toString().utf8(); 105 msg_params.origin_url = params.origin().toString().utf8();
79 msg_params.render_view_id = routing_id(); 106 msg_params.render_view_id = routing_id();
80 msg_params.request_id = GetOrCreateIDForHandle(handle); 107 msg_params.request_id = GetOrCreateIDForHandle(handle);
108 #if defined(ENABLE_WEBRTC)
109 // fall back to default input when the track is not allowed
no longer working on chromium 2014/10/09 12:19:02 nit, s/fall/Fall/g, and end with period.
burnik 2014/10/09 13:13:04 Done.
110 msg_params.using_audio_track = !audio_track_.isNull();
111 #else
112 msg_params.using_audio_track = false;
113 #endif
81 // The handle mapping will be removed in |OnRecognitionEnd|. 114 // The handle mapping will be removed in |OnRecognitionEnd|.
82 Send(new SpeechRecognitionHostMsg_StartRequest(msg_params)); 115 Send(new SpeechRecognitionHostMsg_StartRequest(msg_params));
83 } 116 }
84 117
85 void SpeechRecognitionDispatcher::stop( 118 void SpeechRecognitionDispatcher::stop(
86 const WebSpeechRecognitionHandle& handle, 119 const WebSpeechRecognitionHandle& handle,
87 WebSpeechRecognizerClient* recognizer_client) { 120 WebSpeechRecognizerClient* recognizer_client) {
121 ResetAudioSink();
88 // Ignore a |stop| issued without a matching |start|. 122 // Ignore a |stop| issued without a matching |start|.
89 if (recognizer_client_ != recognizer_client || !HandleExists(handle)) 123 if (recognizer_client_ != recognizer_client || !HandleExists(handle))
90 return; 124 return;
91 Send(new SpeechRecognitionHostMsg_StopCaptureRequest( 125 Send(new SpeechRecognitionHostMsg_StopCaptureRequest(
92 routing_id(), GetOrCreateIDForHandle(handle))); 126 routing_id(), GetOrCreateIDForHandle(handle)));
93 } 127 }
94 128
95 void SpeechRecognitionDispatcher::abort( 129 void SpeechRecognitionDispatcher::abort(
96 const WebSpeechRecognitionHandle& handle, 130 const WebSpeechRecognitionHandle& handle,
97 WebSpeechRecognizerClient* recognizer_client) { 131 WebSpeechRecognizerClient* recognizer_client) {
132 ResetAudioSink();
98 // Ignore an |abort| issued without a matching |start|. 133 // Ignore an |abort| issued without a matching |start|.
99 if (recognizer_client_ != recognizer_client || !HandleExists(handle)) 134 if (recognizer_client_ != recognizer_client || !HandleExists(handle))
100 return; 135 return;
101 Send(new SpeechRecognitionHostMsg_AbortRequest( 136 Send(new SpeechRecognitionHostMsg_AbortRequest(
102 routing_id(), GetOrCreateIDForHandle(handle))); 137 routing_id(), GetOrCreateIDForHandle(handle)));
103 } 138 }
104 139
105 void SpeechRecognitionDispatcher::OnRecognitionStarted(int request_id) { 140 void SpeechRecognitionDispatcher::OnRecognitionStarted(int request_id) {
106 recognizer_client_->didStart(GetHandleFromID(request_id)); 141 recognizer_client_->didStart(GetHandleFromID(request_id));
107 } 142 }
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
147 NOTREACHED(); 182 NOTREACHED();
148 return WebSpeechRecognizerClient::OtherError; 183 return WebSpeechRecognizerClient::OtherError;
149 } 184 }
150 185
151 void SpeechRecognitionDispatcher::OnErrorOccurred( 186 void SpeechRecognitionDispatcher::OnErrorOccurred(
152 int request_id, const SpeechRecognitionError& error) { 187 int request_id, const SpeechRecognitionError& error) {
153 if (error.code == SPEECH_RECOGNITION_ERROR_NO_MATCH) { 188 if (error.code == SPEECH_RECOGNITION_ERROR_NO_MATCH) {
154 recognizer_client_->didReceiveNoMatch(GetHandleFromID(request_id), 189 recognizer_client_->didReceiveNoMatch(GetHandleFromID(request_id),
155 WebSpeechRecognitionResult()); 190 WebSpeechRecognitionResult());
156 } else { 191 } else {
192 ResetAudioSink();
157 recognizer_client_->didReceiveError( 193 recognizer_client_->didReceiveError(
158 GetHandleFromID(request_id), 194 GetHandleFromID(request_id),
159 WebString(), // TODO(primiano): message? 195 WebString(), // TODO(primiano): message?
160 WebKitErrorCode(error.code)); 196 WebKitErrorCode(error.code));
161 } 197 }
162 } 198 }
163 199
164 void SpeechRecognitionDispatcher::OnRecognitionEnded(int request_id) { 200 void SpeechRecognitionDispatcher::OnRecognitionEnded(int request_id) {
165 // TODO(tommi): It is possible that the handle isn't found in the array if 201 // TODO(tommi): It is possible that the handle isn't found in the array if
166 // the user just refreshed the page. It seems that we then get a notification 202 // the user just refreshed the page. It seems that we then get a notification
167 // for the previously loaded instance of the page. 203 // for the previously loaded instance of the page.
168 HandleMap::iterator iter = handle_map_.find(request_id); 204 HandleMap::iterator iter = handle_map_.find(request_id);
169 if (iter == handle_map_.end()) { 205 if (iter == handle_map_.end()) {
170 DLOG(ERROR) << "OnRecognitionEnded called for a handle that doesn't exist"; 206 DLOG(ERROR) << "OnRecognitionEnded called for a handle that doesn't exist";
171 } else { 207 } else {
172 WebSpeechRecognitionHandle handle = iter->second; 208 WebSpeechRecognitionHandle handle = iter->second;
173 // Note: we need to erase the handle from the map *before* calling didEnd. 209 // Note: we need to erase the handle from the map *before* calling didEnd.
174 // didEnd may call back synchronously to start a new recognition session, 210 // didEnd may call back synchronously to start a new recognition session,
175 // and we don't want to delete the handle from the map after that happens. 211 // and we don't want to delete the handle from the map after that happens.
176 handle_map_.erase(request_id); 212 handle_map_.erase(request_id);
213 ResetAudioSink();
177 recognizer_client_->didEnd(handle); 214 recognizer_client_->didEnd(handle);
178 } 215 }
179 } 216 }
180 217
181 void SpeechRecognitionDispatcher::OnResultsRetrieved( 218 void SpeechRecognitionDispatcher::OnResultsRetrieved(
182 int request_id, const SpeechRecognitionResults& results) { 219 int request_id, const SpeechRecognitionResults& results) {
183 size_t provisional_count = 0; 220 size_t provisional_count = 0;
184 SpeechRecognitionResults::const_iterator it = results.begin(); 221 SpeechRecognitionResults::const_iterator it = results.begin();
185 for (; it != results.end(); ++it) { 222 for (; it != results.end(); ++it) {
186 if (it->is_provisional) 223 if (it->is_provisional)
(...skipping 17 matching lines...) Expand all
204 transcripts[i] = result.hypotheses[i].utterance; 241 transcripts[i] = result.hypotheses[i].utterance;
205 confidences[i] = static_cast<float>(result.hypotheses[i].confidence); 242 confidences[i] = static_cast<float>(result.hypotheses[i].confidence);
206 } 243 }
207 webkit_result->assign(transcripts, confidences, !result.is_provisional); 244 webkit_result->assign(transcripts, confidences, !result.is_provisional);
208 } 245 }
209 246
210 recognizer_client_->didReceiveResults( 247 recognizer_client_->didReceiveResults(
211 GetHandleFromID(request_id), final, provisional); 248 GetHandleFromID(request_id), final, provisional);
212 } 249 }
213 250
251 void SpeechRecognitionDispatcher::OnSharedAudioBusReady(
252 int request_id,
253 const media::AudioParameters& params,
254 const base::SharedMemoryHandle memory,
255 const base::SyncSocket::TransitDescriptor descriptor) {
256 #if defined(ENABLE_WEBRTC)
257 DCHECK(!speech_audio_sink_.get());
258 if (audio_track_.isNull()) {
259 speech_audio_sink_.reset();
no longer working on chromium 2014/10/09 12:19:02 call ResetAudioSink() instead
burnik 2014/10/09 13:13:03 Done.
260 return;
261 }
262
263 // Create socket here and pass ownership to the |speech_audio_sink_|.
264 scoped_ptr<base::SyncSocket> socket(new base::CancelableSyncSocket(
265 base::SyncSocket::UnwrapHandle(descriptor)));
266
267 speech_audio_sink_.reset(new SpeechRecognitionAudioSink(
268 audio_track_, params, memory, socket.Pass(),
269 base::Bind(&SpeechRecognitionDispatcher::ResetAudioSink,
270 base::Unretained(this))));
271 #endif
272 }
214 273
215 int SpeechRecognitionDispatcher::GetOrCreateIDForHandle( 274 int SpeechRecognitionDispatcher::GetOrCreateIDForHandle(
216 const WebSpeechRecognitionHandle& handle) { 275 const WebSpeechRecognitionHandle& handle) {
217 // Search first for an existing mapping. 276 // Search first for an existing mapping.
218 for (HandleMap::iterator iter = handle_map_.begin(); 277 for (HandleMap::iterator iter = handle_map_.begin();
219 iter != handle_map_.end(); 278 iter != handle_map_.end();
220 ++iter) { 279 ++iter) {
221 if (iter->second.equals(handle)) 280 if (iter->second.equals(handle))
222 return iter->first; 281 return iter->first;
223 } 282 }
224 // If no existing mapping found, create a new one. 283 // If no existing mapping found, create a new one.
225 const int new_id = next_id_; 284 const int new_id = next_id_;
226 handle_map_[new_id] = handle; 285 handle_map_[new_id] = handle;
227 ++next_id_; 286 ++next_id_;
228 return new_id; 287 return new_id;
229 } 288 }
230 289
231 bool SpeechRecognitionDispatcher::HandleExists( 290 bool SpeechRecognitionDispatcher::HandleExists(
232 const WebSpeechRecognitionHandle& handle) { 291 const WebSpeechRecognitionHandle& handle) {
233 for (HandleMap::iterator iter = handle_map_.begin(); 292 for (HandleMap::iterator iter = handle_map_.begin();
234 iter != handle_map_.end(); 293 iter != handle_map_.end();
235 ++iter) { 294 ++iter) {
236 if (iter->second.equals(handle)) 295 if (iter->second.equals(handle))
237 return true; 296 return true;
238 } 297 }
239 return false; 298 return false;
240 } 299 }
241 300
301 void SpeechRecognitionDispatcher::ResetAudioSink() {
302 #if defined(ENABLE_WEBRTC)
303 speech_audio_sink_.reset();
304 #endif
305 }
306
242 const WebSpeechRecognitionHandle& SpeechRecognitionDispatcher::GetHandleFromID( 307 const WebSpeechRecognitionHandle& SpeechRecognitionDispatcher::GetHandleFromID(
243 int request_id) { 308 int request_id) {
244 HandleMap::iterator iter = handle_map_.find(request_id); 309 HandleMap::iterator iter = handle_map_.find(request_id);
245 DCHECK(iter != handle_map_.end()); 310 DCHECK(iter != handle_map_.end());
246 return iter->second; 311 return iter->second;
247 } 312 }
248 313
249 } // namespace content 314 } // namespace content
OLDNEW
« content/content_renderer.gypi ('K') | « content/renderer/speech_recognition_dispatcher.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698