content/renderer/speech_recognition_dispatcher.cc - Issue 499233003: Binding media stream audio track to speech recognition [renderer]

Side by Side Diff: content/renderer/speech_recognition_dispatcher.cc

Issue 499233003: Binding media stream audio track to speech recognition [renderer] (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Change error type for unsupported tracks Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "content/renderer/speech_recognition_dispatcher.h"	5 #include "content/renderer/speech_recognition_dispatcher.h"

6	6

7 #include "base/basictypes.h"	7 #include "base/basictypes.h"

8 #include "base/strings/utf_string_conversions.h"	8 #include "base/strings/utf_string_conversions.h"

9 #include "content/common/speech_recognition_messages.h"	9 #include "content/common/speech_recognition_messages.h"

	10 #include "content/renderer/media/speech_recognition_audio_sink.h"

10 #include "content/renderer/render_view_impl.h"	11 #include "content/renderer/render_view_impl.h"

11 #include "third_party/WebKit/public/platform/WebString.h"	12 #include "third_party/WebKit/public/platform/WebString.h"

12 #include "third_party/WebKit/public/platform/WebVector.h"	13 #include "third_party/WebKit/public/platform/WebVector.h"

13 #include "third_party/WebKit/public/web/WebSpeechGrammar.h"	14 #include "third_party/WebKit/public/web/WebSpeechGrammar.h"

14 #include "third_party/WebKit/public/web/WebSpeechRecognitionParams.h"	15 #include "third_party/WebKit/public/web/WebSpeechRecognitionParams.h"

15 #include "third_party/WebKit/public/web/WebSpeechRecognitionResult.h"	16 #include "third_party/WebKit/public/web/WebSpeechRecognitionResult.h"

16 #include "third_party/WebKit/public/web/WebSpeechRecognizerClient.h"	17 #include "third_party/WebKit/public/web/WebSpeechRecognizerClient.h"

17	18

18 using blink::WebVector;	19 using blink::WebVector;

19 using blink::WebString;	20 using blink::WebString;

20 using blink::WebSpeechGrammar;	21 using blink::WebSpeechGrammar;

21 using blink::WebSpeechRecognitionHandle;	22 using blink::WebSpeechRecognitionHandle;

22 using blink::WebSpeechRecognitionResult;	23 using blink::WebSpeechRecognitionResult;

23 using blink::WebSpeechRecognitionParams;	24 using blink::WebSpeechRecognitionParams;

24 using blink::WebSpeechRecognizerClient;	25 using blink::WebSpeechRecognizerClient;

25	26

26 namespace content {	27 namespace content {

27	28

28 SpeechRecognitionDispatcher::SpeechRecognitionDispatcher(	29 SpeechRecognitionDispatcher::SpeechRecognitionDispatcher(

29 RenderViewImpl* render_view)	30 RenderViewImpl* render_view)

30 : RenderViewObserver(render_view),	31 : RenderViewObserver(render_view),

31 recognizer_client_(NULL),	32 recognizer_client_(NULL),

32 next_id_(1) {	33 next_id_(1) {}

33 }

34	34

35 SpeechRecognitionDispatcher::~SpeechRecognitionDispatcher() {	35 SpeechRecognitionDispatcher::~SpeechRecognitionDispatcher() {}

36 }

37	36

38 void SpeechRecognitionDispatcher::AbortAllRecognitions() {	37 void SpeechRecognitionDispatcher::AbortAllRecognitions() {

	38 speech_audio_sink_.reset();

39 Send(new SpeechRecognitionHostMsg_AbortAllRequests(	39 Send(new SpeechRecognitionHostMsg_AbortAllRequests(

40 routing_id()));	40 routing_id()));

41 }	41 }

42	42

43 bool SpeechRecognitionDispatcher::OnMessageReceived(	43 bool SpeechRecognitionDispatcher::OnMessageReceived(

44 const IPC::Message& message) {	44 const IPC::Message& message) {

45 bool handled = true;	45 bool handled = true;

46 IPC_BEGIN_MESSAGE_MAP(SpeechRecognitionDispatcher, message)	46 IPC_BEGIN_MESSAGE_MAP(SpeechRecognitionDispatcher, message)

47 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Started, OnRecognitionStarted)	47 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Started, OnRecognitionStarted)

48 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioStarted, OnAudioStarted)	48 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioStarted, OnAudioStarted)

49 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundStarted, OnSoundStarted)	49 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundStarted, OnSoundStarted)

50 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundEnded, OnSoundEnded)	50 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundEnded, OnSoundEnded)

51 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioEnded, OnAudioEnded)	51 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioEnded, OnAudioEnded)

52 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ErrorOccurred, OnErrorOccurred)	52 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ErrorOccurred, OnErrorOccurred)

53 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Ended, OnRecognitionEnded)	53 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Ended, OnRecognitionEnded)

54 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ResultRetrieved,	54 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ResultRetrieved,

55 OnResultsRetrieved)	55 OnResultsRetrieved)

	56 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioTrackReady, OnAudioTrackReady)

56 IPC_MESSAGE_UNHANDLED(handled = false)	57 IPC_MESSAGE_UNHANDLED(handled = false)

57 IPC_END_MESSAGE_MAP()	58 IPC_END_MESSAGE_MAP()

58 return handled;	59 return handled;

59 }	60 }

60	61

61 void SpeechRecognitionDispatcher::start(	62 void SpeechRecognitionDispatcher::start(

62 const WebSpeechRecognitionHandle& handle,	63 const WebSpeechRecognitionHandle& handle,

63 const WebSpeechRecognitionParams& params,	64 const WebSpeechRecognitionParams& params,

64 WebSpeechRecognizerClient* recognizer_client) {	65 WebSpeechRecognizerClient* recognizer_client) {

65 DCHECK(!recognizer_client_ \|\| recognizer_client_ == recognizer_client);	66 DCHECK(!recognizer_client_ \|\| recognizer_client_ == recognizer_client);

66 recognizer_client_ = recognizer_client;	67 recognizer_client_ = recognizer_client;

67	68

	69 const blink::WebMediaStreamTrack track = params.audioTrack();

	70 if (!track.isNull()) {

	71 // Check if this type of track is allowed by implemented policy.

	72 if (SpeechRecognitionAudioSink::IsSupportedTrack(track)) {

	73 audio_track_.assign(track);

	74 } else {

	75 audio_track_.reset();

	76 // Notify user that the track used is not supported.

	77 recognizer_client_->didReceiveError(

	78 handle,

	79 WebString("Provided audioTrack is not supported."),

	80 WebSpeechRecognizerClient::AudioCaptureError);

	81

	82 return;

	83 }

	84 }

	85

	86 // Destroy any previous instance to detach from the audio track.

	87 // Each new session should reinstantiate the provider once the track is ready.

	88 speech_audio_sink_.reset();

	89

68 SpeechRecognitionHostMsg_StartRequest_Params msg_params;	90 SpeechRecognitionHostMsg_StartRequest_Params msg_params;

69 for (size_t i = 0; i < params.grammars().size(); ++i) {	91 for (size_t i = 0; i < params.grammars().size(); ++i) {

70 const WebSpeechGrammar& grammar = params.grammars()[i];	92 const WebSpeechGrammar& grammar = params.grammars()[i];

71 msg_params.grammars.push_back(	93 msg_params.grammars.push_back(

72 SpeechRecognitionGrammar(grammar.src().spec(), grammar.weight()));	94 SpeechRecognitionGrammar(grammar.src().spec(), grammar.weight()));

73 }	95 }

74 msg_params.language = base::UTF16ToUTF8(params.language());	96 msg_params.language = base::UTF16ToUTF8(params.language());

75 msg_params.max_hypotheses = static_cast<uint32>(params.maxAlternatives());	97 msg_params.max_hypotheses = static_cast<uint32>(params.maxAlternatives());

76 msg_params.continuous = params.continuous();	98 msg_params.continuous = params.continuous();

77 msg_params.interim_results = params.interimResults();	99 msg_params.interim_results = params.interimResults();

78 msg_params.origin_url = params.origin().toString().utf8();	100 msg_params.origin_url = params.origin().toString().utf8();

79 msg_params.render_view_id = routing_id();	101 msg_params.render_view_id = routing_id();

80 msg_params.request_id = GetOrCreateIDForHandle(handle);	102 msg_params.request_id = GetOrCreateIDForHandle(handle);

	103 // fall back to default input when the track is not allowed

	104 msg_params.using_audio_track = !audio_track_.isNull();

81 // The handle mapping will be removed in \|OnRecognitionEnd\|.	105 // The handle mapping will be removed in \|OnRecognitionEnd\|.

82 Send(new SpeechRecognitionHostMsg_StartRequest(msg_params));	106 Send(new SpeechRecognitionHostMsg_StartRequest(msg_params));

83 }	107 }

84	108

85 void SpeechRecognitionDispatcher::stop(	109 void SpeechRecognitionDispatcher::stop(

86 const WebSpeechRecognitionHandle& handle,	110 const WebSpeechRecognitionHandle& handle,

87 WebSpeechRecognizerClient* recognizer_client) {	111 WebSpeechRecognizerClient* recognizer_client) {

	112 speech_audio_sink_.reset();

88 // Ignore a \|stop\| issued without a matching \|start\|.	113 // Ignore a \|stop\| issued without a matching \|start\|.

89 if (recognizer_client_ != recognizer_client \|\| !HandleExists(handle))	114 if (recognizer_client_ != recognizer_client \|\| !HandleExists(handle))

90 return;	115 return;

91 Send(new SpeechRecognitionHostMsg_StopCaptureRequest(	116 Send(new SpeechRecognitionHostMsg_StopCaptureRequest(

92 routing_id(), GetOrCreateIDForHandle(handle)));	117 routing_id(), GetOrCreateIDForHandle(handle)));

93 }	118 }

94	119

95 void SpeechRecognitionDispatcher::abort(	120 void SpeechRecognitionDispatcher::abort(

96 const WebSpeechRecognitionHandle& handle,	121 const WebSpeechRecognitionHandle& handle,

97 WebSpeechRecognizerClient* recognizer_client) {	122 WebSpeechRecognizerClient* recognizer_client) {

	123 speech_audio_sink_.reset();

98 // Ignore an \|abort\| issued without a matching \|start\|.	124 // Ignore an \|abort\| issued without a matching \|start\|.

99 if (recognizer_client_ != recognizer_client \|\| !HandleExists(handle))	125 if (recognizer_client_ != recognizer_client \|\| !HandleExists(handle))

100 return;	126 return;

101 Send(new SpeechRecognitionHostMsg_AbortRequest(	127 Send(new SpeechRecognitionHostMsg_AbortRequest(

102 routing_id(), GetOrCreateIDForHandle(handle)));	128 routing_id(), GetOrCreateIDForHandle(handle)));

103 }	129 }

104	130

105 void SpeechRecognitionDispatcher::OnRecognitionStarted(int request_id) {	131 void SpeechRecognitionDispatcher::OnRecognitionStarted(int request_id) {

106 recognizer_client_->didStart(GetHandleFromID(request_id));	132 recognizer_client_->didStart(GetHandleFromID(request_id));

107 }	133 }

(...skipping 39 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
147 NOTREACHED();	173 NOTREACHED();

148 return WebSpeechRecognizerClient::OtherError;	174 return WebSpeechRecognizerClient::OtherError;

149 }	175 }

150	176

151 void SpeechRecognitionDispatcher::OnErrorOccurred(	177 void SpeechRecognitionDispatcher::OnErrorOccurred(

152 int request_id, const SpeechRecognitionError& error) {	178 int request_id, const SpeechRecognitionError& error) {

153 if (error.code == SPEECH_RECOGNITION_ERROR_NO_MATCH) {	179 if (error.code == SPEECH_RECOGNITION_ERROR_NO_MATCH) {

154 recognizer_client_->didReceiveNoMatch(GetHandleFromID(request_id),	180 recognizer_client_->didReceiveNoMatch(GetHandleFromID(request_id),

155 WebSpeechRecognitionResult());	181 WebSpeechRecognitionResult());

156 } else {	182 } else {

	183 speech_audio_sink_.reset();

157 recognizer_client_->didReceiveError(	184 recognizer_client_->didReceiveError(

158 GetHandleFromID(request_id),	185 GetHandleFromID(request_id),

159 WebString(), // TODO(primiano): message?	186 WebString(), // TODO(primiano): message?

160 WebKitErrorCode(error.code));	187 WebKitErrorCode(error.code));

161 }	188 }

162 }	189 }

163	190

164 void SpeechRecognitionDispatcher::OnRecognitionEnded(int request_id) {	191 void SpeechRecognitionDispatcher::OnRecognitionEnded(int request_id) {

165 // TODO(tommi): It is possible that the handle isn't found in the array if	192 // TODO(tommi): It is possible that the handle isn't found in the array if

166 // the user just refreshed the page. It seems that we then get a notification	193 // the user just refreshed the page. It seems that we then get a notification

167 // for the previously loaded instance of the page.	194 // for the previously loaded instance of the page.

168 HandleMap::iterator iter = handle_map_.find(request_id);	195 HandleMap::iterator iter = handle_map_.find(request_id);

169 if (iter == handle_map_.end()) {	196 if (iter == handle_map_.end()) {

170 DLOG(ERROR) << "OnRecognitionEnded called for a handle that doesn't exist";	197 DLOG(ERROR) << "OnRecognitionEnded called for a handle that doesn't exist";

171 } else {	198 } else {

172 WebSpeechRecognitionHandle handle = iter->second;	199 WebSpeechRecognitionHandle handle = iter->second;

173 // Note: we need to erase the handle from the map before calling didEnd.	200 // Note: we need to erase the handle from the map before calling didEnd.

174 // didEnd may call back synchronously to start a new recognition session,	201 // didEnd may call back synchronously to start a new recognition session,

175 // and we don't want to delete the handle from the map after that happens.	202 // and we don't want to delete the handle from the map after that happens.

176 handle_map_.erase(request_id);	203 handle_map_.erase(request_id);

	204 speech_audio_sink_.reset();

177 recognizer_client_->didEnd(handle);	205 recognizer_client_->didEnd(handle);

178 }	206 }

179 }	207 }

180	208

181 void SpeechRecognitionDispatcher::OnResultsRetrieved(	209 void SpeechRecognitionDispatcher::OnResultsRetrieved(

182 int request_id, const SpeechRecognitionResults& results) {	210 int request_id, const SpeechRecognitionResults& results) {

183 size_t provisional_count = 0;	211 size_t provisional_count = 0;

184 SpeechRecognitionResults::const_iterator it = results.begin();	212 SpeechRecognitionResults::const_iterator it = results.begin();

185 for (; it != results.end(); ++it) {	213 for (; it != results.end(); ++it) {

186 if (it->is_provisional)	214 if (it->is_provisional)

(...skipping 17 matching lines...) Expand all Loading...
204 transcripts[i] = result.hypotheses[i].utterance;	232 transcripts[i] = result.hypotheses[i].utterance;

205 confidences[i] = static_cast<float>(result.hypotheses[i].confidence);	233 confidences[i] = static_cast<float>(result.hypotheses[i].confidence);

206 }	234 }

207 webkit_result->assign(transcripts, confidences, !result.is_provisional);	235 webkit_result->assign(transcripts, confidences, !result.is_provisional);

208 }	236 }

209	237

210 recognizer_client_->didReceiveResults(	238 recognizer_client_->didReceiveResults(

211 GetHandleFromID(request_id), final, provisional);	239 GetHandleFromID(request_id), final, provisional);

212 }	240 }

213	241

	242 void SpeechRecognitionDispatcher::OnAudioTrackReady(

	243 int request_id,

	244 const media::AudioParameters& params,

	245 base::SharedMemoryHandle memory,

	246 base::SyncSocket::TransitDescriptor descriptor) {

	247 DCHECK(!speech_audio_sink_.get());

	248 if (audio_track_.isNull()) {

	249 speech_audio_sink_.reset();

	250 return;

	251 }

	252

	253 // Create socket here and pass ownership to the \|speech_audio_sink_\|.

	254 scoped_ptr<base::SyncSocket> socket(new base::CancelableSyncSocket(

	255 base::SyncSocket::UnwrapHandle(descriptor)));

	256

	257 speech_audio_sink_.reset(new SpeechRecognitionAudioSink(

	258 audio_track_, params, memory, socket.Pass(),

	259 base::Bind(&SpeechRecognitionDispatcher::ResetAudioSourceProvider,

	260 base::Unretained(this))));

	261 }

214	262

215 int SpeechRecognitionDispatcher::GetOrCreateIDForHandle(	263 int SpeechRecognitionDispatcher::GetOrCreateIDForHandle(

216 const WebSpeechRecognitionHandle& handle) {	264 const WebSpeechRecognitionHandle& handle) {

217 // Search first for an existing mapping.	265 // Search first for an existing mapping.

218 for (HandleMap::iterator iter = handle_map_.begin();	266 for (HandleMap::iterator iter = handle_map_.begin();

219 iter != handle_map_.end();	267 iter != handle_map_.end();

220 ++iter) {	268 ++iter) {

221 if (iter->second.equals(handle))	269 if (iter->second.equals(handle))

222 return iter->first;	270 return iter->first;

223 }	271 }

224 // If no existing mapping found, create a new one.	272 // If no existing mapping found, create a new one.

225 const int new_id = next_id_;	273 const int new_id = next_id_;

226 handle_map_[new_id] = handle;	274 handle_map_[new_id] = handle;

227 ++next_id_;	275 ++next_id_;

228 return new_id;	276 return new_id;

229 }	277 }

230	278

231 bool SpeechRecognitionDispatcher::HandleExists(	279 bool SpeechRecognitionDispatcher::HandleExists(

232 const WebSpeechRecognitionHandle& handle) {	280 const WebSpeechRecognitionHandle& handle) {

233 for (HandleMap::iterator iter = handle_map_.begin();	281 for (HandleMap::iterator iter = handle_map_.begin();

234 iter != handle_map_.end();	282 iter != handle_map_.end();

235 ++iter) {	283 ++iter) {

236 if (iter->second.equals(handle))	284 if (iter->second.equals(handle))

237 return true;	285 return true;

238 }	286 }

239 return false;	287 return false;

240 }	288 }

241	289

	290 void SpeechRecognitionDispatcher::ResetAudioSourceProvider() {
	no longer working on chromium 2014/09/29 09:28:57 There is no AudioSourceProvider any more, how abou There is no AudioSourceProvider any more, how about changing the name to OnAudioTrackStopped() burnik 2014/09/29 10:38:12 This has been discussed earlier. It would be regar Show quoted text On 2014/09/29 09:28:57, xians1 wrote: > There is no AudioSourceProvider any more, how about changing the name to > OnAudioTrackStopped() This has been discussed earlier. It would be regarded as an IPC for the reader if named with "On" prefix. Renamed to "ResetAudioSink" - ready for next patchset. Also, this is a bug. Changed to: speech_audio_sink_.reset();
	291 audio_track_.reset();

	292 }

	293

242 const WebSpeechRecognitionHandle& SpeechRecognitionDispatcher::GetHandleFromID(	294 const WebSpeechRecognitionHandle& SpeechRecognitionDispatcher::GetHandleFromID(

243 int request_id) {	295 int request_id) {

244 HandleMap::iterator iter = handle_map_.find(request_id);	296 HandleMap::iterator iter = handle_map_.find(request_id);

245 DCHECK(iter != handle_map_.end());	297 DCHECK(iter != handle_map_.end());

246 return iter->second;	298 return iter->second;

247 }	299 }

248	300

249 } // namespace content	301 } // namespace content

OLD	NEW

« content/renderer/media/speech_recognition_audio_sink_unittest.cc ('K') | « content/renderer/speech_recognition_dispatcher.h ('k') | no next file » | no next file with comments »