content/renderer/speech_recognition_dispatcher.cc - Issue 499233003: Binding media stream audio track to speech recognition [renderer]

Side by Side Diff: content/renderer/speech_recognition_dispatcher.cc

Issue 499233003: Binding media stream audio track to speech recognition [renderer] (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Add unit test and refactor Created 6 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "content/renderer/speech_recognition_dispatcher.h"	5 #include "content/renderer/speech_recognition_dispatcher.h"

6	6

7 #include "base/basictypes.h"	7 #include "base/basictypes.h"

8 #include "base/strings/utf_string_conversions.h"	8 #include "base/strings/utf_string_conversions.h"

9 #include "content/common/speech_recognition_messages.h"	9 #include "content/common/speech_recognition_messages.h"

10 #include "content/renderer/render_view_impl.h"	10 #include "content/renderer/render_view_impl.h"

(...skipping 11 matching lines...) Expand all Loading...
22 using blink::WebSpeechRecognitionResult;	22 using blink::WebSpeechRecognitionResult;

23 using blink::WebSpeechRecognitionParams;	23 using blink::WebSpeechRecognitionParams;

24 using blink::WebSpeechRecognizerClient;	24 using blink::WebSpeechRecognizerClient;

25	25

26 namespace content {	26 namespace content {

27	27

28 SpeechRecognitionDispatcher::SpeechRecognitionDispatcher(	28 SpeechRecognitionDispatcher::SpeechRecognitionDispatcher(

29 RenderViewImpl* render_view)	29 RenderViewImpl* render_view)

30 : RenderViewObserver(render_view),	30 : RenderViewObserver(render_view),

31 recognizer_client_(NULL),	31 recognizer_client_(NULL),

32 next_id_(1) {	32 audio_track_set_(false),

33 }	33 is_allowed_audio_track_(false),

	34 next_id_(1) {}

34	35

35 SpeechRecognitionDispatcher::~SpeechRecognitionDispatcher() {	36 SpeechRecognitionDispatcher::~SpeechRecognitionDispatcher() {

36 }	37 }

37	38

38 void SpeechRecognitionDispatcher::AbortAllRecognitions() {	39 void SpeechRecognitionDispatcher::AbortAllRecognitions() {

	40 audio_source_provider_.reset();

39 Send(new SpeechRecognitionHostMsg_AbortAllRequests(	41 Send(new SpeechRecognitionHostMsg_AbortAllRequests(

40 routing_id()));	42 routing_id()));

41 }	43 }

42	44

43 bool SpeechRecognitionDispatcher::OnMessageReceived(	45 bool SpeechRecognitionDispatcher::OnMessageReceived(

44 const IPC::Message& message) {	46 const IPC::Message& message) {

45 bool handled = true;	47 bool handled = true;

46 IPC_BEGIN_MESSAGE_MAP(SpeechRecognitionDispatcher, message)	48 IPC_BEGIN_MESSAGE_MAP(SpeechRecognitionDispatcher, message)

47 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Started, OnRecognitionStarted)	49 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Started, OnRecognitionStarted)

48 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioStarted, OnAudioStarted)	50 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioStarted, OnAudioStarted)

49 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundStarted, OnSoundStarted)	51 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundStarted, OnSoundStarted)

50 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundEnded, OnSoundEnded)	52 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundEnded, OnSoundEnded)

51 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioEnded, OnAudioEnded)	53 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioEnded, OnAudioEnded)

52 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ErrorOccurred, OnErrorOccurred)	54 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ErrorOccurred, OnErrorOccurred)

53 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Ended, OnRecognitionEnded)	55 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Ended, OnRecognitionEnded)

54 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ResultRetrieved,	56 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ResultRetrieved,

55 OnResultsRetrieved)	57 OnResultsRetrieved)

	58 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioTrackReady, OnAudioTrackReady)

56 IPC_MESSAGE_UNHANDLED(handled = false)	59 IPC_MESSAGE_UNHANDLED(handled = false)

57 IPC_END_MESSAGE_MAP()	60 IPC_END_MESSAGE_MAP()

58 return handled;	61 return handled;

59 }	62 }

60	63

	64 void SpeechRecognitionDispatcher::attach(

	65 const blink::WebSpeechRecognitionHandle& handle,

	66 const blink::WebMediaStreamTrack& audio_track,

	67 blink::WebSpeechRecognizerClient* recognizer_client) {

	68 // Check if track is from an allowed source (microphone only for now)

	69 is_allowed_audio_track_ =

	70 SpeechRecognitionAudioSourceProvider::IsAllowedAudioTrack(audio_track);

	71 audio_track_ = audio_track;

	72 audio_track_set_ = true;

	73 }

	74

	75 void SpeechRecognitionDispatcher::detach(

	76 const blink::WebSpeechRecognitionHandle& handle,

	77 blink::WebSpeechRecognizerClient* recognizer_client) {

	78 audio_track_set_ = false;

	79 }

	80

61 void SpeechRecognitionDispatcher::start(	81 void SpeechRecognitionDispatcher::start(

62 const WebSpeechRecognitionHandle& handle,	82 const WebSpeechRecognitionHandle& handle,

63 const WebSpeechRecognitionParams& params,	83 const WebSpeechRecognitionParams& params,

64 WebSpeechRecognizerClient* recognizer_client) {	84 WebSpeechRecognizerClient* recognizer_client) {

65 DCHECK(!recognizer_client_ \|\| recognizer_client_ == recognizer_client);	85 DCHECK(!recognizer_client_ \|\| recognizer_client_ == recognizer_client);

66 recognizer_client_ = recognizer_client;	86 recognizer_client_ = recognizer_client;

67	87

	88 // Destroy any previous instance not to starve it waiting on chunk ACKs.

	89 audio_source_provider_.reset();

	90

	91 if (audio_track_set_ && !is_allowed_audio_track_) {

	92 // Notify user that the track used is not supported.

	93 recognizer_client_->didReceiveError(

	94 handle,

	95 WebString("Provided audioTrack is not supported. Ignoring track."),

	96 WebSpeechRecognizerClient::NotAllowedError);

	97 }

	98

68 SpeechRecognitionHostMsg_StartRequest_Params msg_params;	99 SpeechRecognitionHostMsg_StartRequest_Params msg_params;

69 for (size_t i = 0; i < params.grammars().size(); ++i) {	100 for (size_t i = 0; i < params.grammars().size(); ++i) {

70 const WebSpeechGrammar& grammar = params.grammars()[i];	101 const WebSpeechGrammar& grammar = params.grammars()[i];

71 msg_params.grammars.push_back(	102 msg_params.grammars.push_back(

72 SpeechRecognitionGrammar(grammar.src().spec(), grammar.weight()));	103 SpeechRecognitionGrammar(grammar.src().spec(), grammar.weight()));

73 }	104 }

74 msg_params.language = base::UTF16ToUTF8(params.language());	105 msg_params.language = base::UTF16ToUTF8(params.language());

75 msg_params.max_hypotheses = static_cast<uint32>(params.maxAlternatives());	106 msg_params.max_hypotheses = static_cast<uint32>(params.maxAlternatives());

76 msg_params.continuous = params.continuous();	107 msg_params.continuous = params.continuous();

77 msg_params.interim_results = params.interimResults();	108 msg_params.interim_results = params.interimResults();

78 msg_params.origin_url = params.origin().toString().utf8();	109 msg_params.origin_url = params.origin().toString().utf8();

79 msg_params.render_view_id = routing_id();	110 msg_params.render_view_id = routing_id();

80 msg_params.request_id = GetOrCreateIDForHandle(handle);	111 msg_params.request_id = GetOrCreateIDForHandle(handle);

	112 // fall back to default input when the track is not allowed

	113 msg_params.using_audio_track = (audio_track_set_ && is_allowed_audio_track_);

81 // The handle mapping will be removed in \|OnRecognitionEnd\|.	114 // The handle mapping will be removed in \|OnRecognitionEnd\|.

82 Send(new SpeechRecognitionHostMsg_StartRequest(msg_params));	115 Send(new SpeechRecognitionHostMsg_StartRequest(msg_params));

83 }	116 }

84	117

85 void SpeechRecognitionDispatcher::stop(	118 void SpeechRecognitionDispatcher::stop(

86 const WebSpeechRecognitionHandle& handle,	119 const WebSpeechRecognitionHandle& handle,

87 WebSpeechRecognizerClient* recognizer_client) {	120 WebSpeechRecognizerClient* recognizer_client) {

	121 audio_source_provider_.reset();

88 // Ignore a \|stop\| issued without a matching \|start\|.	122 // Ignore a \|stop\| issued without a matching \|start\|.

89 if (recognizer_client_ != recognizer_client \|\| !HandleExists(handle))	123 if (recognizer_client_ != recognizer_client \|\| !HandleExists(handle))

90 return;	124 return;

91 Send(new SpeechRecognitionHostMsg_StopCaptureRequest(	125 Send(new SpeechRecognitionHostMsg_StopCaptureRequest(

92 routing_id(), GetOrCreateIDForHandle(handle)));	126 routing_id(), GetOrCreateIDForHandle(handle)));

93 }	127 }

94	128

95 void SpeechRecognitionDispatcher::abort(	129 void SpeechRecognitionDispatcher::abort(

96 const WebSpeechRecognitionHandle& handle,	130 const WebSpeechRecognitionHandle& handle,

97 WebSpeechRecognizerClient* recognizer_client) {	131 WebSpeechRecognizerClient* recognizer_client) {

	132 audio_source_provider_.reset();

98 // Ignore an \|abort\| issued without a matching \|start\|.	133 // Ignore an \|abort\| issued without a matching \|start\|.

99 if (recognizer_client_ != recognizer_client \|\| !HandleExists(handle))	134 if (recognizer_client_ != recognizer_client \|\| !HandleExists(handle))

100 return;	135 return;

101 Send(new SpeechRecognitionHostMsg_AbortRequest(	136 Send(new SpeechRecognitionHostMsg_AbortRequest(

102 routing_id(), GetOrCreateIDForHandle(handle)));	137 routing_id(), GetOrCreateIDForHandle(handle)));

103 }	138 }

104	139

105 void SpeechRecognitionDispatcher::OnRecognitionStarted(int request_id) {	140 void SpeechRecognitionDispatcher::OnRecognitionStarted(int request_id) {

106 recognizer_client_->didStart(GetHandleFromID(request_id));	141 recognizer_client_->didStart(GetHandleFromID(request_id));

107 }	142 }

(...skipping 39 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
147 NOTREACHED();	182 NOTREACHED();

148 return WebSpeechRecognizerClient::OtherError;	183 return WebSpeechRecognizerClient::OtherError;

149 }	184 }

150	185

151 void SpeechRecognitionDispatcher::OnErrorOccurred(	186 void SpeechRecognitionDispatcher::OnErrorOccurred(

152 int request_id, const SpeechRecognitionError& error) {	187 int request_id, const SpeechRecognitionError& error) {

153 if (error.code == SPEECH_RECOGNITION_ERROR_NO_MATCH) {	188 if (error.code == SPEECH_RECOGNITION_ERROR_NO_MATCH) {

154 recognizer_client_->didReceiveNoMatch(GetHandleFromID(request_id),	189 recognizer_client_->didReceiveNoMatch(GetHandleFromID(request_id),

155 WebSpeechRecognitionResult());	190 WebSpeechRecognitionResult());

156 } else {	191 } else {

	192 audio_source_provider_.reset();

157 recognizer_client_->didReceiveError(	193 recognizer_client_->didReceiveError(

158 GetHandleFromID(request_id),	194 GetHandleFromID(request_id),

159 WebString(), // TODO(primiano): message?	195 WebString(), // TODO(primiano): message?

160 WebKitErrorCode(error.code));	196 WebKitErrorCode(error.code));

161 }	197 }

162 }	198 }

163	199

164 void SpeechRecognitionDispatcher::OnRecognitionEnded(int request_id) {	200 void SpeechRecognitionDispatcher::OnRecognitionEnded(int request_id) {

165 // TODO(tommi): It is possible that the handle isn't found in the array if	201 // TODO(tommi): It is possible that the handle isn't found in the array if

166 // the user just refreshed the page. It seems that we then get a notification	202 // the user just refreshed the page. It seems that we then get a notification

167 // for the previously loaded instance of the page.	203 // for the previously loaded instance of the page.

168 HandleMap::iterator iter = handle_map_.find(request_id);	204 HandleMap::iterator iter = handle_map_.find(request_id);

169 if (iter == handle_map_.end()) {	205 if (iter == handle_map_.end()) {

170 DLOG(ERROR) << "OnRecognitionEnded called for a handle that doesn't exist";	206 DLOG(ERROR) << "OnRecognitionEnded called for a handle that doesn't exist";

171 } else {	207 } else {

172 WebSpeechRecognitionHandle handle = iter->second;	208 WebSpeechRecognitionHandle handle = iter->second;

173 // Note: we need to erase the handle from the map before calling didEnd.	209 // Note: we need to erase the handle from the map before calling didEnd.

174 // didEnd may call back synchronously to start a new recognition session,	210 // didEnd may call back synchronously to start a new recognition session,

175 // and we don't want to delete the handle from the map after that happens.	211 // and we don't want to delete the handle from the map after that happens.

176 handle_map_.erase(request_id);	212 handle_map_.erase(request_id);

	213 audio_source_provider_.reset();

177 recognizer_client_->didEnd(handle);	214 recognizer_client_->didEnd(handle);

178 }	215 }

179 }	216 }

180	217

181 void SpeechRecognitionDispatcher::OnResultsRetrieved(	218 void SpeechRecognitionDispatcher::OnResultsRetrieved(

182 int request_id, const SpeechRecognitionResults& results) {	219 int request_id, const SpeechRecognitionResults& results) {

183 size_t provisional_count = 0;	220 size_t provisional_count = 0;

184 SpeechRecognitionResults::const_iterator it = results.begin();	221 SpeechRecognitionResults::const_iterator it = results.begin();

185 for (; it != results.end(); ++it) {	222 for (; it != results.end(); ++it) {

186 if (it->is_provisional)	223 if (it->is_provisional)

(...skipping 17 matching lines...) Expand all Loading...
204 transcripts[i] = result.hypotheses[i].utterance;	241 transcripts[i] = result.hypotheses[i].utterance;

205 confidences[i] = static_cast<float>(result.hypotheses[i].confidence);	242 confidences[i] = static_cast<float>(result.hypotheses[i].confidence);

206 }	243 }

207 webkit_result->assign(transcripts, confidences, !result.is_provisional);	244 webkit_result->assign(transcripts, confidences, !result.is_provisional);

208 }	245 }

209	246

210 recognizer_client_->didReceiveResults(	247 recognizer_client_->didReceiveResults(

211 GetHandleFromID(request_id), final, provisional);	248 GetHandleFromID(request_id), final, provisional);

212 }	249 }

213	250

	251 // TODO(burnik): Each param on it's own line.

	252 void SpeechRecognitionDispatcher::OnAudioTrackReady(

	253 int request_id, const media::AudioParameters& params,

	254 base::SharedMemoryHandle memory,

	255 base::SyncSocket::TransitDescriptor descriptor) {

	256 DCHECK(!audio_source_provider_.get());

	257 if (audio_track_.isNull()) {

	258 audio_source_provider_.reset();

	259 return;

	260 }

	261

	262 scoped_ptr<base::SyncSocket> socket;

	263 socket.reset(

	264 new base::SyncSocket(base::SyncSocket::UnwrapHandle(descriptor)));

	265

	266 audio_source_provider_.reset(new SpeechRecognitionAudioSourceProvider(

	267 audio_track_, params, memory, socket.release(),

	268 base::Bind(&SpeechRecognitionDispatcher::OnAudioTrackError,

	269 base::Unretained(this))));

	270 }

214	271

215 int SpeechRecognitionDispatcher::GetOrCreateIDForHandle(	272 int SpeechRecognitionDispatcher::GetOrCreateIDForHandle(

216 const WebSpeechRecognitionHandle& handle) {	273 const WebSpeechRecognitionHandle& handle) {

217 // Search first for an existing mapping.	274 // Search first for an existing mapping.

218 for (HandleMap::iterator iter = handle_map_.begin();	275 for (HandleMap::iterator iter = handle_map_.begin();

219 iter != handle_map_.end();	276 iter != handle_map_.end();

220 ++iter) {	277 ++iter) {

221 if (iter->second.equals(handle))	278 if (iter->second.equals(handle))

222 return iter->first;	279 return iter->first;

223 }	280 }

224 // If no existing mapping found, create a new one.	281 // If no existing mapping found, create a new one.

225 const int new_id = next_id_;	282 const int new_id = next_id_;

226 handle_map_[new_id] = handle;	283 handle_map_[new_id] = handle;

227 ++next_id_;	284 ++next_id_;

228 return new_id;	285 return new_id;

229 }	286 }

230	287

231 bool SpeechRecognitionDispatcher::HandleExists(	288 bool SpeechRecognitionDispatcher::HandleExists(

232 const WebSpeechRecognitionHandle& handle) {	289 const WebSpeechRecognitionHandle& handle) {

233 for (HandleMap::iterator iter = handle_map_.begin();	290 for (HandleMap::iterator iter = handle_map_.begin();

234 iter != handle_map_.end();	291 iter != handle_map_.end();

235 ++iter) {	292 ++iter) {

236 if (iter->second.equals(handle))	293 if (iter->second.equals(handle))

237 return true;	294 return true;

238 }	295 }

239 return false;	296 return false;

240 }	297 }

241	298

	299 void SpeechRecognitionDispatcher::OnAudioTrackError(
	burnik 2014/09/12 12:09:12 Clearly not useful yet. Consider it a stub. Clearly not useful yet. Consider it a stub. burnik 2014/09/15 15:00:07 Refactored stub - just used for detecting a stop. Refactored stub - just used for detecting a stop. Refactoring further in next iteration. On 2014/09/12 12:09:12, burnik wrote: Show quoted text > Clearly not useful yet. Consider it a stub.
	300 SpeechRecognitionAudioSourceProvider::ErrorState error) {

	301 // TODO(burnik): handle error state events.

	302 switch (error) {

	303 case SpeechRecognitionAudioSourceProvider::ErrorState::SEND_FAILED:

	304 DLOG(ERROR) << "SEND_FAILED";

	305 break;

	306 case SpeechRecognitionAudioSourceProvider::ErrorState::BUFFER_SYNC_LAG:

	307 DLOG(ERROR) << "BUFFER_SYNC_LAG";

	308 break;

	309 case SpeechRecognitionAudioSourceProvider::ErrorState::AUDIO_FIFO_OVERFLOW:

	310 DLOG(ERROR) << "AUDIO_FIFO_OVERFLOW";

	311 break;

	312 case SpeechRecognitionAudioSourceProvider::ErrorState::TRACK_STOPPED:

	313 DLOG(ERROR) << "TRACK_STOPPED";

	314 break;

	315 }

	316 }

	317

242 const WebSpeechRecognitionHandle& SpeechRecognitionDispatcher::GetHandleFromID(	318 const WebSpeechRecognitionHandle& SpeechRecognitionDispatcher::GetHandleFromID(

243 int request_id) {	319 int request_id) {

244 HandleMap::iterator iter = handle_map_.find(request_id);	320 HandleMap::iterator iter = handle_map_.find(request_id);

245 DCHECK(iter != handle_map_.end());	321 DCHECK(iter != handle_map_.end());

246 return iter->second;	322 return iter->second;

247 }	323 }

248	324

249 } // namespace content	325 } // namespace content

OLD	NEW

« content/renderer/speech_recognition_audio_source_provider_unittest.cc ('K') | « content/renderer/speech_recognition_dispatcher.h ('k') | no next file » | no next file with comments »