content/renderer/speech_recognition_dispatcher.cc - Issue 499233003: Binding media stream audio track to speech recognition [renderer]

Side by Side Diff: content/renderer/speech_recognition_dispatcher.cc

Issue 499233003: Binding media stream audio track to speech recognition [renderer] (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: style fix Created 6 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "content/renderer/speech_recognition_dispatcher.h"	5 #include "content/renderer/speech_recognition_dispatcher.h"

6	6

7 #include "base/basictypes.h"	7 #include "base/basictypes.h"

8 #include "base/strings/utf_string_conversions.h"	8 #include "base/strings/utf_string_conversions.h"

9 #include "content/common/speech_recognition_messages.h"	9 #include "content/common/speech_recognition_messages.h"

	10 #include "content/renderer/media/media_stream_audio_source.h"

10 #include "content/renderer/render_view_impl.h"	11 #include "content/renderer/render_view_impl.h"

	12 #include "content/renderer/speech_recognition_audio_source_provider.h"

11 #include "third_party/WebKit/public/platform/WebString.h"	13 #include "third_party/WebKit/public/platform/WebString.h"

12 #include "third_party/WebKit/public/platform/WebVector.h"	14 #include "third_party/WebKit/public/platform/WebVector.h"

13 #include "third_party/WebKit/public/web/WebSpeechGrammar.h"	15 #include "third_party/WebKit/public/web/WebSpeechGrammar.h"

14 #include "third_party/WebKit/public/web/WebSpeechRecognitionParams.h"	16 #include "third_party/WebKit/public/web/WebSpeechRecognitionParams.h"

15 #include "third_party/WebKit/public/web/WebSpeechRecognitionResult.h"	17 #include "third_party/WebKit/public/web/WebSpeechRecognitionResult.h"

16 #include "third_party/WebKit/public/web/WebSpeechRecognizerClient.h"	18 #include "third_party/WebKit/public/web/WebSpeechRecognizerClient.h"

17	19

18 using blink::WebVector;	20 using blink::WebVector;

19 using blink::WebString;	21 using blink::WebString;

20 using blink::WebSpeechGrammar;	22 using blink::WebSpeechGrammar;

21 using blink::WebSpeechRecognitionHandle;	23 using blink::WebSpeechRecognitionHandle;

22 using blink::WebSpeechRecognitionResult;	24 using blink::WebSpeechRecognitionResult;

23 using blink::WebSpeechRecognitionParams;	25 using blink::WebSpeechRecognitionParams;

24 using blink::WebSpeechRecognizerClient;	26 using blink::WebSpeechRecognizerClient;

25	27

26 namespace content {	28 namespace content {

27	29

28 SpeechRecognitionDispatcher::SpeechRecognitionDispatcher(	30 SpeechRecognitionDispatcher::SpeechRecognitionDispatcher(

29 RenderViewImpl* render_view)	31 RenderViewImpl* render_view)

30 : RenderViewObserver(render_view),	32 : RenderViewObserver(render_view),

31 recognizer_client_(NULL),	33 recognizer_client_(NULL),

	34 audio_track_set_(false),

	35 is_allowed_audio_track_(false),

	36 render_loop_(base::MessageLoopProxy::current()),

32 next_id_(1) {	37 next_id_(1) {

33 }	38 }

34	39

35 SpeechRecognitionDispatcher::~SpeechRecognitionDispatcher() {	40 SpeechRecognitionDispatcher::~SpeechRecognitionDispatcher() {

36 }	41 }

37	42

38 void SpeechRecognitionDispatcher::AbortAllRecognitions() {	43 void SpeechRecognitionDispatcher::AbortAllRecognitions() {

	44 audio_source_provider_.reset();

39 Send(new SpeechRecognitionHostMsg_AbortAllRequests(	45 Send(new SpeechRecognitionHostMsg_AbortAllRequests(

40 routing_id()));	46 routing_id()));

41 }	47 }

42	48

43 bool SpeechRecognitionDispatcher::OnMessageReceived(	49 bool SpeechRecognitionDispatcher::OnMessageReceived(

44 const IPC::Message& message) {	50 const IPC::Message& message) {

45 bool handled = true;	51 bool handled = true;

46 IPC_BEGIN_MESSAGE_MAP(SpeechRecognitionDispatcher, message)	52 IPC_BEGIN_MESSAGE_MAP(SpeechRecognitionDispatcher, message)

47 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Started, OnRecognitionStarted)	53 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Started, OnRecognitionStarted)

48 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioStarted, OnAudioStarted)	54 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioStarted, OnAudioStarted)

49 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundStarted, OnSoundStarted)	55 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundStarted, OnSoundStarted)

50 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundEnded, OnSoundEnded)	56 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundEnded, OnSoundEnded)

51 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioEnded, OnAudioEnded)	57 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioEnded, OnAudioEnded)

52 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ErrorOccurred, OnErrorOccurred)	58 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ErrorOccurred, OnErrorOccurred)

53 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Ended, OnRecognitionEnded)	59 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Ended, OnRecognitionEnded)

54 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ResultRetrieved,	60 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ResultRetrieved,

55 OnResultsRetrieved)	61 OnResultsRetrieved)

	62 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioTrackReady,

	63 OnAudioTrackReady)

	64 IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioChunkProcessed,

	65 OnAudioChunkProcessed)

56 IPC_MESSAGE_UNHANDLED(handled = false)	66 IPC_MESSAGE_UNHANDLED(handled = false)

57 IPC_END_MESSAGE_MAP()	67 IPC_END_MESSAGE_MAP()

58 return handled;	68 return handled;

59 }	69 }

60	70

	71 void SpeechRecognitionDispatcher::attach(

	72 const blink::WebSpeechRecognitionHandle& handle,

	73 const blink::WebMediaStreamTrack& audio_track,

	74 blink::WebSpeechRecognizerClient* recognizer_client) {

	75

	76 // Check if track is from an allowed source (microphone only for now)

	77 // TODO(burnik): externalize the policy of allowed track types from dispatcher

	78 DCHECK(audio_track.source().type() == blink::WebMediaStreamSource::TypeAudio);

	79 MediaStreamAudioSource* native_source =

	80 static_cast <MediaStreamAudioSource*>(audio_track.source().extraData());

	81 StreamDeviceInfo device_info = native_source->device_info();

	82 is_allowed_audio_track_ = (device_info.device.type ==

	83 content::MEDIA_DEVICE_AUDIO_CAPTURE);

	84

	85 audio_track_ = audio_track;

	86 audio_track_set_ = true;

	87 }

	88

	89 void SpeechRecognitionDispatcher::detach(

	90 const blink::WebSpeechRecognitionHandle& handle,

	91 blink::WebSpeechRecognizerClient* recognizer_client) {

	92 audio_track_set_ = false;

	93 }

	94

61 void SpeechRecognitionDispatcher::start(	95 void SpeechRecognitionDispatcher::start(

62 const WebSpeechRecognitionHandle& handle,	96 const WebSpeechRecognitionHandle& handle,

63 const WebSpeechRecognitionParams& params,	97 const WebSpeechRecognitionParams& params,

64 WebSpeechRecognizerClient* recognizer_client) {	98 WebSpeechRecognizerClient* recognizer_client) {

65 DCHECK(!recognizer_client_ \|\| recognizer_client_ == recognizer_client);	99 DCHECK(!recognizer_client_ \|\| recognizer_client_ == recognizer_client);

66 recognizer_client_ = recognizer_client;	100 recognizer_client_ = recognizer_client;

67	101

	102 // destroy any previous instance not to starve it waiting on chunk ACKs

	103 audio_source_provider_.reset();

	104

	105 if (audio_track_set_ && !is_allowed_audio_track_) {

	106 // notify user that the track used is not supported

	107 recognizer_client_->didReceiveError(

	108 handle,

	109 WebString("Provided audioTrack is not supported. Ignoring track."),

	110 WebSpeechRecognizerClient::NotAllowedError);

	111 }

	112

68 SpeechRecognitionHostMsg_StartRequest_Params msg_params;	113 SpeechRecognitionHostMsg_StartRequest_Params msg_params;

69 for (size_t i = 0; i < params.grammars().size(); ++i) {	114 for (size_t i = 0; i < params.grammars().size(); ++i) {

70 const WebSpeechGrammar& grammar = params.grammars()[i];	115 const WebSpeechGrammar& grammar = params.grammars()[i];

71 msg_params.grammars.push_back(	116 msg_params.grammars.push_back(

72 SpeechRecognitionGrammar(grammar.src().spec(), grammar.weight()));	117 SpeechRecognitionGrammar(grammar.src().spec(), grammar.weight()));

73 }	118 }

74 msg_params.language = base::UTF16ToUTF8(params.language());	119 msg_params.language = base::UTF16ToUTF8(params.language());

75 msg_params.max_hypotheses = static_cast<uint32>(params.maxAlternatives());	120 msg_params.max_hypotheses = static_cast<uint32>(params.maxAlternatives());

76 msg_params.continuous = params.continuous();	121 msg_params.continuous = params.continuous();

77 msg_params.interim_results = params.interimResults();	122 msg_params.interim_results = params.interimResults();

78 msg_params.origin_url = params.origin().toString().utf8();	123 msg_params.origin_url = params.origin().toString().utf8();

79 msg_params.render_view_id = routing_id();	124 msg_params.render_view_id = routing_id();

80 msg_params.request_id = GetOrCreateIDForHandle(handle);	125 msg_params.request_id = GetOrCreateIDForHandle(handle);

	126 // fall back to default input when the track is not allowed

	127 msg_params.using_audio_track = (audio_track_set_ && is_allowed_audio_track_);

81 // The handle mapping will be removed in \|OnRecognitionEnd\|.	128 // The handle mapping will be removed in \|OnRecognitionEnd\|.

82 Send(new SpeechRecognitionHostMsg_StartRequest(msg_params));	129 Send(new SpeechRecognitionHostMsg_StartRequest(msg_params));

83 }	130 }

84	131

85 void SpeechRecognitionDispatcher::stop(	132 void SpeechRecognitionDispatcher::stop(

86 const WebSpeechRecognitionHandle& handle,	133 const WebSpeechRecognitionHandle& handle,

87 WebSpeechRecognizerClient* recognizer_client) {	134 WebSpeechRecognizerClient* recognizer_client) {

	135 audio_source_provider_.reset();

88 // Ignore a \|stop\| issued without a matching \|start\|.	136 // Ignore a \|stop\| issued without a matching \|start\|.

89 if (recognizer_client_ != recognizer_client \|\| !HandleExists(handle))	137 if (recognizer_client_ != recognizer_client \|\| !HandleExists(handle))

90 return;	138 return;

91 Send(new SpeechRecognitionHostMsg_StopCaptureRequest(	139 Send(new SpeechRecognitionHostMsg_StopCaptureRequest(

92 routing_id(), GetOrCreateIDForHandle(handle)));	140 routing_id(), GetOrCreateIDForHandle(handle)));

93 }	141 }

94	142

95 void SpeechRecognitionDispatcher::abort(	143 void SpeechRecognitionDispatcher::abort(

96 const WebSpeechRecognitionHandle& handle,	144 const WebSpeechRecognitionHandle& handle,

97 WebSpeechRecognizerClient* recognizer_client) {	145 WebSpeechRecognizerClient* recognizer_client) {

	146 audio_source_provider_.reset();

98 // Ignore an \|abort\| issued without a matching \|start\|.	147 // Ignore an \|abort\| issued without a matching \|start\|.

99 if (recognizer_client_ != recognizer_client \|\| !HandleExists(handle))	148 if (recognizer_client_ != recognizer_client \|\| !HandleExists(handle))

100 return;	149 return;

101 Send(new SpeechRecognitionHostMsg_AbortRequest(	150 Send(new SpeechRecognitionHostMsg_AbortRequest(

102 routing_id(), GetOrCreateIDForHandle(handle)));	151 routing_id(), GetOrCreateIDForHandle(handle)));

103 }	152 }

104	153

105 void SpeechRecognitionDispatcher::OnRecognitionStarted(int request_id) {	154 void SpeechRecognitionDispatcher::OnRecognitionStarted(int request_id) {

106 recognizer_client_->didStart(GetHandleFromID(request_id));	155 recognizer_client_->didStart(GetHandleFromID(request_id));

107 }	156 }

(...skipping 39 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
147 NOTREACHED();	196 NOTREACHED();

148 return WebSpeechRecognizerClient::OtherError;	197 return WebSpeechRecognizerClient::OtherError;

149 }	198 }

150	199

151 void SpeechRecognitionDispatcher::OnErrorOccurred(	200 void SpeechRecognitionDispatcher::OnErrorOccurred(

152 int request_id, const SpeechRecognitionError& error) {	201 int request_id, const SpeechRecognitionError& error) {

153 if (error.code == SPEECH_RECOGNITION_ERROR_NO_MATCH) {	202 if (error.code == SPEECH_RECOGNITION_ERROR_NO_MATCH) {

154 recognizer_client_->didReceiveNoMatch(GetHandleFromID(request_id),	203 recognizer_client_->didReceiveNoMatch(GetHandleFromID(request_id),

155 WebSpeechRecognitionResult());	204 WebSpeechRecognitionResult());

156 } else {	205 } else {

	206 audio_source_provider_.reset();

157 recognizer_client_->didReceiveError(	207 recognizer_client_->didReceiveError(

158 GetHandleFromID(request_id),	208 GetHandleFromID(request_id),

159 WebString(), // TODO(primiano): message?	209 WebString(), // TODO(primiano): message?

160 WebKitErrorCode(error.code));	210 WebKitErrorCode(error.code));

161 }	211 }

162 }	212 }

163	213

164 void SpeechRecognitionDispatcher::OnRecognitionEnded(int request_id) {	214 void SpeechRecognitionDispatcher::OnRecognitionEnded(int request_id) {

165 // TODO(tommi): It is possible that the handle isn't found in the array if	215 // TODO(tommi): It is possible that the handle isn't found in the array if

166 // the user just refreshed the page. It seems that we then get a notification	216 // the user just refreshed the page. It seems that we then get a notification

167 // for the previously loaded instance of the page.	217 // for the previously loaded instance of the page.

168 HandleMap::iterator iter = handle_map_.find(request_id);	218 HandleMap::iterator iter = handle_map_.find(request_id);

169 if (iter == handle_map_.end()) {	219 if (iter == handle_map_.end()) {

170 DLOG(ERROR) << "OnRecognitionEnded called for a handle that doesn't exist";	220 DLOG(ERROR) << "OnRecognitionEnded called for a handle that doesn't exist";

171 } else {	221 } else {

172 WebSpeechRecognitionHandle handle = iter->second;	222 WebSpeechRecognitionHandle handle = iter->second;

173 // Note: we need to erase the handle from the map before calling didEnd.	223 // Note: we need to erase the handle from the map before calling didEnd.

174 // didEnd may call back synchronously to start a new recognition session,	224 // didEnd may call back synchronously to start a new recognition session,

175 // and we don't want to delete the handle from the map after that happens.	225 // and we don't want to delete the handle from the map after that happens.

176 handle_map_.erase(request_id);	226 handle_map_.erase(request_id);

	227 audio_source_provider_.reset();

177 recognizer_client_->didEnd(handle);	228 recognizer_client_->didEnd(handle);

178 }	229 }

179 }	230 }

180	231

181 void SpeechRecognitionDispatcher::OnResultsRetrieved(	232 void SpeechRecognitionDispatcher::OnResultsRetrieved(

182 int request_id, const SpeechRecognitionResults& results) {	233 int request_id, const SpeechRecognitionResults& results) {

183 size_t provisional_count = 0;	234 size_t provisional_count = 0;

184 SpeechRecognitionResults::const_iterator it = results.begin();	235 SpeechRecognitionResults::const_iterator it = results.begin();

185 for (; it != results.end(); ++it) {	236 for (; it != results.end(); ++it) {

186 if (it->is_provisional)	237 if (it->is_provisional)

(...skipping 17 matching lines...) Expand all Loading...
204 transcripts[i] = result.hypotheses[i].utterance;	255 transcripts[i] = result.hypotheses[i].utterance;

205 confidences[i] = static_cast<float>(result.hypotheses[i].confidence);	256 confidences[i] = static_cast<float>(result.hypotheses[i].confidence);

206 }	257 }

207 webkit_result->assign(transcripts, confidences, !result.is_provisional);	258 webkit_result->assign(transcripts, confidences, !result.is_provisional);

208 }	259 }

209	260

210 recognizer_client_->didReceiveResults(	261 recognizer_client_->didReceiveResults(

211 GetHandleFromID(request_id), final, provisional);	262 GetHandleFromID(request_id), final, provisional);

212 }	263 }

213	264

	265 void SpeechRecognitionDispatcher::OnAudioError(int request_id){

	266 // Browser gets notified on render thread

	267 if (!render_loop_->BelongsToCurrentThread()) {

	268 render_loop_->PostTask(

	269 FROM_HERE,

	270 base::Bind(&SpeechRecognitionDispatcher::OnAudioError,

	271 base::Unretained(this), request_id));

	272 return;

	273 }

	274 audio_source_provider_.reset();

	275 }

	276

	277 void SpeechRecognitionDispatcher::OnAudioTrackReady(

	278 int request_id,

	279 const media::AudioParameters& params,

	280 base::SharedMemoryHandle handle,

	281 uint32 length) {

	282

	283 // TODO(burnik): Log and DCHECK(!audio_source_provider_).

	284 if (audio_track_.isNull()) {

	285 audio_source_provider_.reset();

	286 return;

	287 }

	288

	289 audio_source_provider_.reset(

	290 new SpeechRecognitionAudioSourceProvider(

	291 audio_track_, params, handle, length,

	292 base::Bind(&SpeechRecognitionDispatcher::OnAudioData,

	293 base::Unretained(this), request_id),

	294 base::Bind(&SpeechRecognitionDispatcher::OnAudioError,

	295 base::Unretained(this), request_id)));

	296 }

	297

	298 void SpeechRecognitionDispatcher::OnAudioChunkProcessed(

	299 int request_id) {

	300

	301 // TODO(burnik): Log and DCHECK(!audio_source_provider_).

	302 if (audio_track_.isNull())

	303 return;

	304

	305 // discard any message to a destroyed instance

	306 if(!audio_source_provider_.get())

	307 return;

	308

	309 audio_source_provider_->NotifyAudioBusConsumed();

	310 }

	311

	312

	313 // TODO(burnik): Consider using sync_socket

	314 void SpeechRecognitionDispatcher::OnAudioData(int request_id) {

	315 // Browser gets notified on render thread

	316 if (!render_loop_->BelongsToCurrentThread()) {

	317 render_loop_->PostTask(

	318 FROM_HERE,

	319 base::Bind(&SpeechRecognitionDispatcher::OnAudioData,

	320 base::Unretained(this), request_id));

	321 return;

	322 }

	323 // If the handle isn't found in the array, which might happen if the

	324 // recognition has been ended by the browser, delete the

	325 // \|audio_source_provider_\|.

	326 HandleMap::iterator iter = handle_map_.find(request_id);

	327 if (iter == handle_map_.end()) {

	328 audio_source_provider_.reset();

	329 return;

	330 }

	331

	332 Send(new SpeechRecognitionHostMsg_OnAudioTrackData(routing_id(), request_id));

	333 }

214	334

215 int SpeechRecognitionDispatcher::GetOrCreateIDForHandle(	335 int SpeechRecognitionDispatcher::GetOrCreateIDForHandle(

216 const WebSpeechRecognitionHandle& handle) {	336 const WebSpeechRecognitionHandle& handle) {

217 // Search first for an existing mapping.	337 // Search first for an existing mapping.

218 for (HandleMap::iterator iter = handle_map_.begin();	338 for (HandleMap::iterator iter = handle_map_.begin();

219 iter != handle_map_.end();	339 iter != handle_map_.end();

220 ++iter) {	340 ++iter) {

221 if (iter->second.equals(handle))	341 if (iter->second.equals(handle))

222 return iter->first;	342 return iter->first;

223 }	343 }

(...skipping 16 matching lines...) Expand all Loading...
240 }	360 }

241	361

242 const WebSpeechRecognitionHandle& SpeechRecognitionDispatcher::GetHandleFromID(	362 const WebSpeechRecognitionHandle& SpeechRecognitionDispatcher::GetHandleFromID(

243 int request_id) {	363 int request_id) {

244 HandleMap::iterator iter = handle_map_.find(request_id);	364 HandleMap::iterator iter = handle_map_.find(request_id);

245 DCHECK(iter != handle_map_.end());	365 DCHECK(iter != handle_map_.end());

246 return iter->second;	366 return iter->second;

247 }	367 }

248	368

249 } // namespace content	369 } // namespace content

OLD	NEW

« content/renderer/speech_recognition_dispatcher.h ('K') | « content/renderer/speech_recognition_dispatcher.h ('k') | no next file » | no next file with comments »