content/browser/speech/speech_recognizer_impl.cc - Issue 9663066: Refactoring of chrome speech recognition architecture (CL1.3)

Side by Side Diff: content/browser/speech/speech_recognizer_impl.cc

Issue 9663066: Refactoring of chrome speech recognition architecture (CL1.3) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Minor nit on speech_recognition_engine comments. Created 8 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« content/browser/speech/speech_recognition_engine.h ('K') | « content/browser/speech/speech_recognizer_impl.h ('k') | content/browser/speech/speech_recognizer_impl_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "content/browser/speech/speech_recognizer_impl.h"	5 #include "content/browser/speech/speech_recognizer_impl.h"

6	6

7 #include "base/bind.h"	7 #include "base/bind.h"

8 #include "base/time.h"	8 #include "base/time.h"

9 #include "content/browser/browser_main_loop.h"	9 #include "content/browser/browser_main_loop.h"

10 #include "content/browser/speech/audio_buffer.h"	10 #include "content/browser/speech/audio_buffer.h"

	11 #include "content/browser/speech/google_one_shot_remote_engine.h"

	12 #include "content/public/browser/browser_thread.h"

11 #include "content/public/browser/speech_recognition_event_listener.h"	13 #include "content/public/browser/speech_recognition_event_listener.h"

12 #include "content/public/browser/browser_thread.h"	14 #include "content/public/browser/speech_recognizer.h"

13 #include "content/public/common/speech_recognition_result.h"	15 #include "content/public/common/speech_recognition_result.h"

14 #include "net/url_request/url_request_context_getter.h"	16 #include "net/url_request/url_request_context_getter.h"

	17 #include "chrome/browser/speech/chrome_speech_recognition_preferences.h"

15	18

16 using content::BrowserMainLoop;	19 using content::BrowserMainLoop;

17 using content::BrowserThread;	20 using content::BrowserThread;

	21 using content::SpeechRecognitionError;

18 using content::SpeechRecognitionEventListener;	22 using content::SpeechRecognitionEventListener;

	23 using content::SpeechRecognitionResult;

19 using content::SpeechRecognizer;	24 using content::SpeechRecognizer;

20 using media::AudioInputController;	25 using media::AudioInputController;

21 using std::string;

22	26

23 namespace {	27 namespace {

24	28

25 // The following constants are related to the volume level indicator shown in	29 // The following constants are related to the volume level indicator shown in

26 // the UI for recorded audio.	30 // the UI for recorded audio.

27 // Multiplier used when new volume is greater than previous level.	31 // Multiplier used when new volume is greater than previous level.

28 const float kUpSmoothingFactor = 1.0f;	32 const float kUpSmoothingFactor = 1.0f;

29 // Multiplier used when new volume is lesser than previous level.	33 // Multiplier used when new volume is lesser than previous level.

30 const float kDownSmoothingFactor = 0.7f;	34 const float kDownSmoothingFactor = 0.7f;

31 // RMS dB value of a maximum (unclipped) sine wave for int16 samples.	35 // RMS dB value of a maximum (unclipped) sine wave for int16 samples.

(...skipping 16 matching lines...) Expand all Loading...
48 if (samples[i] <= -32767 \|\| samples[i] >= 32767) {	52 if (samples[i] <= -32767 \|\| samples[i] >= 32767) {

49 if (++clipping_samples > kThreshold)	53 if (++clipping_samples > kThreshold)

50 return true;	54 return true;

51 }	55 }

52 }	56 }

53 return false;	57 return false;

54 }	58 }

55	59

56 } // namespace	60 } // namespace

57	61

	62 // TODO(primiano) transitional, see description in speech_recognizer.h.
	Satish 2012/03/22 12:03:28 this doesn't seem useful, could remove this doesn't seem useful, could remove Primiano Tucci (use gerrit) 2012/03/22 12:39:29 Done. Show quoted text On 2012/03/22 12:03:28, Satish wrote: > this doesn't seem useful, could remove Done.
58 SpeechRecognizer* SpeechRecognizer::Create(	63 SpeechRecognizer* SpeechRecognizer::Create(

59 SpeechRecognitionEventListener* listener,	64 SpeechRecognitionEventListener* listener,

60 int caller_id,	65 int caller_id,

61 const std::string& language,	66 const std::string& language,

62 const std::string& grammar,	67 const std::string& grammar,

63 net::URLRequestContextGetter* context_getter,	68 net::URLRequestContextGetter* context_getter,

64 bool filter_profanities,	69 bool filter_profanities,

65 const std::string& hardware_info,	70 const std::string& hardware_info,

66 const std::string& origin_url) {	71 const std::string& origin_url) {

67 return new speech::SpeechRecognizerImpl(	72 speech::GoogleOneShotRemoteEngineConfig google_sr_config;

68 listener, caller_id, language, grammar, context_getter,	73 google_sr_config.language = language;

69 filter_profanities, hardware_info, origin_url);	74 google_sr_config.grammar = grammar;

	75 google_sr_config.audio_sample_rate =

	76 speech::SpeechRecognizerImpl::kAudioSampleRate;

	77 google_sr_config.audio_num_bits_per_sample =

	78 speech::SpeechRecognizerImpl::kNumBitsPerAudioSample;

	79 google_sr_config.filter_profanities = filter_profanities;

	80 google_sr_config.hardware_info = hardware_info;

	81 google_sr_config.origin_url = origin_url;

	82

	83 speech::GoogleOneShotRemoteEngine* google_sr_engine =

	84 new speech::GoogleOneShotRemoteEngine(context_getter);
	Satish 2012/03/22 12:03:28 is this code to create google_sr_engine necessary? is this code to create google_sr_engine necessary? I don't see it used anywhere in this function and you are creating this again in HandleOnData Primiano Tucci (use gerrit) 2012/03/22 12:39:29 Oops. It was a "remainder" of the old code. Remove Show quoted text On 2012/03/22 12:03:28, Satish wrote: > is this code to create google_sr_engine necessary? I don't see it used anywhere > in this function and you are creating this again in HandleOnData Oops. It was a "remainder" of the old code. Removed.
	85 google_sr_engine->SetConfig(google_sr_config);

	86

	87 return new speech::SpeechRecognizerImpl(listener,

	88 caller_id,

	89 language,

	90 grammar,

	91 context_getter,

	92 filter_profanities,

	93 hardware_info,

	94 origin_url);

70 }	95 }

71	96

72 namespace speech {	97 namespace speech {

73	98

74 const int SpeechRecognizerImpl::kAudioSampleRate = 16000;	99 const int SpeechRecognizerImpl::kAudioSampleRate = 16000;

75 const int SpeechRecognizerImpl::kAudioPacketIntervalMs = 100;

76 const ChannelLayout SpeechRecognizerImpl::kChannelLayout = CHANNEL_LAYOUT_MONO;	100 const ChannelLayout SpeechRecognizerImpl::kChannelLayout = CHANNEL_LAYOUT_MONO;

77 const int SpeechRecognizerImpl::kNumBitsPerAudioSample = 16;	101 const int SpeechRecognizerImpl::kNumBitsPerAudioSample = 16;

78 const int SpeechRecognizerImpl::kNoSpeechTimeoutSec = 8;	102 const int SpeechRecognizerImpl::kNoSpeechTimeoutMs = 8000;

79 const int SpeechRecognizerImpl::kEndpointerEstimationTimeMs = 300;	103 const int SpeechRecognizerImpl::kEndpointerEstimationTimeMs = 300;

80	104

81 SpeechRecognizerImpl::SpeechRecognizerImpl(	105 SpeechRecognizerImpl::SpeechRecognizerImpl(

82 SpeechRecognitionEventListener* listener,	106 SpeechRecognitionEventListener* listener,

83 int caller_id,	107 int caller_id,

84 const std::string& language,	108 const std::string& language,

85 const std::string& grammar,	109 const std::string& grammar,

86 net::URLRequestContextGetter* context_getter,	110 net::URLRequestContextGetter* context_getter,

87 bool filter_profanities,	111 bool filter_profanities,

88 const std::string& hardware_info,	112 const std::string& hardware_info,

89 const std::string& origin_url)	113 const std::string& origin_url)

90 : listener_(listener),	114 : listener_(listener),

	115 testing_audio_manager_(NULL),

	116 recognition_engine_(NULL),
	Satish 2012/03/22 12:03:28 this isn't required as it is a scoped_ptr this isn't required as it is a scoped_ptr Primiano Tucci (use gerrit) 2012/03/22 12:39:29 Done. Show quoted text On 2012/03/22 12:03:28, Satish wrote: > this isn't required as it is a scoped_ptr Done.
	117 endpointer_(kAudioSampleRate),

	118 context_getter_(context_getter),

91 caller_id_(caller_id),	119 caller_id_(caller_id),

92 language_(language),	120 language_(language),

93 grammar_(grammar),	121 grammar_(grammar),

94 filter_profanities_(filter_profanities),	122 filter_profanities_(filter_profanities),

95 hardware_info_(hardware_info),	123 hardware_info_(hardware_info),

96 origin_url_(origin_url),	124 origin_url_(origin_url) {

97 context_getter_(context_getter),	125 DCHECK(listener_ != NULL);
	Satish 2012/03/22 12:03:28 is this DCHECK required? There are other pointer p is this DCHECK required? There are other pointer params you don't check for Primiano Tucci (use gerrit) 2012/03/22 12:39:29 The only othher pointer I see is context_getter, t Show quoted text On 2012/03/22 12:03:28, Satish wrote: > is this DCHECK required? There are other pointer params you don't check for The only othher pointer I see is context_getter, that might be NULL (it is, indeed, during tests)
98 codec_(AudioEncoder::CODEC_FLAC),

99 encoder_(NULL),

100 endpointer_(kAudioSampleRate),

101 num_samples_recorded_(0),

102 audio_level_(0.0f),

103 audio_manager_(NULL) {

104 endpointer_.set_speech_input_complete_silence_length(	126 endpointer_.set_speech_input_complete_silence_length(

105 base::Time::kMicrosecondsPerSecond / 2);	127 base::Time::kMicrosecondsPerSecond / 2);

106 endpointer_.set_long_speech_input_complete_silence_length(	128 endpointer_.set_long_speech_input_complete_silence_length(

107 base::Time::kMicrosecondsPerSecond);	129 base::Time::kMicrosecondsPerSecond);

108 endpointer_.set_long_speech_length(3 * base::Time::kMicrosecondsPerSecond);	130 endpointer_.set_long_speech_length(3 * base::Time::kMicrosecondsPerSecond);

109 endpointer_.StartSession();	131 endpointer_.StartSession();

110 }	132 }

111	133

112 SpeechRecognizerImpl::~SpeechRecognizerImpl() {	134 SpeechRecognizerImpl::~SpeechRecognizerImpl() {

113 // Recording should have stopped earlier due to the endpointer or	135 // Recording should have stopped earlier due to the endpointer or

114 // \|StopRecording\| being called.	136 // \|StopRecording\| being called.

115 DCHECK(!audio_controller_.get());	137 DCHECK(!audio_controller_.get());

116 DCHECK(!request_.get() \|\| !request_->HasPendingRequest());	138 DCHECK(!recognition_engine_.get() \|\|

117 DCHECK(!encoder_.get());	139 !recognition_engine_->IsRecognitionPending());

118 endpointer_.EndSession();	140 endpointer_.EndSession();

119 }	141 }

120	142

121 bool SpeechRecognizerImpl::StartRecognition() {	143 void SpeechRecognizerImpl::StartRecognition() {

122 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));	144 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));

123 DCHECK(!audio_controller_.get());	145 DCHECK(!audio_controller_.get());

124 DCHECK(!request_.get() \|\| !request_->HasPendingRequest());	146 DCHECK(!recognition_engine_.get() \|\|

125 DCHECK(!encoder_.get());	147 !recognition_engine_->IsRecognitionPending());

126	148

127 // The endpointer needs to estimate the environment/background noise before	149 // The endpointer needs to estimate the environment/background noise before

128 // starting to treat the audio as user input. In \|HandleOnData\| we wait until	150 // starting to treat the audio as user input. In \|HandleOnData\| we wait until

129 // such time has passed before switching to user input mode.	151 // such time has passed before switching to user input mode.

130 endpointer_.SetEnvironmentEstimationMode();	152 endpointer_.SetEnvironmentEstimationMode();

131	153

132 encoder_.reset(AudioEncoder::Create(codec_, kAudioSampleRate,	154 AudioManager* audio_manager = (testing_audio_manager_ != NULL) ?

133 kNumBitsPerAudioSample));	155 testing_audio_manager_ :

134 int samples_per_packet = (kAudioSampleRate * kAudioPacketIntervalMs) / 1000;	156 BrowserMainLoop::GetAudioManager();

	157 const int samples_per_packet = kAudioSampleRate *

	158 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs / 1000;
	Satish 2012/03/22 12:03:28 this doesn't seem right, having sampling rate as p this doesn't seem right, having sampling rate as part of this file but packet interval as part of the one shot reco engine. Can we move that to here as well? Primiano Tucci (use gerrit) 2012/03/22 12:39:29 Hmm this is transitional code, I know it is not cl Show quoted text On 2012/03/22 12:03:28, Satish wrote: > this doesn't seem right, having sampling rate as part of this file but packet > interval as part of the one shot reco engine. Can we move that to here as well? Hmm this is transitional code, I know it is not clean. The clean way is inquiring the recognition_engine through GetDesiredAudioChunkDuration(). However, in this intermediate CL we instantiate the engine on-demand (is not injected as it should be), thus it is NULL at this point. Therefore I need this (very temporary) dirty patch, just in this CL.
135 AudioParameters params(AudioParameters::AUDIO_PCM_LINEAR, kChannelLayout,	159 AudioParameters params(AudioParameters::AUDIO_PCM_LINEAR, kChannelLayout,

136 kAudioSampleRate, kNumBitsPerAudioSample,	160 kAudioSampleRate, kNumBitsPerAudioSample,

137 samples_per_packet);	161 samples_per_packet);

138 audio_controller_ = AudioInputController::Create(	162 audio_controller_ = AudioInputController::Create(audio_manager, this, params);

139 audio_manager_ ? audio_manager_ : BrowserMainLoop::GetAudioManager(),

140 this, params);

141 DCHECK(audio_controller_.get());	163 DCHECK(audio_controller_.get());

142 VLOG(1) << "SpeechRecognizer starting record.";	164 VLOG(1) << "SpeechRecognizer starting record.";

143 num_samples_recorded_ = 0;	165 num_samples_recorded_ = 0;

144 audio_controller_->Record();	166 audio_controller_->Record();

145

146 return true;

147 }	167 }

148	168

149 void SpeechRecognizerImpl::AbortRecognition() {	169 void SpeechRecognizerImpl::AbortRecognition() {

150 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));	170 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));

151 DCHECK(audio_controller_.get() \|\| request_.get());	171 DCHECK(audio_controller_.get() \|\| recognition_engine_.get());

152	172

153 // Stop recording if required.	173 // Stop recording if required.

154 if (audio_controller_.get()) {	174 if (audio_controller_.get()) {

155 CloseAudioControllerSynchronously();	175 CloseAudioControllerSynchronously();

156 }	176 }

157	177

158 VLOG(1) << "SpeechRecognizer canceling recognition.";	178 VLOG(1) << "SpeechRecognizer canceling recognition.";

159 encoder_.reset();	179 recognition_engine_.reset();
	Satish 2012/03/22 12:03:28 is it required to call EndRecognition here or does is it required to call EndRecognition here or does .reset cleanly shutdown things? Primiano Tucci (use gerrit) 2012/03/22 12:39:29 I didn't make a lot of changes in this CL as this Show quoted text On 2012/03/22 12:03:28, Satish wrote: > is it required to call EndRecognition here or does .reset cleanly shutdown > things? I didn't make a lot of changes in this CL as this code is temporary. BTW, currently the RecognitionEnd just destroys the url_request in the engine, and calling a dtor has the same effect. So in this (temporary) situation, that indeed violates the spech_recognition_engine contract, we get the same effect.
160 request_.reset();

161 }	180 }

162	181

163 void SpeechRecognizerImpl::StopAudioCapture() {	182 void SpeechRecognizerImpl::StopAudioCapture() {

164 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));	183 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));

165	184

166 // If audio recording has already stopped and we are in recognition phase,	185 // If audio recording has already stopped and we are in recognition phase,

167 // silently ignore any more calls to stop recording.	186 // silently ignore any more calls to stop recording.

168 if (!audio_controller_.get())	187 if (!audio_controller_.get())

169 return;	188 return;

170	189

171 CloseAudioControllerSynchronously();	190 CloseAudioControllerSynchronously();

172	191

173 listener_->OnSoundEnd(caller_id_);
Satish 2012/03/22 12:03:28 I don't see OnSoundEnd called from this class anym I don't see OnSoundEnd called from this class anymore. Should this deleted line be added back? Primiano Tucci (use gerrit) 2012/03/22 12:39:29 Done. Show quoted text On 2012/03/22 12:03:28, Satish wrote: > I don't see OnSoundEnd called from this class anymore. Should this deleted line > be added back? Done.
174 listener_->OnAudioEnd(caller_id_);	192 listener_->OnAudioEnd(caller_id_);

175	193

176 // UploadAudioChunk requires a non-empty final buffer. So we encode a packet

177 // of silence in case encoder had no data already.

178 std::vector<short> samples((kAudioSampleRate * kAudioPacketIntervalMs) /

179 1000);

180 AudioChunk dummy_chunk(reinterpret_cast<uint8*>(&samples[0]),

181 samples.size() * sizeof(short),

182 encoder_->bits_per_sample() / 8);

183 encoder_->Encode(dummy_chunk);

184 encoder_->Flush();

185 scoped_ptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear());

186 DCHECK(!encoded_data->IsEmpty());

187 encoder_.reset();

188

189 // If we haven't got any audio yet end the recognition sequence here.	194 // If we haven't got any audio yet end the recognition sequence here.

190 if (request_ == NULL) {	195 if (recognition_engine_ == NULL) {

191 // Guard against the listener freeing us until we finish our job.	196 // Guard against the listener freeing us until we finish our job.

192 scoped_refptr<SpeechRecognizerImpl> me(this);	197 scoped_refptr<SpeechRecognizerImpl> me(this);

193 listener_->OnRecognitionEnd(caller_id_);	198 listener_->OnRecognitionEnd(caller_id_);

194 } else {	199 } else {

195 request_->UploadAudioChunk(encoded_data, true / is_last_chunk */);	200 recognition_engine_->AudioChunksEnded();

196 }	201 }

197 }	202 }

198	203

199 // Invoked in the audio thread.	204 // Invoked in the audio thread.

200 void SpeechRecognizerImpl::OnError(AudioInputController* controller,	205 void SpeechRecognizerImpl::OnError(AudioInputController* controller,

201 int error_code) {	206 int error_code) {

202 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,	207 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,

203 base::Bind(&SpeechRecognizerImpl::HandleOnError,	208 base::Bind(&SpeechRecognizerImpl::HandleOnError,

204 this, error_code));	209 this, error_code));

205 }	210 }

(...skipping 24 matching lines...) Expand all Loading...
230 void SpeechRecognizerImpl::HandleOnData(AudioChunk* raw_audio) {	235 void SpeechRecognizerImpl::HandleOnData(AudioChunk* raw_audio) {

231 scoped_ptr<AudioChunk> free_raw_audio_on_return(raw_audio);	236 scoped_ptr<AudioChunk> free_raw_audio_on_return(raw_audio);

232 // Check if we are still recording and if not discard this buffer, as	237 // Check if we are still recording and if not discard this buffer, as

233 // recording might have been stopped after this buffer was posted to the queue	238 // recording might have been stopped after this buffer was posted to the queue

234 // by \|OnData\|.	239 // by \|OnData\|.

235 if (!audio_controller_.get())	240 if (!audio_controller_.get())

236 return;	241 return;

237	242

238 bool speech_was_heard_before_packet = endpointer_.DidStartReceivingSpeech();	243 bool speech_was_heard_before_packet = endpointer_.DidStartReceivingSpeech();

239	244

240 encoder_->Encode(*raw_audio);

241 float rms;	245 float rms;

242 endpointer_.ProcessAudio(*raw_audio, &rms);	246 endpointer_.ProcessAudio(*raw_audio, &rms);

243 bool did_clip = DetectClipping(*raw_audio);	247 bool did_clip = DetectClipping(*raw_audio);

244 num_samples_recorded_ += raw_audio->NumSamples();	248 num_samples_recorded_ += raw_audio->NumSamples();

245	249

246 if (request_ == NULL) {	250 if (recognition_engine_ == NULL) {

247 // This was the first audio packet recorded, so start a request to the	251 // This was the first audio packet recorded, so start a request to the

248 // server to send the data and inform the listener.	252 // server to send the data and inform the listener.

249 listener_->OnAudioStart(caller_id_);	253 listener_->OnAudioStart(caller_id_);

250 request_.reset(new SpeechRecognitionRequest(context_getter_.get(), this));	254 GoogleOneShotRemoteEngineConfig google_sr_config;
	Satish 2012/03/22 12:03:28 suggest moving all this to a separate helper funct suggest moving all this to a separate helper function and call that from here to initialize recognition_engine_ Primiano Tucci (use gerrit) 2012/03/22 12:39:29 Same as above, this code is temporary and the next Show quoted text On 2012/03/22 12:03:28, Satish wrote: > suggest moving all this to a separate helper function and call that from here to > initialize recognition_engine_ Same as above, this code is temporary and the next CL will completely change it.
251 request_->Start(language_, grammar_, filter_profanities_,	255 google_sr_config.language = language_;

252 hardware_info_, origin_url_, encoder_->mime_type());	256 google_sr_config.grammar = grammar_;

	257 google_sr_config.audio_sample_rate = kAudioSampleRate;

	258 google_sr_config.audio_num_bits_per_sample = kNumBitsPerAudioSample;

	259 google_sr_config.filter_profanities = filter_profanities_;

	260 google_sr_config.hardware_info = hardware_info_;

	261 google_sr_config.origin_url = origin_url_;

	262 GoogleOneShotRemoteEngine* google_sr_engine =

	263 new GoogleOneShotRemoteEngine(context_getter_.get());

	264 google_sr_engine->SetConfig(google_sr_config);

	265 recognition_engine_.reset(google_sr_engine);

	266 recognition_engine_->set_delegate(this);

	267 recognition_engine_->StartRecognition();

253 }	268 }

254	269

255 scoped_ptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear());	270 recognition_engine_->TakeAudioChunk(*raw_audio);

256 DCHECK(!encoded_data->IsEmpty());

257 request_->UploadAudioChunk(encoded_data, false / is_last_chunk */);

258	271

259 if (endpointer_.IsEstimatingEnvironment()) {	272 if (endpointer_.IsEstimatingEnvironment()) {

260 // Check if we have gathered enough audio for the endpointer to do	273 // Check if we have gathered enough audio for the endpointer to do

261 // environment estimation and should move on to detect speech/end of speech.	274 // environment estimation and should move on to detect speech/end of speech.

262 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs *	275 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs *

263 kAudioSampleRate) / 1000) {	276 kAudioSampleRate) / 1000) {

264 endpointer_.SetUserInputMode();	277 endpointer_.SetUserInputMode();

265 listener_->OnEnvironmentEstimationComplete(caller_id_);	278 listener_->OnEnvironmentEstimationComplete(caller_id_);

266 }	279 }

267 return; // No more processing since we are still estimating environment.	280 return; // No more processing since we are still estimating environment.

268 }	281 }

269	282

270 // Check if we have waited too long without hearing any speech.	283 // Check if we have waited too long without hearing any speech.

271 bool speech_was_heard_after_packet = endpointer_.DidStartReceivingSpeech();	284 bool speech_was_heard_after_packet = endpointer_.DidStartReceivingSpeech();

272 if (!speech_was_heard_after_packet &&	285 if (!speech_was_heard_after_packet &&

273 num_samples_recorded_ >= kNoSpeechTimeoutSec * kAudioSampleRate) {	286 num_samples_recorded_ >= (kNoSpeechTimeoutMs / 1000) * kAudioSampleRate) {

274 InformErrorAndAbortRecognition(	287 InformErrorAndAbortRecognition(

275 content::SPEECH_RECOGNITION_ERROR_NO_SPEECH);	288 content::SPEECH_RECOGNITION_ERROR_NO_SPEECH);

276 return;	289 return;

277 }	290 }

278	291

279 if (!speech_was_heard_before_packet && speech_was_heard_after_packet)	292 if (!speech_was_heard_before_packet && speech_was_heard_after_packet)

280 listener_->OnSoundStart(caller_id_);	293 listener_->OnSoundStart(caller_id_);

281	294

282 // Calculate the input volume to display in the UI, smoothing towards the	295 // Calculate the input volume to display in the UI, smoothing towards the

283 // new level.	296 // new level.

(...skipping 11 matching lines...) Expand all Loading...
295 noise_level = std::min(std::max(0.0f, noise_level),	308 noise_level = std::min(std::max(0.0f, noise_level),

296 kAudioMeterRangeMaxUnclipped);	309 kAudioMeterRangeMaxUnclipped);

297	310

298 listener_->OnAudioLevelsChange(caller_id_, did_clip ? 1.0f : audio_level_,	311 listener_->OnAudioLevelsChange(caller_id_, did_clip ? 1.0f : audio_level_,

299 noise_level);	312 noise_level);

300	313

301 if (endpointer_.speech_input_complete())	314 if (endpointer_.speech_input_complete())

302 StopAudioCapture();	315 StopAudioCapture();

303 }	316 }

304	317

305 void SpeechRecognizerImpl::SetRecognitionResult(	318 void SpeechRecognizerImpl::OnSpeechRecognitionEngineResult(

306 const content::SpeechRecognitionResult& result) {	319 const content::SpeechRecognitionResult& result) {

307 if (result.error != content::SPEECH_RECOGNITION_ERROR_NONE) {	320 if (result.error != content::SPEECH_RECOGNITION_ERROR_NONE) {

308 InformErrorAndAbortRecognition(result.error);	321 InformErrorAndAbortRecognition(result.error);

309 return;	322 return;

310 }	323 }

311	324

312 // Guard against the listener freeing us until we finish our job.	325 // Guard against the listener freeing us until we finish our job.

313 scoped_refptr<SpeechRecognizerImpl> me(this);	326 scoped_refptr<SpeechRecognizerImpl> me(this);

314 listener_->OnRecognitionResult(caller_id_, result);	327 listener_->OnRecognitionResult(caller_id_, result);

315 listener_->OnRecognitionEnd(caller_id_);	328 listener_->OnRecognitionEnd(caller_id_);

316 }	329 }

317	330

	331 void SpeechRecognizerImpl::OnSpeechRecognitionEngineError(

	332 const content::SpeechRecognitionError& error) {

	333 InformErrorAndAbortRecognition(error.code);

	334 }

	335

318 void SpeechRecognizerImpl::InformErrorAndAbortRecognition(	336 void SpeechRecognizerImpl::InformErrorAndAbortRecognition(

319 content::SpeechRecognitionErrorCode error) {	337 content::SpeechRecognitionErrorCode error) {

320 DCHECK_NE(error, content::SPEECH_RECOGNITION_ERROR_NONE);	338 DCHECK_NE(error, content::SPEECH_RECOGNITION_ERROR_NONE);

321 AbortRecognition();	339 AbortRecognition();

322	340

323 // Guard against the listener freeing us until we finish our job.	341 // Guard against the listener freeing us until we finish our job.

324 scoped_refptr<SpeechRecognizerImpl> me(this);	342 scoped_refptr<SpeechRecognizerImpl> me(this);

325 listener_->OnRecognitionError(caller_id_, error);	343 listener_->OnRecognitionError(caller_id_, error);

326 }	344 }

327	345

328 void SpeechRecognizerImpl::CloseAudioControllerSynchronously() {	346 void SpeechRecognizerImpl::CloseAudioControllerSynchronously() {

329 VLOG(1) << "SpeechRecognizer stopping record.";	347 VLOG(1) << "SpeechRecognizer stopping record.";

330	348

331 // TODO(satish): investigate the possibility to utilize the closure	349 // TODO(satish): investigate the possibility to utilize the closure

332 // and switch to async. version of this method. Compare with how	350 // and switch to async. version of this method. Compare with how

333 // it's done in e.g. the AudioRendererHost.	351 // it's done in e.g. the AudioRendererHost.

334 base::WaitableEvent closed_event(true, false);	352 base::WaitableEvent closed_event(true, false);

335 audio_controller_->Close(base::Bind(&base::WaitableEvent::Signal,	353 audio_controller_->Close(base::Bind(&base::WaitableEvent::Signal,

336 base::Unretained(&closed_event)));	354 base::Unretained(&closed_event)));

337 closed_event.Wait();	355 closed_event.Wait();

338 audio_controller_ = NULL; // Releases the ref ptr.	356 audio_controller_ = NULL; // Releases the ref ptr.

339 }	357 }

340	358

341 void SpeechRecognizerImpl::SetAudioManagerForTesting(

342 AudioManager* audio_manager) {

343 audio_manager_ = audio_manager;

344 }

345

346 bool SpeechRecognizerImpl::IsActive() const {	359 bool SpeechRecognizerImpl::IsActive() const {

347 return (request_.get() != NULL);	360 return (recognition_engine_.get() != NULL);

348 }	361 }

349	362

350 bool SpeechRecognizerImpl::IsCapturingAudio() const {	363 bool SpeechRecognizerImpl::IsCapturingAudio() const {

351 return (audio_controller_.get() != NULL);	364 return (audio_controller_.get() != NULL);

352 }	365 }

353	366

	367 const SpeechRecognitionEngine&

	368 SpeechRecognizerImpl::recognition_engine() const {

	369 return *(recognition_engine_.get());

	370 }

	371

	372 void SpeechRecognizerImpl::SetAudioManagerForTesting(

	373 AudioManager* audio_manager) {

	374 testing_audio_manager_ = audio_manager;

	375 }

	376

	377

354 } // namespace speech	378 } // namespace speech

OLD	NEW