content/browser/speech/speech_recognizer_impl.cc - Issue 2675713002: Switch Speech Recognition to asynchronous callback-based AudioManager interactions.

Side by Side Diff: content/browser/speech/speech_recognizer_impl.cc

Issue 2675713002: Switch Speech Recognition to asynchronous callback-based AudioManager interactions. (Closed)

Patch Set: review comments addressed Created 3 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "content/browser/speech/speech_recognizer_impl.h"	5 #include "content/browser/speech/speech_recognizer_impl.h"

6	6

7 #include <stdint.h>	7 #include <stdint.h>

8	8

9 #include <algorithm>	9 #include <algorithm>

10	10

11 #include "base/bind.h"	11 #include "base/bind.h"

12 #include "base/macros.h"	12 #include "base/macros.h"

13 #include "base/time/time.h"	13 #include "base/time/time.h"

14 #include "build/build_config.h"	14 #include "build/build_config.h"

15 #include "content/browser/browser_main_loop.h"	15 #include "content/browser/browser_main_loop.h"

16 #include "content/browser/media/media_internals.h"	16 #include "content/browser/media/media_internals.h"

17 #include "content/browser/speech/audio_buffer.h"	17 #include "content/browser/speech/audio_buffer.h"

18 #include "content/public/browser/speech_recognition_event_listener.h"	18 #include "content/public/browser/speech_recognition_event_listener.h"

19 #include "media/audio/audio_file_writer.h"	19 #include "media/audio/audio_file_writer.h"

	20 #include "media/audio/audio_manager.h"

	21 #include "media/audio/audio_system.h"

20 #include "media/base/audio_converter.h"	22 #include "media/base/audio_converter.h"

21	23

22 #if defined(OS_WIN)	24 #if defined(OS_WIN)

23 #include "media/audio/win/core_audio_util_win.h"	25 #include "media/audio/win/core_audio_util_win.h"

24 #endif	26 #endif

25	27

26 using media::AudioBus;	28 using media::AudioBus;

27 using media::AudioConverter;	29 using media::AudioConverter;

28 using media::AudioInputController;	30 using media::AudioInputController;

29 using media::AudioManager;	31 using media::AudioManager;

(...skipping 75 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
105 }	107 }

106	108

107 } // namespace	109 } // namespace

108	110

109 const int SpeechRecognizerImpl::kAudioSampleRate = 16000;	111 const int SpeechRecognizerImpl::kAudioSampleRate = 16000;

110 const ChannelLayout SpeechRecognizerImpl::kChannelLayout =	112 const ChannelLayout SpeechRecognizerImpl::kChannelLayout =

111 media::CHANNEL_LAYOUT_MONO;	113 media::CHANNEL_LAYOUT_MONO;

112 const int SpeechRecognizerImpl::kNumBitsPerAudioSample = 16;	114 const int SpeechRecognizerImpl::kNumBitsPerAudioSample = 16;

113 const int SpeechRecognizerImpl::kNoSpeechTimeoutMs = 8000;	115 const int SpeechRecognizerImpl::kNoSpeechTimeoutMs = 8000;

114 const int SpeechRecognizerImpl::kEndpointerEstimationTimeMs = 300;	116 const int SpeechRecognizerImpl::kEndpointerEstimationTimeMs = 300;

115 media::AudioManager* SpeechRecognizerImpl::audio_manager_for_tests_ = NULL;	117 media::AudioSystem* SpeechRecognizerImpl::audio_system_for_tests_ = nullptr;

116	118

117 static_assert(SpeechRecognizerImpl::kNumBitsPerAudioSample % 8 == 0,	119 static_assert(SpeechRecognizerImpl::kNumBitsPerAudioSample % 8 == 0,

118 "kNumBitsPerAudioSample must be a multiple of 8");	120 "kNumBitsPerAudioSample must be a multiple of 8");

119	121

120 // SpeechRecognizerImpl::OnDataConverter implementation	122 // SpeechRecognizerImpl::OnDataConverter implementation

121	123

122 SpeechRecognizerImpl::OnDataConverter::OnDataConverter(	124 SpeechRecognizerImpl::OnDataConverter::OnDataConverter(

123 const AudioParameters& input_params,	125 const AudioParameters& input_params,

124 const AudioParameters& output_params)	126 const AudioParameters& output_params)

125 : audio_converter_(input_params, output_params, false),	127 : audio_converter_(input_params, output_params, false),

(...skipping 43 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
169 input_bus_->CopyTo(dest);	171 input_bus_->CopyTo(dest);

170 // Indicate that the recorded audio has in fact been used by the converter.	172 // Indicate that the recorded audio has in fact been used by the converter.

171 data_was_converted_ = true;	173 data_was_converted_ = true;

172 return 1;	174 return 1;

173 }	175 }

174	176

175 // SpeechRecognizerImpl implementation	177 // SpeechRecognizerImpl implementation

176	178

177 SpeechRecognizerImpl::SpeechRecognizerImpl(	179 SpeechRecognizerImpl::SpeechRecognizerImpl(

178 SpeechRecognitionEventListener* listener,	180 SpeechRecognitionEventListener* listener,

	181 media::AudioSystem* audio_system,

179 int session_id,	182 int session_id,

180 bool continuous,	183 bool continuous,

181 bool provisional_results,	184 bool provisional_results,

182 SpeechRecognitionEngine* engine)	185 SpeechRecognitionEngine* engine)

183 : SpeechRecognizer(listener, session_id),	186 : SpeechRecognizer(listener, session_id),

	187 audio_system_(audio_system),

184 recognition_engine_(engine),	188 recognition_engine_(engine),

185 endpointer_(kAudioSampleRate),	189 endpointer_(kAudioSampleRate),

186 audio_log_(MediaInternals::GetInstance()->CreateAudioLog(	190 audio_log_(MediaInternals::GetInstance()->CreateAudioLog(

187 media::AudioLogFactory::AUDIO_INPUT_CONTROLLER)),	191 media::AudioLogFactory::AUDIO_INPUT_CONTROLLER)),

188 is_dispatching_event_(false),	192 is_dispatching_event_(false),

189 provisional_results_(provisional_results),	193 provisional_results_(provisional_results),

190 end_of_utterance_(false),	194 end_of_utterance_(false),

191 state_(STATE_IDLE) {	195 state_(STATE_IDLE),

192 DCHECK(recognition_engine_ != NULL);	196 weak_ptr_factory_(this) {

	197 DCHECK(recognition_engine_ != nullptr);

	198 DCHECK(audio_system_ != nullptr);

193 if (!continuous) {	199 if (!continuous) {

194 // In single shot (non-continous) recognition,	200 // In single shot (non-continous) recognition,

195 // the session is automatically ended after:	201 // the session is automatically ended after:

196 // - 0.5 seconds of silence if time < 3 seconds	202 // - 0.5 seconds of silence if time < 3 seconds

197 // - 1 seconds of silence if time >= 3 seconds	203 // - 1 seconds of silence if time >= 3 seconds

198 endpointer_.set_speech_input_complete_silence_length(	204 endpointer_.set_speech_input_complete_silence_length(

199 base::Time::kMicrosecondsPerSecond / 2);	205 base::Time::kMicrosecondsPerSecond / 2);

200 endpointer_.set_long_speech_input_complete_silence_length(	206 endpointer_.set_long_speech_input_complete_silence_length(

201 base::Time::kMicrosecondsPerSecond);	207 base::Time::kMicrosecondsPerSecond);

202 endpointer_.set_long_speech_length(3 * base::Time::kMicrosecondsPerSecond);	208 endpointer_.set_long_speech_length(3 * base::Time::kMicrosecondsPerSecond);

(...skipping 13 matching lines...) Expand all Loading...
216 // NOTE:all the external events and requests should be enqueued (PostTask), even	222 // NOTE:all the external events and requests should be enqueued (PostTask), even

217 // if they come from the same (IO) thread, in order to preserve the relationship	223 // if they come from the same (IO) thread, in order to preserve the relationship

218 // of causality between events and avoid interleaved event processing due to	224 // of causality between events and avoid interleaved event processing due to

219 // synchronous callbacks.	225 // synchronous callbacks.

220	226

221 void SpeechRecognizerImpl::StartRecognition(const std::string& device_id) {	227 void SpeechRecognizerImpl::StartRecognition(const std::string& device_id) {

222 DCHECK(!device_id.empty());	228 DCHECK(!device_id.empty());

223 device_id_ = device_id;	229 device_id_ = device_id;

224	230

225 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,	231 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,

226 base::Bind(&SpeechRecognizerImpl::DispatchEvent,	232 base::Bind(&SpeechRecognizerImpl::DispatchEvent, this,

227 this, FSMEventArgs(EVENT_START)));	233 FSMEventArgs(EVENT_PREPARE)));

228 }	234 }

229	235

230 void SpeechRecognizerImpl::AbortRecognition() {	236 void SpeechRecognizerImpl::AbortRecognition() {

231 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,	237 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,

232 base::Bind(&SpeechRecognizerImpl::DispatchEvent,	238 base::Bind(&SpeechRecognizerImpl::DispatchEvent,

233 this, FSMEventArgs(EVENT_ABORT)));	239 this, FSMEventArgs(EVENT_ABORT)));

234 }	240 }

235	241

236 void SpeechRecognizerImpl::StopAudioCapture() {	242 void SpeechRecognizerImpl::StopAudioCapture() {

237 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,	243 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,

(...skipping 131 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
369 SpeechRecognizerImpl::ExecuteTransitionAndGetNextState(	375 SpeechRecognizerImpl::ExecuteTransitionAndGetNextState(

370 const FSMEventArgs& event_args) {	376 const FSMEventArgs& event_args) {

371 const FSMEvent event = event_args.event;	377 const FSMEvent event = event_args.event;

372 switch (state_) {	378 switch (state_) {

373 case STATE_IDLE:	379 case STATE_IDLE:

374 switch (event) {	380 switch (event) {

375 // TODO(primiano): restore UNREACHABLE_CONDITION on EVENT_ABORT and	381 // TODO(primiano): restore UNREACHABLE_CONDITION on EVENT_ABORT and

376 // EVENT_STOP_CAPTURE below once speech input extensions are fixed.	382 // EVENT_STOP_CAPTURE below once speech input extensions are fixed.

377 case EVENT_ABORT:	383 case EVENT_ABORT:

378 return AbortSilently(event_args);	384 return AbortSilently(event_args);

	385 case EVENT_PREPARE:

	386 return PrepareRecognition(event_args);

	387 case EVENT_START:

	388 return NotFeasible(event_args);

	389 case EVENT_STOP_CAPTURE:

	390 return AbortSilently(event_args);

	391 case EVENT_AUDIO_DATA: // Corner cases related to queued messages

	392 case EVENT_ENGINE_RESULT: // being lately dispatched.

	393 case EVENT_ENGINE_ERROR:

	394 case EVENT_AUDIO_ERROR:

	395 return DoNothing(event_args);

	396 }

	397 break;

	398 case STATE_PREPARING:

	399 switch (event) {

	400 case EVENT_ABORT:

	401 return AbortSilently(event_args);

	402 case EVENT_PREPARE:

	403 return NotFeasible(event_args);

379 case EVENT_START:	404 case EVENT_START:

380 return StartRecording(event_args);	405 return StartRecording(event_args);

381 case EVENT_STOP_CAPTURE:	406 case EVENT_STOP_CAPTURE:

382 return AbortSilently(event_args);	407 return AbortSilently(event_args);

383 case EVENT_AUDIO_DATA: // Corner cases related to queued messages	408 case EVENT_AUDIO_DATA: // Corner cases related to queued messages

384 case EVENT_ENGINE_RESULT: // being lately dispatched.	409 case EVENT_ENGINE_RESULT: // being lately dispatched.

385 case EVENT_ENGINE_ERROR:	410 case EVENT_ENGINE_ERROR:

386 case EVENT_AUDIO_ERROR:	411 case EVENT_AUDIO_ERROR:

387 return DoNothing(event_args);	412 return DoNothing(event_args);

388 }	413 }

389 break;	414 break;

390 case STATE_STARTING:	415 case STATE_STARTING:

391 switch (event) {	416 switch (event) {

392 case EVENT_ABORT:	417 case EVENT_ABORT:

393 return AbortWithError(event_args);	418 return AbortWithError(event_args);

	419 case EVENT_PREPARE:

	420 return NotFeasible(event_args);

394 case EVENT_START:	421 case EVENT_START:

395 return NotFeasible(event_args);	422 return NotFeasible(event_args);

396 case EVENT_STOP_CAPTURE:	423 case EVENT_STOP_CAPTURE:

397 return AbortSilently(event_args);	424 return AbortSilently(event_args);

398 case EVENT_AUDIO_DATA:	425 case EVENT_AUDIO_DATA:

399 return StartRecognitionEngine(event_args);	426 return StartRecognitionEngine(event_args);

400 case EVENT_ENGINE_RESULT:	427 case EVENT_ENGINE_RESULT:

401 return NotFeasible(event_args);	428 return NotFeasible(event_args);

402 case EVENT_ENGINE_ERROR:	429 case EVENT_ENGINE_ERROR:

403 case EVENT_AUDIO_ERROR:	430 case EVENT_AUDIO_ERROR:

404 return AbortWithError(event_args);	431 return AbortWithError(event_args);

405 }	432 }

406 break;	433 break;

407 case STATE_ESTIMATING_ENVIRONMENT:	434 case STATE_ESTIMATING_ENVIRONMENT:

408 switch (event) {	435 switch (event) {

409 case EVENT_ABORT:	436 case EVENT_ABORT:

410 return AbortWithError(event_args);	437 return AbortWithError(event_args);

	438 case EVENT_PREPARE:

	439 return NotFeasible(event_args);

411 case EVENT_START:	440 case EVENT_START:

412 return NotFeasible(event_args);	441 return NotFeasible(event_args);

413 case EVENT_STOP_CAPTURE:	442 case EVENT_STOP_CAPTURE:

414 return StopCaptureAndWaitForResult(event_args);	443 return StopCaptureAndWaitForResult(event_args);

415 case EVENT_AUDIO_DATA:	444 case EVENT_AUDIO_DATA:

416 return WaitEnvironmentEstimationCompletion(event_args);	445 return WaitEnvironmentEstimationCompletion(event_args);

417 case EVENT_ENGINE_RESULT:	446 case EVENT_ENGINE_RESULT:

418 return ProcessIntermediateResult(event_args);	447 return ProcessIntermediateResult(event_args);

419 case EVENT_ENGINE_ERROR:	448 case EVENT_ENGINE_ERROR:

420 case EVENT_AUDIO_ERROR:	449 case EVENT_AUDIO_ERROR:

421 return AbortWithError(event_args);	450 return AbortWithError(event_args);

422 }	451 }

423 break;	452 break;

424 case STATE_WAITING_FOR_SPEECH:	453 case STATE_WAITING_FOR_SPEECH:

425 switch (event) {	454 switch (event) {

426 case EVENT_ABORT:	455 case EVENT_ABORT:

427 return AbortWithError(event_args);	456 return AbortWithError(event_args);

	457 case EVENT_PREPARE:

	458 return NotFeasible(event_args);

428 case EVENT_START:	459 case EVENT_START:

429 return NotFeasible(event_args);	460 return NotFeasible(event_args);

430 case EVENT_STOP_CAPTURE:	461 case EVENT_STOP_CAPTURE:

431 return StopCaptureAndWaitForResult(event_args);	462 return StopCaptureAndWaitForResult(event_args);

432 case EVENT_AUDIO_DATA:	463 case EVENT_AUDIO_DATA:

433 return DetectUserSpeechOrTimeout(event_args);	464 return DetectUserSpeechOrTimeout(event_args);

434 case EVENT_ENGINE_RESULT:	465 case EVENT_ENGINE_RESULT:

435 return ProcessIntermediateResult(event_args);	466 return ProcessIntermediateResult(event_args);

436 case EVENT_ENGINE_ERROR:	467 case EVENT_ENGINE_ERROR:

437 case EVENT_AUDIO_ERROR:	468 case EVENT_AUDIO_ERROR:

438 return AbortWithError(event_args);	469 return AbortWithError(event_args);

439 }	470 }

440 break;	471 break;

441 case STATE_RECOGNIZING:	472 case STATE_RECOGNIZING:

442 switch (event) {	473 switch (event) {

443 case EVENT_ABORT:	474 case EVENT_ABORT:

444 return AbortWithError(event_args);	475 return AbortWithError(event_args);

	476 case EVENT_PREPARE:

	477 return NotFeasible(event_args);

445 case EVENT_START:	478 case EVENT_START:

446 return NotFeasible(event_args);	479 return NotFeasible(event_args);

447 case EVENT_STOP_CAPTURE:	480 case EVENT_STOP_CAPTURE:

448 return StopCaptureAndWaitForResult(event_args);	481 return StopCaptureAndWaitForResult(event_args);

449 case EVENT_AUDIO_DATA:	482 case EVENT_AUDIO_DATA:

450 return DetectEndOfSpeech(event_args);	483 return DetectEndOfSpeech(event_args);

451 case EVENT_ENGINE_RESULT:	484 case EVENT_ENGINE_RESULT:

452 return ProcessIntermediateResult(event_args);	485 return ProcessIntermediateResult(event_args);

453 case EVENT_ENGINE_ERROR:	486 case EVENT_ENGINE_ERROR:

454 case EVENT_AUDIO_ERROR:	487 case EVENT_AUDIO_ERROR:

455 return AbortWithError(event_args);	488 return AbortWithError(event_args);

456 }	489 }

457 break;	490 break;

458 case STATE_WAITING_FINAL_RESULT:	491 case STATE_WAITING_FINAL_RESULT:

459 switch (event) {	492 switch (event) {

460 case EVENT_ABORT:	493 case EVENT_ABORT:

461 return AbortWithError(event_args);	494 return AbortWithError(event_args);

	495 case EVENT_PREPARE:

	496 return NotFeasible(event_args);

462 case EVENT_START:	497 case EVENT_START:

463 return NotFeasible(event_args);	498 return NotFeasible(event_args);

464 case EVENT_STOP_CAPTURE:	499 case EVENT_STOP_CAPTURE:

465 case EVENT_AUDIO_DATA:	500 case EVENT_AUDIO_DATA:

466 return DoNothing(event_args);	501 return DoNothing(event_args);

467 case EVENT_ENGINE_RESULT:	502 case EVENT_ENGINE_RESULT:

468 return ProcessFinalResult(event_args);	503 return ProcessFinalResult(event_args);

469 case EVENT_ENGINE_ERROR:	504 case EVENT_ENGINE_ERROR:

470 case EVENT_AUDIO_ERROR:	505 case EVENT_AUDIO_ERROR:

471 return AbortWithError(event_args);	506 return AbortWithError(event_args);

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
508 if (route_to_vumeter) {	543 if (route_to_vumeter) {

509 DCHECK(route_to_endpointer); // Depends on endpointer due to \|rms\|.	544 DCHECK(route_to_endpointer); // Depends on endpointer due to \|rms\|.

510 UpdateSignalAndNoiseLevels(rms, clip_detected);	545 UpdateSignalAndNoiseLevels(rms, clip_detected);

511 }	546 }

512 if (route_to_sr_engine) {	547 if (route_to_sr_engine) {

513 DCHECK(recognition_engine_.get() != NULL);	548 DCHECK(recognition_engine_.get() != NULL);

514 recognition_engine_->TakeAudioChunk(raw_audio);	549 recognition_engine_->TakeAudioChunk(raw_audio);

515 }	550 }

516 }	551 }

517	552

	553 void SpeechRecognizerImpl::OnDeviceInfo(const media::AudioParameters& params) {

	554 DCHECK_CURRENTLY_ON(BrowserThread::IO);

	555 device_params_ = params;

	556 DVLOG(1) << "Device parameters: " << device_params_.AsHumanReadableString();

	557 DispatchEvent(FSMEventArgs(EVENT_START));

	558 }

	559

	560 SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::PrepareRecognition(

	561 const FSMEventArgs&) {

	562 DCHECK(state_ == STATE_IDLE);

	563 DCHECK(recognition_engine_.get() != NULL);

	564 DCHECK(!IsCapturingAudio());

	565 GetAudioSystem()->GetInputStreamParameters(

	566 device_id_, base::Bind(&SpeechRecognizerImpl::OnDeviceInfo,

	567 weak_ptr_factory_.GetWeakPtr()));

	568

	569 listener()->OnRecognitionStart(session_id());

	570 return STATE_PREPARING;

	571 }

	572

518 SpeechRecognizerImpl::FSMState	573 SpeechRecognizerImpl::FSMState

519 SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) {	574 SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) {

520 DCHECK(state_ == STATE_IDLE);	575 DCHECK(state_ == STATE_PREPARING);

521 DCHECK(recognition_engine_.get() != NULL);	576 DCHECK(recognition_engine_.get() != NULL);

522 DCHECK(!IsCapturingAudio());	577 DCHECK(!IsCapturingAudio());

523 const bool unit_test_is_active = (audio_manager_for_tests_ != NULL);

524 AudioManager* audio_manager = unit_test_is_active ?

525 audio_manager_for_tests_ :

526 AudioManager::Get();

527 DCHECK(audio_manager != NULL);

528	578

529 DVLOG(1) << "SpeechRecognizerImpl starting audio capture.";	579 DVLOG(1) << "SpeechRecognizerImpl starting audio capture.";

530 num_samples_recorded_ = 0;	580 num_samples_recorded_ = 0;

531 audio_level_ = 0;	581 audio_level_ = 0;

532 end_of_utterance_ = false;	582 end_of_utterance_ = false;

533 listener()->OnRecognitionStart(session_id());

534	583

535 // TODO(xians): Check if the OS has the device with \|device_id_\|, return	584 int chunk_duration_ms = recognition_engine_->GetDesiredAudioChunkDurationMs();

536 // \|SPEECH_AUDIO_ERROR_DETAILS_NO_MIC\| if the target device does not exist.	585

537 if (!audio_manager->HasAudioInputDevices()) {	586 if (!device_params_.IsValid()) {

	587 DLOG(ERROR) << "Audio input device not found";

538 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE,	588 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE,

539 SPEECH_AUDIO_ERROR_DETAILS_NO_MIC));	589 SPEECH_AUDIO_ERROR_DETAILS_NO_MIC));

540 }	590 }

541	591

542 int chunk_duration_ms = recognition_engine_->GetDesiredAudioChunkDurationMs();

543

544 AudioParameters in_params = audio_manager->GetInputStreamParameters(

545 device_id_);

546 if (!in_params.IsValid() && !unit_test_is_active) {

547 DLOG(ERROR) << "Invalid native audio input parameters";

548 return Abort(

549 SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE));

550 }

551

552 // Audio converter shall provide audio based on these parameters as output.	592 // Audio converter shall provide audio based on these parameters as output.

553 // Hard coded, WebSpeech specific parameters are utilized here.	593 // Hard coded, WebSpeech specific parameters are utilized here.

554 int frames_per_buffer = (kAudioSampleRate * chunk_duration_ms) / 1000;	594 int frames_per_buffer = (kAudioSampleRate * chunk_duration_ms) / 1000;

555 AudioParameters output_parameters = AudioParameters(	595 AudioParameters output_parameters = AudioParameters(

556 AudioParameters::AUDIO_PCM_LOW_LATENCY, kChannelLayout, kAudioSampleRate,	596 AudioParameters::AUDIO_PCM_LOW_LATENCY, kChannelLayout, kAudioSampleRate,

557 kNumBitsPerAudioSample, frames_per_buffer);	597 kNumBitsPerAudioSample, frames_per_buffer);

558 DVLOG(1) << "SRI::output_parameters: "	598 DVLOG(1) << "SRI::output_parameters: "

559 << output_parameters.AsHumanReadableString();	599 << output_parameters.AsHumanReadableString();

560	600

561 // Audio converter will receive audio based on these parameters as input.	601 // Audio converter will receive audio based on these parameters as input.

562 // On Windows we start by verifying that Core Audio is supported. If not,	602 // On Windows we start by verifying that Core Audio is supported. If not,

563 // the WaveIn API is used and we might as well avoid all audio conversations	603 // the WaveIn API is used and we might as well avoid all audio conversations

564 // since WaveIn does the conversion for us.	604 // since WaveIn does the conversion for us.

565 // TODO(henrika): this code should be moved to platform dependent audio	605 // TODO(henrika): this code should be moved to platform dependent audio

566 // managers.	606 // managers.

567 bool use_native_audio_params = true;	607 bool use_native_audio_params = true;

568 #if defined(OS_WIN)	608 #if defined(OS_WIN)

569 use_native_audio_params = media::CoreAudioUtil::IsSupported();	609 use_native_audio_params = media::CoreAudioUtil::IsSupported();

570 DVLOG_IF(1, !use_native_audio_params) << "Reverting to WaveIn for WebSpeech";	610 DVLOG_IF(1, !use_native_audio_params) << "Reverting to WaveIn for WebSpeech";

571 #endif	611 #endif

572	612

573 AudioParameters input_parameters = output_parameters;	613 AudioParameters input_parameters = output_parameters;

574 if (use_native_audio_params && !unit_test_is_active) {	614

	615 // AUDIO_FAKE means we are running a test.

	616 if (use_native_audio_params &&

	617 device_params_.format() != media::AudioParameters::AUDIO_FAKE) {

575 // Use native audio parameters but avoid opening up at the native buffer	618 // Use native audio parameters but avoid opening up at the native buffer

576 // size. Instead use same frame size (in milliseconds) as WebSpeech uses.	619 // size. Instead use same frame size (in milliseconds) as WebSpeech uses.

577 // We rely on internal buffers in the audio back-end to fulfill this request	620 // We rely on internal buffers in the audio back-end to fulfill this request

578 // and the idea is to simplify the audio conversion since each Convert()	621 // and the idea is to simplify the audio conversion since each Convert()

579 // call will then render exactly one ProvideInput() call.	622 // call will then render exactly one ProvideInput() call.

580 // in_params.sample_rate()	623 input_parameters = device_params_;

581 input_parameters = in_params;

582 frames_per_buffer =	624 frames_per_buffer =

583 ((in_params.sample_rate() * chunk_duration_ms) / 1000.0) + 0.5;	625 ((input_parameters.sample_rate() * chunk_duration_ms) / 1000.0) + 0.5;

584 input_parameters.set_frames_per_buffer(frames_per_buffer);	626 input_parameters.set_frames_per_buffer(frames_per_buffer);

585 DVLOG(1) << "SRI::input_parameters: "	627 DVLOG(1) << "SRI::input_parameters: "

586 << input_parameters.AsHumanReadableString();	628 << input_parameters.AsHumanReadableString();

587 }	629 }

588	630

589 // Create an audio converter which converts data between native input format	631 // Create an audio converter which converts data between native input format

590 // and WebSpeech specific output format.	632 // and WebSpeech specific output format.

591 audio_converter_.reset(	633 audio_converter_.reset(

592 new OnDataConverter(input_parameters, output_parameters));	634 new OnDataConverter(input_parameters, output_parameters));

593	635

594 audio_controller_ = AudioInputController::Create(	636 audio_controller_ = AudioInputController::Create(

595 audio_manager, this, this, nullptr, nullptr, input_parameters, device_id_,	637 GetAudioSystem()->GetAudioManager(), this, this, nullptr, nullptr,

	638 input_parameters, device_id_,

596 /agc_is_enabled/ false);	639 /agc_is_enabled/ false);

597	640

598 if (!audio_controller_.get()) {	641 if (!audio_controller_.get()) {

599 return Abort(	642 return Abort(

600 SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE));	643 SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE));

601 }	644 }

602	645

603 audio_log_->OnCreated(0, input_parameters, device_id_);	646 audio_log_->OnCreated(0, input_parameters, device_id_);

604	647

605 // The endpointer needs to estimate the environment/background noise before	648 // The endpointer needs to estimate the environment/background noise before

(...skipping 79 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
685 return Abort(	728 return Abort(

686 SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE));	729 SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE));

687 } else if (event_args.event == EVENT_ENGINE_ERROR) {	730 } else if (event_args.event == EVENT_ENGINE_ERROR) {

688 return Abort(event_args.engine_error);	731 return Abort(event_args.engine_error);

689 }	732 }

690 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_ABORTED));	733 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_ABORTED));

691 }	734 }

692	735

693 SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::Abort(	736 SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::Abort(

694 const SpeechRecognitionError& error) {	737 const SpeechRecognitionError& error) {

	738 DCHECK_CURRENTLY_ON(BrowserThread::IO);

	739

695 if (IsCapturingAudio())	740 if (IsCapturingAudio())

696 CloseAudioControllerAsynchronously();	741 CloseAudioControllerAsynchronously();

697	742

698 DVLOG(1) << "SpeechRecognizerImpl canceling recognition. ";	743 DVLOG(1) << "SpeechRecognizerImpl canceling recognition. ";

699	744

	745 if (state_ == STATE_PREPARING) {

	746 // Cancel an outstanding reply from AudioSystem.

	747 weak_ptr_factory_.InvalidateWeakPtrs();

	748 }

	749

700 // The recognition engine is initialized only after STATE_STARTING.	750 // The recognition engine is initialized only after STATE_STARTING.

701 if (state_ > STATE_STARTING) {	751 if (state_ > STATE_STARTING) {

702 DCHECK(recognition_engine_.get() != NULL);	752 DCHECK(recognition_engine_.get() != NULL);

703 recognition_engine_->EndRecognition();	753 recognition_engine_->EndRecognition();

704 }	754 }

705	755

706 if (state_ > STATE_WAITING_FOR_SPEECH && state_ < STATE_WAITING_FINAL_RESULT)	756 if (state_ > STATE_WAITING_FOR_SPEECH && state_ < STATE_WAITING_FINAL_RESULT)

707 listener()->OnSoundEnd(session_id());	757 listener()->OnSoundEnd(session_id());

708	758

709 if (state_ > STATE_STARTING && state_ < STATE_WAITING_FINAL_RESULT)	759 if (state_ > STATE_STARTING && state_ < STATE_WAITING_FINAL_RESULT)

(...skipping 116 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
826	876

827 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) /	877 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) /

828 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped);	878 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped);

829 noise_level = std::min(std::max(0.0f, noise_level),	879 noise_level = std::min(std::max(0.0f, noise_level),

830 kAudioMeterRangeMaxUnclipped);	880 kAudioMeterRangeMaxUnclipped);

831	881

832 listener()->OnAudioLevelsChange(	882 listener()->OnAudioLevelsChange(

833 session_id(), clip_detected ? 1.0f : audio_level_, noise_level);	883 session_id(), clip_detected ? 1.0f : audio_level_, noise_level);

834 }	884 }

835	885

836 void SpeechRecognizerImpl::SetAudioManagerForTesting(	886 void SpeechRecognizerImpl::SetAudioSystemForTesting(

837 AudioManager* audio_manager) {	887 media::AudioSystem* audio_system) {

838 audio_manager_for_tests_ = audio_manager;	888 audio_system_for_tests_ = audio_system;

	889 }

	890

	891 media::AudioSystem* SpeechRecognizerImpl::GetAudioSystem() {

	892 return audio_system_for_tests_ ? audio_system_for_tests_ : audio_system_;

839 }	893 }

840	894

841 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value)	895 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value)

842 : event(event_value),	896 : event(event_value),

843 audio_data(NULL),	897 audio_data(NULL),

844 engine_error(SPEECH_RECOGNITION_ERROR_NONE) {	898 engine_error(SPEECH_RECOGNITION_ERROR_NONE) {

845 }	899 }

846	900

847 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(const FSMEventArgs& other) =	901 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(const FSMEventArgs& other) =

848 default;	902 default;

849	903

850 SpeechRecognizerImpl::FSMEventArgs::~FSMEventArgs() {	904 SpeechRecognizerImpl::FSMEventArgs::~FSMEventArgs() {

851 }	905 }

852	906

853 } // namespace content	907 } // namespace content

OLD	NEW