content/browser/speech/speech_recognizer_impl.cc - Issue 2675713002: Switch Speech Recognition to asynchronous callback-based AudioManager interactions.

Side by Side Diff: content/browser/speech/speech_recognizer_impl.cc

Issue 2675713002: Switch Speech Recognition to asynchronous callback-based AudioManager interactions. (Closed)

Patch Set: Created 3 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "content/browser/speech/speech_recognizer_impl.h"	5 #include "content/browser/speech/speech_recognizer_impl.h"

6	6

7 #include <stdint.h>	7 #include <stdint.h>

8	8

9 #include "base/bind.h"	9 #include "base/bind.h"

10 #include "base/macros.h"	10 #include "base/macros.h"

11 #include "base/time/time.h"	11 #include "base/time/time.h"

12 #include "build/build_config.h"	12 #include "build/build_config.h"

13 #include "content/browser/browser_main_loop.h"	13 #include "content/browser/browser_main_loop.h"

14 #include "content/browser/media/media_internals.h"	14 #include "content/browser/media/media_internals.h"

15 #include "content/browser/speech/audio_buffer.h"	15 #include "content/browser/speech/audio_buffer.h"

16 #include "content/public/browser/speech_recognition_event_listener.h"	16 #include "content/public/browser/speech_recognition_event_listener.h"

	17 #include "media/audio/audio_manager.h"

	18 #include "media/audio/audio_system.h"

17 #include "media/base/audio_converter.h"	19 #include "media/base/audio_converter.h"

18	20

19 #if defined(OS_WIN)	21 #if defined(OS_WIN)

20 #include "media/audio/win/core_audio_util_win.h"	22 #include "media/audio/win/core_audio_util_win.h"

21 #endif	23 #endif

22	24

23 using media::AudioBus;	25 using media::AudioBus;

24 using media::AudioConverter;	26 using media::AudioConverter;

25 using media::AudioInputController;	27 using media::AudioInputController;

26 using media::AudioManager;	28 using media::AudioManager;

(...skipping 75 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
102 }	104 }

103	105

104 } // namespace	106 } // namespace

105	107

106 const int SpeechRecognizerImpl::kAudioSampleRate = 16000;	108 const int SpeechRecognizerImpl::kAudioSampleRate = 16000;

107 const ChannelLayout SpeechRecognizerImpl::kChannelLayout =	109 const ChannelLayout SpeechRecognizerImpl::kChannelLayout =

108 media::CHANNEL_LAYOUT_MONO;	110 media::CHANNEL_LAYOUT_MONO;

109 const int SpeechRecognizerImpl::kNumBitsPerAudioSample = 16;	111 const int SpeechRecognizerImpl::kNumBitsPerAudioSample = 16;

110 const int SpeechRecognizerImpl::kNoSpeechTimeoutMs = 8000;	112 const int SpeechRecognizerImpl::kNoSpeechTimeoutMs = 8000;

111 const int SpeechRecognizerImpl::kEndpointerEstimationTimeMs = 300;	113 const int SpeechRecognizerImpl::kEndpointerEstimationTimeMs = 300;

112 media::AudioManager* SpeechRecognizerImpl::audio_manager_for_tests_ = NULL;	114 media::AudioSystem* SpeechRecognizerImpl::audio_system_for_tests_ = nullptr;

113	115

114 static_assert(SpeechRecognizerImpl::kNumBitsPerAudioSample % 8 == 0,	116 static_assert(SpeechRecognizerImpl::kNumBitsPerAudioSample % 8 == 0,

115 "kNumBitsPerAudioSample must be a multiple of 8");	117 "kNumBitsPerAudioSample must be a multiple of 8");

116	118

117 // SpeechRecognizerImpl::OnDataConverter implementation	119 // SpeechRecognizerImpl::OnDataConverter implementation

118	120

119 SpeechRecognizerImpl::OnDataConverter::OnDataConverter(	121 SpeechRecognizerImpl::OnDataConverter::OnDataConverter(

120 const AudioParameters& input_params,	122 const AudioParameters& input_params,

121 const AudioParameters& output_params)	123 const AudioParameters& output_params)

122 : audio_converter_(input_params, output_params, false),	124 : audio_converter_(input_params, output_params, false),

(...skipping 43 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
166 input_bus_->CopyTo(dest);	168 input_bus_->CopyTo(dest);

167 // Indicate that the recorded audio has in fact been used by the converter.	169 // Indicate that the recorded audio has in fact been used by the converter.

168 data_was_converted_ = true;	170 data_was_converted_ = true;

169 return 1;	171 return 1;

170 }	172 }

171	173

172 // SpeechRecognizerImpl implementation	174 // SpeechRecognizerImpl implementation

173	175

174 SpeechRecognizerImpl::SpeechRecognizerImpl(	176 SpeechRecognizerImpl::SpeechRecognizerImpl(

175 SpeechRecognitionEventListener* listener,	177 SpeechRecognitionEventListener* listener,

	178 media::AudioSystem* audio_system,

176 int session_id,	179 int session_id,

177 bool continuous,	180 bool continuous,

178 bool provisional_results,	181 bool provisional_results,

179 SpeechRecognitionEngine* engine)	182 SpeechRecognitionEngine* engine)

180 : SpeechRecognizer(listener, session_id),	183 : SpeechRecognizer(listener, session_id),

	184 audio_system_(audio_system),

181 recognition_engine_(engine),	185 recognition_engine_(engine),

182 endpointer_(kAudioSampleRate),	186 endpointer_(kAudioSampleRate),

183 audio_log_(MediaInternals::GetInstance()->CreateAudioLog(	187 audio_log_(MediaInternals::GetInstance()->CreateAudioLog(

184 media::AudioLogFactory::AUDIO_INPUT_CONTROLLER)),	188 media::AudioLogFactory::AUDIO_INPUT_CONTROLLER)),

185 is_dispatching_event_(false),	189 is_dispatching_event_(false),

186 provisional_results_(provisional_results),	190 provisional_results_(provisional_results),

187 end_of_utterance_(false),	191 end_of_utterance_(false),

188 state_(STATE_IDLE) {	192 state_(STATE_IDLE),

189 DCHECK(recognition_engine_ != NULL);	193 weak_ptr_factory_(this) {

	194 DCHECK(recognition_engine_ != nullptr);

	195 DCHECK(audio_system_ != nullptr);

190 if (!continuous) {	196 if (!continuous) {

191 // In single shot (non-continous) recognition,	197 // In single shot (non-continous) recognition,

192 // the session is automatically ended after:	198 // the session is automatically ended after:

193 // - 0.5 seconds of silence if time < 3 seconds	199 // - 0.5 seconds of silence if time < 3 seconds

194 // - 1 seconds of silence if time >= 3 seconds	200 // - 1 seconds of silence if time >= 3 seconds

195 endpointer_.set_speech_input_complete_silence_length(	201 endpointer_.set_speech_input_complete_silence_length(

196 base::Time::kMicrosecondsPerSecond / 2);	202 base::Time::kMicrosecondsPerSecond / 2);

197 endpointer_.set_long_speech_input_complete_silence_length(	203 endpointer_.set_long_speech_input_complete_silence_length(

198 base::Time::kMicrosecondsPerSecond);	204 base::Time::kMicrosecondsPerSecond);

199 endpointer_.set_long_speech_length(3 * base::Time::kMicrosecondsPerSecond);	205 endpointer_.set_long_speech_length(3 * base::Time::kMicrosecondsPerSecond);

(...skipping 13 matching lines...) Expand all Loading...
213 // NOTE:all the external events and requests should be enqueued (PostTask), even	219 // NOTE:all the external events and requests should be enqueued (PostTask), even

214 // if they come from the same (IO) thread, in order to preserve the relationship	220 // if they come from the same (IO) thread, in order to preserve the relationship

215 // of causality between events and avoid interleaved event processing due to	221 // of causality between events and avoid interleaved event processing due to

216 // synchronous callbacks.	222 // synchronous callbacks.

217	223

218 void SpeechRecognizerImpl::StartRecognition(const std::string& device_id) {	224 void SpeechRecognizerImpl::StartRecognition(const std::string& device_id) {

219 DCHECK(!device_id.empty());	225 DCHECK(!device_id.empty());

220 device_id_ = device_id;	226 device_id_ = device_id;

221	227

222 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,	228 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,

223 base::Bind(&SpeechRecognizerImpl::DispatchEvent,	229 base::Bind(&SpeechRecognizerImpl::DispatchEvent, this,

224 this, FSMEventArgs(EVENT_START)));	230 FSMEventArgs(EVENT_PREPARE)));

225 }	231 }

226	232

227 void SpeechRecognizerImpl::AbortRecognition() {	233 void SpeechRecognizerImpl::AbortRecognition() {

228 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,	234 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,

229 base::Bind(&SpeechRecognizerImpl::DispatchEvent,	235 base::Bind(&SpeechRecognizerImpl::DispatchEvent,

230 this, FSMEventArgs(EVENT_ABORT)));	236 this, FSMEventArgs(EVENT_ABORT)));

231 }	237 }

232	238

233 void SpeechRecognizerImpl::StopAudioCapture() {	239 void SpeechRecognizerImpl::StopAudioCapture() {

234 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,	240 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,

(...skipping 131 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
366 SpeechRecognizerImpl::ExecuteTransitionAndGetNextState(	372 SpeechRecognizerImpl::ExecuteTransitionAndGetNextState(

367 const FSMEventArgs& event_args) {	373 const FSMEventArgs& event_args) {

368 const FSMEvent event = event_args.event;	374 const FSMEvent event = event_args.event;

369 switch (state_) {	375 switch (state_) {

370 case STATE_IDLE:	376 case STATE_IDLE:

371 switch (event) {	377 switch (event) {

372 // TODO(primiano): restore UNREACHABLE_CONDITION on EVENT_ABORT and	378 // TODO(primiano): restore UNREACHABLE_CONDITION on EVENT_ABORT and

373 // EVENT_STOP_CAPTURE below once speech input extensions are fixed.	379 // EVENT_STOP_CAPTURE below once speech input extensions are fixed.

374 case EVENT_ABORT:	380 case EVENT_ABORT:

375 return AbortSilently(event_args);	381 return AbortSilently(event_args);

	382 case EVENT_PREPARE:

	383 return PrepareRecognition(event_args);

	384 case EVENT_START:

	385 return NotFeasible(event_args);

	386 case EVENT_STOP_CAPTURE:

	387 return AbortSilently(event_args);

	388 case EVENT_AUDIO_DATA: // Corner cases related to queued messages

	389 case EVENT_ENGINE_RESULT: // being lately dispatched.

	390 case EVENT_ENGINE_ERROR:

	391 case EVENT_AUDIO_ERROR:

	392 return DoNothing(event_args);

	393 }

	394 break;

	395 case STATE_PREPARING:

	396 switch (event) {

	397 case EVENT_ABORT:

	398 return AbortSilently(event_args);

	399 case EVENT_PREPARE:

	400 return NotFeasible(event_args);

376 case EVENT_START:	401 case EVENT_START:

377 return StartRecording(event_args);	402 return StartRecording(event_args);

378 case EVENT_STOP_CAPTURE:	403 case EVENT_STOP_CAPTURE:

379 return AbortSilently(event_args);	404 return AbortSilently(event_args);

380 case EVENT_AUDIO_DATA: // Corner cases related to queued messages	405 case EVENT_AUDIO_DATA: // Corner cases related to queued messages

381 case EVENT_ENGINE_RESULT: // being lately dispatched.	406 case EVENT_ENGINE_RESULT: // being lately dispatched.

382 case EVENT_ENGINE_ERROR:	407 case EVENT_ENGINE_ERROR:

383 case EVENT_AUDIO_ERROR:	408 case EVENT_AUDIO_ERROR:

384 return DoNothing(event_args);	409 return DoNothing(event_args);

385 }	410 }

386 break;	411 break;

387 case STATE_STARTING:	412 case STATE_STARTING:

388 switch (event) {	413 switch (event) {

389 case EVENT_ABORT:	414 case EVENT_ABORT:

390 return AbortWithError(event_args);	415 return AbortWithError(event_args);

	416 case EVENT_PREPARE:

	417 return NotFeasible(event_args);

391 case EVENT_START:	418 case EVENT_START:

392 return NotFeasible(event_args);	419 return NotFeasible(event_args);

393 case EVENT_STOP_CAPTURE:	420 case EVENT_STOP_CAPTURE:

394 return AbortSilently(event_args);	421 return AbortSilently(event_args);

395 case EVENT_AUDIO_DATA:	422 case EVENT_AUDIO_DATA:

396 return StartRecognitionEngine(event_args);	423 return StartRecognitionEngine(event_args);

397 case EVENT_ENGINE_RESULT:	424 case EVENT_ENGINE_RESULT:

398 return NotFeasible(event_args);	425 return NotFeasible(event_args);

399 case EVENT_ENGINE_ERROR:	426 case EVENT_ENGINE_ERROR:

400 case EVENT_AUDIO_ERROR:	427 case EVENT_AUDIO_ERROR:

401 return AbortWithError(event_args);	428 return AbortWithError(event_args);

402 }	429 }

403 break;	430 break;

404 case STATE_ESTIMATING_ENVIRONMENT:	431 case STATE_ESTIMATING_ENVIRONMENT:

405 switch (event) {	432 switch (event) {

406 case EVENT_ABORT:	433 case EVENT_ABORT:

407 return AbortWithError(event_args);	434 return AbortWithError(event_args);

	435 case EVENT_PREPARE:

	436 return NotFeasible(event_args);

408 case EVENT_START:	437 case EVENT_START:

409 return NotFeasible(event_args);	438 return NotFeasible(event_args);

410 case EVENT_STOP_CAPTURE:	439 case EVENT_STOP_CAPTURE:

411 return StopCaptureAndWaitForResult(event_args);	440 return StopCaptureAndWaitForResult(event_args);

412 case EVENT_AUDIO_DATA:	441 case EVENT_AUDIO_DATA:

413 return WaitEnvironmentEstimationCompletion(event_args);	442 return WaitEnvironmentEstimationCompletion(event_args);

414 case EVENT_ENGINE_RESULT:	443 case EVENT_ENGINE_RESULT:

415 return ProcessIntermediateResult(event_args);	444 return ProcessIntermediateResult(event_args);

416 case EVENT_ENGINE_ERROR:	445 case EVENT_ENGINE_ERROR:

417 case EVENT_AUDIO_ERROR:	446 case EVENT_AUDIO_ERROR:

418 return AbortWithError(event_args);	447 return AbortWithError(event_args);

419 }	448 }

420 break;	449 break;

421 case STATE_WAITING_FOR_SPEECH:	450 case STATE_WAITING_FOR_SPEECH:

422 switch (event) {	451 switch (event) {

423 case EVENT_ABORT:	452 case EVENT_ABORT:

424 return AbortWithError(event_args);	453 return AbortWithError(event_args);

	454 case EVENT_PREPARE:

	455 return NotFeasible(event_args);

425 case EVENT_START:	456 case EVENT_START:

426 return NotFeasible(event_args);	457 return NotFeasible(event_args);

427 case EVENT_STOP_CAPTURE:	458 case EVENT_STOP_CAPTURE:

428 return StopCaptureAndWaitForResult(event_args);	459 return StopCaptureAndWaitForResult(event_args);

429 case EVENT_AUDIO_DATA:	460 case EVENT_AUDIO_DATA:

430 return DetectUserSpeechOrTimeout(event_args);	461 return DetectUserSpeechOrTimeout(event_args);

431 case EVENT_ENGINE_RESULT:	462 case EVENT_ENGINE_RESULT:

432 return ProcessIntermediateResult(event_args);	463 return ProcessIntermediateResult(event_args);

433 case EVENT_ENGINE_ERROR:	464 case EVENT_ENGINE_ERROR:

434 case EVENT_AUDIO_ERROR:	465 case EVENT_AUDIO_ERROR:

435 return AbortWithError(event_args);	466 return AbortWithError(event_args);

436 }	467 }

437 break;	468 break;

438 case STATE_RECOGNIZING:	469 case STATE_RECOGNIZING:

439 switch (event) {	470 switch (event) {

440 case EVENT_ABORT:	471 case EVENT_ABORT:

441 return AbortWithError(event_args);	472 return AbortWithError(event_args);

	473 case EVENT_PREPARE:

	474 return NotFeasible(event_args);

442 case EVENT_START:	475 case EVENT_START:

443 return NotFeasible(event_args);	476 return NotFeasible(event_args);

444 case EVENT_STOP_CAPTURE:	477 case EVENT_STOP_CAPTURE:

445 return StopCaptureAndWaitForResult(event_args);	478 return StopCaptureAndWaitForResult(event_args);

446 case EVENT_AUDIO_DATA:	479 case EVENT_AUDIO_DATA:

447 return DetectEndOfSpeech(event_args);	480 return DetectEndOfSpeech(event_args);

448 case EVENT_ENGINE_RESULT:	481 case EVENT_ENGINE_RESULT:

449 return ProcessIntermediateResult(event_args);	482 return ProcessIntermediateResult(event_args);

450 case EVENT_ENGINE_ERROR:	483 case EVENT_ENGINE_ERROR:

451 case EVENT_AUDIO_ERROR:	484 case EVENT_AUDIO_ERROR:

452 return AbortWithError(event_args);	485 return AbortWithError(event_args);

453 }	486 }

454 break;	487 break;

455 case STATE_WAITING_FINAL_RESULT:	488 case STATE_WAITING_FINAL_RESULT:

456 switch (event) {	489 switch (event) {

457 case EVENT_ABORT:	490 case EVENT_ABORT:

458 return AbortWithError(event_args);	491 return AbortWithError(event_args);

	492 case EVENT_PREPARE:

	493 return NotFeasible(event_args);

459 case EVENT_START:	494 case EVENT_START:

460 return NotFeasible(event_args);	495 return NotFeasible(event_args);

461 case EVENT_STOP_CAPTURE:	496 case EVENT_STOP_CAPTURE:

462 case EVENT_AUDIO_DATA:	497 case EVENT_AUDIO_DATA:

463 return DoNothing(event_args);	498 return DoNothing(event_args);

464 case EVENT_ENGINE_RESULT:	499 case EVENT_ENGINE_RESULT:

465 return ProcessFinalResult(event_args);	500 return ProcessFinalResult(event_args);

466 case EVENT_ENGINE_ERROR:	501 case EVENT_ENGINE_ERROR:

467 case EVENT_AUDIO_ERROR:	502 case EVENT_AUDIO_ERROR:

468 return AbortWithError(event_args);	503 return AbortWithError(event_args);

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
505 if (route_to_vumeter) {	540 if (route_to_vumeter) {

506 DCHECK(route_to_endpointer); // Depends on endpointer due to \|rms\|.	541 DCHECK(route_to_endpointer); // Depends on endpointer due to \|rms\|.

507 UpdateSignalAndNoiseLevels(rms, clip_detected);	542 UpdateSignalAndNoiseLevels(rms, clip_detected);

508 }	543 }

509 if (route_to_sr_engine) {	544 if (route_to_sr_engine) {

510 DCHECK(recognition_engine_.get() != NULL);	545 DCHECK(recognition_engine_.get() != NULL);

511 recognition_engine_->TakeAudioChunk(raw_audio);	546 recognition_engine_->TakeAudioChunk(raw_audio);

512 }	547 }

513 }	548 }

514	549

	550 void SpeechRecognizerImpl::OnDeviceInfo(const media::AudioParameters& params) {

	551 DCHECK_CURRENTLY_ON(BrowserThread::IO);

	552 device_params_ = params;

	553 DVLOG(1) << "Device parameters: " << device_params_.AsHumanReadableString();

	554 DispatchEvent(FSMEventArgs(EVENT_START));

	555 }

	556

	557 SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::PrepareRecognition(

	558 const FSMEventArgs&) {

	559 DCHECK(state_ == STATE_IDLE);

	560 DCHECK(recognition_engine_.get() != NULL);

	561 DCHECK(!IsCapturingAudio());

	562 GetAudioSystem()->GetInputStreamParameters(
	tommi (sloooow) - chröme 2017/02/02 16:27:18 Instead of adding AudioSystem etc, could you post Instead of adding AudioSystem etc, could you post to the audio thread here, call GetInputParameters() and post pack to the IO thread when you're done to complete the state transition? Maybe I'm missing something but it feels like we're adding a lot of complexity that we can avoid adding.
	563 device_id_, base::Bind(&SpeechRecognizerImpl::OnDeviceInfo,

	564 weak_ptr_factory_.GetWeakPtr()));

	565

	566 listener()->OnRecognitionStart(session_id());

	567 return STATE_PREPARING;

	568 }

	569

515 SpeechRecognizerImpl::FSMState	570 SpeechRecognizerImpl::FSMState

516 SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) {	571 SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) {

517 DCHECK(state_ == STATE_IDLE);	572 DCHECK(state_ == STATE_PREPARING);

518 DCHECK(recognition_engine_.get() != NULL);	573 DCHECK(recognition_engine_.get() != NULL);

519 DCHECK(!IsCapturingAudio());	574 DCHECK(!IsCapturingAudio());

520 const bool unit_test_is_active = (audio_manager_for_tests_ != NULL);

521 AudioManager* audio_manager = unit_test_is_active ?

522 audio_manager_for_tests_ :

523 AudioManager::Get();

524 DCHECK(audio_manager != NULL);

525	575

526 DVLOG(1) << "SpeechRecognizerImpl starting audio capture.";	576 DVLOG(1) << "SpeechRecognizerImpl starting audio capture.";

527 num_samples_recorded_ = 0;	577 num_samples_recorded_ = 0;

528 audio_level_ = 0;	578 audio_level_ = 0;

529 end_of_utterance_ = false;	579 end_of_utterance_ = false;

530 listener()->OnRecognitionStart(session_id());

531	580

532 // TODO(xians): Check if the OS has the device with \|device_id_\|, return	581 int chunk_duration_ms = recognition_engine_->GetDesiredAudioChunkDurationMs();

533 // \|SPEECH_AUDIO_ERROR_DETAILS_NO_MIC\| if the target device does not exist.	582

534 if (!audio_manager->HasAudioInputDevices()) {	583 if (!device_params_.IsValid()) {

	584 DLOG(ERROR) << "Audio input device not found";

535 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE,	585 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE,

536 SPEECH_AUDIO_ERROR_DETAILS_NO_MIC));	586 SPEECH_AUDIO_ERROR_DETAILS_NO_MIC));

537 }	587 }

538	588

539 int chunk_duration_ms = recognition_engine_->GetDesiredAudioChunkDurationMs();

540

541 AudioParameters in_params = audio_manager->GetInputStreamParameters(

542 device_id_);

543 if (!in_params.IsValid() && !unit_test_is_active) {

544 DLOG(ERROR) << "Invalid native audio input parameters";

545 return Abort(

546 SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE));

547 }

548

549 // Audio converter shall provide audio based on these parameters as output.	589 // Audio converter shall provide audio based on these parameters as output.

550 // Hard coded, WebSpeech specific parameters are utilized here.	590 // Hard coded, WebSpeech specific parameters are utilized here.

551 int frames_per_buffer = (kAudioSampleRate * chunk_duration_ms) / 1000;	591 int frames_per_buffer = (kAudioSampleRate * chunk_duration_ms) / 1000;

552 AudioParameters output_parameters = AudioParameters(	592 AudioParameters output_parameters = AudioParameters(

553 AudioParameters::AUDIO_PCM_LOW_LATENCY, kChannelLayout, kAudioSampleRate,	593 AudioParameters::AUDIO_PCM_LOW_LATENCY, kChannelLayout, kAudioSampleRate,

554 kNumBitsPerAudioSample, frames_per_buffer);	594 kNumBitsPerAudioSample, frames_per_buffer);

555 DVLOG(1) << "SRI::output_parameters: "	595 DVLOG(1) << "SRI::output_parameters: "

556 << output_parameters.AsHumanReadableString();	596 << output_parameters.AsHumanReadableString();

557	597

558 // Audio converter will receive audio based on these parameters as input.	598 // Audio converter will receive audio based on these parameters as input.

559 // On Windows we start by verifying that Core Audio is supported. If not,	599 // On Windows we start by verifying that Core Audio is supported. If not,

560 // the WaveIn API is used and we might as well avoid all audio conversations	600 // the WaveIn API is used and we might as well avoid all audio conversations

561 // since WaveIn does the conversion for us.	601 // since WaveIn does the conversion for us.

562 // TODO(henrika): this code should be moved to platform dependent audio	602 // TODO(henrika): this code should be moved to platform dependent audio

563 // managers.	603 // managers.

564 bool use_native_audio_params = true;	604 bool use_native_audio_params = true;

565 #if defined(OS_WIN)	605 #if defined(OS_WIN)

566 use_native_audio_params = media::CoreAudioUtil::IsSupported();	606 use_native_audio_params = media::CoreAudioUtil::IsSupported();

567 DVLOG_IF(1, !use_native_audio_params) << "Reverting to WaveIn for WebSpeech";	607 DVLOG_IF(1, !use_native_audio_params) << "Reverting to WaveIn for WebSpeech";

568 #endif	608 #endif

569	609

570 AudioParameters input_parameters = output_parameters;	610 AudioParameters input_parameters = output_parameters;

571 if (use_native_audio_params && !unit_test_is_active) {	611

	612 // AUDIO_FAKE means we are running a test.

	613 if (use_native_audio_params &&

	614 device_params_.format() != media::AudioParameters::AUDIO_FAKE) {

572 // Use native audio parameters but avoid opening up at the native buffer	615 // Use native audio parameters but avoid opening up at the native buffer

573 // size. Instead use same frame size (in milliseconds) as WebSpeech uses.	616 // size. Instead use same frame size (in milliseconds) as WebSpeech uses.

574 // We rely on internal buffers in the audio back-end to fulfill this request	617 // We rely on internal buffers in the audio back-end to fulfill this request

575 // and the idea is to simplify the audio conversion since each Convert()	618 // and the idea is to simplify the audio conversion since each Convert()

576 // call will then render exactly one ProvideInput() call.	619 // call will then render exactly one ProvideInput() call.

577 // in_params.sample_rate()	620 input_parameters = device_params_;

578 input_parameters = in_params;

579 frames_per_buffer =	621 frames_per_buffer =

580 ((in_params.sample_rate() * chunk_duration_ms) / 1000.0) + 0.5;	622 ((input_parameters.sample_rate() * chunk_duration_ms) / 1000.0) + 0.5;

581 input_parameters.set_frames_per_buffer(frames_per_buffer);	623 input_parameters.set_frames_per_buffer(frames_per_buffer);

582 DVLOG(1) << "SRI::input_parameters: "	624 DVLOG(1) << "SRI::input_parameters: "

583 << input_parameters.AsHumanReadableString();	625 << input_parameters.AsHumanReadableString();

584 }	626 }

585	627

586 // Create an audio converter which converts data between native input format	628 // Create an audio converter which converts data between native input format

587 // and WebSpeech specific output format.	629 // and WebSpeech specific output format.

588 audio_converter_.reset(	630 audio_converter_.reset(

589 new OnDataConverter(input_parameters, output_parameters));	631 new OnDataConverter(input_parameters, output_parameters));

590	632

591 audio_controller_ = AudioInputController::Create(	633 audio_controller_ =

592 audio_manager, this, this, input_parameters, device_id_, NULL);	634 AudioInputController::Create(GetAudioSystem()->GetAudioManager(), this,

	635 this, input_parameters, device_id_, NULL);

593	636

594 if (!audio_controller_.get()) {	637 if (!audio_controller_.get()) {

595 return Abort(	638 return Abort(

596 SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE));	639 SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE));

597 }	640 }

598	641

599 audio_log_->OnCreated(0, input_parameters, device_id_);	642 audio_log_->OnCreated(0, input_parameters, device_id_);

600	643

601 // The endpointer needs to estimate the environment/background noise before	644 // The endpointer needs to estimate the environment/background noise before

602 // starting to treat the audio as user input. We wait in the state	645 // starting to treat the audio as user input. We wait in the state

(...skipping 78 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
681 return Abort(	724 return Abort(

682 SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE));	725 SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE));

683 } else if (event_args.event == EVENT_ENGINE_ERROR) {	726 } else if (event_args.event == EVENT_ENGINE_ERROR) {

684 return Abort(event_args.engine_error);	727 return Abort(event_args.engine_error);

685 }	728 }

686 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_ABORTED));	729 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_ABORTED));

687 }	730 }

688	731

689 SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::Abort(	732 SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::Abort(

690 const SpeechRecognitionError& error) {	733 const SpeechRecognitionError& error) {

	734 DCHECK_CURRENTLY_ON(BrowserThread::IO);

	735

691 if (IsCapturingAudio())	736 if (IsCapturingAudio())

692 CloseAudioControllerAsynchronously();	737 CloseAudioControllerAsynchronously();

693	738

694 DVLOG(1) << "SpeechRecognizerImpl canceling recognition. ";	739 DVLOG(1) << "SpeechRecognizerImpl canceling recognition. ";

695	740

	741 if (state_ == STATE_PREPARING) {

	742 // Cancel an outstanding reply from AudioSystem.

	743 weak_ptr_factory_.InvalidateWeakPtrs();

	744 }

	745

696 // The recognition engine is initialized only after STATE_STARTING.	746 // The recognition engine is initialized only after STATE_STARTING.

697 if (state_ > STATE_STARTING) {	747 if (state_ > STATE_STARTING) {

698 DCHECK(recognition_engine_.get() != NULL);	748 DCHECK(recognition_engine_.get() != NULL);

699 recognition_engine_->EndRecognition();	749 recognition_engine_->EndRecognition();

700 }	750 }

701	751

702 if (state_ > STATE_WAITING_FOR_SPEECH && state_ < STATE_WAITING_FINAL_RESULT)	752 if (state_ > STATE_WAITING_FOR_SPEECH && state_ < STATE_WAITING_FINAL_RESULT)

703 listener()->OnSoundEnd(session_id());	753 listener()->OnSoundEnd(session_id());

704	754

705 if (state_ > STATE_STARTING && state_ < STATE_WAITING_FINAL_RESULT)	755 if (state_ > STATE_STARTING && state_ < STATE_WAITING_FINAL_RESULT)

(...skipping 116 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
822	872

823 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) /	873 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) /

824 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped);	874 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped);

825 noise_level = std::min(std::max(0.0f, noise_level),	875 noise_level = std::min(std::max(0.0f, noise_level),

826 kAudioMeterRangeMaxUnclipped);	876 kAudioMeterRangeMaxUnclipped);

827	877

828 listener()->OnAudioLevelsChange(	878 listener()->OnAudioLevelsChange(

829 session_id(), clip_detected ? 1.0f : audio_level_, noise_level);	879 session_id(), clip_detected ? 1.0f : audio_level_, noise_level);

830 }	880 }

831	881

832 void SpeechRecognizerImpl::SetAudioManagerForTesting(	882 void SpeechRecognizerImpl::SetAudioSystemForTesting(

833 AudioManager* audio_manager) {	883 media::AudioSystem* audio_system) {

834 audio_manager_for_tests_ = audio_manager;	884 audio_system_for_tests_ = audio_system;

	885 }

	886

	887 media::AudioSystem* SpeechRecognizerImpl::GetAudioSystem() {

	888 return audio_system_for_tests_ ? audio_system_for_tests_ : audio_system_;

835 }	889 }

836	890

837 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value)	891 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value)

838 : event(event_value),	892 : event(event_value),

839 audio_data(NULL),	893 audio_data(NULL),

840 engine_error(SPEECH_RECOGNITION_ERROR_NONE) {	894 engine_error(SPEECH_RECOGNITION_ERROR_NONE) {

841 }	895 }

842	896

843 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(const FSMEventArgs& other) =	897 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(const FSMEventArgs& other) =

844 default;	898 default;

845	899

846 SpeechRecognizerImpl::FSMEventArgs::~FSMEventArgs() {	900 SpeechRecognizerImpl::FSMEventArgs::~FSMEventArgs() {

847 }	901 }

848	902

849 } // namespace content	903 } // namespace content

OLD	NEW

« no previous file with comments | « content/browser/speech/speech_recognizer_impl.h ('k') | content/browser/speech/speech_recognizer_impl_unittest.cc » ('j') | media/audio/audio_system.h » ('J')