Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(312)

Side by Side Diff: content/browser/speech/speech_recognizer.cc

Issue 11347004: content/browser: Move speech code into content namespace. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 8 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "content/browser/speech/speech_recognizer.h" 5 #include "content/browser/speech/speech_recognizer.h"
6 6
7 #include "base/basictypes.h" 7 #include "base/basictypes.h"
8 #include "base/bind.h" 8 #include "base/bind.h"
9 #include "base/time.h" 9 #include "base/time.h"
10 #include "content/browser/browser_main_loop.h" 10 #include "content/browser/browser_main_loop.h"
11 #include "content/browser/speech/audio_buffer.h" 11 #include "content/browser/speech/audio_buffer.h"
12 #include "content/browser/speech/google_one_shot_remote_engine.h" 12 #include "content/browser/speech/google_one_shot_remote_engine.h"
13 #include "content/public/browser/browser_thread.h" 13 #include "content/public/browser/browser_thread.h"
14 #include "content/public/browser/speech_recognition_event_listener.h" 14 #include "content/public/browser/speech_recognition_event_listener.h"
15 #include "content/public/common/speech_recognition_error.h" 15 #include "content/public/common/speech_recognition_error.h"
16 #include "content/public/common/speech_recognition_grammar.h" 16 #include "content/public/common/speech_recognition_grammar.h"
17 #include "content/public/common/speech_recognition_result.h" 17 #include "content/public/common/speech_recognition_result.h"
18 #include "net/url_request/url_request_context_getter.h" 18 #include "net/url_request/url_request_context_getter.h"
19 19
20 using content::BrowserMainLoop;
21 using content::BrowserThread;
22 using content::SpeechRecognitionError;
23 using content::SpeechRecognitionEventListener;
24 using content::SpeechRecognitionGrammar;
25 using content::SpeechRecognitionResult;
26 using media::AudioInputController; 20 using media::AudioInputController;
27 using media::AudioManager; 21 using media::AudioManager;
28 using media::AudioParameters; 22 using media::AudioParameters;
29 using media::ChannelLayout; 23 using media::ChannelLayout;
30 24
25 namespace content {
31 namespace { 26 namespace {
32 27
33 // The following constants are related to the volume level indicator shown in 28 // The following constants are related to the volume level indicator shown in
34 // the UI for recorded audio. 29 // the UI for recorded audio.
35 // Multiplier used when new volume is greater than previous level. 30 // Multiplier used when new volume is greater than previous level.
36 const float kUpSmoothingFactor = 1.0f; 31 const float kUpSmoothingFactor = 1.0f;
37 // Multiplier used when new volume is lesser than previous level. 32 // Multiplier used when new volume is lesser than previous level.
38 const float kDownSmoothingFactor = 0.7f; 33 const float kDownSmoothingFactor = 0.7f;
39 // RMS dB value of a maximum (unclipped) sine wave for int16 samples. 34 // RMS dB value of a maximum (unclipped) sine wave for int16 samples.
40 const float kAudioMeterMaxDb = 90.31f; 35 const float kAudioMeterMaxDb = 90.31f;
41 // This value corresponds to RMS dB for int16 with 6 most-significant-bits = 0. 36 // This value corresponds to RMS dB for int16 with 6 most-significant-bits = 0.
42 // Values lower than this will display as empty level-meter. 37 // Values lower than this will display as empty level-meter.
43 const float kAudioMeterMinDb = 30.0f; 38 const float kAudioMeterMinDb = 30.0f;
44 const float kAudioMeterDbRange = kAudioMeterMaxDb - kAudioMeterMinDb; 39 const float kAudioMeterDbRange = kAudioMeterMaxDb - kAudioMeterMinDb;
45 40
46 // Maximum level to draw to display unclipped meter. (1.0f displays clipping.) 41 // Maximum level to draw to display unclipped meter. (1.0f displays clipping.)
47 const float kAudioMeterRangeMaxUnclipped = 47.0f / 48.0f; 42 const float kAudioMeterRangeMaxUnclipped = 47.0f / 48.0f;
48 43
49 // Returns true if more than 5% of the samples are at min or max value. 44 // Returns true if more than 5% of the samples are at min or max value.
50 bool DetectClipping(const speech::AudioChunk& chunk) { 45 bool DetectClipping(const AudioChunk& chunk) {
51 const int num_samples = chunk.NumSamples(); 46 const int num_samples = chunk.NumSamples();
52 const int16* samples = chunk.SamplesData16(); 47 const int16* samples = chunk.SamplesData16();
53 const int kThreshold = num_samples / 20; 48 const int kThreshold = num_samples / 20;
54 int clipping_samples = 0; 49 int clipping_samples = 0;
55 50
56 for (int i = 0; i < num_samples; ++i) { 51 for (int i = 0; i < num_samples; ++i) {
57 if (samples[i] <= -32767 || samples[i] >= 32767) { 52 if (samples[i] <= -32767 || samples[i] >= 32767) {
58 if (++clipping_samples > kThreshold) 53 if (++clipping_samples > kThreshold)
59 return true; 54 return true;
60 } 55 }
61 } 56 }
62 return false; 57 return false;
63 } 58 }
64 59
65 void KeepAudioControllerRefcountedForDtor(scoped_refptr<AudioInputController>) { 60 void KeepAudioControllerRefcountedForDtor(scoped_refptr<AudioInputController>) {
66 } 61 }
67 62
68 } // namespace 63 } // namespace
69 64
70 namespace speech {
71
72 const int SpeechRecognizer::kAudioSampleRate = 16000; 65 const int SpeechRecognizer::kAudioSampleRate = 16000;
73 const ChannelLayout SpeechRecognizer::kChannelLayout = 66 const ChannelLayout SpeechRecognizer::kChannelLayout =
74 media::CHANNEL_LAYOUT_MONO; 67 media::CHANNEL_LAYOUT_MONO;
75 const int SpeechRecognizer::kNumBitsPerAudioSample = 16; 68 const int SpeechRecognizer::kNumBitsPerAudioSample = 16;
76 const int SpeechRecognizer::kNoSpeechTimeoutMs = 8000; 69 const int SpeechRecognizer::kNoSpeechTimeoutMs = 8000;
77 const int SpeechRecognizer::kEndpointerEstimationTimeMs = 300; 70 const int SpeechRecognizer::kEndpointerEstimationTimeMs = 300;
78 media::AudioManager* SpeechRecognizer::audio_manager_for_tests_ = NULL; 71 media::AudioManager* SpeechRecognizer::audio_manager_for_tests_ = NULL;
79 72
80 COMPILE_ASSERT(SpeechRecognizer::kNumBitsPerAudioSample % 8 == 0, 73 COMPILE_ASSERT(SpeechRecognizer::kNumBitsPerAudioSample % 8 == 0,
81 kNumBitsPerAudioSample_must_be_a_multiple_of_8); 74 kNumBitsPerAudioSample_must_be_a_multiple_of_8);
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after
187 event_args.audio_data = new AudioChunk(data, static_cast<size_t>(size), 180 event_args.audio_data = new AudioChunk(data, static_cast<size_t>(size),
188 kNumBitsPerAudioSample / 8); 181 kNumBitsPerAudioSample / 8);
189 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, 182 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
190 base::Bind(&SpeechRecognizer::DispatchEvent, 183 base::Bind(&SpeechRecognizer::DispatchEvent,
191 this, event_args)); 184 this, event_args));
192 } 185 }
193 186
194 void SpeechRecognizer::OnAudioClosed(AudioInputController*) {} 187 void SpeechRecognizer::OnAudioClosed(AudioInputController*) {}
195 188
196 void SpeechRecognizer::OnSpeechRecognitionEngineResult( 189 void SpeechRecognizer::OnSpeechRecognitionEngineResult(
197 const content::SpeechRecognitionResult& result) { 190 const SpeechRecognitionResult& result) {
198 FSMEventArgs event_args(EVENT_ENGINE_RESULT); 191 FSMEventArgs event_args(EVENT_ENGINE_RESULT);
199 event_args.engine_result = result; 192 event_args.engine_result = result;
200 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, 193 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
201 base::Bind(&SpeechRecognizer::DispatchEvent, 194 base::Bind(&SpeechRecognizer::DispatchEvent,
202 this, event_args)); 195 this, event_args));
203 } 196 }
204 197
205 void SpeechRecognizer::OnSpeechRecognitionEngineError( 198 void SpeechRecognizer::OnSpeechRecognitionEngineError(
206 const content::SpeechRecognitionError& error) { 199 const SpeechRecognitionError& error) {
207 FSMEventArgs event_args(EVENT_ENGINE_ERROR); 200 FSMEventArgs event_args(EVENT_ENGINE_ERROR);
208 event_args.engine_error = error; 201 event_args.engine_error = error;
209 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, 202 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
210 base::Bind(&SpeechRecognizer::DispatchEvent, 203 base::Bind(&SpeechRecognizer::DispatchEvent,
211 this, event_args)); 204 this, event_args));
212 } 205 }
213 206
214 // ----------------------- Core FSM implementation --------------------------- 207 // ----------------------- Core FSM implementation ---------------------------
215 // TODO(primiano): After the changes in the media package (r129173), this class 208 // TODO(primiano): After the changes in the media package (r129173), this class
216 // slightly violates the SpeechRecognitionEventListener interface contract. In 209 // slightly violates the SpeechRecognitionEventListener interface contract. In
(...skipping 182 matching lines...) Expand 10 before | Expand all | Expand 10 after
399 audio_manager_for_tests_ : 392 audio_manager_for_tests_ :
400 BrowserMainLoop::GetAudioManager(); 393 BrowserMainLoop::GetAudioManager();
401 DCHECK(audio_manager != NULL); 394 DCHECK(audio_manager != NULL);
402 395
403 DVLOG(1) << "SpeechRecognizer starting audio capture."; 396 DVLOG(1) << "SpeechRecognizer starting audio capture.";
404 num_samples_recorded_ = 0; 397 num_samples_recorded_ = 0;
405 audio_level_ = 0; 398 audio_level_ = 0;
406 listener_->OnRecognitionStart(session_id_); 399 listener_->OnRecognitionStart(session_id_);
407 400
408 if (!audio_manager->HasAudioInputDevices()) { 401 if (!audio_manager->HasAudioInputDevices()) {
409 return Abort(SpeechRecognitionError( 402 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO,
410 content::SPEECH_RECOGNITION_ERROR_AUDIO, 403 SPEECH_AUDIO_ERROR_DETAILS_NO_MIC));
411 content::SPEECH_AUDIO_ERROR_DETAILS_NO_MIC));
412 } 404 }
413 405
414 if (audio_manager->IsRecordingInProcess()) { 406 if (audio_manager->IsRecordingInProcess()) {
415 return Abort(SpeechRecognitionError( 407 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO,
416 content::SPEECH_RECOGNITION_ERROR_AUDIO, 408 SPEECH_AUDIO_ERROR_DETAILS_IN_USE));
417 content::SPEECH_AUDIO_ERROR_DETAILS_IN_USE));
418 } 409 }
419 410
420 const int samples_per_packet = (kAudioSampleRate * 411 const int samples_per_packet = (kAudioSampleRate *
421 recognition_engine_->GetDesiredAudioChunkDurationMs()) / 1000; 412 recognition_engine_->GetDesiredAudioChunkDurationMs()) / 1000;
422 AudioParameters params(AudioParameters::AUDIO_PCM_LINEAR, kChannelLayout, 413 AudioParameters params(AudioParameters::AUDIO_PCM_LINEAR, kChannelLayout,
423 kAudioSampleRate, kNumBitsPerAudioSample, 414 kAudioSampleRate, kNumBitsPerAudioSample,
424 samples_per_packet); 415 samples_per_packet);
425 audio_controller_ = AudioInputController::Create(audio_manager, this, params); 416 audio_controller_ = AudioInputController::Create(audio_manager, this, params);
426 417
427 if (audio_controller_.get() == NULL) { 418 if (audio_controller_.get() == NULL) {
428 return Abort( 419 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO));
429 SpeechRecognitionError(content::SPEECH_RECOGNITION_ERROR_AUDIO));
430 } 420 }
431 421
432 // The endpointer needs to estimate the environment/background noise before 422 // The endpointer needs to estimate the environment/background noise before
433 // starting to treat the audio as user input. We wait in the state 423 // starting to treat the audio as user input. We wait in the state
434 // ESTIMATING_ENVIRONMENT until such interval has elapsed before switching 424 // ESTIMATING_ENVIRONMENT until such interval has elapsed before switching
435 // to user input mode. 425 // to user input mode.
436 endpointer_.SetEnvironmentEstimationMode(); 426 endpointer_.SetEnvironmentEstimationMode();
437 audio_controller_->Record(); 427 audio_controller_->Record();
438 return STATE_STARTING; 428 return STATE_STARTING;
439 } 429 }
(...skipping 24 matching lines...) Expand all
464 return STATE_ESTIMATING_ENVIRONMENT; 454 return STATE_ESTIMATING_ENVIRONMENT;
465 } 455 }
466 } 456 }
467 457
468 SpeechRecognizer::FSMState 458 SpeechRecognizer::FSMState
469 SpeechRecognizer::DetectUserSpeechOrTimeout(const FSMEventArgs&) { 459 SpeechRecognizer::DetectUserSpeechOrTimeout(const FSMEventArgs&) {
470 if (endpointer_.DidStartReceivingSpeech()) { 460 if (endpointer_.DidStartReceivingSpeech()) {
471 listener_->OnSoundStart(session_id_); 461 listener_->OnSoundStart(session_id_);
472 return STATE_RECOGNIZING; 462 return STATE_RECOGNIZING;
473 } else if (GetElapsedTimeMs() >= kNoSpeechTimeoutMs) { 463 } else if (GetElapsedTimeMs() >= kNoSpeechTimeoutMs) {
474 return Abort( 464 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_NO_SPEECH));
475 SpeechRecognitionError(content::SPEECH_RECOGNITION_ERROR_NO_SPEECH));
476 } 465 }
477 return STATE_WAITING_FOR_SPEECH; 466 return STATE_WAITING_FOR_SPEECH;
478 } 467 }
479 468
480 SpeechRecognizer::FSMState 469 SpeechRecognizer::FSMState
481 SpeechRecognizer::DetectEndOfSpeech(const FSMEventArgs& event_args) { 470 SpeechRecognizer::DetectEndOfSpeech(const FSMEventArgs& event_args) {
482 if (endpointer_.speech_input_complete()) 471 if (endpointer_.speech_input_complete())
483 return StopCaptureAndWaitForResult(event_args); 472 return StopCaptureAndWaitForResult(event_args);
484 return STATE_RECOGNIZING; 473 return STATE_RECOGNIZING;
485 } 474 }
(...skipping 10 matching lines...) Expand all
496 listener_->OnSoundEnd(session_id_); 485 listener_->OnSoundEnd(session_id_);
497 486
498 listener_->OnAudioEnd(session_id_); 487 listener_->OnAudioEnd(session_id_);
499 return STATE_WAITING_FINAL_RESULT; 488 return STATE_WAITING_FINAL_RESULT;
500 } 489 }
501 490
502 SpeechRecognizer::FSMState 491 SpeechRecognizer::FSMState
503 SpeechRecognizer::AbortSilently(const FSMEventArgs& event_args) { 492 SpeechRecognizer::AbortSilently(const FSMEventArgs& event_args) {
504 DCHECK_NE(event_args.event, EVENT_AUDIO_ERROR); 493 DCHECK_NE(event_args.event, EVENT_AUDIO_ERROR);
505 DCHECK_NE(event_args.event, EVENT_ENGINE_ERROR); 494 DCHECK_NE(event_args.event, EVENT_ENGINE_ERROR);
506 return Abort( 495 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_NONE));
507 SpeechRecognitionError(content::SPEECH_RECOGNITION_ERROR_NONE));
508 } 496 }
509 497
510 SpeechRecognizer::FSMState 498 SpeechRecognizer::FSMState
511 SpeechRecognizer::AbortWithError(const FSMEventArgs& event_args) { 499 SpeechRecognizer::AbortWithError(const FSMEventArgs& event_args) {
512 if (event_args.event == EVENT_AUDIO_ERROR) { 500 if (event_args.event == EVENT_AUDIO_ERROR) {
513 return Abort( 501 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO));
514 SpeechRecognitionError(content::SPEECH_RECOGNITION_ERROR_AUDIO));
515 } else if (event_args.event == EVENT_ENGINE_ERROR) { 502 } else if (event_args.event == EVENT_ENGINE_ERROR) {
516 return Abort(event_args.engine_error); 503 return Abort(event_args.engine_error);
517 } 504 }
518 return Abort( 505 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_ABORTED));
519 SpeechRecognitionError(content::SPEECH_RECOGNITION_ERROR_ABORTED));
520 } 506 }
521 507
522 SpeechRecognizer::FSMState SpeechRecognizer::Abort( 508 SpeechRecognizer::FSMState SpeechRecognizer::Abort(
523 const SpeechRecognitionError& error) { 509 const SpeechRecognitionError& error) {
524 if (IsCapturingAudio()) 510 if (IsCapturingAudio())
525 CloseAudioControllerAsynchronously(); 511 CloseAudioControllerAsynchronously();
526 512
527 DVLOG(1) << "SpeechRecognizer canceling recognition. "; 513 DVLOG(1) << "SpeechRecognizer canceling recognition. ";
528 514
529 // The recognition engine is initialized only after STATE_STARTING. 515 // The recognition engine is initialized only after STATE_STARTING.
530 if (state_ > STATE_STARTING) { 516 if (state_ > STATE_STARTING) {
531 DCHECK(recognition_engine_.get() != NULL); 517 DCHECK(recognition_engine_.get() != NULL);
532 recognition_engine_->EndRecognition(); 518 recognition_engine_->EndRecognition();
533 } 519 }
534 520
535 if (state_ > STATE_WAITING_FOR_SPEECH && state_ < STATE_WAITING_FINAL_RESULT) 521 if (state_ > STATE_WAITING_FOR_SPEECH && state_ < STATE_WAITING_FINAL_RESULT)
536 listener_->OnSoundEnd(session_id_); 522 listener_->OnSoundEnd(session_id_);
537 523
538 if (state_ > STATE_STARTING && state_ < STATE_WAITING_FINAL_RESULT) 524 if (state_ > STATE_STARTING && state_ < STATE_WAITING_FINAL_RESULT)
539 listener_->OnAudioEnd(session_id_); 525 listener_->OnAudioEnd(session_id_);
540 526
541 if (error.code != content::SPEECH_RECOGNITION_ERROR_NONE) 527 if (error.code != SPEECH_RECOGNITION_ERROR_NONE)
542 listener_->OnRecognitionError(session_id_, error); 528 listener_->OnRecognitionError(session_id_, error);
543 529
544 listener_->OnRecognitionEnd(session_id_); 530 listener_->OnRecognitionEnd(session_id_);
545 531
546 return STATE_IDLE; 532 return STATE_IDLE;
547 } 533 }
548 534
549 SpeechRecognizer::FSMState SpeechRecognizer::ProcessIntermediateResult( 535 SpeechRecognizer::FSMState SpeechRecognizer::ProcessIntermediateResult(
550 const FSMEventArgs& event_args) { 536 const FSMEventArgs& event_args) {
551 // Provisional results can occur only during continuous (non one-shot) mode. 537 // Provisional results can occur only during continuous (non one-shot) mode.
(...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after
653 639
654 void SpeechRecognizer::SetAudioManagerForTests( 640 void SpeechRecognizer::SetAudioManagerForTests(
655 AudioManager* audio_manager) { 641 AudioManager* audio_manager) {
656 audio_manager_for_tests_ = audio_manager; 642 audio_manager_for_tests_ = audio_manager;
657 } 643 }
658 644
659 SpeechRecognizer::FSMEventArgs::FSMEventArgs(FSMEvent event_value) 645 SpeechRecognizer::FSMEventArgs::FSMEventArgs(FSMEvent event_value)
660 : event(event_value), 646 : event(event_value),
661 audio_error_code(0), 647 audio_error_code(0),
662 audio_data(NULL), 648 audio_data(NULL),
663 engine_error(content::SPEECH_RECOGNITION_ERROR_NONE) { 649 engine_error(SPEECH_RECOGNITION_ERROR_NONE) {
664 } 650 }
665 651
666 SpeechRecognizer::FSMEventArgs::~FSMEventArgs() { 652 SpeechRecognizer::FSMEventArgs::~FSMEventArgs() {
667 } 653 }
668 654
669 } // namespace speech 655 } // namespace content
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698