| OLD | NEW |
| 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "content/browser/speech/speech_recognizer_impl.h" | 5 #include "content/browser/speech/speech_recognizer_impl.h" |
| 6 | 6 |
| 7 #include "base/basictypes.h" | 7 #include <stdint.h> |
| 8 |
| 8 #include "base/bind.h" | 9 #include "base/bind.h" |
| 10 #include "base/macros.h" |
| 9 #include "base/time/time.h" | 11 #include "base/time/time.h" |
| 12 #include "build/build_config.h" |
| 10 #include "content/browser/browser_main_loop.h" | 13 #include "content/browser/browser_main_loop.h" |
| 11 #include "content/browser/media/media_internals.h" | 14 #include "content/browser/media/media_internals.h" |
| 12 #include "content/browser/speech/audio_buffer.h" | 15 #include "content/browser/speech/audio_buffer.h" |
| 13 #include "content/browser/speech/google_one_shot_remote_engine.h" | 16 #include "content/browser/speech/google_one_shot_remote_engine.h" |
| 14 #include "content/public/browser/speech_recognition_event_listener.h" | 17 #include "content/public/browser/speech_recognition_event_listener.h" |
| 15 #include "media/base/audio_converter.h" | 18 #include "media/base/audio_converter.h" |
| 16 | 19 |
| 17 #if defined(OS_WIN) | 20 #if defined(OS_WIN) |
| 18 #include "media/audio/win/core_audio_util_win.h" | 21 #include "media/audio/win/core_audio_util_win.h" |
| 19 #endif | 22 #endif |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 62 }; | 65 }; |
| 63 | 66 |
| 64 namespace { | 67 namespace { |
| 65 | 68 |
| 66 // The following constants are related to the volume level indicator shown in | 69 // The following constants are related to the volume level indicator shown in |
| 67 // the UI for recorded audio. | 70 // the UI for recorded audio. |
| 68 // Multiplier used when new volume is greater than previous level. | 71 // Multiplier used when new volume is greater than previous level. |
| 69 const float kUpSmoothingFactor = 1.0f; | 72 const float kUpSmoothingFactor = 1.0f; |
| 70 // Multiplier used when new volume is lesser than previous level. | 73 // Multiplier used when new volume is lesser than previous level. |
| 71 const float kDownSmoothingFactor = 0.7f; | 74 const float kDownSmoothingFactor = 0.7f; |
| 72 // RMS dB value of a maximum (unclipped) sine wave for int16 samples. | 75 // RMS dB value of a maximum (unclipped) sine wave for int16_t samples. |
| 73 const float kAudioMeterMaxDb = 90.31f; | 76 const float kAudioMeterMaxDb = 90.31f; |
| 74 // This value corresponds to RMS dB for int16 with 6 most-significant-bits = 0. | 77 // This value corresponds to RMS dB for int16_t with 6 most-significant-bits = |
| 78 // 0. |
| 75 // Values lower than this will display as empty level-meter. | 79 // Values lower than this will display as empty level-meter. |
| 76 const float kAudioMeterMinDb = 30.0f; | 80 const float kAudioMeterMinDb = 30.0f; |
| 77 const float kAudioMeterDbRange = kAudioMeterMaxDb - kAudioMeterMinDb; | 81 const float kAudioMeterDbRange = kAudioMeterMaxDb - kAudioMeterMinDb; |
| 78 | 82 |
| 79 // Maximum level to draw to display unclipped meter. (1.0f displays clipping.) | 83 // Maximum level to draw to display unclipped meter. (1.0f displays clipping.) |
| 80 const float kAudioMeterRangeMaxUnclipped = 47.0f / 48.0f; | 84 const float kAudioMeterRangeMaxUnclipped = 47.0f / 48.0f; |
| 81 | 85 |
| 82 // Returns true if more than 5% of the samples are at min or max value. | 86 // Returns true if more than 5% of the samples are at min or max value. |
| 83 bool DetectClipping(const AudioChunk& chunk) { | 87 bool DetectClipping(const AudioChunk& chunk) { |
| 84 const int num_samples = chunk.NumSamples(); | 88 const int num_samples = chunk.NumSamples(); |
| 85 const int16* samples = chunk.SamplesData16(); | 89 const int16_t* samples = chunk.SamplesData16(); |
| 86 const int kThreshold = num_samples / 20; | 90 const int kThreshold = num_samples / 20; |
| 87 int clipping_samples = 0; | 91 int clipping_samples = 0; |
| 88 | 92 |
| 89 for (int i = 0; i < num_samples; ++i) { | 93 for (int i = 0; i < num_samples; ++i) { |
| 90 if (samples[i] <= -32767 || samples[i] >= 32767) { | 94 if (samples[i] <= -32767 || samples[i] >= 32767) { |
| 91 if (++clipping_samples > kThreshold) | 95 if (++clipping_samples > kThreshold) |
| 92 return true; | 96 return true; |
| 93 } | 97 } |
| 94 } | 98 } |
| 95 return false; | 99 return false; |
| (...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 189 // - 0.5 seconds of silence if time < 3 seconds | 193 // - 0.5 seconds of silence if time < 3 seconds |
| 190 // - 1 seconds of silence if time >= 3 seconds | 194 // - 1 seconds of silence if time >= 3 seconds |
| 191 endpointer_.set_speech_input_complete_silence_length( | 195 endpointer_.set_speech_input_complete_silence_length( |
| 192 base::Time::kMicrosecondsPerSecond / 2); | 196 base::Time::kMicrosecondsPerSecond / 2); |
| 193 endpointer_.set_long_speech_input_complete_silence_length( | 197 endpointer_.set_long_speech_input_complete_silence_length( |
| 194 base::Time::kMicrosecondsPerSecond); | 198 base::Time::kMicrosecondsPerSecond); |
| 195 endpointer_.set_long_speech_length(3 * base::Time::kMicrosecondsPerSecond); | 199 endpointer_.set_long_speech_length(3 * base::Time::kMicrosecondsPerSecond); |
| 196 } else { | 200 } else { |
| 197 // In continuous recognition, the session is automatically ended after 15 | 201 // In continuous recognition, the session is automatically ended after 15 |
| 198 // seconds of silence. | 202 // seconds of silence. |
| 199 const int64 cont_timeout_us = base::Time::kMicrosecondsPerSecond * 15; | 203 const int64_t cont_timeout_us = base::Time::kMicrosecondsPerSecond * 15; |
| 200 endpointer_.set_speech_input_complete_silence_length(cont_timeout_us); | 204 endpointer_.set_speech_input_complete_silence_length(cont_timeout_us); |
| 201 endpointer_.set_long_speech_length(0); // Use only a single timeout. | 205 endpointer_.set_long_speech_length(0); // Use only a single timeout. |
| 202 } | 206 } |
| 203 endpointer_.StartSession(); | 207 endpointer_.StartSession(); |
| 204 recognition_engine_->set_delegate(this); | 208 recognition_engine_->set_delegate(this); |
| 205 } | 209 } |
| 206 | 210 |
| 207 // ------- Methods that trigger Finite State Machine (FSM) events ------------ | 211 // ------- Methods that trigger Finite State Machine (FSM) events ------------ |
| 208 | 212 |
| 209 // NOTE:all the external events and requests should be enqueued (PostTask), even | 213 // NOTE:all the external events and requests should be enqueued (PostTask), even |
| (...skipping 618 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 828 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value) | 832 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value) |
| 829 : event(event_value), | 833 : event(event_value), |
| 830 audio_data(NULL), | 834 audio_data(NULL), |
| 831 engine_error(SPEECH_RECOGNITION_ERROR_NONE) { | 835 engine_error(SPEECH_RECOGNITION_ERROR_NONE) { |
| 832 } | 836 } |
| 833 | 837 |
| 834 SpeechRecognizerImpl::FSMEventArgs::~FSMEventArgs() { | 838 SpeechRecognizerImpl::FSMEventArgs::~FSMEventArgs() { |
| 835 } | 839 } |
| 836 | 840 |
| 837 } // namespace content | 841 } // namespace content |
| OLD | NEW |