OLD | NEW |
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "content/browser/speech/speech_recognizer_impl.h" | 5 #include "content/browser/speech/speech_recognizer_impl.h" |
6 | 6 |
7 #include "base/basictypes.h" | 7 #include <stdint.h> |
| 8 |
8 #include "base/bind.h" | 9 #include "base/bind.h" |
| 10 #include "base/macros.h" |
9 #include "base/time/time.h" | 11 #include "base/time/time.h" |
| 12 #include "build/build_config.h" |
10 #include "content/browser/browser_main_loop.h" | 13 #include "content/browser/browser_main_loop.h" |
11 #include "content/browser/media/media_internals.h" | 14 #include "content/browser/media/media_internals.h" |
12 #include "content/browser/speech/audio_buffer.h" | 15 #include "content/browser/speech/audio_buffer.h" |
13 #include "content/browser/speech/google_one_shot_remote_engine.h" | 16 #include "content/browser/speech/google_one_shot_remote_engine.h" |
14 #include "content/public/browser/speech_recognition_event_listener.h" | 17 #include "content/public/browser/speech_recognition_event_listener.h" |
15 #include "media/base/audio_converter.h" | 18 #include "media/base/audio_converter.h" |
16 | 19 |
17 #if defined(OS_WIN) | 20 #if defined(OS_WIN) |
18 #include "media/audio/win/core_audio_util_win.h" | 21 #include "media/audio/win/core_audio_util_win.h" |
19 #endif | 22 #endif |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
62 }; | 65 }; |
63 | 66 |
64 namespace { | 67 namespace { |
65 | 68 |
66 // The following constants are related to the volume level indicator shown in | 69 // The following constants are related to the volume level indicator shown in |
67 // the UI for recorded audio. | 70 // the UI for recorded audio. |
68 // Multiplier used when new volume is greater than previous level. | 71 // Multiplier used when new volume is greater than previous level. |
69 const float kUpSmoothingFactor = 1.0f; | 72 const float kUpSmoothingFactor = 1.0f; |
70 // Multiplier used when new volume is lesser than previous level. | 73 // Multiplier used when new volume is lesser than previous level. |
71 const float kDownSmoothingFactor = 0.7f; | 74 const float kDownSmoothingFactor = 0.7f; |
72 // RMS dB value of a maximum (unclipped) sine wave for int16 samples. | 75 // RMS dB value of a maximum (unclipped) sine wave for int16_t samples. |
73 const float kAudioMeterMaxDb = 90.31f; | 76 const float kAudioMeterMaxDb = 90.31f; |
74 // This value corresponds to RMS dB for int16 with 6 most-significant-bits = 0. | 77 // This value corresponds to RMS dB for int16_t with 6 most-significant-bits = |
| 78 // 0. |
75 // Values lower than this will display as empty level-meter. | 79 // Values lower than this will display as empty level-meter. |
76 const float kAudioMeterMinDb = 30.0f; | 80 const float kAudioMeterMinDb = 30.0f; |
77 const float kAudioMeterDbRange = kAudioMeterMaxDb - kAudioMeterMinDb; | 81 const float kAudioMeterDbRange = kAudioMeterMaxDb - kAudioMeterMinDb; |
78 | 82 |
79 // Maximum level to draw to display unclipped meter. (1.0f displays clipping.) | 83 // Maximum level to draw to display unclipped meter. (1.0f displays clipping.) |
80 const float kAudioMeterRangeMaxUnclipped = 47.0f / 48.0f; | 84 const float kAudioMeterRangeMaxUnclipped = 47.0f / 48.0f; |
81 | 85 |
82 // Returns true if more than 5% of the samples are at min or max value. | 86 // Returns true if more than 5% of the samples are at min or max value. |
83 bool DetectClipping(const AudioChunk& chunk) { | 87 bool DetectClipping(const AudioChunk& chunk) { |
84 const int num_samples = chunk.NumSamples(); | 88 const int num_samples = chunk.NumSamples(); |
85 const int16* samples = chunk.SamplesData16(); | 89 const int16_t* samples = chunk.SamplesData16(); |
86 const int kThreshold = num_samples / 20; | 90 const int kThreshold = num_samples / 20; |
87 int clipping_samples = 0; | 91 int clipping_samples = 0; |
88 | 92 |
89 for (int i = 0; i < num_samples; ++i) { | 93 for (int i = 0; i < num_samples; ++i) { |
90 if (samples[i] <= -32767 || samples[i] >= 32767) { | 94 if (samples[i] <= -32767 || samples[i] >= 32767) { |
91 if (++clipping_samples > kThreshold) | 95 if (++clipping_samples > kThreshold) |
92 return true; | 96 return true; |
93 } | 97 } |
94 } | 98 } |
95 return false; | 99 return false; |
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
189 // - 0.5 seconds of silence if time < 3 seconds | 193 // - 0.5 seconds of silence if time < 3 seconds |
190 // - 1 seconds of silence if time >= 3 seconds | 194 // - 1 seconds of silence if time >= 3 seconds |
191 endpointer_.set_speech_input_complete_silence_length( | 195 endpointer_.set_speech_input_complete_silence_length( |
192 base::Time::kMicrosecondsPerSecond / 2); | 196 base::Time::kMicrosecondsPerSecond / 2); |
193 endpointer_.set_long_speech_input_complete_silence_length( | 197 endpointer_.set_long_speech_input_complete_silence_length( |
194 base::Time::kMicrosecondsPerSecond); | 198 base::Time::kMicrosecondsPerSecond); |
195 endpointer_.set_long_speech_length(3 * base::Time::kMicrosecondsPerSecond); | 199 endpointer_.set_long_speech_length(3 * base::Time::kMicrosecondsPerSecond); |
196 } else { | 200 } else { |
197 // In continuous recognition, the session is automatically ended after 15 | 201 // In continuous recognition, the session is automatically ended after 15 |
198 // seconds of silence. | 202 // seconds of silence. |
199 const int64 cont_timeout_us = base::Time::kMicrosecondsPerSecond * 15; | 203 const int64_t cont_timeout_us = base::Time::kMicrosecondsPerSecond * 15; |
200 endpointer_.set_speech_input_complete_silence_length(cont_timeout_us); | 204 endpointer_.set_speech_input_complete_silence_length(cont_timeout_us); |
201 endpointer_.set_long_speech_length(0); // Use only a single timeout. | 205 endpointer_.set_long_speech_length(0); // Use only a single timeout. |
202 } | 206 } |
203 endpointer_.StartSession(); | 207 endpointer_.StartSession(); |
204 recognition_engine_->set_delegate(this); | 208 recognition_engine_->set_delegate(this); |
205 } | 209 } |
206 | 210 |
207 // ------- Methods that trigger Finite State Machine (FSM) events ------------ | 211 // ------- Methods that trigger Finite State Machine (FSM) events ------------ |
208 | 212 |
209 // NOTE:all the external events and requests should be enqueued (PostTask), even | 213 // NOTE:all the external events and requests should be enqueued (PostTask), even |
(...skipping 618 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
828 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value) | 832 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value) |
829 : event(event_value), | 833 : event(event_value), |
830 audio_data(NULL), | 834 audio_data(NULL), |
831 engine_error(SPEECH_RECOGNITION_ERROR_NONE) { | 835 engine_error(SPEECH_RECOGNITION_ERROR_NONE) { |
832 } | 836 } |
833 | 837 |
834 SpeechRecognizerImpl::FSMEventArgs::~FSMEventArgs() { | 838 SpeechRecognizerImpl::FSMEventArgs::~FSMEventArgs() { |
835 } | 839 } |
836 | 840 |
837 } // namespace content | 841 } // namespace content |
OLD | NEW |