Index: content/browser/speech/speech_recognizer_impl.cc |
diff --git a/content/browser/speech/speech_recognizer_impl.cc b/content/browser/speech/speech_recognizer_impl.cc |
index a08ffe17661af3b5ecaeffa381fa22b8a0c6308f..96b0876bf5cfbb8425c558e168ab469ac75690ac 100644 |
--- a/content/browser/speech/speech_recognizer_impl.cc |
+++ b/content/browser/speech/speech_recognizer_impl.cc |
@@ -55,6 +55,7 @@ class SpeechRecognizerImpl::OnDataConverter |
const AudioParameters input_parameters_; |
const AudioParameters output_parameters_; |
bool waiting_for_input_; |
+ int convert_count_; |
DISALLOW_COPY_AND_ASSIGN(OnDataConverter); |
}; |
@@ -119,8 +120,18 @@ SpeechRecognizerImpl::OnDataConverter::OnDataConverter( |
output_bus_(AudioBus::Create(output_params)), |
input_parameters_(input_params), |
output_parameters_(output_params), |
- waiting_for_input_(false) { |
+ waiting_for_input_(false), |
+ convert_count_(0) { |
audio_converter_.AddInput(this); |
+ DVLOG(1) << "SRI::AudioConverter::ChunkSize " << audio_converter_.ChunkSize(); |
+ |
+ // Initial priming with zeros... |
+ // waiting_for_input_ = true; |
+ // input_bus_->Zero(); |
+ // audio_converter_.Convert(output_bus_.get()); |
+ |
+ audio_converter_.PrimeWithSilence(); |
+ DVLOG(1) << "SRI::AudioConverter::ChunkSize " << audio_converter_.ChunkSize(); |
} |
SpeechRecognizerImpl::OnDataConverter::~OnDataConverter() { |
@@ -132,11 +143,18 @@ SpeechRecognizerImpl::OnDataConverter::~OnDataConverter() { |
scoped_refptr<AudioChunk> SpeechRecognizerImpl::OnDataConverter::Convert( |
const AudioBus* data) { |
CHECK_EQ(data->frames(), input_parameters_.frames_per_buffer()); |
- |
+ DVLOG(1) << "SRI::ODC::Convert..."; |
+ // Data should always have been provided by ProvideInput(). If not, the |
+ // previous call to Convert() could produce converted data using cached |
+ // data. But that means that we will miss one large frame of recorded audio |
+ // samples. |
+ CHECK(!waiting_for_input_); |
data->CopyTo(input_bus_.get()); |
waiting_for_input_ = true; |
+ convert_count_++; |
audio_converter_.Convert(output_bus_.get()); |
DaleCurtis
2015/07/07 16:38:17
As mentioned in the email thread, you should proba
henrika (OOO until Aug 14)
2015/07/07 19:30:05
Smart. But note that I have modified the input siz
DaleCurtis
2015/07/07 20:40:44
Well, I'm not confident your modification always e
henrika (OOO until Aug 14)
2015/07/07 21:14:08
Got it. I will test your scheme using 102 where I
DaleCurtis
2015/07/07 21:44:15
I think it's fine to use 100ms like you do w/ prim
henrika (OOO until Aug 14)
2015/07/08 12:22:59
Done. Hope you are OK with how I handle the extra
|
+ DVLOG(1) << "SRI::ODC::Convert done (" << convert_count_ << ")"; |
tommi (sloooow) - chröme
2015/07/07 12:40:21
seems like this will always log a value higher tha
henrika (OOO until Aug 14)
2015/07/07 12:56:54
Got it. Plan is to remove this counter. Just wante
|
scoped_refptr<AudioChunk> chunk( |
new AudioChunk(output_parameters_.GetBytesPerBuffer(), |
@@ -149,6 +167,7 @@ scoped_refptr<AudioChunk> SpeechRecognizerImpl::OnDataConverter::Convert( |
double SpeechRecognizerImpl::OnDataConverter::ProvideInput( |
AudioBus* dest, base::TimeDelta buffer_delay) { |
+ DVLOG(1) << "SRI::ODC::ProvideInput"; |
// The audio converted should never ask for more than one bus in each call |
// to Convert(). If so, we have a serious issue in our design since we might |
// miss recorded chunks of 100 ms audio data. |
@@ -523,6 +542,8 @@ SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) { |
AudioParameters output_parameters = AudioParameters( |
AudioParameters::AUDIO_PCM_LOW_LATENCY, kChannelLayout, kAudioSampleRate, |
kNumBitsPerAudioSample, frames_per_buffer); |
+ DVLOG(1) << "SRI::output_parameters: " |
+ << output_parameters.AsHumanReadableString(); |
// Audio converter will receive audio based on these parameters as input. |
// On Windows we start by verifying that Core Audio is supported. If not, |
@@ -546,14 +567,18 @@ SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) { |
// Due to implementation details in the audio converter, 2 milliseconds |
// are added to the default frame size (100 ms) to ensure there is enough |
// data to generate 100 ms of output when resampling. |
+ // frames_per_buffer = |
+ // ((in_params.sample_rate() * (chunk_duration_ms + 2)) / 1000.0) + 0.5; |
frames_per_buffer = |
- ((in_params.sample_rate() * (chunk_duration_ms + 2)) / 1000.0) + 0.5; |
+ ((in_params.sample_rate() * chunk_duration_ms) / 1000.0) + 0.5; |
input_parameters.Reset(in_params.format(), |
in_params.channel_layout(), |
in_params.channels(), |
in_params.sample_rate(), |
in_params.bits_per_sample(), |
frames_per_buffer); |
+ DVLOG(1) << "SRI::input_parameters: " |
+ << input_parameters.AsHumanReadableString(); |
} |
// Create an audio converter which converts data between native input format |