Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(376)

Unified Diff: chrome/browser/speech/speech_recognizer.cc

Issue 3124009: Adds SpeechRecognizer which provides a simple interface to record and recognize speech. (Closed)
Patch Set: Address comments. Created 10 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « chrome/browser/speech/speech_recognizer.h ('k') | chrome/browser/speech/speech_recognizer_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: chrome/browser/speech/speech_recognizer.cc
diff --git a/chrome/browser/speech/speech_recognizer.cc b/chrome/browser/speech/speech_recognizer.cc
new file mode 100644
index 0000000000000000000000000000000000000000..f83b5af1a63345a7eeac5bf0c9d96d7d41a0198c
--- /dev/null
+++ b/chrome/browser/speech/speech_recognizer.cc
@@ -0,0 +1,180 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/speech/speech_recognizer.h"
+
+#include "base/ref_counted.h"
+#include "base/scoped_ptr.h"
+#include "chrome/browser/chrome_thread.h"
+#include "chrome/browser/profile.h"
+#include "chrome/common/net/url_request_context_getter.h"
+
+using media::AudioInputController;
+using std::list;
+using std::string;
+
+namespace {
+const char* kDefaultSpeechRecognitionUrl =
+ "http://www.google.com/speech-api/v1/recognize?lang=en-us&client=chromium";
+const int kAudioPacketIntervalMs = 100; // Record 100ms long audio packets.
+const int kNumAudioChannels = 1; // Speech is recorded as mono.
+const int kNumBitsPerAudioSample = 16;
+} // namespace
+
+namespace speech_input {
+
+SpeechRecognizer::SpeechRecognizer(Delegate* delegate, int render_view_id)
+ : delegate_(delegate),
+ render_view_id_(render_view_id) {
+}
+
+SpeechRecognizer::~SpeechRecognizer() {
+ // Recording should have stopped earlier due to the endpointer or
+ // |StopRecording| being called.
+ DCHECK(!audio_controller_.get());
+ DCHECK(!request_.get() || !request_->HasPendingRequest());
+ DCHECK(audio_buffers_.empty());
+}
+
+bool SpeechRecognizer::StartRecording() {
+ DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO));
+ DCHECK(!audio_controller_.get());
+ DCHECK(!request_.get() || !request_->HasPendingRequest());
+
+ audio_controller_ = AudioInputController::Create(this,
+ AudioManager::AUDIO_PCM_LINEAR, kNumAudioChannels,
+ AudioManager::kTelephoneSampleRate, kNumBitsPerAudioSample,
+ (AudioManager::kTelephoneSampleRate * kAudioPacketIntervalMs) / 1000);
+ DCHECK(audio_controller_.get());
+ LOG(INFO) << "SpeechRecognizer starting record.";
+ audio_controller_->Record();
+
+ return true;
+}
+
+void SpeechRecognizer::CancelRecognition() {
+ DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO));
+ DCHECK(audio_controller_.get() || request_.get());
+
+ // Stop recording if required.
+ if (audio_controller_.get()) {
+ LOG(INFO) << "SpeechRecognizer stopping record.";
+ audio_controller_->Close();
+ audio_controller_ = NULL; // Releases the ref ptr.
+ }
+
+ LOG(INFO) << "SpeechRecognizer canceling recognition.";
+ ReleaseAudioBuffers();
+ request_.reset();
+}
+
+void SpeechRecognizer::StopRecording() {
+ DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO));
+
+ // If audio recording has already stopped and we are in recognition phase,
+ // silently ignore any more calls to stop recording.
+ if (!audio_controller_.get())
+ return;
+
+ LOG(INFO) << "SpeechRecognizer stopping record.";
+ audio_controller_->Close();
+ audio_controller_ = NULL; // Releases the ref ptr.
+ delegate_->DidCompleteRecording(render_view_id_);
+
+ // If we haven't got any audio yet end the recognition sequence here.
+ if (audio_buffers_.empty()) {
+ // Guard against the delegate freeing us until we finish our job.
+ scoped_refptr<SpeechRecognizer> me(this);
+ delegate_->DidCompleteRecognition(render_view_id_);
+ return;
+ }
+
+ // We now have recorded audio in our buffers, so start a recognition request.
+ // Since the http request takes a single string as POST data, allocate
+ // one and copy over bytes from the audio buffers to the string.
+ int audio_buffer_length = 0;
+ for (AudioBufferQueue::iterator it = audio_buffers_.begin();
+ it != audio_buffers_.end(); it++) {
+ audio_buffer_length += (*it)->length();
+ }
+ string data;
+ data.reserve(audio_buffer_length);
+ for (AudioBufferQueue::iterator it = audio_buffers_.begin();
+ it != audio_buffers_.end(); it++) {
+ data.append(*(*it));
+ }
+ DCHECK(!request_.get());
+ request_.reset(new SpeechRecognitionRequest(
+ Profile::GetDefaultRequestContext(),
+ GURL(kDefaultSpeechRecognitionUrl),
+ this));
+ request_->Send(data);
+ ReleaseAudioBuffers(); // No need to keep the audio anymore.
+}
+
+void SpeechRecognizer::ReleaseAudioBuffers() {
+ for (AudioBufferQueue::iterator it = audio_buffers_.begin();
+ it != audio_buffers_.end(); it++)
+ delete *it;
+ audio_buffers_.clear();
+}
+
+// Invoked in the audio thread.
+void SpeechRecognizer::OnError(AudioInputController* controller,
+ int error_code) {
+ ChromeThread::PostTask(ChromeThread::IO, FROM_HERE,
+ NewRunnableMethod(this,
+ &SpeechRecognizer::HandleOnError,
+ error_code));
+}
+
+void SpeechRecognizer::HandleOnError(int error_code) {
+ LOG(WARNING) << "SpeechRecognizer::HandleOnError, code=" << error_code;
+
+ // Check if we are still recording before canceling recognition, as
+ // recording might have been stopped after this error was posted to the queue
+ // by |OnError|.
+ if (!audio_controller_.get())
+ return;
+
+ CancelRecognition();
+ delegate_->DidCompleteRecording(render_view_id_);
+ delegate_->DidCompleteRecognition(render_view_id_);
+}
+
+void SpeechRecognizer::OnData(AudioInputController* controller,
+ const uint8* data, uint32 size) {
+ if (size == 0) // This could happen when recording stops and is normal.
+ return;
+
+ string* str_data = new string(reinterpret_cast<const char*>(data), size);
+ ChromeThread::PostTask(ChromeThread::IO, FROM_HERE,
+ NewRunnableMethod(this,
+ &SpeechRecognizer::HandleOnData,
+ str_data));
+}
+
+void SpeechRecognizer::HandleOnData(string* data) {
+ // Check if we are still recording and if not discard this buffer, as
+ // recording might have been stopped after this buffer was posted to the queue
+ // by |OnData|.
+ if (!audio_controller_.get()) {
+ delete data;
+ return;
+ }
+
+ // TODO(satish): Once we have streaming POST, start sending the data received
+ // here as POST chunks.
+ audio_buffers_.push_back(data);
+}
+
+void SpeechRecognizer::SetRecognitionResult(bool error, const string16& value) {
+ delegate_->SetRecognitionResult(render_view_id_, error, value);
+
+ // Guard against the delegate freeing us until we finish our job.
+ scoped_refptr<SpeechRecognizer> me(this);
+ delegate_->DidCompleteRecognition(render_view_id_);
+}
+
+} // namespace speech_input
« no previous file with comments | « chrome/browser/speech/speech_recognizer.h ('k') | chrome/browser/speech/speech_recognizer_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698