chrome/browser/speech/speech_recognizer.cc - Issue 3124009: Adds SpeechRecognizer which provides a simple interface to record and recognize speech.

Side by Side Diff: chrome/browser/speech/speech_recognizer.cc

Issue 3124009: Adds SpeechRecognizer which provides a simple interface to record and recognize speech. (Closed)

Patch Set: Address comments. Created 10 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "chrome/browser/speech/speech_recognizer.h"

	6

	7 #include "base/ref_counted.h"

	8 #include "base/scoped_ptr.h"

	9 #include "chrome/browser/chrome_thread.h"

	10 #include "chrome/browser/profile.h"

	11 #include "chrome/common/net/url_request_context_getter.h"

	12

	13 using media::AudioInputController;

	14 using std::list;

	15 using std::string;

	16

	17 namespace {

	18 const char* kDefaultSpeechRecognitionUrl =

	19 "http://www.google.com/speech-api/v1/recognize?lang=en-us&client=chromium";

	20 const int kAudioPacketIntervalMs = 100; // Record 100ms long audio packets.

	21 const int kNumAudioChannels = 1; // Speech is recorded as mono.

	22 const int kNumBitsPerAudioSample = 16;

	23 } // namespace

	24

	25 namespace speech_input {

	26

	27 SpeechRecognizer::SpeechRecognizer(Delegate* delegate, int render_view_id)

	28 : delegate_(delegate),

	29 render_view_id_(render_view_id) {

	30 }

	31

	32 SpeechRecognizer::~SpeechRecognizer() {

	33 // Recording should have stopped earlier due to the endpointer or

	34 // \|StopRecording\| being called.

	35 DCHECK(!audio_controller_.get());

	36 DCHECK(!request_.get() \|\| !request_->HasPendingRequest());

	37 DCHECK(audio_buffers_.empty());

	38 }

	39

	40 bool SpeechRecognizer::StartRecording() {

	41 DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO));

	42 DCHECK(!audio_controller_.get());

	43 DCHECK(!request_.get() \|\| !request_->HasPendingRequest());

	44

	45 audio_controller_ = AudioInputController::Create(this,

	46 AudioManager::AUDIO_PCM_LINEAR, kNumAudioChannels,

	47 AudioManager::kTelephoneSampleRate, kNumBitsPerAudioSample,

	48 (AudioManager::kTelephoneSampleRate * kAudioPacketIntervalMs) / 1000);

	49 DCHECK(audio_controller_.get());

	50 LOG(INFO) << "SpeechRecognizer starting record.";

	51 audio_controller_->Record();

	52

	53 return true;

	54 }

	55

	56 void SpeechRecognizer::CancelRecognition() {

	57 DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO));

	58 DCHECK(audio_controller_.get() \|\| request_.get());

	59

	60 // Stop recording if required.

	61 if (audio_controller_.get()) {

	62 LOG(INFO) << "SpeechRecognizer stopping record.";

	63 audio_controller_->Close();

	64 audio_controller_ = NULL; // Releases the ref ptr.

	65 }

	66

	67 LOG(INFO) << "SpeechRecognizer canceling recognition.";

	68 ReleaseAudioBuffers();

	69 request_.reset();

	70 }

	71

	72 void SpeechRecognizer::StopRecording() {

	73 DCHECK(ChromeThread::CurrentlyOn(ChromeThread::IO));

	74

	75 // If audio recording has already stopped and we are in recognition phase,

	76 // silently ignore any more calls to stop recording.

	77 if (!audio_controller_.get())

	78 return;

	79

	80 LOG(INFO) << "SpeechRecognizer stopping record.";

	81 audio_controller_->Close();

	82 audio_controller_ = NULL; // Releases the ref ptr.

	83 delegate_->DidCompleteRecording(render_view_id_);

	84

	85 // If we haven't got any audio yet end the recognition sequence here.

	86 if (audio_buffers_.empty()) {

	87 // Guard against the delegate freeing us until we finish our job.

	88 scoped_refptr<SpeechRecognizer> me(this);

	89 delegate_->DidCompleteRecognition(render_view_id_);

	90 return;

	91 }

	92

	93 // We now have recorded audio in our buffers, so start a recognition request.

	94 // Since the http request takes a single string as POST data, allocate

	95 // one and copy over bytes from the audio buffers to the string.

	96 int audio_buffer_length = 0;

	97 for (AudioBufferQueue::iterator it = audio_buffers_.begin();

	98 it != audio_buffers_.end(); it++) {

	99 audio_buffer_length += (*it)->length();

	100 }

	101 string data;

	102 data.reserve(audio_buffer_length);

	103 for (AudioBufferQueue::iterator it = audio_buffers_.begin();

	104 it != audio_buffers_.end(); it++) {

	105 data.append((it));

	106 }

	107 DCHECK(!request_.get());

	108 request_.reset(new SpeechRecognitionRequest(

	109 Profile::GetDefaultRequestContext(),

	110 GURL(kDefaultSpeechRecognitionUrl),

	111 this));

	112 request_->Send(data);

	113 ReleaseAudioBuffers(); // No need to keep the audio anymore.

	114 }

	115

	116 void SpeechRecognizer::ReleaseAudioBuffers() {

	117 for (AudioBufferQueue::iterator it = audio_buffers_.begin();

	118 it != audio_buffers_.end(); it++)

	119 delete *it;

	120 audio_buffers_.clear();

	121 }

	122

	123 // Invoked in the audio thread.

	124 void SpeechRecognizer::OnError(AudioInputController* controller,

	125 int error_code) {

	126 ChromeThread::PostTask(ChromeThread::IO, FROM_HERE,

	127 NewRunnableMethod(this,

	128 &SpeechRecognizer::HandleOnError,

	129 error_code));

	130 }

	131

	132 void SpeechRecognizer::HandleOnError(int error_code) {

	133 LOG(WARNING) << "SpeechRecognizer::HandleOnError, code=" << error_code;

	134

	135 // Check if we are still recording before canceling recognition, as

	136 // recording might have been stopped after this error was posted to the queue

	137 // by \|OnError\|.

	138 if (!audio_controller_.get())

	139 return;

	140

	141 CancelRecognition();

	142 delegate_->DidCompleteRecording(render_view_id_);

	143 delegate_->DidCompleteRecognition(render_view_id_);

	144 }

	145

	146 void SpeechRecognizer::OnData(AudioInputController* controller,

	147 const uint8* data, uint32 size) {

	148 if (size == 0) // This could happen when recording stops and is normal.

	149 return;

	150

	151 string* str_data = new string(reinterpret_cast<const char*>(data), size);

	152 ChromeThread::PostTask(ChromeThread::IO, FROM_HERE,

	153 NewRunnableMethod(this,

	154 &SpeechRecognizer::HandleOnData,

	155 str_data));

	156 }

	157

	158 void SpeechRecognizer::HandleOnData(string* data) {

	159 // Check if we are still recording and if not discard this buffer, as

	160 // recording might have been stopped after this buffer was posted to the queue

	161 // by \|OnData\|.

	162 if (!audio_controller_.get()) {

	163 delete data;

	164 return;

	165 }

	166

	167 // TODO(satish): Once we have streaming POST, start sending the data received

	168 // here as POST chunks.

	169 audio_buffers_.push_back(data);

	170 }

	171

	172 void SpeechRecognizer::SetRecognitionResult(bool error, const string16& value) {

	173 delegate_->SetRecognitionResult(render_view_id_, error, value);

	174

	175 // Guard against the delegate freeing us until we finish our job.

	176 scoped_refptr<SpeechRecognizer> me(this);

	177 delegate_->DidCompleteRecognition(render_view_id_);

	178 }

	179

	180 } // namespace speech_input

OLD	NEW

« no previous file with comments | « chrome/browser/speech/speech_recognizer.h ('k') | chrome/browser/speech/speech_recognizer_unittest.cc » ('j') | no next file with comments »