chrome/browser/ui/app_list/speech_recognizer.cc - Issue 676593003: Implement native speech recognition for the launcher.

Side by Side Diff: chrome/browser/ui/app_list/speech_recognizer.cc

Issue 676593003: Implement native speech recognition for the launcher. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Rebase. Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« chrome/browser/ui/app_list/speech_recognizer.h ('K') | « chrome/browser/ui/app_list/speech_recognizer.h ('k') | chrome/browser/ui/app_list/speech_recognizer_browsertest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 // Copyright 2014 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "chrome/browser/ui/app_list/speech_recognizer.h"

	6

	7 #include <algorithm>

	8

	9 #include "base/bind.h"

	10 #include "base/strings/string16.h"

	11 #include "chrome/browser/ui/app_list/speech_recognizer_delegate.h"

	12 #include "content/public/browser/browser_thread.h"

	13 #include "content/public/browser/render_process_host.h"

	14 #include "content/public/browser/speech_recognition_manager.h"

	15 #include "content/public/browser/speech_recognition_session_config.h"

	16 #include "content/public/browser/web_contents.h"

	17 #include "content/public/common/speech_recognition_error.h"

	18

	19 namespace app_list {

	20

	21 // Length of timeout to cancel recognition if there's no speech heard.

	22 static const int kNoSpeechTimeoutInSeconds = 5;

	23

	24 // Speech recognizer listener. This is separate from SpeechRecognizer because

	25 // the speech recognition engine must function from the IO thread. Because of

	26 // this, the lifecycle of this class must be decoupled from the lifecycle of

	27 // SpeechRecognizer. To avoid circular references, this class has no reference

	28 // to SpeechRecognizer. Instead, it has a reference to the

	29 // SpeechRecognizerDelegate via a weak pointer that is only ever referenced from

	30 // the UI thread.

	31 class SpeechRecognizer::EventListener

	32 : public base::RefCountedThreadSafe<SpeechRecognizer::EventListener>,

	33 public content::SpeechRecognitionEventListener,

	34 public SpeechRecognizerDelegate {

	35 public:

	36 EventListener(base::WeakPtr<SpeechRecognizerDelegate> delegate,

	37 net::URLRequestContextGetter* url_request_context_getter,

	38 const std::string& locale);

	39

	40 void StartOnIOThread(int render_process_id);

	41 void StopOnIOThread();

	42

	43 private:

	44 friend class base::RefCountedThreadSafe<SpeechRecognizer::EventListener>;

	45 ~EventListener();

	46

	47 void NotifyRecognitionStateChanged(SpeechRecognitionState new_state);

	48

	49 void StartSpeechTimeout();

	50 void StopSpeechTimeout();

	51 void SpeechTimeout();

	52

	53 // Overridden from app_list::SpeechRecognizerDelegate:

	54 // Always called on the UI thread and used to dispatch to delegate_.

	55 void OnSpeechResult(const base::string16& query, bool is_final) override;

	56 void OnSpeechSoundLevelChanged(int16_t level) override;

	57 void OnSpeechRecognitionStateChanged(

	58 SpeechRecognitionState new_state) override;

	59 content::WebContents* GetSpeechContents() override;

	60

	61 // Overidden from content::SpeechRecognitionEventListener:

	62 // These are always called on the IO thread.

	63 void OnRecognitionStart(int session_id) override;

	64 void OnRecognitionEnd(int session_id) override;

	65 void OnRecognitionResults(

	66 int session_id,

	67 const content::SpeechRecognitionResults& results) override;

	68 void OnRecognitionError(

	69 int session_id, const content::SpeechRecognitionError& error) override;

	70 void OnSoundStart(int session_id) override;

	71 void OnSoundEnd(int session_id) override;

	72 void OnAudioLevelsChange(

	73 int session_id, float volume, float noise_volume) override;

	74 void OnEnvironmentEstimationComplete(int session_id) override;

	75 void OnAudioStart(int session_id) override;

	76 void OnAudioEnd(int session_id) override;

	77

	78 // Only access from the UI thread.

	79 base::WeakPtr<SpeechRecognizerDelegate> delegate_;

	80

	81 scoped_refptr<net::URLRequestContextGetter> url_request_context_getter_;

	82 std::string locale_;

	83 base::Timer speech_timeout_;

	84 int session_;

	85

	86 base::WeakPtrFactory<EventListener> weak_factory_;

	87

	88 DISALLOW_COPY_AND_ASSIGN(EventListener);

	89 };

	90

	91 SpeechRecognizer::EventListener::EventListener(

	92 base::WeakPtr<SpeechRecognizerDelegate> delegate,

	93 net::URLRequestContextGetter* url_request_context_getter,

	94 const std::string& locale)

	95 : delegate_(delegate),

	96 url_request_context_getter_(url_request_context_getter),

	97 locale_(locale),

	98 speech_timeout_(false, false),

	99 session_(-1),

	100 weak_factory_(this) {

	101 DCHECK_CURRENTLY_ON(content::BrowserThread::UI);

	102 }

	103

	104 SpeechRecognizer::EventListener::~EventListener() {

	105 DCHECK(!speech_timeout_.IsRunning());

	106 }

	107

	108 void SpeechRecognizer::EventListener::StartOnIOThread(int render_process_id) {

	109 DCHECK_CURRENTLY_ON(content::BrowserThread::IO);

	110 if (session_ != -1)

	111 StopOnIOThread();

	112

	113 content::SpeechRecognitionSessionConfig config;

	114 config.language = locale_;

	115 config.is_legacy_api = false;

	116 config.continuous = true;

	117 config.interim_results = true;

	118 config.max_hypotheses = 1;

	119 config.filter_profanities = true;

	120 config.url_request_context_getter = url_request_context_getter_;

	121 config.event_listener = weak_factory_.GetWeakPtr();

	122 config.initial_context.render_process_id = render_process_id;

	123

	124 auto speech_instance = content::SpeechRecognitionManager::GetInstance();

	125 session_ = speech_instance->CreateSession(config);

	126 speech_instance->StartSession(session_);

	127 }

	128

	129 void SpeechRecognizer::EventListener::StopOnIOThread() {

	130 DCHECK_CURRENTLY_ON(content::BrowserThread::IO);

	131 if (session_ == -1)

	132 return;

	133

	134 // Prevent recursion.

	135 int session = session_;

	136 session_ = -1;

	137 StopSpeechTimeout();

	138 content::SpeechRecognitionManager::GetInstance()->StopAudioCaptureForSession(

	139 session);

	140 }

	141

	142 void SpeechRecognizer::EventListener::NotifyRecognitionStateChanged(

	143 SpeechRecognitionState new_state) {

	144 content::BrowserThread::PostTask(

	145 content::BrowserThread::UI,

	146 FROM_HERE,

	147 base::Bind(&SpeechRecognizerDelegate::OnSpeechRecognitionStateChanged,

	148 this,

	149 new_state));

	150 }

	151

	152 void SpeechRecognizer::EventListener::StartSpeechTimeout() {

	153 DCHECK_CURRENTLY_ON(content::BrowserThread::IO);

	154 speech_timeout_.Start(

	155 FROM_HERE,

	156 base::TimeDelta::FromSeconds(kNoSpeechTimeoutInSeconds),

	157 base::Bind(&SpeechRecognizer::EventListener::SpeechTimeout, this));

	158 }

	159

	160 void SpeechRecognizer::EventListener::StopSpeechTimeout() {

	161 DCHECK_CURRENTLY_ON(content::BrowserThread::IO);

	162 speech_timeout_.Stop();

	163 }

	164

	165 void SpeechRecognizer::EventListener::SpeechTimeout() {

	166 DCHECK_CURRENTLY_ON(content::BrowserThread::IO);

	167 StopOnIOThread();

	168 }

	169

	170 void SpeechRecognizer::EventListener::OnRecognitionStart(int session_id) {

	171 NotifyRecognitionStateChanged(SPEECH_RECOGNITION_RECOGNIZING);

	172 }

	173

	174 void SpeechRecognizer::EventListener::OnRecognitionEnd(int session_id) {

	175 StopOnIOThread();

	176 NotifyRecognitionStateChanged(SPEECH_RECOGNITION_READY);

	177 }

	178

	179 void SpeechRecognizer::EventListener::OnRecognitionResults(

	180 int session_id, const content::SpeechRecognitionResults& results) {

	181 base::string16 result_str;

	182 size_t final_count = 0;

	183 for (const auto& result : results) {

	184 if (!result.is_provisional)

	185 final_count++;

	186 result_str += result.hypotheses[0].utterance;

	187 }

	188 StopSpeechTimeout();

	189 content::BrowserThread::PostTask(

	190 content::BrowserThread::UI,

	191 FROM_HERE,

	192 base::Bind(&SpeechRecognizerDelegate::OnSpeechResult,

	193 this,

	194 result_str,

	195 final_count == results.size()));

	196

	197 // Stop the moment we have a final result.

	198 if (final_count == results.size())

	199 StopOnIOThread();

	200 }

	201

	202 void SpeechRecognizer::EventListener::OnRecognitionError(

	203 int session_id, const content::SpeechRecognitionError& error) {

	204 StopOnIOThread();

	205 if (error.code == content::SPEECH_RECOGNITION_ERROR_NETWORK) {

	206 NotifyRecognitionStateChanged(SPEECH_RECOGNITION_NETWORK_ERROR);

	207 }

	208 NotifyRecognitionStateChanged(SPEECH_RECOGNITION_READY);

	209 }

	210

	211 void SpeechRecognizer::EventListener::OnSoundStart(int session_id) {

	212 StartSpeechTimeout();

	213 NotifyRecognitionStateChanged(SPEECH_RECOGNITION_IN_SPEECH);

	214 }

	215

	216 void SpeechRecognizer::EventListener::OnSoundEnd(int session_id) {

	217 StopOnIOThread();

	218 NotifyRecognitionStateChanged(SPEECH_RECOGNITION_RECOGNIZING);

	219 }

	220

	221 void SpeechRecognizer::EventListener::OnAudioLevelsChange(

	222 int session_id, float volume, float noise_volume) {

	223 DCHECK_LE(0.0, volume);

	224 DCHECK_GE(1.0, volume);

	225 DCHECK_LE(0.0, noise_volume);

	226 DCHECK_GE(1.0, noise_volume);

	227 volume = std::max(0.0f, volume - noise_volume);

	228 // Both \|volume\| and \|noise_volume\| are defined to be in the range [0.0, 1.0].

	229 // See: content/public/browser/speech_recognition_event_listener.h

	230 int16_t sound_level = static_cast<int16_t>(INT16_MAX * volume);

	231 content::BrowserThread::PostTask(

	232 content::BrowserThread::UI,

	233 FROM_HERE,

	234 base::Bind(&SpeechRecognizerDelegate::OnSpeechSoundLevelChanged,

	235 this,

	236 sound_level));

	237 }

	238

	239 void SpeechRecognizer::EventListener::OnEnvironmentEstimationComplete(

	240 int session_id) {

	241 }

	242

	243 void SpeechRecognizer::EventListener::OnAudioStart(int session_id) {

	244 }

	245

	246 void SpeechRecognizer::EventListener::OnAudioEnd(int session_id) {

	247 }

	248

	249 void SpeechRecognizer::EventListener::OnSpeechResult(
	Lei Zhang 2014/11/05 00:54:15 Why not put this in SpeechRecognizer? Why not put this in SpeechRecognizer? Anand Mistry (off Chromium) 2014/11/05 02:58:33 I've learned that using WeakPtr<> with callbacks " Show quoted text On 2014/11/05 00:54:15, Lei Zhang wrote: > Why not put this in SpeechRecognizer? I've learned that using WeakPtr<> with callbacks "Just Works", so none of this is necessary. Yay WeakPtr!!!
	250 const base::string16& query, bool is_final) {

	251 DCHECK_CURRENTLY_ON(content::BrowserThread::UI);

	252 if (delegate_)

	253 delegate_->OnSpeechResult(query, is_final);

	254 }

	255

	256 void SpeechRecognizer::EventListener::OnSpeechSoundLevelChanged(int16_t level) {

	257 DCHECK_CURRENTLY_ON(content::BrowserThread::UI);

	258 if (delegate_)

	259 delegate_->OnSpeechSoundLevelChanged(level);

	260 }

	261

	262 void SpeechRecognizer::EventListener::OnSpeechRecognitionStateChanged(

	263 SpeechRecognitionState new_state) {

	264 DCHECK_CURRENTLY_ON(content::BrowserThread::UI);

	265 if (delegate_)

	266 delegate_->OnSpeechRecognitionStateChanged(new_state);

	267 }

	268

	269 content::WebContents* SpeechRecognizer::EventListener::GetSpeechContents() {

	270 NOTREACHED();

	271 return nullptr;

	272 }

	273

	274 SpeechRecognizer::SpeechRecognizer(

	275 SpeechRecognizerDelegate* delegate,

	276 net::URLRequestContextGetter* url_request_context_getter,

	277 const std::string& locale)

	278 : delegate_(delegate),

	279 delegate_weak_factory_(delegate) {

	280 DCHECK_CURRENTLY_ON(content::BrowserThread::UI);

	281

	282 // Must be done after delegate_weak_factory_ has been constructed.

	283 speech_event_listener_ = new EventListener(

	284 delegate_weak_factory_.GetWeakPtr(), url_request_context_getter, locale);

	285 }

	286

	287 SpeechRecognizer::~SpeechRecognizer() {

	288 DCHECK_CURRENTLY_ON(content::BrowserThread::UI);

	289 Stop();

	290 }

	291

	292 void SpeechRecognizer::Start() {

	293 DCHECK_CURRENTLY_ON(content::BrowserThread::UI);

	294 // The speech recognizer checks to see if the request is allowed by looking

	295 // up the renderer process. A renderer containing the app-list is hard-coded

	296 // to be allowed.

	297 content::WebContents* contents = delegate_->GetSpeechContents();

	298 if (!contents)

	299 return;

	300

	301 content::BrowserThread::PostTask(

	302 content::BrowserThread::IO,

	303 FROM_HERE,

	304 base::Bind(&SpeechRecognizer::EventListener::StartOnIOThread,

	305 speech_event_listener_,

	306 contents->GetRenderProcessHost()->GetID()));

	307 }

	308

	309 void SpeechRecognizer::Stop() {

	310 DCHECK_CURRENTLY_ON(content::BrowserThread::UI);

	311 content::BrowserThread::PostTask(

	312 content::BrowserThread::IO,

	313 FROM_HERE,

	314 base::Bind(&SpeechRecognizer::EventListener::StopOnIOThread,

	315 speech_event_listener_));

	316 }

	317

	318 } // namespace app_list

OLD	NEW