Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(751)

Unified Diff: content/browser/speech/speech_recognition_engine.h

Issue 1891543002: Devirtualize SpeechRecognitionEngine (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@kill_one_shot_engine
Patch Set: drop an include Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: content/browser/speech/speech_recognition_engine.h
diff --git a/content/browser/speech/speech_recognition_engine.h b/content/browser/speech/speech_recognition_engine.h
index c25fd19c359c6f9f91bb67e15097d8923315bded..bbe244437c1b2df8682db0f0eeddbcbb4b1a43b8 100644
--- a/content/browser/speech/speech_recognition_engine.h
+++ b/content/browser/speech/speech_recognition_engine.h
@@ -6,37 +6,65 @@
#define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_ENGINE_H_
#include <stdint.h>
-
+#include <memory>
#include <string>
+#include <vector>
+#include "base/macros.h"
+#include "base/memory/ref_counted.h"
+#include "base/threading/non_thread_safe.h"
+#include "content/browser/speech/audio_encoder.h"
+#include "content/browser/speech/chunked_byte_buffer.h"
#include "content/common/content_export.h"
#include "content/public/browser/speech_recognition_session_preamble.h"
+#include "content/public/common/speech_recognition_error.h"
#include "content/public/common/speech_recognition_grammar.h"
#include "content/public/common/speech_recognition_result.h"
+#include "net/url_request/url_fetcher_delegate.h"
+
+namespace net {
+class URLRequestContextGetter;
+}
namespace content {
class AudioChunk;
struct SpeechRecognitionError;
-
-// This interface models the basic contract that a speech recognition engine,
-// either working locally or relying on a remote web-service, must obey.
-// The expected call sequence for exported methods is:
+struct SpeechRecognitionResult;
+
+// A speech recognition engine supporting continuous recognition by means of
+// interaction with the Google streaming speech recognition webservice.
+//
+// This class establishes two HTTPS connections with the webservice for each
+// session, herein called "upstream" and "downstream". Audio chunks are sent on
+// the upstream by means of a chunked HTTP POST upload. Recognition results are
+// retrieved in a full-duplex fashion (i.e. while pushing audio on the upstream)
+// on the downstream by means of a chunked HTTP GET request. Pairing between the
+// two stream is handled through a randomly generated key, unique for each
+// request, which is passed in the &pair= arg to both stream request URLs. In
+// the case of a regular session, the upstream is closed when the audio capture
+// ends (notified through a |AudioChunksEnded| call) and the downstream waits
+// for a corresponding server closure (eventually some late results can come
+// after closing the upstream). Both streams are guaranteed to be closed when
+// |EndRecognition| call is issued.
+//
+// The expected call sequence is:
// StartRecognition Mandatory at beginning of SR.
// TakeAudioChunk For every audio chunk pushed.
// AudioChunksEnded Finalize the audio stream (omitted in case of errors).
// EndRecognition Mandatory at end of SR (even on errors).
-// No delegate callbacks are allowed before StartRecognition or after
+//
+// No delegate callbacks are performed before StartRecognition or after
// EndRecognition. If a recognition was started, the caller can free the
// SpeechRecognitionEngine only after calling EndRecognition.
-class SpeechRecognitionEngine {
+
+class CONTENT_EXPORT SpeechRecognitionEngine
+ : public net::URLFetcherDelegate,
+ public NON_EXPORTED_BASE(base::NonThreadSafe) {
public:
- // Interface for receiving callbacks from this object.
class Delegate {
public:
- // Called whenever a result is retrieved. It might be issued several times,
- // (e.g., in the case of continuous speech recognition engine
- // implementations).
+ // Called whenever a result is retrieved.
virtual void OnSpeechRecognitionEngineResults(
const SpeechRecognitionResults& results) = 0;
virtual void OnSpeechRecognitionEngineEndOfUtterance() = 0;
@@ -47,7 +75,7 @@ class SpeechRecognitionEngine {
virtual ~Delegate() {}
};
- // Remote engine configuration.
+ // Engine configuration.
struct CONTENT_EXPORT Config {
Config();
~Config();
@@ -67,51 +95,130 @@ class SpeechRecognitionEngine {
scoped_refptr<SpeechRecognitionSessionPreamble> preamble;
};
- virtual ~SpeechRecognitionEngine() {}
+ // set_delegate detached from constructor for lazy dependency injection.
+ void set_delegate(Delegate* delegate) { delegate_ = delegate; }
- // Set/change the recognition engine configuration. It is not allowed to call
- // this function while a recognition is ongoing.
- virtual void SetConfig(const Config& config) = 0;
+ // Duration of each audio packet.
+ static const int kAudioPacketIntervalMs;
- // Called when the speech recognition begins, before any TakeAudioChunk call.
- virtual void StartRecognition() = 0;
+ // IDs passed to URLFetcher::Create(). Used for testing.
+ static const int kUpstreamUrlFetcherIdForTesting;
+ static const int kDownstreamUrlFetcherIdForTesting;
- // End any recognition activity and don't make any further callback.
- // Must be always called to close the corresponding StartRecognition call,
- // even in case of errors.
- // No further TakeAudioChunk/AudioChunksEnded calls are allowed after this.
- virtual void EndRecognition() = 0;
+ explicit SpeechRecognitionEngine(net::URLRequestContextGetter* context);
+ ~SpeechRecognitionEngine() override;
- // Push a chunk of uncompressed audio data, where the chunk length agrees with
- // GetDesiredAudioChunkDurationMs().
- virtual void TakeAudioChunk(const AudioChunk& data) = 0;
+ void SetConfig(const Config& config);
+ void StartRecognition();
+ void EndRecognition();
+ void TakeAudioChunk(const AudioChunk& data);
+ void AudioChunksEnded();
+ bool IsRecognitionPending() const;
+ int GetDesiredAudioChunkDurationMs() const;
- // Notifies the engine that audio capture has completed and no more chunks
- // will be pushed. The engine, however, can still provide further results
- // using the audio chunks collected so far.
- virtual void AudioChunksEnded() = 0;
+ // net::URLFetcherDelegate methods.
+ void OnURLFetchComplete(const net::URLFetcher* source) override;
+ void OnURLFetchDownloadProgress(const net::URLFetcher* source,
+ int64_t current,
+ int64_t total) override;
- // Checks wheter recognition of pushed audio data is pending.
- virtual bool IsRecognitionPending() const = 0;
+ private:
+ Delegate* delegate_;
- // Retrieves the desired duration, in milliseconds, of pushed AudioChunk(s).
- virtual int GetDesiredAudioChunkDurationMs() const = 0;
+ // Response status codes from the speech recognition webservice.
+ static const int kWebserviceStatusNoError;
+ static const int kWebserviceStatusErrorNoMatch;
- // set_delegate detached from constructor for lazy dependency injection.
- void set_delegate(Delegate* delegate) { delegate_ = delegate; }
+ // Frame type for framed POST data. Do NOT change these. They must match
+ // values the server expects.
+ enum FrameType {
+ FRAME_PREAMBLE_AUDIO = 0,
+ FRAME_RECOGNITION_AUDIO = 1
+ };
- protected:
- Delegate* delegate() const { return delegate_; }
+ // Data types for the internal Finite State Machine (FSM).
+ enum FSMState {
+ STATE_IDLE = 0,
+ STATE_BOTH_STREAMS_CONNECTED,
+ STATE_WAITING_DOWNSTREAM_RESULTS,
+ STATE_MAX_VALUE = STATE_WAITING_DOWNSTREAM_RESULTS
+ };
- private:
- Delegate* delegate_;
-};
+ enum FSMEvent {
+ EVENT_END_RECOGNITION = 0,
+ EVENT_START_RECOGNITION,
+ EVENT_AUDIO_CHUNK,
+ EVENT_AUDIO_CHUNKS_ENDED,
+ EVENT_UPSTREAM_ERROR,
+ EVENT_DOWNSTREAM_ERROR,
+ EVENT_DOWNSTREAM_RESPONSE,
+ EVENT_DOWNSTREAM_CLOSED,
+ EVENT_MAX_VALUE = EVENT_DOWNSTREAM_CLOSED
+ };
+
+ struct FSMEventArgs {
+ explicit FSMEventArgs(FSMEvent event_value);
+ ~FSMEventArgs();
+
+ FSMEvent event;
-// These typedefs are to workaround the issue with certain versions of
-// Visual Studio where it gets confused between multiple Delegate
-// classes and gives a C2500 error.
-typedef SpeechRecognitionEngine::Delegate SpeechRecognitionEngineDelegate;
-typedef SpeechRecognitionEngine::Config SpeechRecognitionEngineConfig;
hans 2016/04/13 21:23:06 Let's see if current MSVC can do without these..
+ // In case of EVENT_AUDIO_CHUNK, holds the chunk pushed by |TakeAudioChunk|.
+ scoped_refptr<const AudioChunk> audio_data;
+
+ // In case of EVENT_DOWNSTREAM_RESPONSE, hold the current chunk bytes.
+ std::unique_ptr<std::vector<uint8_t>> response;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(FSMEventArgs);
+ };
+
+ // Invoked by both upstream and downstream URLFetcher callbacks to handle
+ // new chunk data, connection closed or errors notifications.
+ void DispatchHTTPResponse(const net::URLFetcher* source,
+ bool end_of_response);
+
+ // Entry point for pushing any new external event into the recognizer FSM.
+ void DispatchEvent(const FSMEventArgs& event_args);
+
+ // Defines the behavior of the recognizer FSM, selecting the appropriate
+ // transition according to the current state and event.
+ FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& event_args);
+
+ // The methods below handle transitions of the recognizer FSM.
+ FSMState ConnectBothStreams(const FSMEventArgs& event_args);
+ FSMState TransmitAudioUpstream(const FSMEventArgs& event_args);
+ FSMState ProcessDownstreamResponse(const FSMEventArgs& event_args);
+ FSMState RaiseNoMatchErrorIfGotNoResults(const FSMEventArgs& event_args);
+ FSMState CloseUpstreamAndWaitForResults(const FSMEventArgs& event_args);
+ FSMState CloseDownstream(const FSMEventArgs& event_args);
+ FSMState AbortSilently(const FSMEventArgs& event_args);
+ FSMState AbortWithError(const FSMEventArgs& event_args);
+ FSMState Abort(SpeechRecognitionErrorCode error);
+ FSMState DoNothing(const FSMEventArgs& event_args);
+ FSMState NotFeasible(const FSMEventArgs& event_args);
+
+ std::string GetAcceptedLanguages() const;
+ std::string GenerateRequestKey() const;
+
+ // Upload a single chunk of audio data. Handles both unframed and framed
+ // upload formats, and uses the appropriate one.
+ void UploadAudioChunk(const std::string& data, FrameType type, bool is_final);
+
+ Config config_;
+ std::unique_ptr<net::URLFetcher> upstream_fetcher_;
+ std::unique_ptr<net::URLFetcher> downstream_fetcher_;
+ scoped_refptr<net::URLRequestContextGetter> url_context_;
+ std::unique_ptr<AudioEncoder> encoder_;
+ std::unique_ptr<AudioEncoder> preamble_encoder_;
+ ChunkedByteBuffer chunked_byte_buffer_;
+ size_t previous_response_length_;
+ bool got_last_definitive_result_;
+ bool is_dispatching_event_;
+ bool use_framed_post_data_;
+ FSMState state_;
+
+ DISALLOW_COPY_AND_ASSIGN(SpeechRecognitionEngine);
+};
} // namespace content
« no previous file with comments | « content/browser/speech/speech_recognition_browsertest.cc ('k') | content/browser/speech/speech_recognition_engine.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698