| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #ifndef CONTENT_BROWSER_SPEECH_GOOGLE_STREAMING_REMOTE_ENGINE_H_ | |
| 6 #define CONTENT_BROWSER_SPEECH_GOOGLE_STREAMING_REMOTE_ENGINE_H_ | |
| 7 | |
| 8 #include <stddef.h> | |
| 9 #include <stdint.h> | |
| 10 | |
| 11 #include <memory> | |
| 12 #include <string> | |
| 13 #include <vector> | |
| 14 | |
| 15 #include "base/macros.h" | |
| 16 #include "base/memory/ref_counted.h" | |
| 17 #include "base/threading/non_thread_safe.h" | |
| 18 #include "content/browser/speech/audio_encoder.h" | |
| 19 #include "content/browser/speech/chunked_byte_buffer.h" | |
| 20 #include "content/browser/speech/speech_recognition_engine.h" | |
| 21 #include "content/common/content_export.h" | |
| 22 #include "content/public/common/speech_recognition_error.h" | |
| 23 #include "net/url_request/url_fetcher_delegate.h" | |
| 24 | |
| 25 namespace net { | |
| 26 class URLRequestContextGetter; | |
| 27 } | |
| 28 | |
| 29 namespace content { | |
| 30 | |
| 31 class AudioChunk; | |
| 32 struct SpeechRecognitionError; | |
| 33 struct SpeechRecognitionResult; | |
| 34 | |
| 35 // Implements a SpeechRecognitionEngine supporting continuous recognition by | |
| 36 // means of interaction with Google streaming speech recognition webservice. | |
| 37 // More in details, this class establishes two HTTP(S) connections with the | |
| 38 // webservice, for each session, herein called "upstream" and "downstream". | |
| 39 // Audio chunks are sent on the upstream by means of a chunked HTTP POST upload. | |
| 40 // Recognition results are retrieved in a full-duplex fashion (i.e. while | |
| 41 // pushing audio on the upstream) on the downstream by means of a chunked | |
| 42 // HTTP GET request. Pairing between the two stream is handled through a | |
| 43 // randomly generated key, unique for each request, which is passed in the | |
| 44 // &pair= arg to both stream request URLs. | |
| 45 // In the case of a regular session, the upstream is closed when the audio | |
| 46 // capture ends (notified through a |AudioChunksEnded| call) and the downstream | |
| 47 // waits for a corresponding server closure (eventually some late results can | |
| 48 // come after closing the upstream). | |
| 49 // Both stream are guaranteed to be closed when |EndRecognition| call is issued. | |
| 50 class CONTENT_EXPORT GoogleStreamingRemoteEngine | |
| 51 : public NON_EXPORTED_BASE(SpeechRecognitionEngine), | |
| 52 public net::URLFetcherDelegate, | |
| 53 public NON_EXPORTED_BASE(base::NonThreadSafe) { | |
| 54 public: | |
| 55 // Duration of each audio packet. | |
| 56 static const int kAudioPacketIntervalMs; | |
| 57 | |
| 58 // IDs passed to URLFetcher::Create(). Used for testing. | |
| 59 static const int kUpstreamUrlFetcherIdForTesting; | |
| 60 static const int kDownstreamUrlFetcherIdForTesting; | |
| 61 | |
| 62 explicit GoogleStreamingRemoteEngine(net::URLRequestContextGetter* context); | |
| 63 ~GoogleStreamingRemoteEngine() override; | |
| 64 | |
| 65 // SpeechRecognitionEngine methods. | |
| 66 void SetConfig(const SpeechRecognitionEngineConfig& config) override; | |
| 67 void StartRecognition() override; | |
| 68 void EndRecognition() override; | |
| 69 void TakeAudioChunk(const AudioChunk& data) override; | |
| 70 void AudioChunksEnded() override; | |
| 71 bool IsRecognitionPending() const override; | |
| 72 int GetDesiredAudioChunkDurationMs() const override; | |
| 73 | |
| 74 // net::URLFetcherDelegate methods. | |
| 75 void OnURLFetchComplete(const net::URLFetcher* source) override; | |
| 76 void OnURLFetchDownloadProgress(const net::URLFetcher* source, | |
| 77 int64_t current, | |
| 78 int64_t total) override; | |
| 79 | |
| 80 private: | |
| 81 // Response status codes from the speech recognition webservice. | |
| 82 static const int kWebserviceStatusNoError; | |
| 83 static const int kWebserviceStatusErrorNoMatch; | |
| 84 | |
| 85 // Frame type for framed POST data. Do NOT change these. They must match | |
| 86 // values the server expects. | |
| 87 enum FrameType { | |
| 88 FRAME_PREAMBLE_AUDIO = 0, | |
| 89 FRAME_RECOGNITION_AUDIO = 1 | |
| 90 }; | |
| 91 | |
| 92 // Data types for the internal Finite State Machine (FSM). | |
| 93 enum FSMState { | |
| 94 STATE_IDLE = 0, | |
| 95 STATE_BOTH_STREAMS_CONNECTED, | |
| 96 STATE_WAITING_DOWNSTREAM_RESULTS, | |
| 97 STATE_MAX_VALUE = STATE_WAITING_DOWNSTREAM_RESULTS | |
| 98 }; | |
| 99 | |
| 100 enum FSMEvent { | |
| 101 EVENT_END_RECOGNITION = 0, | |
| 102 EVENT_START_RECOGNITION, | |
| 103 EVENT_AUDIO_CHUNK, | |
| 104 EVENT_AUDIO_CHUNKS_ENDED, | |
| 105 EVENT_UPSTREAM_ERROR, | |
| 106 EVENT_DOWNSTREAM_ERROR, | |
| 107 EVENT_DOWNSTREAM_RESPONSE, | |
| 108 EVENT_DOWNSTREAM_CLOSED, | |
| 109 EVENT_MAX_VALUE = EVENT_DOWNSTREAM_CLOSED | |
| 110 }; | |
| 111 | |
| 112 struct FSMEventArgs { | |
| 113 explicit FSMEventArgs(FSMEvent event_value); | |
| 114 ~FSMEventArgs(); | |
| 115 | |
| 116 FSMEvent event; | |
| 117 | |
| 118 // In case of EVENT_AUDIO_CHUNK, holds the chunk pushed by |TakeAudioChunk|. | |
| 119 scoped_refptr<const AudioChunk> audio_data; | |
| 120 | |
| 121 // In case of EVENT_DOWNSTREAM_RESPONSE, hold the current chunk bytes. | |
| 122 std::unique_ptr<std::vector<uint8_t>> response; | |
| 123 | |
| 124 private: | |
| 125 DISALLOW_COPY_AND_ASSIGN(FSMEventArgs); | |
| 126 }; | |
| 127 | |
| 128 // Invoked by both upstream and downstream URLFetcher callbacks to handle | |
| 129 // new chunk data, connection closed or errors notifications. | |
| 130 void DispatchHTTPResponse(const net::URLFetcher* source, | |
| 131 bool end_of_response); | |
| 132 | |
| 133 // Entry point for pushing any new external event into the recognizer FSM. | |
| 134 void DispatchEvent(const FSMEventArgs& event_args); | |
| 135 | |
| 136 // Defines the behavior of the recognizer FSM, selecting the appropriate | |
| 137 // transition according to the current state and event. | |
| 138 FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& event_args); | |
| 139 | |
| 140 // The methods below handle transitions of the recognizer FSM. | |
| 141 FSMState ConnectBothStreams(const FSMEventArgs& event_args); | |
| 142 FSMState TransmitAudioUpstream(const FSMEventArgs& event_args); | |
| 143 FSMState ProcessDownstreamResponse(const FSMEventArgs& event_args); | |
| 144 FSMState RaiseNoMatchErrorIfGotNoResults(const FSMEventArgs& event_args); | |
| 145 FSMState CloseUpstreamAndWaitForResults(const FSMEventArgs& event_args); | |
| 146 FSMState CloseDownstream(const FSMEventArgs& event_args); | |
| 147 FSMState AbortSilently(const FSMEventArgs& event_args); | |
| 148 FSMState AbortWithError(const FSMEventArgs& event_args); | |
| 149 FSMState Abort(SpeechRecognitionErrorCode error); | |
| 150 FSMState DoNothing(const FSMEventArgs& event_args); | |
| 151 FSMState NotFeasible(const FSMEventArgs& event_args); | |
| 152 | |
| 153 std::string GetAcceptedLanguages() const; | |
| 154 std::string GenerateRequestKey() const; | |
| 155 | |
| 156 // Upload a single chunk of audio data. Handles both unframed and framed | |
| 157 // upload formats, and uses the appropriate one. | |
| 158 void UploadAudioChunk(const std::string& data, FrameType type, bool is_final); | |
| 159 | |
| 160 SpeechRecognitionEngineConfig config_; | |
| 161 std::unique_ptr<net::URLFetcher> upstream_fetcher_; | |
| 162 std::unique_ptr<net::URLFetcher> downstream_fetcher_; | |
| 163 scoped_refptr<net::URLRequestContextGetter> url_context_; | |
| 164 std::unique_ptr<AudioEncoder> encoder_; | |
| 165 std::unique_ptr<AudioEncoder> preamble_encoder_; | |
| 166 ChunkedByteBuffer chunked_byte_buffer_; | |
| 167 size_t previous_response_length_; | |
| 168 bool got_last_definitive_result_; | |
| 169 bool is_dispatching_event_; | |
| 170 bool use_framed_post_data_; | |
| 171 FSMState state_; | |
| 172 | |
| 173 DISALLOW_COPY_AND_ASSIGN(GoogleStreamingRemoteEngine); | |
| 174 }; | |
| 175 | |
| 176 } // namespace content | |
| 177 | |
| 178 #endif // CONTENT_BROWSER_SPEECH_GOOGLE_STREAMING_REMOTE_ENGINE_H_ | |
| OLD | NEW |