Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(18)

Side by Side Diff: content/browser/speech/google_streaming_remote_engine.h

Issue 1891543002: Devirtualize SpeechRecognitionEngine (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@kill_one_shot_engine
Patch Set: drop an include Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef CONTENT_BROWSER_SPEECH_GOOGLE_STREAMING_REMOTE_ENGINE_H_
6 #define CONTENT_BROWSER_SPEECH_GOOGLE_STREAMING_REMOTE_ENGINE_H_
7
8 #include <stddef.h>
9 #include <stdint.h>
10
11 #include <memory>
12 #include <string>
13 #include <vector>
14
15 #include "base/macros.h"
16 #include "base/memory/ref_counted.h"
17 #include "base/threading/non_thread_safe.h"
18 #include "content/browser/speech/audio_encoder.h"
19 #include "content/browser/speech/chunked_byte_buffer.h"
20 #include "content/browser/speech/speech_recognition_engine.h"
21 #include "content/common/content_export.h"
22 #include "content/public/common/speech_recognition_error.h"
23 #include "net/url_request/url_fetcher_delegate.h"
24
25 namespace net {
26 class URLRequestContextGetter;
27 }
28
29 namespace content {
30
31 class AudioChunk;
32 struct SpeechRecognitionError;
33 struct SpeechRecognitionResult;
34
35 // Implements a SpeechRecognitionEngine supporting continuous recognition by
36 // means of interaction with Google streaming speech recognition webservice.
37 // More in details, this class establishes two HTTP(S) connections with the
38 // webservice, for each session, herein called "upstream" and "downstream".
39 // Audio chunks are sent on the upstream by means of a chunked HTTP POST upload.
40 // Recognition results are retrieved in a full-duplex fashion (i.e. while
41 // pushing audio on the upstream) on the downstream by means of a chunked
42 // HTTP GET request. Pairing between the two stream is handled through a
43 // randomly generated key, unique for each request, which is passed in the
44 // &pair= arg to both stream request URLs.
45 // In the case of a regular session, the upstream is closed when the audio
46 // capture ends (notified through a |AudioChunksEnded| call) and the downstream
47 // waits for a corresponding server closure (eventually some late results can
48 // come after closing the upstream).
49 // Both stream are guaranteed to be closed when |EndRecognition| call is issued.
50 class CONTENT_EXPORT GoogleStreamingRemoteEngine
51 : public NON_EXPORTED_BASE(SpeechRecognitionEngine),
52 public net::URLFetcherDelegate,
53 public NON_EXPORTED_BASE(base::NonThreadSafe) {
54 public:
55 // Duration of each audio packet.
56 static const int kAudioPacketIntervalMs;
57
58 // IDs passed to URLFetcher::Create(). Used for testing.
59 static const int kUpstreamUrlFetcherIdForTesting;
60 static const int kDownstreamUrlFetcherIdForTesting;
61
62 explicit GoogleStreamingRemoteEngine(net::URLRequestContextGetter* context);
63 ~GoogleStreamingRemoteEngine() override;
64
65 // SpeechRecognitionEngine methods.
66 void SetConfig(const SpeechRecognitionEngineConfig& config) override;
67 void StartRecognition() override;
68 void EndRecognition() override;
69 void TakeAudioChunk(const AudioChunk& data) override;
70 void AudioChunksEnded() override;
71 bool IsRecognitionPending() const override;
72 int GetDesiredAudioChunkDurationMs() const override;
73
74 // net::URLFetcherDelegate methods.
75 void OnURLFetchComplete(const net::URLFetcher* source) override;
76 void OnURLFetchDownloadProgress(const net::URLFetcher* source,
77 int64_t current,
78 int64_t total) override;
79
80 private:
81 // Response status codes from the speech recognition webservice.
82 static const int kWebserviceStatusNoError;
83 static const int kWebserviceStatusErrorNoMatch;
84
85 // Frame type for framed POST data. Do NOT change these. They must match
86 // values the server expects.
87 enum FrameType {
88 FRAME_PREAMBLE_AUDIO = 0,
89 FRAME_RECOGNITION_AUDIO = 1
90 };
91
92 // Data types for the internal Finite State Machine (FSM).
93 enum FSMState {
94 STATE_IDLE = 0,
95 STATE_BOTH_STREAMS_CONNECTED,
96 STATE_WAITING_DOWNSTREAM_RESULTS,
97 STATE_MAX_VALUE = STATE_WAITING_DOWNSTREAM_RESULTS
98 };
99
100 enum FSMEvent {
101 EVENT_END_RECOGNITION = 0,
102 EVENT_START_RECOGNITION,
103 EVENT_AUDIO_CHUNK,
104 EVENT_AUDIO_CHUNKS_ENDED,
105 EVENT_UPSTREAM_ERROR,
106 EVENT_DOWNSTREAM_ERROR,
107 EVENT_DOWNSTREAM_RESPONSE,
108 EVENT_DOWNSTREAM_CLOSED,
109 EVENT_MAX_VALUE = EVENT_DOWNSTREAM_CLOSED
110 };
111
112 struct FSMEventArgs {
113 explicit FSMEventArgs(FSMEvent event_value);
114 ~FSMEventArgs();
115
116 FSMEvent event;
117
118 // In case of EVENT_AUDIO_CHUNK, holds the chunk pushed by |TakeAudioChunk|.
119 scoped_refptr<const AudioChunk> audio_data;
120
121 // In case of EVENT_DOWNSTREAM_RESPONSE, hold the current chunk bytes.
122 std::unique_ptr<std::vector<uint8_t>> response;
123
124 private:
125 DISALLOW_COPY_AND_ASSIGN(FSMEventArgs);
126 };
127
128 // Invoked by both upstream and downstream URLFetcher callbacks to handle
129 // new chunk data, connection closed or errors notifications.
130 void DispatchHTTPResponse(const net::URLFetcher* source,
131 bool end_of_response);
132
133 // Entry point for pushing any new external event into the recognizer FSM.
134 void DispatchEvent(const FSMEventArgs& event_args);
135
136 // Defines the behavior of the recognizer FSM, selecting the appropriate
137 // transition according to the current state and event.
138 FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& event_args);
139
140 // The methods below handle transitions of the recognizer FSM.
141 FSMState ConnectBothStreams(const FSMEventArgs& event_args);
142 FSMState TransmitAudioUpstream(const FSMEventArgs& event_args);
143 FSMState ProcessDownstreamResponse(const FSMEventArgs& event_args);
144 FSMState RaiseNoMatchErrorIfGotNoResults(const FSMEventArgs& event_args);
145 FSMState CloseUpstreamAndWaitForResults(const FSMEventArgs& event_args);
146 FSMState CloseDownstream(const FSMEventArgs& event_args);
147 FSMState AbortSilently(const FSMEventArgs& event_args);
148 FSMState AbortWithError(const FSMEventArgs& event_args);
149 FSMState Abort(SpeechRecognitionErrorCode error);
150 FSMState DoNothing(const FSMEventArgs& event_args);
151 FSMState NotFeasible(const FSMEventArgs& event_args);
152
153 std::string GetAcceptedLanguages() const;
154 std::string GenerateRequestKey() const;
155
156 // Upload a single chunk of audio data. Handles both unframed and framed
157 // upload formats, and uses the appropriate one.
158 void UploadAudioChunk(const std::string& data, FrameType type, bool is_final);
159
160 SpeechRecognitionEngineConfig config_;
161 std::unique_ptr<net::URLFetcher> upstream_fetcher_;
162 std::unique_ptr<net::URLFetcher> downstream_fetcher_;
163 scoped_refptr<net::URLRequestContextGetter> url_context_;
164 std::unique_ptr<AudioEncoder> encoder_;
165 std::unique_ptr<AudioEncoder> preamble_encoder_;
166 ChunkedByteBuffer chunked_byte_buffer_;
167 size_t previous_response_length_;
168 bool got_last_definitive_result_;
169 bool is_dispatching_event_;
170 bool use_framed_post_data_;
171 FSMState state_;
172
173 DISALLOW_COPY_AND_ASSIGN(GoogleStreamingRemoteEngine);
174 };
175
176 } // namespace content
177
178 #endif // CONTENT_BROWSER_SPEECH_GOOGLE_STREAMING_REMOTE_ENGINE_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698