content/browser/speech/google_streaming_remote_engine.h - Issue 1891543002: Devirtualize SpeechRecognitionEngine

Side by Side Diff: content/browser/speech/google_streaming_remote_engine.h

Issue 1891543002: Devirtualize SpeechRecognitionEngine (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@kill_one_shot_engine

Patch Set: drop an include Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 #ifndef CONTENT_BROWSER_SPEECH_GOOGLE_STREAMING_REMOTE_ENGINE_H_

6 #define CONTENT_BROWSER_SPEECH_GOOGLE_STREAMING_REMOTE_ENGINE_H_

7

8 #include <stddef.h>

9 #include <stdint.h>

10

11 #include <memory>

12 #include <string>

13 #include <vector>

14

15 #include "base/macros.h"

16 #include "base/memory/ref_counted.h"

17 #include "base/threading/non_thread_safe.h"

18 #include "content/browser/speech/audio_encoder.h"

19 #include "content/browser/speech/chunked_byte_buffer.h"

20 #include "content/browser/speech/speech_recognition_engine.h"

21 #include "content/common/content_export.h"

22 #include "content/public/common/speech_recognition_error.h"

23 #include "net/url_request/url_fetcher_delegate.h"

24

25 namespace net {

26 class URLRequestContextGetter;

27 }

28

29 namespace content {

30

31 class AudioChunk;

32 struct SpeechRecognitionError;

33 struct SpeechRecognitionResult;

34

35 // Implements a SpeechRecognitionEngine supporting continuous recognition by

36 // means of interaction with Google streaming speech recognition webservice.

37 // More in details, this class establishes two HTTP(S) connections with the

38 // webservice, for each session, herein called "upstream" and "downstream".

39 // Audio chunks are sent on the upstream by means of a chunked HTTP POST upload.

40 // Recognition results are retrieved in a full-duplex fashion (i.e. while

41 // pushing audio on the upstream) on the downstream by means of a chunked

42 // HTTP GET request. Pairing between the two stream is handled through a

43 // randomly generated key, unique for each request, which is passed in the

44 // &pair= arg to both stream request URLs.

45 // In the case of a regular session, the upstream is closed when the audio

46 // capture ends (notified through a \|AudioChunksEnded\| call) and the downstream

47 // waits for a corresponding server closure (eventually some late results can

48 // come after closing the upstream).

49 // Both stream are guaranteed to be closed when \|EndRecognition\| call is issued.

50 class CONTENT_EXPORT GoogleStreamingRemoteEngine

51 : public NON_EXPORTED_BASE(SpeechRecognitionEngine),

52 public net::URLFetcherDelegate,

53 public NON_EXPORTED_BASE(base::NonThreadSafe) {

54 public:

55 // Duration of each audio packet.

56 static const int kAudioPacketIntervalMs;

57

58 // IDs passed to URLFetcher::Create(). Used for testing.

59 static const int kUpstreamUrlFetcherIdForTesting;

60 static const int kDownstreamUrlFetcherIdForTesting;

61

62 explicit GoogleStreamingRemoteEngine(net::URLRequestContextGetter* context);

63 ~GoogleStreamingRemoteEngine() override;

64

65 // SpeechRecognitionEngine methods.

66 void SetConfig(const SpeechRecognitionEngineConfig& config) override;

67 void StartRecognition() override;

68 void EndRecognition() override;

69 void TakeAudioChunk(const AudioChunk& data) override;

70 void AudioChunksEnded() override;

71 bool IsRecognitionPending() const override;

72 int GetDesiredAudioChunkDurationMs() const override;

73

74 // net::URLFetcherDelegate methods.

75 void OnURLFetchComplete(const net::URLFetcher* source) override;

76 void OnURLFetchDownloadProgress(const net::URLFetcher* source,

77 int64_t current,

78 int64_t total) override;

79

80 private:

81 // Response status codes from the speech recognition webservice.

82 static const int kWebserviceStatusNoError;

83 static const int kWebserviceStatusErrorNoMatch;

84

85 // Frame type for framed POST data. Do NOT change these. They must match

86 // values the server expects.

87 enum FrameType {

88 FRAME_PREAMBLE_AUDIO = 0,

89 FRAME_RECOGNITION_AUDIO = 1

90 };

91

92 // Data types for the internal Finite State Machine (FSM).

93 enum FSMState {

94 STATE_IDLE = 0,

95 STATE_BOTH_STREAMS_CONNECTED,

96 STATE_WAITING_DOWNSTREAM_RESULTS,

97 STATE_MAX_VALUE = STATE_WAITING_DOWNSTREAM_RESULTS

98 };

99

100 enum FSMEvent {

101 EVENT_END_RECOGNITION = 0,

102 EVENT_START_RECOGNITION,

103 EVENT_AUDIO_CHUNK,

104 EVENT_AUDIO_CHUNKS_ENDED,

105 EVENT_UPSTREAM_ERROR,

106 EVENT_DOWNSTREAM_ERROR,

107 EVENT_DOWNSTREAM_RESPONSE,

108 EVENT_DOWNSTREAM_CLOSED,

109 EVENT_MAX_VALUE = EVENT_DOWNSTREAM_CLOSED

110 };

111

112 struct FSMEventArgs {

113 explicit FSMEventArgs(FSMEvent event_value);

114 ~FSMEventArgs();

115

116 FSMEvent event;

117

118 // In case of EVENT_AUDIO_CHUNK, holds the chunk pushed by \|TakeAudioChunk\|.

119 scoped_refptr<const AudioChunk> audio_data;

120

121 // In case of EVENT_DOWNSTREAM_RESPONSE, hold the current chunk bytes.

122 std::unique_ptr<std::vector<uint8_t>> response;

123

124 private:

125 DISALLOW_COPY_AND_ASSIGN(FSMEventArgs);

126 };

127

128 // Invoked by both upstream and downstream URLFetcher callbacks to handle

129 // new chunk data, connection closed or errors notifications.

130 void DispatchHTTPResponse(const net::URLFetcher* source,

131 bool end_of_response);

132

133 // Entry point for pushing any new external event into the recognizer FSM.

134 void DispatchEvent(const FSMEventArgs& event_args);

135

136 // Defines the behavior of the recognizer FSM, selecting the appropriate

137 // transition according to the current state and event.

138 FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& event_args);

139

140 // The methods below handle transitions of the recognizer FSM.

141 FSMState ConnectBothStreams(const FSMEventArgs& event_args);

142 FSMState TransmitAudioUpstream(const FSMEventArgs& event_args);

143 FSMState ProcessDownstreamResponse(const FSMEventArgs& event_args);

144 FSMState RaiseNoMatchErrorIfGotNoResults(const FSMEventArgs& event_args);

145 FSMState CloseUpstreamAndWaitForResults(const FSMEventArgs& event_args);

146 FSMState CloseDownstream(const FSMEventArgs& event_args);

147 FSMState AbortSilently(const FSMEventArgs& event_args);

148 FSMState AbortWithError(const FSMEventArgs& event_args);

149 FSMState Abort(SpeechRecognitionErrorCode error);

150 FSMState DoNothing(const FSMEventArgs& event_args);

151 FSMState NotFeasible(const FSMEventArgs& event_args);

152

153 std::string GetAcceptedLanguages() const;

154 std::string GenerateRequestKey() const;

155

156 // Upload a single chunk of audio data. Handles both unframed and framed

157 // upload formats, and uses the appropriate one.

158 void UploadAudioChunk(const std::string& data, FrameType type, bool is_final);

159

160 SpeechRecognitionEngineConfig config_;

161 std::unique_ptr<net::URLFetcher> upstream_fetcher_;

162 std::unique_ptr<net::URLFetcher> downstream_fetcher_;

163 scoped_refptr<net::URLRequestContextGetter> url_context_;

164 std::unique_ptr<AudioEncoder> encoder_;

165 std::unique_ptr<AudioEncoder> preamble_encoder_;

166 ChunkedByteBuffer chunked_byte_buffer_;

167 size_t previous_response_length_;

168 bool got_last_definitive_result_;

169 bool is_dispatching_event_;

170 bool use_framed_post_data_;

171 FSMState state_;

172

173 DISALLOW_COPY_AND_ASSIGN(GoogleStreamingRemoteEngine);

174 };

175

176 } // namespace content

177

178 #endif // CONTENT_BROWSER_SPEECH_GOOGLE_STREAMING_REMOTE_ENGINE_H_

OLD	NEW

« no previous file with comments | « content/browser/BUILD.gn ('k') | content/browser/speech/google_streaming_remote_engine.cc » ('j') | content/browser/speech/speech_recognition_engine.h » ('J')