| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "content/browser/speech/google_streaming_remote_engine.h" | 5 #include "content/browser/speech/speech_recognition_engine.h" |
| 6 | 6 |
| 7 #include <stddef.h> | 7 #include <stddef.h> |
| 8 #include <stdint.h> | 8 #include <stdint.h> |
| 9 | 9 |
| 10 #include <memory> | 10 #include <memory> |
| 11 #include <queue> | 11 #include <queue> |
| 12 | 12 |
| 13 #include "base/big_endian.h" | 13 #include "base/big_endian.h" |
| 14 #include "base/message_loop/message_loop.h" | 14 #include "base/message_loop/message_loop.h" |
| 15 #include "base/numerics/safe_conversions.h" | 15 #include "base/numerics/safe_conversions.h" |
| (...skipping 17 matching lines...) Expand all Loading... |
| 33 | 33 |
| 34 namespace content { | 34 namespace content { |
| 35 | 35 |
| 36 // Frame types for framed POST data. | 36 // Frame types for framed POST data. |
| 37 static const uint32_t kFrameTypePreamble = 0; | 37 static const uint32_t kFrameTypePreamble = 0; |
| 38 static const uint32_t kFrameTypeRecognitionAudio = 1; | 38 static const uint32_t kFrameTypeRecognitionAudio = 1; |
| 39 | 39 |
| 40 // Note: the terms upstream and downstream are from the point-of-view of the | 40 // Note: the terms upstream and downstream are from the point-of-view of the |
| 41 // client (engine_under_test_). | 41 // client (engine_under_test_). |
| 42 | 42 |
| 43 class GoogleStreamingRemoteEngineTest : public SpeechRecognitionEngineDelegate, | 43 class SpeechRecognitionEngineTest |
| 44 public testing::Test { | 44 : public SpeechRecognitionEngine::Delegate, |
| 45 public testing::Test { |
| 45 public: | 46 public: |
| 46 GoogleStreamingRemoteEngineTest() | 47 SpeechRecognitionEngineTest() |
| 47 : last_number_of_upstream_chunks_seen_(0U), | 48 : last_number_of_upstream_chunks_seen_(0U), |
| 48 error_(SPEECH_RECOGNITION_ERROR_NONE), | 49 error_(SPEECH_RECOGNITION_ERROR_NONE), |
| 49 end_of_utterance_counter_(0) { } | 50 end_of_utterance_counter_(0) { } |
| 50 | 51 |
| 51 // Creates a speech recognition request and invokes its URL fetcher delegate | 52 // Creates a speech recognition request and invokes its URL fetcher delegate |
| 52 // with the given test data. | 53 // with the given test data. |
| 53 void CreateAndTestRequest(bool success, const std::string& http_response); | 54 void CreateAndTestRequest(bool success, const std::string& http_response); |
| 54 | 55 |
| 55 // SpeechRecognitionRequestDelegate methods. | 56 // SpeechRecognitionRequestDelegate methods. |
| 56 void OnSpeechRecognitionEngineResults( | 57 void OnSpeechRecognitionEngineResults( |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 88 void InjectDummyAudioChunk(); | 89 void InjectDummyAudioChunk(); |
| 89 size_t UpstreamChunksUploadedFromLastCall(); | 90 size_t UpstreamChunksUploadedFromLastCall(); |
| 90 std::string LastUpstreamChunkUploaded(); | 91 std::string LastUpstreamChunkUploaded(); |
| 91 void ProvideMockProtoResultDownstream( | 92 void ProvideMockProtoResultDownstream( |
| 92 const proto::SpeechRecognitionEvent& result); | 93 const proto::SpeechRecognitionEvent& result); |
| 93 void ProvideMockResultDownstream(const SpeechRecognitionResult& result); | 94 void ProvideMockResultDownstream(const SpeechRecognitionResult& result); |
| 94 void ExpectResultsReceived(const SpeechRecognitionResults& result); | 95 void ExpectResultsReceived(const SpeechRecognitionResults& result); |
| 95 void ExpectFramedChunk(const std::string& chunk, uint32_t type); | 96 void ExpectFramedChunk(const std::string& chunk, uint32_t type); |
| 96 void CloseMockDownstream(DownstreamError error); | 97 void CloseMockDownstream(DownstreamError error); |
| 97 | 98 |
| 98 std::unique_ptr<GoogleStreamingRemoteEngine> engine_under_test_; | 99 std::unique_ptr<SpeechRecognitionEngine> engine_under_test_; |
| 99 TestURLFetcherFactory url_fetcher_factory_; | 100 TestURLFetcherFactory url_fetcher_factory_; |
| 100 size_t last_number_of_upstream_chunks_seen_; | 101 size_t last_number_of_upstream_chunks_seen_; |
| 101 base::MessageLoop message_loop_; | 102 base::MessageLoop message_loop_; |
| 102 std::string response_buffer_; | 103 std::string response_buffer_; |
| 103 SpeechRecognitionErrorCode error_; | 104 SpeechRecognitionErrorCode error_; |
| 104 int end_of_utterance_counter_; | 105 int end_of_utterance_counter_; |
| 105 std::queue<SpeechRecognitionResults> results_; | 106 std::queue<SpeechRecognitionResults> results_; |
| 106 }; | 107 }; |
| 107 | 108 |
| 108 TEST_F(GoogleStreamingRemoteEngineTest, SingleDefinitiveResult) { | 109 TEST_F(SpeechRecognitionEngineTest, SingleDefinitiveResult) { |
| 109 StartMockRecognition(); | 110 StartMockRecognition(); |
| 110 ASSERT_TRUE(GetUpstreamFetcher()); | 111 ASSERT_TRUE(GetUpstreamFetcher()); |
| 111 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall()); | 112 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall()); |
| 112 | 113 |
| 113 // Inject some dummy audio chunks and check a corresponding chunked upload | 114 // Inject some dummy audio chunks and check a corresponding chunked upload |
| 114 // is performed every time on the server. | 115 // is performed every time on the server. |
| 115 for (int i = 0; i < 3; ++i) { | 116 for (int i = 0; i < 3; ++i) { |
| 116 InjectDummyAudioChunk(); | 117 InjectDummyAudioChunk(); |
| 117 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall()); | 118 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall()); |
| 118 } | 119 } |
| (...skipping 19 matching lines...) Expand all Loading... |
| 138 ASSERT_TRUE(engine_under_test_->IsRecognitionPending()); | 139 ASSERT_TRUE(engine_under_test_->IsRecognitionPending()); |
| 139 | 140 |
| 140 // Ensure everything is closed cleanly after the downstream is closed. | 141 // Ensure everything is closed cleanly after the downstream is closed. |
| 141 CloseMockDownstream(DOWNSTREAM_ERROR_NONE); | 142 CloseMockDownstream(DOWNSTREAM_ERROR_NONE); |
| 142 ASSERT_FALSE(engine_under_test_->IsRecognitionPending()); | 143 ASSERT_FALSE(engine_under_test_->IsRecognitionPending()); |
| 143 EndMockRecognition(); | 144 EndMockRecognition(); |
| 144 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_); | 145 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_); |
| 145 ASSERT_EQ(0U, results_.size()); | 146 ASSERT_EQ(0U, results_.size()); |
| 146 } | 147 } |
| 147 | 148 |
| 148 TEST_F(GoogleStreamingRemoteEngineTest, SeveralStreamingResults) { | 149 TEST_F(SpeechRecognitionEngineTest, SeveralStreamingResults) { |
| 149 StartMockRecognition(); | 150 StartMockRecognition(); |
| 150 ASSERT_TRUE(GetUpstreamFetcher()); | 151 ASSERT_TRUE(GetUpstreamFetcher()); |
| 151 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall()); | 152 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall()); |
| 152 | 153 |
| 153 for (int i = 0; i < 4; ++i) { | 154 for (int i = 0; i < 4; ++i) { |
| 154 InjectDummyAudioChunk(); | 155 InjectDummyAudioChunk(); |
| 155 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall()); | 156 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall()); |
| 156 | 157 |
| 157 SpeechRecognitionResults results; | 158 SpeechRecognitionResults results; |
| 158 results.push_back(SpeechRecognitionResult()); | 159 results.push_back(SpeechRecognitionResult()); |
| (...skipping 25 matching lines...) Expand all Loading... |
| 184 ASSERT_TRUE(engine_under_test_->IsRecognitionPending()); | 185 ASSERT_TRUE(engine_under_test_->IsRecognitionPending()); |
| 185 | 186 |
| 186 // Ensure everything is closed cleanly after the downstream is closed. | 187 // Ensure everything is closed cleanly after the downstream is closed. |
| 187 CloseMockDownstream(DOWNSTREAM_ERROR_NONE); | 188 CloseMockDownstream(DOWNSTREAM_ERROR_NONE); |
| 188 ASSERT_FALSE(engine_under_test_->IsRecognitionPending()); | 189 ASSERT_FALSE(engine_under_test_->IsRecognitionPending()); |
| 189 EndMockRecognition(); | 190 EndMockRecognition(); |
| 190 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_); | 191 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_); |
| 191 ASSERT_EQ(0U, results_.size()); | 192 ASSERT_EQ(0U, results_.size()); |
| 192 } | 193 } |
| 193 | 194 |
| 194 TEST_F(GoogleStreamingRemoteEngineTest, NoFinalResultAfterAudioChunksEnded) { | 195 TEST_F(SpeechRecognitionEngineTest, NoFinalResultAfterAudioChunksEnded) { |
| 195 StartMockRecognition(); | 196 StartMockRecognition(); |
| 196 ASSERT_TRUE(GetUpstreamFetcher()); | 197 ASSERT_TRUE(GetUpstreamFetcher()); |
| 197 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall()); | 198 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall()); |
| 198 | 199 |
| 199 // Simulate one pushed audio chunk. | 200 // Simulate one pushed audio chunk. |
| 200 InjectDummyAudioChunk(); | 201 InjectDummyAudioChunk(); |
| 201 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall()); | 202 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall()); |
| 202 | 203 |
| 203 // Simulate the corresponding definitive result. | 204 // Simulate the corresponding definitive result. |
| 204 SpeechRecognitionResults results; | 205 SpeechRecognitionResults results; |
| (...skipping 16 matching lines...) Expand all Loading... |
| 221 SpeechRecognitionResults empty_results; | 222 SpeechRecognitionResults empty_results; |
| 222 ExpectResultsReceived(empty_results); | 223 ExpectResultsReceived(empty_results); |
| 223 | 224 |
| 224 // Ensure everything is closed cleanly after the downstream is closed. | 225 // Ensure everything is closed cleanly after the downstream is closed. |
| 225 ASSERT_FALSE(engine_under_test_->IsRecognitionPending()); | 226 ASSERT_FALSE(engine_under_test_->IsRecognitionPending()); |
| 226 EndMockRecognition(); | 227 EndMockRecognition(); |
| 227 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_); | 228 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_); |
| 228 ASSERT_EQ(0U, results_.size()); | 229 ASSERT_EQ(0U, results_.size()); |
| 229 } | 230 } |
| 230 | 231 |
| 231 TEST_F(GoogleStreamingRemoteEngineTest, NoMatchError) { | 232 TEST_F(SpeechRecognitionEngineTest, NoMatchError) { |
| 232 StartMockRecognition(); | 233 StartMockRecognition(); |
| 233 ASSERT_TRUE(GetUpstreamFetcher()); | 234 ASSERT_TRUE(GetUpstreamFetcher()); |
| 234 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall()); | 235 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall()); |
| 235 | 236 |
| 236 for (int i = 0; i < 3; ++i) | 237 for (int i = 0; i < 3; ++i) |
| 237 InjectDummyAudioChunk(); | 238 InjectDummyAudioChunk(); |
| 238 engine_under_test_->AudioChunksEnded(); | 239 engine_under_test_->AudioChunksEnded(); |
| 239 ASSERT_EQ(4U, UpstreamChunksUploadedFromLastCall()); | 240 ASSERT_EQ(4U, UpstreamChunksUploadedFromLastCall()); |
| 240 ASSERT_TRUE(engine_under_test_->IsRecognitionPending()); | 241 ASSERT_TRUE(engine_under_test_->IsRecognitionPending()); |
| 241 | 242 |
| (...skipping 10 matching lines...) Expand all Loading... |
| 252 | 253 |
| 253 CloseMockDownstream(DOWNSTREAM_ERROR_WEBSERVICE_NO_MATCH); | 254 CloseMockDownstream(DOWNSTREAM_ERROR_WEBSERVICE_NO_MATCH); |
| 254 | 255 |
| 255 // Expect an empty result. | 256 // Expect an empty result. |
| 256 ASSERT_FALSE(engine_under_test_->IsRecognitionPending()); | 257 ASSERT_FALSE(engine_under_test_->IsRecognitionPending()); |
| 257 EndMockRecognition(); | 258 EndMockRecognition(); |
| 258 SpeechRecognitionResults empty_result; | 259 SpeechRecognitionResults empty_result; |
| 259 ExpectResultsReceived(empty_result); | 260 ExpectResultsReceived(empty_result); |
| 260 } | 261 } |
| 261 | 262 |
| 262 TEST_F(GoogleStreamingRemoteEngineTest, HTTPError) { | 263 TEST_F(SpeechRecognitionEngineTest, HTTPError) { |
| 263 StartMockRecognition(); | 264 StartMockRecognition(); |
| 264 ASSERT_TRUE(GetUpstreamFetcher()); | 265 ASSERT_TRUE(GetUpstreamFetcher()); |
| 265 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall()); | 266 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall()); |
| 266 | 267 |
| 267 InjectDummyAudioChunk(); | 268 InjectDummyAudioChunk(); |
| 268 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall()); | 269 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall()); |
| 269 | 270 |
| 270 // Close the downstream with a HTTP 500 error. | 271 // Close the downstream with a HTTP 500 error. |
| 271 CloseMockDownstream(DOWNSTREAM_ERROR_HTTP500); | 272 CloseMockDownstream(DOWNSTREAM_ERROR_HTTP500); |
| 272 | 273 |
| 273 // Expect a SPEECH_RECOGNITION_ERROR_NETWORK error to be raised. | 274 // Expect a SPEECH_RECOGNITION_ERROR_NETWORK error to be raised. |
| 274 ASSERT_FALSE(engine_under_test_->IsRecognitionPending()); | 275 ASSERT_FALSE(engine_under_test_->IsRecognitionPending()); |
| 275 EndMockRecognition(); | 276 EndMockRecognition(); |
| 276 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK, error_); | 277 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK, error_); |
| 277 ASSERT_EQ(0U, results_.size()); | 278 ASSERT_EQ(0U, results_.size()); |
| 278 } | 279 } |
| 279 | 280 |
| 280 TEST_F(GoogleStreamingRemoteEngineTest, NetworkError) { | 281 TEST_F(SpeechRecognitionEngineTest, NetworkError) { |
| 281 StartMockRecognition(); | 282 StartMockRecognition(); |
| 282 ASSERT_TRUE(GetUpstreamFetcher()); | 283 ASSERT_TRUE(GetUpstreamFetcher()); |
| 283 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall()); | 284 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall()); |
| 284 | 285 |
| 285 InjectDummyAudioChunk(); | 286 InjectDummyAudioChunk(); |
| 286 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall()); | 287 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall()); |
| 287 | 288 |
| 288 // Close the downstream fetcher simulating a network failure. | 289 // Close the downstream fetcher simulating a network failure. |
| 289 CloseMockDownstream(DOWNSTREAM_ERROR_NETWORK); | 290 CloseMockDownstream(DOWNSTREAM_ERROR_NETWORK); |
| 290 | 291 |
| 291 // Expect a SPEECH_RECOGNITION_ERROR_NETWORK error to be raised. | 292 // Expect a SPEECH_RECOGNITION_ERROR_NETWORK error to be raised. |
| 292 ASSERT_FALSE(engine_under_test_->IsRecognitionPending()); | 293 ASSERT_FALSE(engine_under_test_->IsRecognitionPending()); |
| 293 EndMockRecognition(); | 294 EndMockRecognition(); |
| 294 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK, error_); | 295 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NETWORK, error_); |
| 295 ASSERT_EQ(0U, results_.size()); | 296 ASSERT_EQ(0U, results_.size()); |
| 296 } | 297 } |
| 297 | 298 |
| 298 TEST_F(GoogleStreamingRemoteEngineTest, Stability) { | 299 TEST_F(SpeechRecognitionEngineTest, Stability) { |
| 299 StartMockRecognition(); | 300 StartMockRecognition(); |
| 300 ASSERT_TRUE(GetUpstreamFetcher()); | 301 ASSERT_TRUE(GetUpstreamFetcher()); |
| 301 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall()); | 302 ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall()); |
| 302 | 303 |
| 303 // Upload a dummy audio chunk. | 304 // Upload a dummy audio chunk. |
| 304 InjectDummyAudioChunk(); | 305 InjectDummyAudioChunk(); |
| 305 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall()); | 306 ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall()); |
| 306 engine_under_test_->AudioChunksEnded(); | 307 engine_under_test_->AudioChunksEnded(); |
| 307 | 308 |
| 308 // Simulate a protobuf message with an intermediate result without confidence, | 309 // Simulate a protobuf message with an intermediate result without confidence, |
| (...skipping 26 matching lines...) Expand all Loading... |
| 335 ASSERT_FALSE(engine_under_test_->IsRecognitionPending()); | 336 ASSERT_FALSE(engine_under_test_->IsRecognitionPending()); |
| 336 EndMockRecognition(); | 337 EndMockRecognition(); |
| 337 | 338 |
| 338 // Since there was no final result, we get an empty "no match" result. | 339 // Since there was no final result, we get an empty "no match" result. |
| 339 SpeechRecognitionResults empty_result; | 340 SpeechRecognitionResults empty_result; |
| 340 ExpectResultsReceived(empty_result); | 341 ExpectResultsReceived(empty_result); |
| 341 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_); | 342 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_); |
| 342 ASSERT_EQ(0U, results_.size()); | 343 ASSERT_EQ(0U, results_.size()); |
| 343 } | 344 } |
| 344 | 345 |
| 345 TEST_F(GoogleStreamingRemoteEngineTest, EndOfUtterance) { | 346 TEST_F(SpeechRecognitionEngineTest, EndOfUtterance) { |
| 346 StartMockRecognition(); | 347 StartMockRecognition(); |
| 347 ASSERT_TRUE(GetUpstreamFetcher()); | 348 ASSERT_TRUE(GetUpstreamFetcher()); |
| 348 | 349 |
| 349 // Simulate a END_OF_UTTERANCE proto event with continuous true. | 350 // Simulate a END_OF_UTTERANCE proto event with continuous true. |
| 350 SpeechRecognitionEngine::Config config; | 351 SpeechRecognitionEngine::Config config; |
| 351 config.continuous = true; | 352 config.continuous = true; |
| 352 engine_under_test_->SetConfig(config); | 353 engine_under_test_->SetConfig(config); |
| 353 proto::SpeechRecognitionEvent proto_event; | 354 proto::SpeechRecognitionEvent proto_event; |
| 354 proto_event.set_endpoint(proto::SpeechRecognitionEvent::END_OF_UTTERANCE); | 355 proto_event.set_endpoint(proto::SpeechRecognitionEvent::END_OF_UTTERANCE); |
| 355 ASSERT_EQ(0, end_of_utterance_counter_); | 356 ASSERT_EQ(0, end_of_utterance_counter_); |
| 356 ProvideMockProtoResultDownstream(proto_event); | 357 ProvideMockProtoResultDownstream(proto_event); |
| 357 ASSERT_EQ(0, end_of_utterance_counter_); | 358 ASSERT_EQ(0, end_of_utterance_counter_); |
| 358 | 359 |
| 359 // Simulate a END_OF_UTTERANCE proto event with continuous false. | 360 // Simulate a END_OF_UTTERANCE proto event with continuous false. |
| 360 config.continuous = false; | 361 config.continuous = false; |
| 361 engine_under_test_->SetConfig(config); | 362 engine_under_test_->SetConfig(config); |
| 362 ProvideMockProtoResultDownstream(proto_event); | 363 ProvideMockProtoResultDownstream(proto_event); |
| 363 ASSERT_EQ(1, end_of_utterance_counter_); | 364 ASSERT_EQ(1, end_of_utterance_counter_); |
| 364 | 365 |
| 365 // Shut down. | 366 // Shut down. |
| 366 CloseMockDownstream(DOWNSTREAM_ERROR_NONE); | 367 CloseMockDownstream(DOWNSTREAM_ERROR_NONE); |
| 367 EndMockRecognition(); | 368 EndMockRecognition(); |
| 368 } | 369 } |
| 369 | 370 |
| 370 TEST_F(GoogleStreamingRemoteEngineTest, SendPreamble) { | 371 TEST_F(SpeechRecognitionEngineTest, SendPreamble) { |
| 371 const size_t kPreambleLength = 100; | 372 const size_t kPreambleLength = 100; |
| 372 scoped_refptr<SpeechRecognitionSessionPreamble> preamble = | 373 scoped_refptr<SpeechRecognitionSessionPreamble> preamble = |
| 373 new SpeechRecognitionSessionPreamble(); | 374 new SpeechRecognitionSessionPreamble(); |
| 374 preamble->sample_rate = 16000; | 375 preamble->sample_rate = 16000; |
| 375 preamble->sample_depth = 2; | 376 preamble->sample_depth = 2; |
| 376 preamble->sample_data.assign(kPreambleLength, 0); | 377 preamble->sample_data.assign(kPreambleLength, 0); |
| 377 SpeechRecognitionEngine::Config config; | 378 SpeechRecognitionEngine::Config config; |
| 378 config.auth_token = "foo"; | 379 config.auth_token = "foo"; |
| 379 config.auth_scope = "bar"; | 380 config.auth_scope = "bar"; |
| 380 config.preamble = preamble; | 381 config.preamble = preamble; |
| (...skipping 29 matching lines...) Expand all Loading... |
| 410 ASSERT_TRUE(engine_under_test_->IsRecognitionPending()); | 411 ASSERT_TRUE(engine_under_test_->IsRecognitionPending()); |
| 411 | 412 |
| 412 // Ensure everything is closed cleanly after the downstream is closed. | 413 // Ensure everything is closed cleanly after the downstream is closed. |
| 413 CloseMockDownstream(DOWNSTREAM_ERROR_NONE); | 414 CloseMockDownstream(DOWNSTREAM_ERROR_NONE); |
| 414 ASSERT_FALSE(engine_under_test_->IsRecognitionPending()); | 415 ASSERT_FALSE(engine_under_test_->IsRecognitionPending()); |
| 415 EndMockRecognition(); | 416 EndMockRecognition(); |
| 416 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_); | 417 ASSERT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_); |
| 417 ASSERT_EQ(0U, results_.size()); | 418 ASSERT_EQ(0U, results_.size()); |
| 418 } | 419 } |
| 419 | 420 |
| 420 void GoogleStreamingRemoteEngineTest::SetUp() { | 421 void SpeechRecognitionEngineTest::SetUp() { |
| 421 engine_under_test_.reset( | 422 engine_under_test_.reset( |
| 422 new GoogleStreamingRemoteEngine(NULL /*URLRequestContextGetter*/)); | 423 new SpeechRecognitionEngine(NULL /*URLRequestContextGetter*/)); |
| 423 engine_under_test_->set_delegate(this); | 424 engine_under_test_->set_delegate(this); |
| 424 } | 425 } |
| 425 | 426 |
| 426 void GoogleStreamingRemoteEngineTest::TearDown() { | 427 void SpeechRecognitionEngineTest::TearDown() { |
| 427 engine_under_test_.reset(); | 428 engine_under_test_.reset(); |
| 428 } | 429 } |
| 429 | 430 |
| 430 TestURLFetcher* GoogleStreamingRemoteEngineTest::GetUpstreamFetcher() { | 431 TestURLFetcher* SpeechRecognitionEngineTest::GetUpstreamFetcher() { |
| 431 return url_fetcher_factory_.GetFetcherByID( | 432 return url_fetcher_factory_.GetFetcherByID( |
| 432 GoogleStreamingRemoteEngine::kUpstreamUrlFetcherIdForTesting); | 433 SpeechRecognitionEngine::kUpstreamUrlFetcherIdForTesting); |
| 433 } | 434 } |
| 434 | 435 |
| 435 TestURLFetcher* GoogleStreamingRemoteEngineTest::GetDownstreamFetcher() { | 436 TestURLFetcher* SpeechRecognitionEngineTest::GetDownstreamFetcher() { |
| 436 return url_fetcher_factory_.GetFetcherByID( | 437 return url_fetcher_factory_.GetFetcherByID( |
| 437 GoogleStreamingRemoteEngine::kDownstreamUrlFetcherIdForTesting); | 438 SpeechRecognitionEngine::kDownstreamUrlFetcherIdForTesting); |
| 438 } | 439 } |
| 439 | 440 |
| 440 // Starts recognition on the engine, ensuring that both stream fetchers are | 441 // Starts recognition on the engine, ensuring that both stream fetchers are |
| 441 // created. | 442 // created. |
| 442 void GoogleStreamingRemoteEngineTest::StartMockRecognition() { | 443 void SpeechRecognitionEngineTest::StartMockRecognition() { |
| 443 DCHECK(engine_under_test_.get()); | 444 DCHECK(engine_under_test_.get()); |
| 444 | 445 |
| 445 ASSERT_FALSE(engine_under_test_->IsRecognitionPending()); | 446 ASSERT_FALSE(engine_under_test_->IsRecognitionPending()); |
| 446 | 447 |
| 447 engine_under_test_->StartRecognition(); | 448 engine_under_test_->StartRecognition(); |
| 448 ASSERT_TRUE(engine_under_test_->IsRecognitionPending()); | 449 ASSERT_TRUE(engine_under_test_->IsRecognitionPending()); |
| 449 | 450 |
| 450 TestURLFetcher* upstream_fetcher = GetUpstreamFetcher(); | 451 TestURLFetcher* upstream_fetcher = GetUpstreamFetcher(); |
| 451 ASSERT_TRUE(upstream_fetcher); | 452 ASSERT_TRUE(upstream_fetcher); |
| 452 upstream_fetcher->set_url(upstream_fetcher->GetOriginalURL()); | 453 upstream_fetcher->set_url(upstream_fetcher->GetOriginalURL()); |
| 453 | 454 |
| 454 TestURLFetcher* downstream_fetcher = GetDownstreamFetcher(); | 455 TestURLFetcher* downstream_fetcher = GetDownstreamFetcher(); |
| 455 ASSERT_TRUE(downstream_fetcher); | 456 ASSERT_TRUE(downstream_fetcher); |
| 456 downstream_fetcher->set_url(downstream_fetcher->GetOriginalURL()); | 457 downstream_fetcher->set_url(downstream_fetcher->GetOriginalURL()); |
| 457 } | 458 } |
| 458 | 459 |
| 459 void GoogleStreamingRemoteEngineTest::EndMockRecognition() { | 460 void SpeechRecognitionEngineTest::EndMockRecognition() { |
| 460 DCHECK(engine_under_test_.get()); | 461 DCHECK(engine_under_test_.get()); |
| 461 engine_under_test_->EndRecognition(); | 462 engine_under_test_->EndRecognition(); |
| 462 ASSERT_FALSE(engine_under_test_->IsRecognitionPending()); | 463 ASSERT_FALSE(engine_under_test_->IsRecognitionPending()); |
| 463 | 464 |
| 464 // TODO(primiano): In order to be very pedantic we should check that both the | 465 // TODO(primiano): In order to be very pedantic we should check that both the |
| 465 // upstream and downstream URL fetchers have been disposed at this time. | 466 // upstream and downstream URL fetchers have been disposed at this time. |
| 466 // Unfortunately it seems that there is no direct way to detect (in tests) | 467 // Unfortunately it seems that there is no direct way to detect (in tests) |
| 467 // if a url_fetcher has been freed or not, since they are not automatically | 468 // if a url_fetcher has been freed or not, since they are not automatically |
| 468 // de-registered from the TestURLFetcherFactory on destruction. | 469 // de-registered from the TestURLFetcherFactory on destruction. |
| 469 } | 470 } |
| 470 | 471 |
| 471 void GoogleStreamingRemoteEngineTest::InjectDummyAudioChunk() { | 472 void SpeechRecognitionEngineTest::InjectDummyAudioChunk() { |
| 472 unsigned char dummy_audio_buffer_data[2] = {'\0', '\0'}; | 473 unsigned char dummy_audio_buffer_data[2] = {'\0', '\0'}; |
| 473 scoped_refptr<AudioChunk> dummy_audio_chunk( | 474 scoped_refptr<AudioChunk> dummy_audio_chunk( |
| 474 new AudioChunk(&dummy_audio_buffer_data[0], | 475 new AudioChunk(&dummy_audio_buffer_data[0], |
| 475 sizeof(dummy_audio_buffer_data), | 476 sizeof(dummy_audio_buffer_data), |
| 476 2 /* bytes per sample */)); | 477 2 /* bytes per sample */)); |
| 477 DCHECK(engine_under_test_.get()); | 478 DCHECK(engine_under_test_.get()); |
| 478 engine_under_test_->TakeAudioChunk(*dummy_audio_chunk.get()); | 479 engine_under_test_->TakeAudioChunk(*dummy_audio_chunk.get()); |
| 479 } | 480 } |
| 480 | 481 |
| 481 size_t GoogleStreamingRemoteEngineTest::UpstreamChunksUploadedFromLastCall() { | 482 size_t SpeechRecognitionEngineTest::UpstreamChunksUploadedFromLastCall() { |
| 482 TestURLFetcher* upstream_fetcher = GetUpstreamFetcher(); | 483 TestURLFetcher* upstream_fetcher = GetUpstreamFetcher(); |
| 483 DCHECK(upstream_fetcher); | 484 DCHECK(upstream_fetcher); |
| 484 const size_t number_of_chunks = upstream_fetcher->upload_chunks().size(); | 485 const size_t number_of_chunks = upstream_fetcher->upload_chunks().size(); |
| 485 DCHECK_GE(number_of_chunks, last_number_of_upstream_chunks_seen_); | 486 DCHECK_GE(number_of_chunks, last_number_of_upstream_chunks_seen_); |
| 486 const size_t new_chunks = number_of_chunks - | 487 const size_t new_chunks = number_of_chunks - |
| 487 last_number_of_upstream_chunks_seen_; | 488 last_number_of_upstream_chunks_seen_; |
| 488 last_number_of_upstream_chunks_seen_ = number_of_chunks; | 489 last_number_of_upstream_chunks_seen_ = number_of_chunks; |
| 489 return new_chunks; | 490 return new_chunks; |
| 490 } | 491 } |
| 491 | 492 |
| 492 std::string GoogleStreamingRemoteEngineTest::LastUpstreamChunkUploaded() { | 493 std::string SpeechRecognitionEngineTest::LastUpstreamChunkUploaded() { |
| 493 TestURLFetcher* upstream_fetcher = GetUpstreamFetcher(); | 494 TestURLFetcher* upstream_fetcher = GetUpstreamFetcher(); |
| 494 DCHECK(upstream_fetcher); | 495 DCHECK(upstream_fetcher); |
| 495 DCHECK(!upstream_fetcher->upload_chunks().empty()); | 496 DCHECK(!upstream_fetcher->upload_chunks().empty()); |
| 496 return upstream_fetcher->upload_chunks().back(); | 497 return upstream_fetcher->upload_chunks().back(); |
| 497 } | 498 } |
| 498 | 499 |
| 499 void GoogleStreamingRemoteEngineTest::ProvideMockProtoResultDownstream( | 500 void SpeechRecognitionEngineTest::ProvideMockProtoResultDownstream( |
| 500 const proto::SpeechRecognitionEvent& result) { | 501 const proto::SpeechRecognitionEvent& result) { |
| 501 TestURLFetcher* downstream_fetcher = GetDownstreamFetcher(); | 502 TestURLFetcher* downstream_fetcher = GetDownstreamFetcher(); |
| 502 | 503 |
| 503 ASSERT_TRUE(downstream_fetcher); | 504 ASSERT_TRUE(downstream_fetcher); |
| 504 downstream_fetcher->set_status(URLRequestStatus(/* default=SUCCESS */)); | 505 downstream_fetcher->set_status(URLRequestStatus(/* default=SUCCESS */)); |
| 505 downstream_fetcher->set_response_code(200); | 506 downstream_fetcher->set_response_code(200); |
| 506 | 507 |
| 507 std::string response_string = SerializeProtobufResponse(result); | 508 std::string response_string = SerializeProtobufResponse(result); |
| 508 response_buffer_.append(response_string); | 509 response_buffer_.append(response_string); |
| 509 downstream_fetcher->SetResponseString(response_buffer_); | 510 downstream_fetcher->SetResponseString(response_buffer_); |
| 510 downstream_fetcher->delegate()->OnURLFetchDownloadProgress( | 511 downstream_fetcher->delegate()->OnURLFetchDownloadProgress( |
| 511 downstream_fetcher, | 512 downstream_fetcher, |
| 512 response_buffer_.size(), | 513 response_buffer_.size(), |
| 513 -1 /* total response length not used */); | 514 -1 /* total response length not used */); |
| 514 } | 515 } |
| 515 | 516 |
| 516 void GoogleStreamingRemoteEngineTest::ProvideMockResultDownstream( | 517 void SpeechRecognitionEngineTest::ProvideMockResultDownstream( |
| 517 const SpeechRecognitionResult& result) { | 518 const SpeechRecognitionResult& result) { |
| 518 proto::SpeechRecognitionEvent proto_event; | 519 proto::SpeechRecognitionEvent proto_event; |
| 519 proto_event.set_status(proto::SpeechRecognitionEvent::STATUS_SUCCESS); | 520 proto_event.set_status(proto::SpeechRecognitionEvent::STATUS_SUCCESS); |
| 520 proto::SpeechRecognitionResult* proto_result = proto_event.add_result(); | 521 proto::SpeechRecognitionResult* proto_result = proto_event.add_result(); |
| 521 proto_result->set_final(!result.is_provisional); | 522 proto_result->set_final(!result.is_provisional); |
| 522 for (size_t i = 0; i < result.hypotheses.size(); ++i) { | 523 for (size_t i = 0; i < result.hypotheses.size(); ++i) { |
| 523 proto::SpeechRecognitionAlternative* proto_alternative = | 524 proto::SpeechRecognitionAlternative* proto_alternative = |
| 524 proto_result->add_alternative(); | 525 proto_result->add_alternative(); |
| 525 const SpeechRecognitionHypothesis& hypothesis = result.hypotheses[i]; | 526 const SpeechRecognitionHypothesis& hypothesis = result.hypotheses[i]; |
| 526 proto_alternative->set_confidence(hypothesis.confidence); | 527 proto_alternative->set_confidence(hypothesis.confidence); |
| 527 proto_alternative->set_transcript(base::UTF16ToUTF8(hypothesis.utterance)); | 528 proto_alternative->set_transcript(base::UTF16ToUTF8(hypothesis.utterance)); |
| 528 } | 529 } |
| 529 ProvideMockProtoResultDownstream(proto_event); | 530 ProvideMockProtoResultDownstream(proto_event); |
| 530 } | 531 } |
| 531 | 532 |
| 532 void GoogleStreamingRemoteEngineTest::CloseMockDownstream( | 533 void SpeechRecognitionEngineTest::CloseMockDownstream( |
| 533 DownstreamError error) { | 534 DownstreamError error) { |
| 534 TestURLFetcher* downstream_fetcher = GetDownstreamFetcher(); | 535 TestURLFetcher* downstream_fetcher = GetDownstreamFetcher(); |
| 535 ASSERT_TRUE(downstream_fetcher); | 536 ASSERT_TRUE(downstream_fetcher); |
| 536 | 537 |
| 537 const net::Error net_error = | 538 const net::Error net_error = |
| 538 (error == DOWNSTREAM_ERROR_NETWORK) ? net::ERR_FAILED : net::OK; | 539 (error == DOWNSTREAM_ERROR_NETWORK) ? net::ERR_FAILED : net::OK; |
| 539 downstream_fetcher->set_status(URLRequestStatus::FromError(net_error)); | 540 downstream_fetcher->set_status(URLRequestStatus::FromError(net_error)); |
| 540 downstream_fetcher->set_response_code( | 541 downstream_fetcher->set_response_code( |
| 541 (error == DOWNSTREAM_ERROR_HTTP500) ? 500 : 200); | 542 (error == DOWNSTREAM_ERROR_HTTP500) ? 500 : 200); |
| 542 | 543 |
| 543 if (error == DOWNSTREAM_ERROR_WEBSERVICE_NO_MATCH) { | 544 if (error == DOWNSTREAM_ERROR_WEBSERVICE_NO_MATCH) { |
| 544 // Send empty response. | 545 // Send empty response. |
| 545 proto::SpeechRecognitionEvent response; | 546 proto::SpeechRecognitionEvent response; |
| 546 response_buffer_.append(SerializeProtobufResponse(response)); | 547 response_buffer_.append(SerializeProtobufResponse(response)); |
| 547 } | 548 } |
| 548 downstream_fetcher->SetResponseString(response_buffer_); | 549 downstream_fetcher->SetResponseString(response_buffer_); |
| 549 downstream_fetcher->delegate()->OnURLFetchComplete(downstream_fetcher); | 550 downstream_fetcher->delegate()->OnURLFetchComplete(downstream_fetcher); |
| 550 } | 551 } |
| 551 | 552 |
| 552 void GoogleStreamingRemoteEngineTest::ExpectResultsReceived( | 553 void SpeechRecognitionEngineTest::ExpectResultsReceived( |
| 553 const SpeechRecognitionResults& results) { | 554 const SpeechRecognitionResults& results) { |
| 554 ASSERT_GE(1U, results_.size()); | 555 ASSERT_GE(1U, results_.size()); |
| 555 ASSERT_TRUE(ResultsAreEqual(results, results_.front())); | 556 ASSERT_TRUE(ResultsAreEqual(results, results_.front())); |
| 556 results_.pop(); | 557 results_.pop(); |
| 557 } | 558 } |
| 558 | 559 |
| 559 bool GoogleStreamingRemoteEngineTest::ResultsAreEqual( | 560 bool SpeechRecognitionEngineTest::ResultsAreEqual( |
| 560 const SpeechRecognitionResults& a, const SpeechRecognitionResults& b) { | 561 const SpeechRecognitionResults& a, const SpeechRecognitionResults& b) { |
| 561 if (a.size() != b.size()) | 562 if (a.size() != b.size()) |
| 562 return false; | 563 return false; |
| 563 | 564 |
| 564 SpeechRecognitionResults::const_iterator it_a = a.begin(); | 565 SpeechRecognitionResults::const_iterator it_a = a.begin(); |
| 565 SpeechRecognitionResults::const_iterator it_b = b.begin(); | 566 SpeechRecognitionResults::const_iterator it_b = b.begin(); |
| 566 for (; it_a != a.end() && it_b != b.end(); ++it_a, ++it_b) { | 567 for (; it_a != a.end() && it_b != b.end(); ++it_a, ++it_b) { |
| 567 if (it_a->is_provisional != it_b->is_provisional || | 568 if (it_a->is_provisional != it_b->is_provisional || |
| 568 it_a->hypotheses.size() != it_b->hypotheses.size()) { | 569 it_a->hypotheses.size() != it_b->hypotheses.size()) { |
| 569 return false; | 570 return false; |
| 570 } | 571 } |
| 571 for (size_t i = 0; i < it_a->hypotheses.size(); ++i) { | 572 for (size_t i = 0; i < it_a->hypotheses.size(); ++i) { |
| 572 const SpeechRecognitionHypothesis& hyp_a = it_a->hypotheses[i]; | 573 const SpeechRecognitionHypothesis& hyp_a = it_a->hypotheses[i]; |
| 573 const SpeechRecognitionHypothesis& hyp_b = it_b->hypotheses[i]; | 574 const SpeechRecognitionHypothesis& hyp_b = it_b->hypotheses[i]; |
| 574 if (hyp_a.utterance != hyp_b.utterance || | 575 if (hyp_a.utterance != hyp_b.utterance || |
| 575 hyp_a.confidence != hyp_b.confidence) { | 576 hyp_a.confidence != hyp_b.confidence) { |
| 576 return false; | 577 return false; |
| 577 } | 578 } |
| 578 } | 579 } |
| 579 } | 580 } |
| 580 | 581 |
| 581 return true; | 582 return true; |
| 582 } | 583 } |
| 583 | 584 |
| 584 void GoogleStreamingRemoteEngineTest::ExpectFramedChunk( | 585 void SpeechRecognitionEngineTest::ExpectFramedChunk( |
| 585 const std::string& chunk, uint32_t type) { | 586 const std::string& chunk, uint32_t type) { |
| 586 uint32_t value; | 587 uint32_t value; |
| 587 base::ReadBigEndian(&chunk[0], &value); | 588 base::ReadBigEndian(&chunk[0], &value); |
| 588 EXPECT_EQ(chunk.size() - 8, value); | 589 EXPECT_EQ(chunk.size() - 8, value); |
| 589 base::ReadBigEndian(&chunk[4], &value); | 590 base::ReadBigEndian(&chunk[4], &value); |
| 590 EXPECT_EQ(type, value); | 591 EXPECT_EQ(type, value); |
| 591 } | 592 } |
| 592 | 593 |
| 593 std::string GoogleStreamingRemoteEngineTest::SerializeProtobufResponse( | 594 std::string SpeechRecognitionEngineTest::SerializeProtobufResponse( |
| 594 const proto::SpeechRecognitionEvent& msg) { | 595 const proto::SpeechRecognitionEvent& msg) { |
| 595 std::string msg_string; | 596 std::string msg_string; |
| 596 msg.SerializeToString(&msg_string); | 597 msg.SerializeToString(&msg_string); |
| 597 | 598 |
| 598 // Prepend 4 byte prefix length indication to the protobuf message as | 599 // Prepend 4 byte prefix length indication to the protobuf message as |
| 599 // envisaged by the google streaming recognition webservice protocol. | 600 // envisaged by the google streaming recognition webservice protocol. |
| 600 uint32_t prefix = HostToNet32(checked_cast<uint32_t>(msg_string.size())); | 601 uint32_t prefix = HostToNet32(checked_cast<uint32_t>(msg_string.size())); |
| 601 msg_string.insert(0, reinterpret_cast<char*>(&prefix), sizeof(prefix)); | 602 msg_string.insert(0, reinterpret_cast<char*>(&prefix), sizeof(prefix)); |
| 602 | 603 |
| 603 return msg_string; | 604 return msg_string; |
| 604 } | 605 } |
| 605 | 606 |
| 606 } // namespace content | 607 } // namespace content |
| OLD | NEW |