Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include <stddef.h> | 5 #include <stddef.h> |
| 6 #include <stdint.h> | 6 #include <stdint.h> |
| 7 | 7 |
| 8 #include <vector> | 8 #include <vector> |
| 9 | 9 |
| 10 #include "base/sys_byteorder.h" | |
| 10 #include "content/browser/browser_thread_impl.h" | 11 #include "content/browser/browser_thread_impl.h" |
| 11 #include "content/browser/speech/google_one_shot_remote_engine.h" | 12 #include "content/browser/speech/google_streaming_remote_engine.h" |
| 13 #include "content/browser/speech/proto/google_streaming_api.pb.h" | |
|
Primiano Tucci (use gerrit)
2016/04/13 17:50:05
Just checking: does this require now a dependency
hans
2016/04/13 17:55:41
The dependency is already there because of content
| |
| 12 #include "content/browser/speech/speech_recognizer_impl.h" | 14 #include "content/browser/speech/speech_recognizer_impl.h" |
| 13 #include "content/public/browser/speech_recognition_event_listener.h" | 15 #include "content/public/browser/speech_recognition_event_listener.h" |
| 14 #include "media/audio/audio_manager_base.h" | 16 #include "media/audio/audio_manager_base.h" |
| 15 #include "media/audio/fake_audio_input_stream.h" | 17 #include "media/audio/fake_audio_input_stream.h" |
| 16 #include "media/audio/fake_audio_output_stream.h" | 18 #include "media/audio/fake_audio_output_stream.h" |
| 17 #include "media/audio/mock_audio_manager.h" | 19 #include "media/audio/mock_audio_manager.h" |
| 18 #include "media/audio/test_audio_input_controller_factory.h" | 20 #include "media/audio/test_audio_input_controller_factory.h" |
| 19 #include "media/base/audio_bus.h" | 21 #include "media/base/audio_bus.h" |
| 20 #include "net/base/net_errors.h" | 22 #include "net/base/net_errors.h" |
| 21 #include "net/url_request/test_url_fetcher_factory.h" | 23 #include "net/url_request/test_url_fetcher_factory.h" |
| (...skipping 19 matching lines...) Expand all Loading... | |
| 41 recognition_ended_(false), | 43 recognition_ended_(false), |
| 42 result_received_(false), | 44 result_received_(false), |
| 43 audio_started_(false), | 45 audio_started_(false), |
| 44 audio_ended_(false), | 46 audio_ended_(false), |
| 45 sound_started_(false), | 47 sound_started_(false), |
| 46 sound_ended_(false), | 48 sound_ended_(false), |
| 47 error_(SPEECH_RECOGNITION_ERROR_NONE), | 49 error_(SPEECH_RECOGNITION_ERROR_NONE), |
| 48 volume_(-1.0f) { | 50 volume_(-1.0f) { |
| 49 // SpeechRecognizer takes ownership of sr_engine. | 51 // SpeechRecognizer takes ownership of sr_engine. |
| 50 SpeechRecognitionEngine* sr_engine = | 52 SpeechRecognitionEngine* sr_engine = |
| 51 new GoogleOneShotRemoteEngine(NULL /* URLRequestContextGetter */); | 53 new GoogleStreamingRemoteEngine(NULL /* URLRequestContextGetter */); |
| 52 SpeechRecognitionEngineConfig config; | 54 SpeechRecognitionEngineConfig config; |
| 53 config.audio_num_bits_per_sample = | 55 config.audio_num_bits_per_sample = |
| 54 SpeechRecognizerImpl::kNumBitsPerAudioSample; | 56 SpeechRecognizerImpl::kNumBitsPerAudioSample; |
| 55 config.audio_sample_rate = SpeechRecognizerImpl::kAudioSampleRate; | 57 config.audio_sample_rate = SpeechRecognizerImpl::kAudioSampleRate; |
| 56 config.filter_profanities = false; | 58 config.filter_profanities = false; |
| 57 sr_engine->SetConfig(config); | 59 sr_engine->SetConfig(config); |
| 58 | 60 |
| 59 const int kTestingSessionId = 1; | 61 const int kTestingSessionId = 1; |
| 60 recognizer_ = new SpeechRecognizerImpl( | 62 recognizer_ = new SpeechRecognizerImpl( |
| 61 this, kTestingSessionId, false, false, sr_engine); | 63 this, kTestingSessionId, false, false, sr_engine); |
| 62 audio_manager_.reset(new media::MockAudioManager( | 64 audio_manager_.reset(new media::MockAudioManager( |
| 63 base::MessageLoop::current()->task_runner().get())); | 65 base::MessageLoop::current()->task_runner().get())); |
| 64 recognizer_->SetAudioManagerForTesting(audio_manager_.get()); | 66 recognizer_->SetAudioManagerForTesting(audio_manager_.get()); |
| 65 | 67 |
| 66 int audio_packet_length_bytes = | 68 int audio_packet_length_bytes = |
| 67 (SpeechRecognizerImpl::kAudioSampleRate * | 69 (SpeechRecognizerImpl::kAudioSampleRate * |
| 68 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs * | 70 GoogleStreamingRemoteEngine::kAudioPacketIntervalMs * |
| 69 ChannelLayoutToChannelCount(SpeechRecognizerImpl::kChannelLayout) * | 71 ChannelLayoutToChannelCount(SpeechRecognizerImpl::kChannelLayout) * |
| 70 SpeechRecognizerImpl::kNumBitsPerAudioSample) / (8 * 1000); | 72 SpeechRecognizerImpl::kNumBitsPerAudioSample) / (8 * 1000); |
| 71 audio_packet_.resize(audio_packet_length_bytes); | 73 audio_packet_.resize(audio_packet_length_bytes); |
| 72 | 74 |
| 73 const int channels = | 75 const int channels = |
| 74 ChannelLayoutToChannelCount(SpeechRecognizerImpl::kChannelLayout); | 76 ChannelLayoutToChannelCount(SpeechRecognizerImpl::kChannelLayout); |
| 75 bytes_per_sample_ = SpeechRecognizerImpl::kNumBitsPerAudioSample / 8; | 77 bytes_per_sample_ = SpeechRecognizerImpl::kNumBitsPerAudioSample / 8; |
| 76 const int frames = audio_packet_length_bytes / channels / bytes_per_sample_; | 78 const int frames = audio_packet_length_bytes / channels / bytes_per_sample_; |
| 77 audio_bus_ = media::AudioBus::Create(channels, frames); | 79 audio_bus_ = media::AudioBus::Create(channels, frames); |
| 78 audio_bus_->Zero(); | 80 audio_bus_->Zero(); |
| (...skipping 172 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 251 } | 253 } |
| 252 | 254 |
| 253 recognizer_->StopAudioCapture(); | 255 recognizer_->StopAudioCapture(); |
| 254 base::MessageLoop::current()->RunUntilIdle(); | 256 base::MessageLoop::current()->RunUntilIdle(); |
| 255 EXPECT_TRUE(audio_started_); | 257 EXPECT_TRUE(audio_started_); |
| 256 EXPECT_TRUE(audio_ended_); | 258 EXPECT_TRUE(audio_ended_); |
| 257 EXPECT_FALSE(recognition_ended_); | 259 EXPECT_FALSE(recognition_ended_); |
| 258 EXPECT_FALSE(result_received_); | 260 EXPECT_FALSE(result_received_); |
| 259 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_); | 261 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_); |
| 260 | 262 |
| 263 // Create a response string. | |
| 264 proto::SpeechRecognitionEvent proto_event; | |
| 265 proto_event.set_status(proto::SpeechRecognitionEvent::STATUS_SUCCESS); | |
| 266 proto::SpeechRecognitionResult* proto_result = proto_event.add_result(); | |
| 267 proto_result->set_final(true); | |
| 268 proto::SpeechRecognitionAlternative* proto_alternative = | |
| 269 proto_result->add_alternative(); | |
| 270 proto_alternative->set_confidence(0.5f); | |
| 271 proto_alternative->set_transcript("123"); | |
| 272 std::string msg_string; | |
| 273 proto_event.SerializeToString(&msg_string); | |
| 274 uint32_t prefix = | |
| 275 base::HostToNet32(base::checked_cast<uint32_t>(msg_string.size())); | |
| 276 msg_string.insert(0, reinterpret_cast<char*>(&prefix), sizeof(prefix)); | |
| 277 | |
| 261 // Issue the network callback to complete the process. | 278 // Issue the network callback to complete the process. |
| 262 net::TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0); | 279 net::TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID( |
| 280 GoogleStreamingRemoteEngine::kDownstreamUrlFetcherIdForTesting); | |
| 263 ASSERT_TRUE(fetcher); | 281 ASSERT_TRUE(fetcher); |
| 264 | |
| 265 fetcher->set_url(fetcher->GetOriginalURL()); | 282 fetcher->set_url(fetcher->GetOriginalURL()); |
| 266 fetcher->set_status(net::URLRequestStatus()); | 283 fetcher->set_status(net::URLRequestStatus()); |
| 267 fetcher->set_response_code(200); | 284 fetcher->set_response_code(200); |
| 268 fetcher->SetResponseString( | 285 fetcher->SetResponseString(msg_string); |
| 269 "{\"status\":0,\"hypotheses\":[{\"utterance\":\"123\"}]}"); | |
| 270 fetcher->delegate()->OnURLFetchComplete(fetcher); | 286 fetcher->delegate()->OnURLFetchComplete(fetcher); |
| 287 | |
| 271 base::MessageLoop::current()->RunUntilIdle(); | 288 base::MessageLoop::current()->RunUntilIdle(); |
| 272 EXPECT_TRUE(recognition_ended_); | 289 EXPECT_TRUE(recognition_ended_); |
| 273 EXPECT_TRUE(result_received_); | 290 EXPECT_TRUE(result_received_); |
| 274 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_); | 291 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_); |
| 275 CheckFinalEventsConsistency(); | 292 CheckFinalEventsConsistency(); |
| 276 } | 293 } |
| 277 | 294 |
| 278 TEST_F(SpeechRecognizerImplTest, CancelWithData) { | 295 TEST_F(SpeechRecognizerImplTest, CancelWithData) { |
| 279 // Start recording, give some data and then cancel. | 296 // Start recording, give some data and then cancel. |
| 280 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId); | 297 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId); |
| (...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 403 TEST_F(SpeechRecognizerImplTest, NoSpeechCallbackIssued) { | 420 TEST_F(SpeechRecognizerImplTest, NoSpeechCallbackIssued) { |
| 404 // Start recording and give a lot of packets with audio samples set to zero. | 421 // Start recording and give a lot of packets with audio samples set to zero. |
| 405 // This should trigger the no-speech detector and issue a callback. | 422 // This should trigger the no-speech detector and issue a callback. |
| 406 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId); | 423 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId); |
| 407 base::MessageLoop::current()->RunUntilIdle(); | 424 base::MessageLoop::current()->RunUntilIdle(); |
| 408 TestAudioInputController* controller = | 425 TestAudioInputController* controller = |
| 409 audio_input_controller_factory_.controller(); | 426 audio_input_controller_factory_.controller(); |
| 410 ASSERT_TRUE(controller); | 427 ASSERT_TRUE(controller); |
| 411 | 428 |
| 412 int num_packets = (SpeechRecognizerImpl::kNoSpeechTimeoutMs) / | 429 int num_packets = (SpeechRecognizerImpl::kNoSpeechTimeoutMs) / |
| 413 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs + 1; | 430 GoogleStreamingRemoteEngine::kAudioPacketIntervalMs + 1; |
| 414 // The vector is already filled with zero value samples on create. | 431 // The vector is already filled with zero value samples on create. |
| 415 for (int i = 0; i < num_packets; ++i) { | 432 for (int i = 0; i < num_packets; ++i) { |
| 416 controller->event_handler()->OnData(controller, audio_bus_.get()); | 433 controller->event_handler()->OnData(controller, audio_bus_.get()); |
| 417 } | 434 } |
| 418 base::MessageLoop::current()->RunUntilIdle(); | 435 base::MessageLoop::current()->RunUntilIdle(); |
| 419 EXPECT_TRUE(recognition_started_); | 436 EXPECT_TRUE(recognition_started_); |
| 420 EXPECT_TRUE(audio_started_); | 437 EXPECT_TRUE(audio_started_); |
| 421 EXPECT_FALSE(result_received_); | 438 EXPECT_FALSE(result_received_); |
| 422 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NO_SPEECH, error_); | 439 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NO_SPEECH, error_); |
| 423 CheckFinalEventsConsistency(); | 440 CheckFinalEventsConsistency(); |
| 424 } | 441 } |
| 425 | 442 |
| 426 TEST_F(SpeechRecognizerImplTest, NoSpeechCallbackNotIssued) { | 443 TEST_F(SpeechRecognizerImplTest, NoSpeechCallbackNotIssued) { |
| 427 // Start recording and give a lot of packets with audio samples set to zero | 444 // Start recording and give a lot of packets with audio samples set to zero |
| 428 // and then some more with reasonably loud audio samples. This should be | 445 // and then some more with reasonably loud audio samples. This should be |
| 429 // treated as normal speech input and the no-speech detector should not get | 446 // treated as normal speech input and the no-speech detector should not get |
| 430 // triggered. | 447 // triggered. |
| 431 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId); | 448 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId); |
| 432 base::MessageLoop::current()->RunUntilIdle(); | 449 base::MessageLoop::current()->RunUntilIdle(); |
| 433 TestAudioInputController* controller = | 450 TestAudioInputController* controller = |
| 434 audio_input_controller_factory_.controller(); | 451 audio_input_controller_factory_.controller(); |
| 435 ASSERT_TRUE(controller); | 452 ASSERT_TRUE(controller); |
| 436 controller = audio_input_controller_factory_.controller(); | 453 controller = audio_input_controller_factory_.controller(); |
| 437 ASSERT_TRUE(controller); | 454 ASSERT_TRUE(controller); |
| 438 | 455 |
| 439 int num_packets = (SpeechRecognizerImpl::kNoSpeechTimeoutMs) / | 456 int num_packets = (SpeechRecognizerImpl::kNoSpeechTimeoutMs) / |
| 440 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs; | 457 GoogleStreamingRemoteEngine::kAudioPacketIntervalMs; |
| 441 | 458 |
| 442 // The vector is already filled with zero value samples on create. | 459 // The vector is already filled with zero value samples on create. |
| 443 for (int i = 0; i < num_packets / 2; ++i) { | 460 for (int i = 0; i < num_packets / 2; ++i) { |
| 444 controller->event_handler()->OnData(controller, audio_bus_.get()); | 461 controller->event_handler()->OnData(controller, audio_bus_.get()); |
| 445 } | 462 } |
| 446 | 463 |
| 447 FillPacketWithTestWaveform(); | 464 FillPacketWithTestWaveform(); |
| 448 for (int i = 0; i < num_packets / 2; ++i) { | 465 for (int i = 0; i < num_packets / 2; ++i) { |
| 449 controller->event_handler()->OnData(controller, audio_bus_.get()); | 466 controller->event_handler()->OnData(controller, audio_bus_.get()); |
| 450 } | 467 } |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 467 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId); | 484 recognizer_->StartRecognition(media::AudioManagerBase::kDefaultDeviceId); |
| 468 base::MessageLoop::current()->RunUntilIdle(); | 485 base::MessageLoop::current()->RunUntilIdle(); |
| 469 TestAudioInputController* controller = | 486 TestAudioInputController* controller = |
| 470 audio_input_controller_factory_.controller(); | 487 audio_input_controller_factory_.controller(); |
| 471 ASSERT_TRUE(controller); | 488 ASSERT_TRUE(controller); |
| 472 controller = audio_input_controller_factory_.controller(); | 489 controller = audio_input_controller_factory_.controller(); |
| 473 ASSERT_TRUE(controller); | 490 ASSERT_TRUE(controller); |
| 474 | 491 |
| 475 // Feed some samples to begin with for the endpointer to do noise estimation. | 492 // Feed some samples to begin with for the endpointer to do noise estimation. |
| 476 int num_packets = SpeechRecognizerImpl::kEndpointerEstimationTimeMs / | 493 int num_packets = SpeechRecognizerImpl::kEndpointerEstimationTimeMs / |
| 477 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs; | 494 GoogleStreamingRemoteEngine::kAudioPacketIntervalMs; |
| 478 FillPacketWithNoise(); | 495 FillPacketWithNoise(); |
| 479 for (int i = 0; i < num_packets; ++i) { | 496 for (int i = 0; i < num_packets; ++i) { |
| 480 controller->event_handler()->OnData(controller, audio_bus_.get()); | 497 controller->event_handler()->OnData(controller, audio_bus_.get()); |
| 481 } | 498 } |
| 482 base::MessageLoop::current()->RunUntilIdle(); | 499 base::MessageLoop::current()->RunUntilIdle(); |
| 483 EXPECT_EQ(-1.0f, volume_); // No audio volume set yet. | 500 EXPECT_EQ(-1.0f, volume_); // No audio volume set yet. |
| 484 | 501 |
| 485 // The vector is already filled with zero value samples on create. | 502 // The vector is already filled with zero value samples on create. |
| 486 controller->event_handler()->OnData(controller, audio_bus_.get()); | 503 controller->event_handler()->OnData(controller, audio_bus_.get()); |
| 487 base::MessageLoop::current()->RunUntilIdle(); | 504 base::MessageLoop::current()->RunUntilIdle(); |
| 488 EXPECT_FLOAT_EQ(0.74939233f, volume_); | 505 EXPECT_FLOAT_EQ(0.74939233f, volume_); |
| 489 | 506 |
| 490 FillPacketWithTestWaveform(); | 507 FillPacketWithTestWaveform(); |
| 491 controller->event_handler()->OnData(controller, audio_bus_.get()); | 508 controller->event_handler()->OnData(controller, audio_bus_.get()); |
| 492 base::MessageLoop::current()->RunUntilIdle(); | 509 base::MessageLoop::current()->RunUntilIdle(); |
| 493 EXPECT_NEAR(0.89926866f, volume_, 0.00001f); | 510 EXPECT_NEAR(0.89926866f, volume_, 0.00001f); |
| 494 EXPECT_FLOAT_EQ(0.75071919f, noise_volume_); | 511 EXPECT_FLOAT_EQ(0.75071919f, noise_volume_); |
| 495 | 512 |
| 496 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_); | 513 EXPECT_EQ(SPEECH_RECOGNITION_ERROR_NONE, error_); |
| 497 EXPECT_FALSE(audio_ended_); | 514 EXPECT_FALSE(audio_ended_); |
| 498 EXPECT_FALSE(recognition_ended_); | 515 EXPECT_FALSE(recognition_ended_); |
| 499 recognizer_->AbortRecognition(); | 516 recognizer_->AbortRecognition(); |
| 500 base::MessageLoop::current()->RunUntilIdle(); | 517 base::MessageLoop::current()->RunUntilIdle(); |
| 501 CheckFinalEventsConsistency(); | 518 CheckFinalEventsConsistency(); |
| 502 } | 519 } |
| 503 | 520 |
| 504 } // namespace content | 521 } // namespace content |
| OLD | NEW |