chrome/browser/speech/speech_recognizer_unittest.cc - Issue 3341020: Speech input: Do environment estimation and detect the no-speech case.

Side by Side Diff: chrome/browser/speech/speech_recognizer_unittest.cc

Issue 3341020: Speech input: Do environment estimation and detect the no-speech case. (Closed)

Patch Set: . Created 10 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

	5 #include "base/scoped_ptr.h"

5 #include "chrome/browser/chrome_thread.h"	6 #include "chrome/browser/chrome_thread.h"

6 #include "chrome/browser/speech/speech_recognizer.h"	7 #include "chrome/browser/speech/speech_recognizer.h"

7 #include "chrome/common/net/test_url_fetcher_factory.h"	8 #include "chrome/common/net/test_url_fetcher_factory.h"

8 #include "media/audio/test_audio_input_controller_factory.h"	9 #include "media/audio/test_audio_input_controller_factory.h"

9 #include "net/url_request/url_request_status.h"	10 #include "net/url_request/url_request_status.h"

10 #include "testing/gtest/include/gtest/gtest.h"	11 #include "testing/gtest/include/gtest/gtest.h"

11	12

12 using media::AudioInputController;	13 using media::AudioInputController;

13 using media::TestAudioInputController;	14 using media::TestAudioInputController;

14 using media::TestAudioInputControllerFactory;	15 using media::TestAudioInputControllerFactory;

15	16

16 namespace {	17 namespace {

17 const int kAudioPacketLengthBytes = 1000;	18 const int kAudioPacketLengthBytes = 1000;

18 }	19 }

19	20

20 namespace speech_input {	21 namespace speech_input {

21	22

22 class SpeechRecognizerTest : public SpeechRecognizerDelegate,	23 class SpeechRecognizerTest : public SpeechRecognizerDelegate,

23 public testing::Test {	24 public testing::Test {

24 public:	25 public:

25 SpeechRecognizerTest()	26 SpeechRecognizerTest()

26 : io_thread_(ChromeThread::IO, &message_loop_),	27 : io_thread_(ChromeThread::IO, &message_loop_),

27 ALLOW_THIS_IN_INITIALIZER_LIST(	28 ALLOW_THIS_IN_INITIALIZER_LIST(

28 recognizer_(new SpeechRecognizer(this, 1))),	29 recognizer_(new SpeechRecognizer(this, 1))),

29 recording_complete_(false),	30 recording_complete_(false),

30 recognition_complete_(false),	31 recognition_complete_(false),

31 result_received_(false),	32 result_received_(false),

32 error_(false) {	33 error_(SpeechRecognizer::RECOGNIZER_NO_ERROR) {

	34 audio_packet_length_bytes_ =

	35 (SpeechRecognizer::kAudioSampleRate *

	36 SpeechRecognizer::kAudioPacketIntervalMs *

	37 SpeechRecognizer::kNumAudioChannels *

	38 SpeechRecognizer::kNumBitsPerAudioSample) / (8 * 1000);

	39 audio_packet_.reset(new uint8[audio_packet_length_bytes_]);

33 }	40 }

34	41

35 void StartTest() {	42 void StartTest() {

36 EXPECT_TRUE(recognizer_->StartRecording());	43 EXPECT_TRUE(recognizer_->StartRecording());

37 }	44 }

38	45

39 // SpeechRecognizer::Delegate methods.	46 // SpeechRecognizer::Delegate methods.

40 virtual void SetRecognitionResult(int caller_id,	47 virtual void SetRecognitionResult(int caller_id,

41 bool error,	48 bool error,

42 const string16& result) {	49 const string16& result) {

43 result_received_ = true;	50 result_received_ = true;

44 }	51 }

45	52

46 virtual void DidCompleteRecording(int caller_id) {	53 virtual void DidCompleteRecording(int caller_id) {

47 recording_complete_ = true;	54 recording_complete_ = true;

48 }	55 }

49	56

50 virtual void DidCompleteRecognition(int caller_id) {	57 virtual void DidCompleteRecognition(int caller_id) {

51 recognition_complete_ = true;	58 recognition_complete_ = true;

52 }	59 }

53	60

54 virtual void OnRecognizerError(int caller_id) {	61 virtual void DidCompleteEnvironmentEstimation(int caller_id) {

55 error_ = true;	62 }

	63

	64 virtual void OnRecognizerError(int caller_id,

	65 SpeechRecognizer::ErrorCode error) {

	66 error_ = error;

56 }	67 }

57	68

58 // testing::Test methods.	69 // testing::Test methods.

59 virtual void SetUp() {	70 virtual void SetUp() {

60 URLFetcher::set_factory(&url_fetcher_factory_);	71 URLFetcher::set_factory(&url_fetcher_factory_);

61 AudioInputController::set_factory(&audio_input_controller_factory_);	72 AudioInputController::set_factory(&audio_input_controller_factory_);

62 }	73 }

63	74

64 virtual void TearDown() {	75 virtual void TearDown() {

65 URLFetcher::set_factory(NULL);	76 URLFetcher::set_factory(NULL);

66 AudioInputController::set_factory(NULL);	77 AudioInputController::set_factory(NULL);

67 }	78 }

68	79

69 protected:	80 protected:

70 MessageLoopForIO message_loop_;	81 MessageLoopForIO message_loop_;

71 ChromeThread io_thread_;	82 ChromeThread io_thread_;

72 scoped_refptr<SpeechRecognizer> recognizer_;	83 scoped_refptr<SpeechRecognizer> recognizer_;

73 bool recording_complete_;	84 bool recording_complete_;

74 bool recognition_complete_;	85 bool recognition_complete_;

75 bool result_received_;	86 bool result_received_;

76 bool error_;	87 SpeechRecognizer::ErrorCode error_;

77 TestURLFetcherFactory url_fetcher_factory_;	88 TestURLFetcherFactory url_fetcher_factory_;

78 TestAudioInputControllerFactory audio_input_controller_factory_;	89 TestAudioInputControllerFactory audio_input_controller_factory_;

	90 scoped_ptr<uint8> audio_packet_;

	91 int audio_packet_length_bytes_;
	joth 2010/09/09 10:51:37 would a vector<uint8> work better than these 2 mem would a vector<uint8> work better than these 2 members? (.size() will give you the length)
79 };	92 };

80	93

81 TEST_F(SpeechRecognizerTest, StopNoData) {	94 TEST_F(SpeechRecognizerTest, StopNoData) {

82 // Check for callbacks when stopping record before any audio gets recorded.	95 // Check for callbacks when stopping record before any audio gets recorded.

83 EXPECT_TRUE(recognizer_->StartRecording());	96 EXPECT_TRUE(recognizer_->StartRecording());

84 recognizer_->CancelRecognition();	97 recognizer_->CancelRecognition();

85 EXPECT_FALSE(recording_complete_);	98 EXPECT_FALSE(recording_complete_);

86 EXPECT_FALSE(recognition_complete_);	99 EXPECT_FALSE(recognition_complete_);

87 EXPECT_FALSE(result_received_);	100 EXPECT_FALSE(result_received_);

88 EXPECT_FALSE(error_);	101 EXPECT_EQ(SpeechRecognizer::RECOGNIZER_NO_ERROR, error_);

89 }	102 }

90	103

91 TEST_F(SpeechRecognizerTest, CancelNoData) {	104 TEST_F(SpeechRecognizerTest, CancelNoData) {

92 // Check for callbacks when canceling recognition before any audio gets	105 // Check for callbacks when canceling recognition before any audio gets

93 // recorded.	106 // recorded.

94 EXPECT_TRUE(recognizer_->StartRecording());	107 EXPECT_TRUE(recognizer_->StartRecording());

95 recognizer_->StopRecording();	108 recognizer_->StopRecording();

96 EXPECT_TRUE(recording_complete_);	109 EXPECT_TRUE(recording_complete_);

97 EXPECT_TRUE(recognition_complete_);	110 EXPECT_TRUE(recognition_complete_);

98 EXPECT_FALSE(result_received_);	111 EXPECT_FALSE(result_received_);

99 EXPECT_FALSE(error_);	112 EXPECT_EQ(SpeechRecognizer::RECOGNIZER_NO_ERROR, error_);

100 }	113 }

101	114

102 TEST_F(SpeechRecognizerTest, StopWithData) {	115 TEST_F(SpeechRecognizerTest, StopWithData) {

103 uint8 data[kAudioPacketLengthBytes] = { 0 };

104

105 // Start recording, give some data and then stop. This should wait for the	116 // Start recording, give some data and then stop. This should wait for the

106 // network callback to arrive before completion.	117 // network callback to arrive before completion.

107 EXPECT_TRUE(recognizer_->StartRecording());	118 EXPECT_TRUE(recognizer_->StartRecording());

108 TestAudioInputController* controller =	119 TestAudioInputController* controller =

109 audio_input_controller_factory_.controller();	120 audio_input_controller_factory_.controller();

110 ASSERT_TRUE(controller);	121 ASSERT_TRUE(controller);

111 controller = audio_input_controller_factory_.controller();	122 controller = audio_input_controller_factory_.controller();

112 ASSERT_TRUE(controller);	123 ASSERT_TRUE(controller);

113 controller->event_handler()->OnData(controller, data, sizeof(data));	124 controller->event_handler()->OnData(controller, audio_packet_.get(),

	125 audio_packet_length_bytes_);

114 MessageLoop::current()->RunAllPending();	126 MessageLoop::current()->RunAllPending();

115 recognizer_->StopRecording();	127 recognizer_->StopRecording();

116 EXPECT_TRUE(recording_complete_);	128 EXPECT_TRUE(recording_complete_);

117 EXPECT_FALSE(recognition_complete_);	129 EXPECT_FALSE(recognition_complete_);

118 EXPECT_FALSE(result_received_);	130 EXPECT_FALSE(result_received_);

119 EXPECT_FALSE(error_);	131 EXPECT_EQ(SpeechRecognizer::RECOGNIZER_NO_ERROR, error_);

120	132

121 // Issue the network callback to complete the process.	133 // Issue the network callback to complete the process.

122 TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);	134 TestURLFetcher* fetcher = url_fetcher_factory_.GetFetcherByID(0);

123 ASSERT_TRUE(fetcher);	135 ASSERT_TRUE(fetcher);

124 URLRequestStatus status;	136 URLRequestStatus status;

125 status.set_status(URLRequestStatus::SUCCESS);	137 status.set_status(URLRequestStatus::SUCCESS);

126 fetcher->delegate()->OnURLFetchComplete(fetcher, fetcher->original_url(),	138 fetcher->delegate()->OnURLFetchComplete(

127 status, 200, ResponseCookies(), "");	139 fetcher, fetcher->original_url(), status, 200, ResponseCookies(),

	140 "{\"hypotheses\":[{\"utterance\":\"123\"}]}");

128 EXPECT_TRUE(recognition_complete_);	141 EXPECT_TRUE(recognition_complete_);

129 EXPECT_TRUE(result_received_);	142 EXPECT_TRUE(result_received_);

130 EXPECT_FALSE(error_);	143 EXPECT_EQ(SpeechRecognizer::RECOGNIZER_NO_ERROR, error_);

131 }	144 }

132	145

133 TEST_F(SpeechRecognizerTest, CancelWithData) {	146 TEST_F(SpeechRecognizerTest, CancelWithData) {

134 uint8 data[kAudioPacketLengthBytes] = { 0 };

135

136 // Start recording, give some data and then cancel. This should not create	147 // Start recording, give some data and then cancel. This should not create

137 // a network request and finish immediately.	148 // a network request and finish immediately.

138 EXPECT_TRUE(recognizer_->StartRecording());	149 EXPECT_TRUE(recognizer_->StartRecording());

139 TestAudioInputController* controller =	150 TestAudioInputController* controller =

140 audio_input_controller_factory_.controller();	151 audio_input_controller_factory_.controller();

141 ASSERT_TRUE(controller);	152 ASSERT_TRUE(controller);

142 controller->event_handler()->OnData(controller, data, sizeof(data));	153 controller->event_handler()->OnData(controller, audio_packet_.get(),

	154 audio_packet_length_bytes_);

143 MessageLoop::current()->RunAllPending();	155 MessageLoop::current()->RunAllPending();

144 recognizer_->CancelRecognition();	156 recognizer_->CancelRecognition();

145 EXPECT_EQ(NULL, url_fetcher_factory_.GetFetcherByID(0));	157 EXPECT_EQ(NULL, url_fetcher_factory_.GetFetcherByID(0));

146 EXPECT_FALSE(recording_complete_);	158 EXPECT_FALSE(recording_complete_);

147 EXPECT_FALSE(recognition_complete_);	159 EXPECT_FALSE(recognition_complete_);

148 EXPECT_FALSE(result_received_);	160 EXPECT_FALSE(result_received_);

149 EXPECT_FALSE(error_);	161 EXPECT_EQ(SpeechRecognizer::RECOGNIZER_NO_ERROR, error_);

150 }	162 }

151	163

152 TEST_F(SpeechRecognizerTest, AudioControllerErrorNoData) {	164 TEST_F(SpeechRecognizerTest, AudioControllerErrorNoData) {

153 // Check if things tear down properly if AudioInputController threw an error.	165 // Check if things tear down properly if AudioInputController threw an error.

154 EXPECT_TRUE(recognizer_->StartRecording());	166 EXPECT_TRUE(recognizer_->StartRecording());

155 TestAudioInputController* controller =	167 TestAudioInputController* controller =

156 audio_input_controller_factory_.controller();	168 audio_input_controller_factory_.controller();

157 ASSERT_TRUE(controller);	169 ASSERT_TRUE(controller);

158 controller->event_handler()->OnError(controller, 0);	170 controller->event_handler()->OnError(controller, 0);

159 MessageLoop::current()->RunAllPending();	171 MessageLoop::current()->RunAllPending();

160 EXPECT_TRUE(recording_complete_);	172 EXPECT_TRUE(recording_complete_);

161 EXPECT_TRUE(recognition_complete_);	173 EXPECT_TRUE(recognition_complete_);

162 EXPECT_FALSE(result_received_);	174 EXPECT_FALSE(result_received_);

163 EXPECT_TRUE(error_);	175 EXPECT_EQ(SpeechRecognizer::RECOGNIZER_ERROR_CAPTURE, error_);

164 }	176 }

165	177

166 TEST_F(SpeechRecognizerTest, AudioControllerErrorWithData) {	178 TEST_F(SpeechRecognizerTest, AudioControllerErrorWithData) {

167 uint8 data[kAudioPacketLengthBytes] = { 0 };

168

169 // Check if things tear down properly if AudioInputController threw an error	179 // Check if things tear down properly if AudioInputController threw an error

170 // after giving some audio data.	180 // after giving some audio data.

171 EXPECT_TRUE(recognizer_->StartRecording());	181 EXPECT_TRUE(recognizer_->StartRecording());

172 TestAudioInputController* controller =	182 TestAudioInputController* controller =

173 audio_input_controller_factory_.controller();	183 audio_input_controller_factory_.controller();

174 ASSERT_TRUE(controller);	184 ASSERT_TRUE(controller);

175 controller->event_handler()->OnData(controller, data, sizeof(data));	185 controller->event_handler()->OnData(controller, audio_packet_.get(),

	186 audio_packet_length_bytes_);

176 controller->event_handler()->OnError(controller, 0);	187 controller->event_handler()->OnError(controller, 0);

177 MessageLoop::current()->RunAllPending();	188 MessageLoop::current()->RunAllPending();

178 EXPECT_EQ(NULL, url_fetcher_factory_.GetFetcherByID(0));	189 EXPECT_EQ(NULL, url_fetcher_factory_.GetFetcherByID(0));

179 EXPECT_TRUE(recording_complete_);	190 EXPECT_TRUE(recording_complete_);

180 EXPECT_TRUE(recognition_complete_);	191 EXPECT_TRUE(recognition_complete_);

181 EXPECT_FALSE(result_received_);	192 EXPECT_FALSE(result_received_);

182 EXPECT_TRUE(error_);	193 EXPECT_EQ(SpeechRecognizer::RECOGNIZER_ERROR_CAPTURE, error_);

	194 }

	195

	196 TEST_F(SpeechRecognizerTest, NoSpeechCallbackIssued) {

	197 // Start recording and give a lot of packets with audio samples set to zero.

	198 // This should trigger the no-speech detector and issue a callback.

	199 EXPECT_TRUE(recognizer_->StartRecording());

	200 TestAudioInputController* controller =

	201 audio_input_controller_factory_.controller();

	202 ASSERT_TRUE(controller);

	203 controller = audio_input_controller_factory_.controller();

	204 ASSERT_TRUE(controller);

	205

	206 int num_packets = (SpeechRecognizer::kNoSpeechTimeoutSec * 1000) /

	207 SpeechRecognizer::kAudioPacketIntervalMs;

	208 for (int i = 0; i < audio_packet_length_bytes_; ++i)

	209 audio_packet_.get()[i] = 0;
	joth 2010/09/09 10:51:37 with vector, audio_packet_.resize(desired_length, with vector, audio_packet_.resize(desired_length, 0) will do the job (you can even drop the 0 as this is the default)
	210 for (int i = 0; i < num_packets; ++i) {

	211 controller->event_handler()->OnData(controller, audio_packet_.get(),

	212 audio_packet_length_bytes_);

	213 }

	214 MessageLoop::current()->RunAllPending();

	215 EXPECT_TRUE(recording_complete_);

	216 EXPECT_TRUE(recognition_complete_);

	217 EXPECT_FALSE(result_received_);

	218 EXPECT_EQ(SpeechRecognizer::RECOGNIZER_ERROR_NO_SPEECH, error_);

	219 }

	220

	221 TEST_F(SpeechRecognizerTest, NoSpeechCallbackNotIssued) {

	222 // Start recording and give a lot of packets with audio samples set to zero

	223 // and then some more with reasonably loud audio samples. This should be

	224 // treated as normal speech input and the no-speech detector should not get

	225 // triggered.

	226 EXPECT_TRUE(recognizer_->StartRecording());

	227 TestAudioInputController* controller =

	228 audio_input_controller_factory_.controller();

	229 ASSERT_TRUE(controller);

	230 controller = audio_input_controller_factory_.controller();

	231 ASSERT_TRUE(controller);

	232

	233 int num_packets = (SpeechRecognizer::kNoSpeechTimeoutSec * 1000) /

	234 SpeechRecognizer::kAudioPacketIntervalMs;

	235

	236 for (int i = 0; i < audio_packet_length_bytes_; ++i)

	237 audio_packet_.get()[i] = 0;
	joth 2010/09/09 10:51:37 ditto ditto
	238 for (int i = 0; i < num_packets / 2; ++i) {

	239 controller->event_handler()->OnData(controller, audio_packet_.get(),

	240 audio_packet_length_bytes_);

	241 }

	242 for (int i = 0; i < audio_packet_length_bytes_; ++i)

	243 audio_packet_.get()[i] = static_cast<uint8>(i);
	joth 2010/09/09 10:51:37 would it be more representative to put a sine wave would it be more representative to put a sine wave in here or something? http://src.chromium.org/viewvc/chrome/trunk/src/media/audio/simple_sources.cc has an example. Satish 2010/09/09 11:30:19 I thought of that earlier but a sine wave isn't ex Show quoted text On 2010/09/09 10:51:37, joth wrote: > would it be more representative to put a sine wave in here or something? > http://src.chromium.org/viewvc/chrome/trunk/src/media/audio/simple_sources.cc > has an example. I thought of that earlier but a sine wave isn't exactly representing speech either. So it would be swapping one artificial sequence for another, so I went with this simpler for loop.
	244 for (int i = 0; i < num_packets / 2; ++i) {

	245 controller->event_handler()->OnData(controller, audio_packet_.get(),

	246 audio_packet_length_bytes_);

	247 }

	248

	249 MessageLoop::current()->RunAllPending();

	250 EXPECT_EQ(SpeechRecognizer::RECOGNIZER_NO_ERROR, error_);

	251 EXPECT_FALSE(recording_complete_);

	252 EXPECT_FALSE(recognition_complete_);

	253 recognizer_->CancelRecognition();

183 }	254 }

184	255

185 } // namespace speech_input	256 } // namespace speech_input

OLD	NEW

« chrome/browser/speech/speech_input_manager.cc ('K') | « chrome/browser/speech/speech_recognizer.cc ('k') | no next file » | no next file with comments »