OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "content/browser/speech/google_streaming_remote_engine.h" | 5 #include "content/browser/speech/google_streaming_remote_engine.h" |
6 | 6 |
7 #include <vector> | 7 #include <vector> |
8 | 8 |
9 #include "base/bind.h" | 9 #include "base/bind.h" |
10 #include "base/command_line.h" | 10 #include "base/command_line.h" |
11 #include "base/rand_util.h" | 11 #include "base/rand_util.h" |
12 #include "base/string_number_conversions.h" | 12 #include "base/string_number_conversions.h" |
13 #include "base/string_util.h" | 13 #include "base/string_util.h" |
14 #include "base/time.h" | 14 #include "base/time.h" |
15 #include "base/utf_string_conversions.h" | 15 #include "base/utf_string_conversions.h" |
16 #include "content/browser/speech/audio_buffer.h" | 16 #include "content/browser/speech/audio_buffer.h" |
17 #include "content/browser/speech/proto/google_streaming_api.pb.h" | 17 #include "content/browser/speech/proto/google_streaming_api.pb.h" |
18 #include "content/public/browser/browser_thread.h" | 18 #include "content/public/browser/browser_thread.h" |
19 #include "content/public/common/content_switches.h" | 19 #include "content/public/common/content_switches.h" |
20 #include "content/public/common/speech_recognition_error.h" | 20 #include "content/public/common/speech_recognition_error.h" |
21 #include "content/public/common/speech_recognition_result.h" | 21 #include "content/public/common/speech_recognition_result.h" |
22 #include "google_apis/google_api_keys.h" | 22 #include "google_apis/google_api_keys.h" |
23 #include "net/base/escape.h" | 23 #include "net/base/escape.h" |
24 #include "net/base/load_flags.h" | 24 #include "net/base/load_flags.h" |
25 #include "net/url_request/url_fetcher.h" | 25 #include "net/url_request/url_fetcher.h" |
26 #include "net/url_request/url_request_context_getter.h" | 26 #include "net/url_request/url_request_context_getter.h" |
27 #include "net/url_request/url_request_context.h" | 27 #include "net/url_request/url_request_context.h" |
28 #include "net/url_request/url_request_status.h" | 28 #include "net/url_request/url_request_status.h" |
29 | 29 |
30 using content::BrowserThread; | |
31 using content::SpeechRecognitionError; | |
32 using content::SpeechRecognitionErrorCode; | |
33 using content::SpeechRecognitionHypothesis; | |
34 using content::SpeechRecognitionResult; | |
35 using net::URLFetcher; | 30 using net::URLFetcher; |
36 | 31 |
| 32 namespace content { |
37 namespace { | 33 namespace { |
38 | 34 |
39 const char kWebServiceBaseUrl[] = | 35 const char kWebServiceBaseUrl[] = |
40 "https://www.google.com/speech-api/full-duplex/v1"; | 36 "https://www.google.com/speech-api/full-duplex/v1"; |
41 const char kDownstreamUrl[] = "/down?"; | 37 const char kDownstreamUrl[] = "/down?"; |
42 const char kUpstreamUrl[] = "/up?"; | 38 const char kUpstreamUrl[] = "/up?"; |
43 const int kAudioPacketIntervalMs = 100; | 39 const int kAudioPacketIntervalMs = 100; |
44 const speech::AudioEncoder::Codec kDefaultAudioCodec = | 40 const AudioEncoder::Codec kDefaultAudioCodec = AudioEncoder::CODEC_FLAC; |
45 speech::AudioEncoder::CODEC_FLAC; | |
46 | 41 |
47 // This mathces the maximum maxAlternatives value supported by the server. | 42 // This mathces the maximum maxAlternatives value supported by the server. |
48 const uint32 kMaxMaxAlternatives = 30; | 43 const uint32 kMaxMaxAlternatives = 30; |
49 | 44 |
50 // TODO(hans): Remove this and other logging when we don't need it anymore. | 45 // TODO(hans): Remove this and other logging when we don't need it anymore. |
51 void DumpResponse(const std::string& response) { | 46 void DumpResponse(const std::string& response) { |
52 DVLOG(1) << "------------"; | 47 DVLOG(1) << "------------"; |
53 speech::proto::SpeechRecognitionEvent event; | 48 proto::SpeechRecognitionEvent event; |
54 if (!event.ParseFromString(response)) { | 49 if (!event.ParseFromString(response)) { |
55 DVLOG(1) << "Parse failed!"; | 50 DVLOG(1) << "Parse failed!"; |
56 return; | 51 return; |
57 } | 52 } |
58 if (event.has_status()) | 53 if (event.has_status()) |
59 DVLOG(1) << "STATUS\t" << event.status(); | 54 DVLOG(1) << "STATUS\t" << event.status(); |
60 for (int i = 0; i < event.result_size(); ++i) { | 55 for (int i = 0; i < event.result_size(); ++i) { |
61 DVLOG(1) << "RESULT #" << i << ":"; | 56 DVLOG(1) << "RESULT #" << i << ":"; |
62 const speech::proto::SpeechRecognitionResult& res = event.result(i); | 57 const proto::SpeechRecognitionResult& res = event.result(i); |
63 if (res.has_final()) | 58 if (res.has_final()) |
64 DVLOG(1) << " FINAL:\t" << res.final(); | 59 DVLOG(1) << " FINAL:\t" << res.final(); |
65 if (res.has_stability()) | 60 if (res.has_stability()) |
66 DVLOG(1) << " STABILITY:\t" << res.stability(); | 61 DVLOG(1) << " STABILITY:\t" << res.stability(); |
67 for (int j = 0; j < res.alternative_size(); ++j) { | 62 for (int j = 0; j < res.alternative_size(); ++j) { |
68 const speech::proto::SpeechRecognitionAlternative& alt = | 63 const proto::SpeechRecognitionAlternative& alt = |
69 res.alternative(j); | 64 res.alternative(j); |
70 if (alt.has_confidence()) | 65 if (alt.has_confidence()) |
71 DVLOG(1) << " CONFIDENCE:\t" << alt.confidence(); | 66 DVLOG(1) << " CONFIDENCE:\t" << alt.confidence(); |
72 if (alt.has_transcript()) | 67 if (alt.has_transcript()) |
73 DVLOG(1) << " TRANSCRIPT:\t" << alt.transcript(); | 68 DVLOG(1) << " TRANSCRIPT:\t" << alt.transcript(); |
74 } | 69 } |
75 } | 70 } |
76 } | 71 } |
77 | 72 |
78 std::string GetAPIKey() { | 73 std::string GetAPIKey() { |
79 const CommandLine& command_line = *CommandLine::ForCurrentProcess(); | 74 const CommandLine& command_line = *CommandLine::ForCurrentProcess(); |
80 if (command_line.HasSwitch(switches::kSpeechRecognitionWebserviceKey)) { | 75 if (command_line.HasSwitch(switches::kSpeechRecognitionWebserviceKey)) { |
81 DVLOG(1) << "GetAPIKey() used key from command-line."; | 76 DVLOG(1) << "GetAPIKey() used key from command-line."; |
82 return command_line.GetSwitchValueASCII( | 77 return command_line.GetSwitchValueASCII( |
83 switches::kSpeechRecognitionWebserviceKey); | 78 switches::kSpeechRecognitionWebserviceKey); |
84 } | 79 } |
85 | 80 |
86 std::string api_key = google_apis::GetAPIKey(); | 81 std::string api_key = google_apis::GetAPIKey(); |
87 if (api_key.empty()) | 82 if (api_key.empty()) |
88 DVLOG(1) << "GetAPIKey() returned empty string!"; | 83 DVLOG(1) << "GetAPIKey() returned empty string!"; |
89 | 84 |
90 return api_key; | 85 return api_key; |
91 } | 86 } |
92 | 87 |
93 } // namespace | 88 } // namespace |
94 | 89 |
95 namespace speech { | |
96 | |
97 const int GoogleStreamingRemoteEngine::kUpstreamUrlFetcherIdForTests = 0; | 90 const int GoogleStreamingRemoteEngine::kUpstreamUrlFetcherIdForTests = 0; |
98 const int GoogleStreamingRemoteEngine::kDownstreamUrlFetcherIdForTests = 1; | 91 const int GoogleStreamingRemoteEngine::kDownstreamUrlFetcherIdForTests = 1; |
99 const int GoogleStreamingRemoteEngine::kWebserviceStatusNoError = 0; | 92 const int GoogleStreamingRemoteEngine::kWebserviceStatusNoError = 0; |
100 const int GoogleStreamingRemoteEngine::kWebserviceStatusErrorNoMatch = 5; | 93 const int GoogleStreamingRemoteEngine::kWebserviceStatusErrorNoMatch = 5; |
101 | 94 |
102 GoogleStreamingRemoteEngine::GoogleStreamingRemoteEngine( | 95 GoogleStreamingRemoteEngine::GoogleStreamingRemoteEngine( |
103 net::URLRequestContextGetter* context) | 96 net::URLRequestContextGetter* context) |
104 : url_context_(context), | 97 : url_context_(context), |
105 encoder_(NULL), | 98 encoder_(NULL), |
106 previous_response_length_(0), | 99 previous_response_length_(0), |
(...skipping 308 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
415 ws_event.status() == proto::SpeechRecognitionEvent::STATUS_SUCCESS)) { | 408 ws_event.status() == proto::SpeechRecognitionEvent::STATUS_SUCCESS)) { |
416 DVLOG(1) << "Received empty response"; | 409 DVLOG(1) << "Received empty response"; |
417 return state_; | 410 return state_; |
418 } | 411 } |
419 | 412 |
420 if (ws_event.has_status()) { | 413 if (ws_event.has_status()) { |
421 switch(ws_event.status()) { | 414 switch(ws_event.status()) { |
422 case proto::SpeechRecognitionEvent::STATUS_SUCCESS: | 415 case proto::SpeechRecognitionEvent::STATUS_SUCCESS: |
423 break; | 416 break; |
424 case proto::SpeechRecognitionEvent::STATUS_NO_SPEECH: | 417 case proto::SpeechRecognitionEvent::STATUS_NO_SPEECH: |
425 return Abort(content::SPEECH_RECOGNITION_ERROR_NO_SPEECH); | 418 return Abort(SPEECH_RECOGNITION_ERROR_NO_SPEECH); |
426 case proto::SpeechRecognitionEvent::STATUS_ABORTED: | 419 case proto::SpeechRecognitionEvent::STATUS_ABORTED: |
427 return Abort(content::SPEECH_RECOGNITION_ERROR_ABORTED); | 420 return Abort(SPEECH_RECOGNITION_ERROR_ABORTED); |
428 case proto::SpeechRecognitionEvent::STATUS_AUDIO_CAPTURE: | 421 case proto::SpeechRecognitionEvent::STATUS_AUDIO_CAPTURE: |
429 return Abort(content::SPEECH_RECOGNITION_ERROR_AUDIO); | 422 return Abort(SPEECH_RECOGNITION_ERROR_AUDIO); |
430 case proto::SpeechRecognitionEvent::STATUS_NETWORK: | 423 case proto::SpeechRecognitionEvent::STATUS_NETWORK: |
431 return Abort(content::SPEECH_RECOGNITION_ERROR_NETWORK); | 424 return Abort(SPEECH_RECOGNITION_ERROR_NETWORK); |
432 case proto::SpeechRecognitionEvent::STATUS_NOT_ALLOWED: | 425 case proto::SpeechRecognitionEvent::STATUS_NOT_ALLOWED: |
433 // TODO(hans): We need a better error code for this. | 426 // TODO(hans): We need a better error code for this. |
434 return Abort(content::SPEECH_RECOGNITION_ERROR_ABORTED); | 427 return Abort(SPEECH_RECOGNITION_ERROR_ABORTED); |
435 case proto::SpeechRecognitionEvent::STATUS_SERVICE_NOT_ALLOWED: | 428 case proto::SpeechRecognitionEvent::STATUS_SERVICE_NOT_ALLOWED: |
436 // TODO(hans): We need a better error code for this. | 429 // TODO(hans): We need a better error code for this. |
437 return Abort(content::SPEECH_RECOGNITION_ERROR_ABORTED); | 430 return Abort(SPEECH_RECOGNITION_ERROR_ABORTED); |
438 case proto::SpeechRecognitionEvent::STATUS_BAD_GRAMMAR: | 431 case proto::SpeechRecognitionEvent::STATUS_BAD_GRAMMAR: |
439 return Abort(content::SPEECH_RECOGNITION_ERROR_BAD_GRAMMAR); | 432 return Abort(SPEECH_RECOGNITION_ERROR_BAD_GRAMMAR); |
440 case proto::SpeechRecognitionEvent::STATUS_LANGUAGE_NOT_SUPPORTED: | 433 case proto::SpeechRecognitionEvent::STATUS_LANGUAGE_NOT_SUPPORTED: |
441 // TODO(hans): We need a better error code for this. | 434 // TODO(hans): We need a better error code for this. |
442 return Abort(content::SPEECH_RECOGNITION_ERROR_ABORTED); | 435 return Abort(SPEECH_RECOGNITION_ERROR_ABORTED); |
443 } | 436 } |
444 } | 437 } |
445 | 438 |
446 for (int i = 0; i < ws_event.result_size(); ++i) { | 439 for (int i = 0; i < ws_event.result_size(); ++i) { |
447 const proto::SpeechRecognitionResult& ws_result = ws_event.result(i); | 440 const proto::SpeechRecognitionResult& ws_result = ws_event.result(i); |
448 SpeechRecognitionResult result; | 441 SpeechRecognitionResult result; |
449 result.is_provisional = !(ws_result.has_final() && ws_result.final()); | 442 result.is_provisional = !(ws_result.has_final() && ws_result.final()); |
450 | 443 |
451 if (!result.is_provisional) | 444 if (!result.is_provisional) |
452 got_last_definitive_result_ = true; | 445 got_last_definitive_result_ = true; |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
517 DCHECK(!upstream_fetcher_.get()); | 510 DCHECK(!upstream_fetcher_.get()); |
518 DCHECK(downstream_fetcher_.get()); | 511 DCHECK(downstream_fetcher_.get()); |
519 | 512 |
520 DVLOG(1) << "Closing downstream."; | 513 DVLOG(1) << "Closing downstream."; |
521 downstream_fetcher_.reset(); | 514 downstream_fetcher_.reset(); |
522 return STATE_IDLE; | 515 return STATE_IDLE; |
523 } | 516 } |
524 | 517 |
525 GoogleStreamingRemoteEngine::FSMState | 518 GoogleStreamingRemoteEngine::FSMState |
526 GoogleStreamingRemoteEngine::AbortSilently(const FSMEventArgs&) { | 519 GoogleStreamingRemoteEngine::AbortSilently(const FSMEventArgs&) { |
527 return Abort(content::SPEECH_RECOGNITION_ERROR_NONE); | 520 return Abort(SPEECH_RECOGNITION_ERROR_NONE); |
528 } | 521 } |
529 | 522 |
530 GoogleStreamingRemoteEngine::FSMState | 523 GoogleStreamingRemoteEngine::FSMState |
531 GoogleStreamingRemoteEngine::AbortWithError(const FSMEventArgs&) { | 524 GoogleStreamingRemoteEngine::AbortWithError(const FSMEventArgs&) { |
532 return Abort(content::SPEECH_RECOGNITION_ERROR_NETWORK); | 525 return Abort(SPEECH_RECOGNITION_ERROR_NETWORK); |
533 } | 526 } |
534 | 527 |
535 GoogleStreamingRemoteEngine::FSMState GoogleStreamingRemoteEngine::Abort( | 528 GoogleStreamingRemoteEngine::FSMState GoogleStreamingRemoteEngine::Abort( |
536 SpeechRecognitionErrorCode error_code) { | 529 SpeechRecognitionErrorCode error_code) { |
537 DVLOG(1) << "Aborting with error " << error_code; | 530 DVLOG(1) << "Aborting with error " << error_code; |
538 | 531 |
539 if (error_code != content::SPEECH_RECOGNITION_ERROR_NONE) { | 532 if (error_code != SPEECH_RECOGNITION_ERROR_NONE) { |
540 delegate()->OnSpeechRecognitionEngineError( | 533 delegate()->OnSpeechRecognitionEngineError( |
541 SpeechRecognitionError(error_code)); | 534 SpeechRecognitionError(error_code)); |
542 } | 535 } |
543 downstream_fetcher_.reset(); | 536 downstream_fetcher_.reset(); |
544 upstream_fetcher_.reset(); | 537 upstream_fetcher_.reset(); |
545 encoder_.reset(); | 538 encoder_.reset(); |
546 return STATE_IDLE; | 539 return STATE_IDLE; |
547 } | 540 } |
548 | 541 |
549 GoogleStreamingRemoteEngine::FSMState | 542 GoogleStreamingRemoteEngine::FSMState |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
589 return base::HexEncode(reinterpret_cast<void*>(&key), sizeof(key)); | 582 return base::HexEncode(reinterpret_cast<void*>(&key), sizeof(key)); |
590 } | 583 } |
591 | 584 |
592 GoogleStreamingRemoteEngine::FSMEventArgs::FSMEventArgs(FSMEvent event_value) | 585 GoogleStreamingRemoteEngine::FSMEventArgs::FSMEventArgs(FSMEvent event_value) |
593 : event(event_value) { | 586 : event(event_value) { |
594 } | 587 } |
595 | 588 |
596 GoogleStreamingRemoteEngine::FSMEventArgs::~FSMEventArgs() { | 589 GoogleStreamingRemoteEngine::FSMEventArgs::~FSMEventArgs() { |
597 } | 590 } |
598 | 591 |
599 } // namespace speech | 592 } // namespace content |
OLD | NEW |