| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "content/browser/speech/google_streaming_remote_engine.h" | 5 #include "content/browser/speech/google_streaming_remote_engine.h" |
| 6 | 6 |
| 7 #include <vector> | 7 #include <vector> |
| 8 | 8 |
| 9 #include "base/bind.h" | 9 #include "base/bind.h" |
| 10 #include "base/command_line.h" | 10 #include "base/command_line.h" |
| 11 #include "base/rand_util.h" | 11 #include "base/rand_util.h" |
| 12 #include "base/string_number_conversions.h" | 12 #include "base/string_number_conversions.h" |
| 13 #include "base/string_util.h" | 13 #include "base/string_util.h" |
| 14 #include "base/time.h" | 14 #include "base/time.h" |
| 15 #include "base/utf_string_conversions.h" | 15 #include "base/utf_string_conversions.h" |
| 16 #include "content/browser/speech/audio_buffer.h" | 16 #include "content/browser/speech/audio_buffer.h" |
| 17 #include "content/browser/speech/proto/google_streaming_api.pb.h" | 17 #include "content/browser/speech/proto/google_streaming_api.pb.h" |
| 18 #include "content/public/browser/browser_thread.h" | 18 #include "content/public/browser/browser_thread.h" |
| 19 #include "content/public/common/content_switches.h" | 19 #include "content/public/common/content_switches.h" |
| 20 #include "content/public/common/speech_recognition_error.h" | 20 #include "content/public/common/speech_recognition_error.h" |
| 21 #include "content/public/common/speech_recognition_result.h" | 21 #include "content/public/common/speech_recognition_result.h" |
| 22 #include "google_apis/google_api_keys.h" | 22 #include "google_apis/google_api_keys.h" |
| 23 #include "net/base/escape.h" | 23 #include "net/base/escape.h" |
| 24 #include "net/base/load_flags.h" | 24 #include "net/base/load_flags.h" |
| 25 #include "net/url_request/url_fetcher.h" | 25 #include "net/url_request/url_fetcher.h" |
| 26 #include "net/url_request/url_request_context_getter.h" | 26 #include "net/url_request/url_request_context_getter.h" |
| 27 #include "net/url_request/url_request_context.h" | 27 #include "net/url_request/url_request_context.h" |
| 28 #include "net/url_request/url_request_status.h" | 28 #include "net/url_request/url_request_status.h" |
| 29 | 29 |
| 30 using content::BrowserThread; | |
| 31 using content::SpeechRecognitionError; | |
| 32 using content::SpeechRecognitionErrorCode; | |
| 33 using content::SpeechRecognitionHypothesis; | |
| 34 using content::SpeechRecognitionResult; | |
| 35 using net::URLFetcher; | 30 using net::URLFetcher; |
| 36 | 31 |
| 32 namespace content { |
| 37 namespace { | 33 namespace { |
| 38 | 34 |
| 39 const char kWebServiceBaseUrl[] = | 35 const char kWebServiceBaseUrl[] = |
| 40 "https://www.google.com/speech-api/full-duplex/v1"; | 36 "https://www.google.com/speech-api/full-duplex/v1"; |
| 41 const char kDownstreamUrl[] = "/down?"; | 37 const char kDownstreamUrl[] = "/down?"; |
| 42 const char kUpstreamUrl[] = "/up?"; | 38 const char kUpstreamUrl[] = "/up?"; |
| 43 const int kAudioPacketIntervalMs = 100; | 39 const int kAudioPacketIntervalMs = 100; |
| 44 const speech::AudioEncoder::Codec kDefaultAudioCodec = | 40 const AudioEncoder::Codec kDefaultAudioCodec = AudioEncoder::CODEC_FLAC; |
| 45 speech::AudioEncoder::CODEC_FLAC; | |
| 46 | 41 |
| 47 // This mathces the maximum maxAlternatives value supported by the server. | 42 // This mathces the maximum maxAlternatives value supported by the server. |
| 48 const uint32 kMaxMaxAlternatives = 30; | 43 const uint32 kMaxMaxAlternatives = 30; |
| 49 | 44 |
| 50 // TODO(hans): Remove this and other logging when we don't need it anymore. | 45 // TODO(hans): Remove this and other logging when we don't need it anymore. |
| 51 void DumpResponse(const std::string& response) { | 46 void DumpResponse(const std::string& response) { |
| 52 DVLOG(1) << "------------"; | 47 DVLOG(1) << "------------"; |
| 53 speech::proto::SpeechRecognitionEvent event; | 48 proto::SpeechRecognitionEvent event; |
| 54 if (!event.ParseFromString(response)) { | 49 if (!event.ParseFromString(response)) { |
| 55 DVLOG(1) << "Parse failed!"; | 50 DVLOG(1) << "Parse failed!"; |
| 56 return; | 51 return; |
| 57 } | 52 } |
| 58 if (event.has_status()) | 53 if (event.has_status()) |
| 59 DVLOG(1) << "STATUS\t" << event.status(); | 54 DVLOG(1) << "STATUS\t" << event.status(); |
| 60 for (int i = 0; i < event.result_size(); ++i) { | 55 for (int i = 0; i < event.result_size(); ++i) { |
| 61 DVLOG(1) << "RESULT #" << i << ":"; | 56 DVLOG(1) << "RESULT #" << i << ":"; |
| 62 const speech::proto::SpeechRecognitionResult& res = event.result(i); | 57 const proto::SpeechRecognitionResult& res = event.result(i); |
| 63 if (res.has_final()) | 58 if (res.has_final()) |
| 64 DVLOG(1) << " FINAL:\t" << res.final(); | 59 DVLOG(1) << " FINAL:\t" << res.final(); |
| 65 if (res.has_stability()) | 60 if (res.has_stability()) |
| 66 DVLOG(1) << " STABILITY:\t" << res.stability(); | 61 DVLOG(1) << " STABILITY:\t" << res.stability(); |
| 67 for (int j = 0; j < res.alternative_size(); ++j) { | 62 for (int j = 0; j < res.alternative_size(); ++j) { |
| 68 const speech::proto::SpeechRecognitionAlternative& alt = | 63 const proto::SpeechRecognitionAlternative& alt = |
| 69 res.alternative(j); | 64 res.alternative(j); |
| 70 if (alt.has_confidence()) | 65 if (alt.has_confidence()) |
| 71 DVLOG(1) << " CONFIDENCE:\t" << alt.confidence(); | 66 DVLOG(1) << " CONFIDENCE:\t" << alt.confidence(); |
| 72 if (alt.has_transcript()) | 67 if (alt.has_transcript()) |
| 73 DVLOG(1) << " TRANSCRIPT:\t" << alt.transcript(); | 68 DVLOG(1) << " TRANSCRIPT:\t" << alt.transcript(); |
| 74 } | 69 } |
| 75 } | 70 } |
| 76 } | 71 } |
| 77 | 72 |
| 78 std::string GetAPIKey() { | 73 std::string GetAPIKey() { |
| 79 const CommandLine& command_line = *CommandLine::ForCurrentProcess(); | 74 const CommandLine& command_line = *CommandLine::ForCurrentProcess(); |
| 80 if (command_line.HasSwitch(switches::kSpeechRecognitionWebserviceKey)) { | 75 if (command_line.HasSwitch(switches::kSpeechRecognitionWebserviceKey)) { |
| 81 DVLOG(1) << "GetAPIKey() used key from command-line."; | 76 DVLOG(1) << "GetAPIKey() used key from command-line."; |
| 82 return command_line.GetSwitchValueASCII( | 77 return command_line.GetSwitchValueASCII( |
| 83 switches::kSpeechRecognitionWebserviceKey); | 78 switches::kSpeechRecognitionWebserviceKey); |
| 84 } | 79 } |
| 85 | 80 |
| 86 std::string api_key = google_apis::GetAPIKey(); | 81 std::string api_key = google_apis::GetAPIKey(); |
| 87 if (api_key.empty()) | 82 if (api_key.empty()) |
| 88 DVLOG(1) << "GetAPIKey() returned empty string!"; | 83 DVLOG(1) << "GetAPIKey() returned empty string!"; |
| 89 | 84 |
| 90 return api_key; | 85 return api_key; |
| 91 } | 86 } |
| 92 | 87 |
| 93 } // namespace | 88 } // namespace |
| 94 | 89 |
| 95 namespace speech { | |
| 96 | |
| 97 const int GoogleStreamingRemoteEngine::kUpstreamUrlFetcherIdForTests = 0; | 90 const int GoogleStreamingRemoteEngine::kUpstreamUrlFetcherIdForTests = 0; |
| 98 const int GoogleStreamingRemoteEngine::kDownstreamUrlFetcherIdForTests = 1; | 91 const int GoogleStreamingRemoteEngine::kDownstreamUrlFetcherIdForTests = 1; |
| 99 const int GoogleStreamingRemoteEngine::kWebserviceStatusNoError = 0; | 92 const int GoogleStreamingRemoteEngine::kWebserviceStatusNoError = 0; |
| 100 const int GoogleStreamingRemoteEngine::kWebserviceStatusErrorNoMatch = 5; | 93 const int GoogleStreamingRemoteEngine::kWebserviceStatusErrorNoMatch = 5; |
| 101 | 94 |
| 102 GoogleStreamingRemoteEngine::GoogleStreamingRemoteEngine( | 95 GoogleStreamingRemoteEngine::GoogleStreamingRemoteEngine( |
| 103 net::URLRequestContextGetter* context) | 96 net::URLRequestContextGetter* context) |
| 104 : url_context_(context), | 97 : url_context_(context), |
| 105 encoder_(NULL), | 98 encoder_(NULL), |
| 106 previous_response_length_(0), | 99 previous_response_length_(0), |
| (...skipping 308 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 415 ws_event.status() == proto::SpeechRecognitionEvent::STATUS_SUCCESS)) { | 408 ws_event.status() == proto::SpeechRecognitionEvent::STATUS_SUCCESS)) { |
| 416 DVLOG(1) << "Received empty response"; | 409 DVLOG(1) << "Received empty response"; |
| 417 return state_; | 410 return state_; |
| 418 } | 411 } |
| 419 | 412 |
| 420 if (ws_event.has_status()) { | 413 if (ws_event.has_status()) { |
| 421 switch(ws_event.status()) { | 414 switch(ws_event.status()) { |
| 422 case proto::SpeechRecognitionEvent::STATUS_SUCCESS: | 415 case proto::SpeechRecognitionEvent::STATUS_SUCCESS: |
| 423 break; | 416 break; |
| 424 case proto::SpeechRecognitionEvent::STATUS_NO_SPEECH: | 417 case proto::SpeechRecognitionEvent::STATUS_NO_SPEECH: |
| 425 return Abort(content::SPEECH_RECOGNITION_ERROR_NO_SPEECH); | 418 return Abort(SPEECH_RECOGNITION_ERROR_NO_SPEECH); |
| 426 case proto::SpeechRecognitionEvent::STATUS_ABORTED: | 419 case proto::SpeechRecognitionEvent::STATUS_ABORTED: |
| 427 return Abort(content::SPEECH_RECOGNITION_ERROR_ABORTED); | 420 return Abort(SPEECH_RECOGNITION_ERROR_ABORTED); |
| 428 case proto::SpeechRecognitionEvent::STATUS_AUDIO_CAPTURE: | 421 case proto::SpeechRecognitionEvent::STATUS_AUDIO_CAPTURE: |
| 429 return Abort(content::SPEECH_RECOGNITION_ERROR_AUDIO); | 422 return Abort(SPEECH_RECOGNITION_ERROR_AUDIO); |
| 430 case proto::SpeechRecognitionEvent::STATUS_NETWORK: | 423 case proto::SpeechRecognitionEvent::STATUS_NETWORK: |
| 431 return Abort(content::SPEECH_RECOGNITION_ERROR_NETWORK); | 424 return Abort(SPEECH_RECOGNITION_ERROR_NETWORK); |
| 432 case proto::SpeechRecognitionEvent::STATUS_NOT_ALLOWED: | 425 case proto::SpeechRecognitionEvent::STATUS_NOT_ALLOWED: |
| 433 // TODO(hans): We need a better error code for this. | 426 // TODO(hans): We need a better error code for this. |
| 434 return Abort(content::SPEECH_RECOGNITION_ERROR_ABORTED); | 427 return Abort(SPEECH_RECOGNITION_ERROR_ABORTED); |
| 435 case proto::SpeechRecognitionEvent::STATUS_SERVICE_NOT_ALLOWED: | 428 case proto::SpeechRecognitionEvent::STATUS_SERVICE_NOT_ALLOWED: |
| 436 // TODO(hans): We need a better error code for this. | 429 // TODO(hans): We need a better error code for this. |
| 437 return Abort(content::SPEECH_RECOGNITION_ERROR_ABORTED); | 430 return Abort(SPEECH_RECOGNITION_ERROR_ABORTED); |
| 438 case proto::SpeechRecognitionEvent::STATUS_BAD_GRAMMAR: | 431 case proto::SpeechRecognitionEvent::STATUS_BAD_GRAMMAR: |
| 439 return Abort(content::SPEECH_RECOGNITION_ERROR_BAD_GRAMMAR); | 432 return Abort(SPEECH_RECOGNITION_ERROR_BAD_GRAMMAR); |
| 440 case proto::SpeechRecognitionEvent::STATUS_LANGUAGE_NOT_SUPPORTED: | 433 case proto::SpeechRecognitionEvent::STATUS_LANGUAGE_NOT_SUPPORTED: |
| 441 // TODO(hans): We need a better error code for this. | 434 // TODO(hans): We need a better error code for this. |
| 442 return Abort(content::SPEECH_RECOGNITION_ERROR_ABORTED); | 435 return Abort(SPEECH_RECOGNITION_ERROR_ABORTED); |
| 443 } | 436 } |
| 444 } | 437 } |
| 445 | 438 |
| 446 for (int i = 0; i < ws_event.result_size(); ++i) { | 439 for (int i = 0; i < ws_event.result_size(); ++i) { |
| 447 const proto::SpeechRecognitionResult& ws_result = ws_event.result(i); | 440 const proto::SpeechRecognitionResult& ws_result = ws_event.result(i); |
| 448 SpeechRecognitionResult result; | 441 SpeechRecognitionResult result; |
| 449 result.is_provisional = !(ws_result.has_final() && ws_result.final()); | 442 result.is_provisional = !(ws_result.has_final() && ws_result.final()); |
| 450 | 443 |
| 451 if (!result.is_provisional) | 444 if (!result.is_provisional) |
| 452 got_last_definitive_result_ = true; | 445 got_last_definitive_result_ = true; |
| (...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 517 DCHECK(!upstream_fetcher_.get()); | 510 DCHECK(!upstream_fetcher_.get()); |
| 518 DCHECK(downstream_fetcher_.get()); | 511 DCHECK(downstream_fetcher_.get()); |
| 519 | 512 |
| 520 DVLOG(1) << "Closing downstream."; | 513 DVLOG(1) << "Closing downstream."; |
| 521 downstream_fetcher_.reset(); | 514 downstream_fetcher_.reset(); |
| 522 return STATE_IDLE; | 515 return STATE_IDLE; |
| 523 } | 516 } |
| 524 | 517 |
| 525 GoogleStreamingRemoteEngine::FSMState | 518 GoogleStreamingRemoteEngine::FSMState |
| 526 GoogleStreamingRemoteEngine::AbortSilently(const FSMEventArgs&) { | 519 GoogleStreamingRemoteEngine::AbortSilently(const FSMEventArgs&) { |
| 527 return Abort(content::SPEECH_RECOGNITION_ERROR_NONE); | 520 return Abort(SPEECH_RECOGNITION_ERROR_NONE); |
| 528 } | 521 } |
| 529 | 522 |
| 530 GoogleStreamingRemoteEngine::FSMState | 523 GoogleStreamingRemoteEngine::FSMState |
| 531 GoogleStreamingRemoteEngine::AbortWithError(const FSMEventArgs&) { | 524 GoogleStreamingRemoteEngine::AbortWithError(const FSMEventArgs&) { |
| 532 return Abort(content::SPEECH_RECOGNITION_ERROR_NETWORK); | 525 return Abort(SPEECH_RECOGNITION_ERROR_NETWORK); |
| 533 } | 526 } |
| 534 | 527 |
| 535 GoogleStreamingRemoteEngine::FSMState GoogleStreamingRemoteEngine::Abort( | 528 GoogleStreamingRemoteEngine::FSMState GoogleStreamingRemoteEngine::Abort( |
| 536 SpeechRecognitionErrorCode error_code) { | 529 SpeechRecognitionErrorCode error_code) { |
| 537 DVLOG(1) << "Aborting with error " << error_code; | 530 DVLOG(1) << "Aborting with error " << error_code; |
| 538 | 531 |
| 539 if (error_code != content::SPEECH_RECOGNITION_ERROR_NONE) { | 532 if (error_code != SPEECH_RECOGNITION_ERROR_NONE) { |
| 540 delegate()->OnSpeechRecognitionEngineError( | 533 delegate()->OnSpeechRecognitionEngineError( |
| 541 SpeechRecognitionError(error_code)); | 534 SpeechRecognitionError(error_code)); |
| 542 } | 535 } |
| 543 downstream_fetcher_.reset(); | 536 downstream_fetcher_.reset(); |
| 544 upstream_fetcher_.reset(); | 537 upstream_fetcher_.reset(); |
| 545 encoder_.reset(); | 538 encoder_.reset(); |
| 546 return STATE_IDLE; | 539 return STATE_IDLE; |
| 547 } | 540 } |
| 548 | 541 |
| 549 GoogleStreamingRemoteEngine::FSMState | 542 GoogleStreamingRemoteEngine::FSMState |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 589 return base::HexEncode(reinterpret_cast<void*>(&key), sizeof(key)); | 582 return base::HexEncode(reinterpret_cast<void*>(&key), sizeof(key)); |
| 590 } | 583 } |
| 591 | 584 |
| 592 GoogleStreamingRemoteEngine::FSMEventArgs::FSMEventArgs(FSMEvent event_value) | 585 GoogleStreamingRemoteEngine::FSMEventArgs::FSMEventArgs(FSMEvent event_value) |
| 593 : event(event_value) { | 586 : event(event_value) { |
| 594 } | 587 } |
| 595 | 588 |
| 596 GoogleStreamingRemoteEngine::FSMEventArgs::~FSMEventArgs() { | 589 GoogleStreamingRemoteEngine::FSMEventArgs::~FSMEventArgs() { |
| 597 } | 590 } |
| 598 | 591 |
| 599 } // namespace speech | 592 } // namespace content |
| OLD | NEW |