Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(109)

Unified Diff: ios/chrome/browser/voice/text_to_speech_parser.mm

Issue 2449593002: [ios] Adds support for parsing Text-to-Speech search results. (Closed)
Patch Set: Disabled Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: ios/chrome/browser/voice/text_to_speech_parser.mm
diff --git a/ios/chrome/browser/voice/text_to_speech_parser.mm b/ios/chrome/browser/voice/text_to_speech_parser.mm
new file mode 100644
index 0000000000000000000000000000000000000000..6f22c22acf91769463bdb576e057a4773f36e628
--- /dev/null
+++ b/ios/chrome/browser/voice/text_to_speech_parser.mm
@@ -0,0 +1,109 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#import "ios/chrome/browser/voice/text_to_speech_parser.h"
+
+#include "base/logging.h"
+#import "ios/web/public/web_state/js/crw_js_injection_receiver.h"
+#include "ios/web/public/web_state/web_state.h"
+#import "third_party/google_toolbox_for_mac/src/Foundation/GTMStringEncoding.h"
+
+namespace {
+
+// The start and end tags that delimit TTS audio data.
+NSString* const kTTSStartTag = @"function(){var _a_tts='";
+NSString* const kTTSEndTag = @"'";
+
+// When |kTTSAudioDataExtractorScriptFormat| is evaluated on a Google voice
+// search results page, this script will extract the innerHTML from the script
+// element containing TTS data. The format takes one parameter, which is the
+// start tag from the TTS config singleton.
+NSString* const kTTSAudioDataExtractorScriptFormat =
+ @"(function(){"
+ " var start_tag = \"%@\";"
+ " var script_elements = document.getElementsByTagName(\"script\");"
+ " for (var i = 0; i < script_elements.length; ++i) {"
+ " var script_html = script_elements[i].innerHTML;"
+ " if (script_html.indexOf(start_tag) > 0)"
+ " return script_html;"
+ " }"
+ " return \"\";"
+ "})()";
+// Escaped encoding for GWS Voice Search service's trailing '='.
+NSString* const kTrailingEqualEncoding = @"\\x3d";
+// The maximum number of trailing '=' characters in a Voice Search SRP.
+const NSUInteger kMaxTrailingEqualsCount = 2;
+
+} // namespace
+
+NSData* ExtractVoiceSearchAudioDataFromPageHTML(NSString* page_html) {
+ if (!page_html.length)
+ return nil;
+
+ // The data should be near the end of the page, so search backwards.
+ NSRange data_start_tag_range =
+ [page_html rangeOfString:kTTSStartTag options:NSBackwardsSearch];
+ if (data_start_tag_range.location == NSNotFound) {
+ DLOG(ERROR) << "Did not find base tts tag in search output. "
+ << page_html.length;
+ return nil;
+ }
+
+ // The base64-encoded data will be everything between
+ // |audioDataStartTag| and |audioDataEndTag|.
+ NSUInteger start_position =
+ data_start_tag_range.location + data_start_tag_range.length;
+ NSRange data_range =
+ NSMakeRange(start_position, page_html.length - start_position);
+ NSRange data_end_tag_range =
+ [page_html rangeOfString:kTTSEndTag options:0 range:data_range];
+ if (data_end_tag_range.location == NSNotFound ||
+ data_end_tag_range.location == start_position) {
+ DLOG(ERROR) << "Could not find encoded data before tts closing tag.";
+ return nil;
+ }
+
+ // Extract the data between the tags.
+ NSRange audio_data_range =
+ NSMakeRange(start_position, data_end_tag_range.location - start_position);
+ NSString* raw_base64_encoded_audio_string =
+ [page_html substringWithRange:audio_data_range];
+ if (!raw_base64_encoded_audio_string) {
+ DLOG(ERROR) << "Could not find encoded data between tags.";
+ return nil;
+ }
+
+ // GWS is escaping the trailing '=' characters to \x3d.
+ // Clean these up before passing the string to the base64 decoder.
+ // Note: there are at most 2 encoded trailing '=' characters, so limit the
+ // string replacement to the last characters of the string.
+ NSUInteger search_range_length =
+ std::min(kMaxTrailingEqualsCount * kTrailingEqualEncoding.length,
+ raw_base64_encoded_audio_string.length);
+ NSRange search_range =
+ NSMakeRange(raw_base64_encoded_audio_string.length - search_range_length,
+ search_range_length);
+ NSString* base64_encoded_audio_string = [raw_base64_encoded_audio_string
+ stringByReplacingOccurrencesOfString:kTrailingEqualEncoding
+ withString:@"="
+ options:0
+ range:search_range];
+
+ GTMStringEncoding* base64 = [GTMStringEncoding rfc4648Base64StringEncoding];
+ return [base64 decode:base64_encoded_audio_string];
+}
+
+void ExtractVoiceSearchAudioDataFromWebState(
+ web::WebState* webState,
+ TextToSpeechCompletion completion) {
+ DCHECK(webState);
+ DCHECK(completion);
+ NSString* tts_extraction_script = [NSString
+ stringWithFormat:kTTSAudioDataExtractorScriptFormat, kTTSStartTag];
+ [webState->GetJSInjectionReceiver()
+ executeJavaScript:tts_extraction_script
+ completionHandler:^(id result, NSError* error) {
+ completion(ExtractVoiceSearchAudioDataFromPageHTML(result));
+ }];
+}
« no previous file with comments | « ios/chrome/browser/voice/text_to_speech_parser.h ('k') | ios/chrome/browser/voice/text_to_speech_parser_unittest.mm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698