Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Side by Side Diff: ios/chrome/browser/voice/text_to_speech_parser.mm

Issue 2449593002: [ios] Adds support for parsing Text-to-Speech search results. (Closed)
Patch Set: Disabled Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #import "ios/chrome/browser/voice/text_to_speech_parser.h"
6
7 #include "base/logging.h"
8 #import "ios/web/public/web_state/js/crw_js_injection_receiver.h"
9 #include "ios/web/public/web_state/web_state.h"
10 #import "third_party/google_toolbox_for_mac/src/Foundation/GTMStringEncoding.h"
11
12 namespace {
13
14 // The start and end tags that delimit TTS audio data.
15 NSString* const kTTSStartTag = @"function(){var _a_tts='";
16 NSString* const kTTSEndTag = @"'";
17
18 // When |kTTSAudioDataExtractorScriptFormat| is evaluated on a Google voice
19 // search results page, this script will extract the innerHTML from the script
20 // element containing TTS data. The format takes one parameter, which is the
21 // start tag from the TTS config singleton.
22 NSString* const kTTSAudioDataExtractorScriptFormat =
23 @"(function(){"
24 " var start_tag = \"%@\";"
25 " var script_elements = document.getElementsByTagName(\"script\");"
26 " for (var i = 0; i < script_elements.length; ++i) {"
27 " var script_html = script_elements[i].innerHTML;"
28 " if (script_html.indexOf(start_tag) > 0)"
29 " return script_html;"
30 " }"
31 " return \"\";"
32 "})()";
33 // Escaped encoding for GWS Voice Search service's trailing '='.
34 NSString* const kTrailingEqualEncoding = @"\\x3d";
35 // The maximum number of trailing '=' characters in a Voice Search SRP.
36 const NSUInteger kMaxTrailingEqualsCount = 2;
37
38 } // namespace
39
40 NSData* ExtractVoiceSearchAudioDataFromPageHTML(NSString* page_html) {
41 if (!page_html.length)
42 return nil;
43
44 // The data should be near the end of the page, so search backwards.
45 NSRange data_start_tag_range =
46 [page_html rangeOfString:kTTSStartTag options:NSBackwardsSearch];
47 if (data_start_tag_range.location == NSNotFound) {
48 DLOG(ERROR) << "Did not find base tts tag in search output. "
49 << page_html.length;
50 return nil;
51 }
52
53 // The base64-encoded data will be everything between
54 // |audioDataStartTag| and |audioDataEndTag|.
55 NSUInteger start_position =
56 data_start_tag_range.location + data_start_tag_range.length;
57 NSRange data_range =
58 NSMakeRange(start_position, page_html.length - start_position);
59 NSRange data_end_tag_range =
60 [page_html rangeOfString:kTTSEndTag options:0 range:data_range];
61 if (data_end_tag_range.location == NSNotFound ||
62 data_end_tag_range.location == start_position) {
63 DLOG(ERROR) << "Could not find encoded data before tts closing tag.";
64 return nil;
65 }
66
67 // Extract the data between the tags.
68 NSRange audio_data_range =
69 NSMakeRange(start_position, data_end_tag_range.location - start_position);
70 NSString* raw_base64_encoded_audio_string =
71 [page_html substringWithRange:audio_data_range];
72 if (!raw_base64_encoded_audio_string) {
73 DLOG(ERROR) << "Could not find encoded data between tags.";
74 return nil;
75 }
76
77 // GWS is escaping the trailing '=' characters to \x3d.
78 // Clean these up before passing the string to the base64 decoder.
79 // Note: there are at most 2 encoded trailing '=' characters, so limit the
80 // string replacement to the last characters of the string.
81 NSUInteger search_range_length =
82 std::min(kMaxTrailingEqualsCount * kTrailingEqualEncoding.length,
83 raw_base64_encoded_audio_string.length);
84 NSRange search_range =
85 NSMakeRange(raw_base64_encoded_audio_string.length - search_range_length,
86 search_range_length);
87 NSString* base64_encoded_audio_string = [raw_base64_encoded_audio_string
88 stringByReplacingOccurrencesOfString:kTrailingEqualEncoding
89 withString:@"="
90 options:0
91 range:search_range];
92
93 GTMStringEncoding* base64 = [GTMStringEncoding rfc4648Base64StringEncoding];
94 return [base64 decode:base64_encoded_audio_string];
95 }
96
97 void ExtractVoiceSearchAudioDataFromWebState(
98 web::WebState* webState,
99 TextToSpeechCompletion completion) {
100 DCHECK(webState);
101 DCHECK(completion);
102 NSString* tts_extraction_script = [NSString
103 stringWithFormat:kTTSAudioDataExtractorScriptFormat, kTTSStartTag];
104 [webState->GetJSInjectionReceiver()
105 executeJavaScript:tts_extraction_script
106 completionHandler:^(id result, NSError* error) {
107 completion(ExtractVoiceSearchAudioDataFromPageHTML(result));
108 }];
109 }
OLDNEW
« no previous file with comments | « ios/chrome/browser/voice/text_to_speech_parser.h ('k') | ios/chrome/browser/voice/text_to_speech_parser_unittest.mm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698