OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 // This program generates a user profile and history by randomly generating | |
6 // data and feeding it to the history service. | |
7 | |
8 #include "chrome/tools/profiles/thumbnail-inl.h" | |
9 | |
10 #include "base/at_exit.h" | |
11 #include "base/command_line.h" | |
12 #include "base/file_util.h" | |
13 #include "base/files/file_path.h" | |
14 #include "base/i18n/icu_util.h" | |
15 #include "base/logging.h" | |
16 #include "base/message_loop.h" | |
17 #include "base/path_service.h" | |
18 #include "base/process_util.h" | |
19 #include "base/strings/string_number_conversions.h" | |
20 #include "base/time.h" | |
21 #include "base/utf_string_conversions.h" | |
22 #include "chrome/browser/history/history_service.h" | |
23 #include "chrome/browser/history/history_service_factory.h" | |
24 #include "chrome/browser/history/top_sites.h" | |
25 #include "chrome/common/chrome_paths.h" | |
26 #include "chrome/common/thumbnail_score.h" | |
27 #include "chrome/test/base/testing_browser_process.h" | |
28 #include "chrome/test/base/testing_profile.h" | |
29 #include "content/browser/browser_thread_impl.h" | |
30 #include "content/public/browser/browser_thread.h" | |
31 #include "content/public/browser/notification_service.h" | |
32 #include "third_party/skia/include/core/SkBitmap.h" | |
33 #include "ui/base/resource/resource_bundle.h" | |
34 #include "ui/base/ui_base_paths.h" | |
35 #include "ui/gfx/codec/jpeg_codec.h" | |
36 | |
37 #if defined(TOOLKIT_GTK) | |
38 #include <gtk/gtk.h> | |
39 #endif | |
40 | |
41 using base::Time; | |
42 using content::BrowserThread; | |
43 | |
44 // Addition types data can be generated for. By default only urls/visits are | |
45 // added. | |
46 enum Types { | |
47 TOP_SITES = 1 << 0, | |
48 FULL_TEXT = 1 << 1 | |
49 }; | |
50 | |
51 // RAII for initializing and shutting down the TestBrowserProcess | |
52 class InitBrowserProcess { | |
53 public: | |
54 InitBrowserProcess() { | |
55 DCHECK(!g_browser_process); | |
56 g_browser_process = new TestingBrowserProcess; | |
57 } | |
58 | |
59 ~InitBrowserProcess() { | |
60 DCHECK(g_browser_process); | |
61 delete g_browser_process; | |
62 g_browser_process = NULL; | |
63 } | |
64 }; | |
65 | |
66 // Probabilities of different word lengths, as measured from Darin's profile. | |
67 // kWordLengthProbabilities[n-1] = P(word of length n) | |
68 const float kWordLengthProbabilities[] = { 0.069f, 0.132f, 0.199f, | |
69 0.137f, 0.088f, 0.115f, 0.081f, 0.055f, 0.034f, 0.021f, 0.019f, 0.018f, | |
70 0.007f, 0.007f, 0.005f, 0.004f, 0.003f, 0.003f, 0.003f }; | |
71 | |
72 // Return a float uniformly in [0,1]. | |
73 // Useful for making probabilistic decisions. | |
74 float RandomFloat() { | |
75 return rand() / static_cast<float>(RAND_MAX); | |
76 } | |
77 | |
78 // Return an integer uniformly in [min,max). | |
79 int RandomInt(int min, int max) { | |
80 return min + (rand() % (max-min)); | |
81 } | |
82 | |
83 // Return a string of |count| lowercase random characters. | |
84 string16 RandomChars(int count) { | |
85 string16 str; | |
86 for (int i = 0; i < count; ++i) | |
87 str += L'a' + rand() % 26; | |
88 return str; | |
89 } | |
90 | |
91 string16 RandomWord() { | |
92 // TODO(evanm): should we instead use the markov chain based | |
93 // version of this that I already wrote? | |
94 | |
95 // Sample a word length from kWordLengthProbabilities. | |
96 float sample = RandomFloat(); | |
97 size_t i; | |
98 for (i = 0; i < arraysize(kWordLengthProbabilities); ++i) { | |
99 sample -= kWordLengthProbabilities[i]; | |
100 if (sample < 0) break; | |
101 } | |
102 const int word_length = i + 1; | |
103 return RandomChars(word_length); | |
104 } | |
105 | |
106 // Return a string of |count| random words. | |
107 string16 RandomWords(int count) { | |
108 string16 str; | |
109 for (int i = 0; i < count; ++i) { | |
110 if (!str.empty()) | |
111 str += L' '; | |
112 str += RandomWord(); | |
113 } | |
114 return str; | |
115 } | |
116 | |
117 // Return a random URL-looking string. | |
118 GURL ConstructRandomURL() { | |
119 return GURL(ASCIIToUTF16("http://") + RandomChars(3) + ASCIIToUTF16(".com/") + | |
120 RandomChars(RandomInt(5, 20))); | |
121 } | |
122 | |
123 // Return a random page title-looking string. | |
124 string16 ConstructRandomTitle() { | |
125 return RandomWords(RandomInt(3, 15)); | |
126 } | |
127 | |
128 // Return a random string that could function as page contents. | |
129 string16 ConstructRandomPage() { | |
130 return RandomWords(RandomInt(10, 4000)); | |
131 } | |
132 | |
133 // Insert a batch of |batch_size| URLs, starting at pageid |page_id|. | |
134 void InsertURLBatch(Profile* profile, | |
135 int page_id, | |
136 int batch_size, | |
137 int types) { | |
138 HistoryService* history_service = | |
139 HistoryServiceFactory::GetForProfile(profile, Profile::EXPLICIT_ACCESS); | |
140 | |
141 // Probability of following a link on the current "page" | |
142 // (vs randomly jumping to a new page). | |
143 const float kFollowLinkProbability = 0.85f; | |
144 // Probability of visiting a page we've visited before. | |
145 const float kRevisitLinkProbability = 0.1f; | |
146 // Probability of a URL being "good enough" to revisit. | |
147 const float kRevisitableURLProbability = 0.05f; | |
148 // Probability of a URL being the end of a redirect chain. | |
149 const float kRedirectProbability = 0.05f; | |
150 | |
151 // A list of URLs that we sometimes revisit. | |
152 std::vector<GURL> revisit_urls; | |
153 | |
154 // Scoping value for page IDs (required by the history service). | |
155 void* id_scope = reinterpret_cast<void*>(1); | |
156 | |
157 scoped_ptr<SkBitmap> google_bitmap( | |
158 gfx::JPEGCodec::Decode(kGoogleThumbnail, sizeof(kGoogleThumbnail))); | |
159 scoped_ptr<SkBitmap> weewar_bitmap( | |
160 gfx::JPEGCodec::Decode(kWeewarThumbnail, sizeof(kWeewarThumbnail))); | |
161 | |
162 printf("Inserting %d URLs...\n", batch_size); | |
163 GURL previous_url; | |
164 content::PageTransition transition = content::PAGE_TRANSITION_TYPED; | |
165 const int end_page_id = page_id + batch_size; | |
166 history::TopSites* top_sites = profile->GetTopSites(); | |
167 for (; page_id < end_page_id; ++page_id) { | |
168 // Randomly decide whether this new URL simulates following a link or | |
169 // whether it's a jump to a new URL. | |
170 if (!previous_url.is_empty() && RandomFloat() < kFollowLinkProbability) { | |
171 transition = content::PAGE_TRANSITION_LINK; | |
172 } else { | |
173 previous_url = GURL(); | |
174 transition = content::PAGE_TRANSITION_TYPED; | |
175 } | |
176 | |
177 // Pick a URL, either newly at random or from our list of previously | |
178 // visited URLs. | |
179 GURL url; | |
180 if (!revisit_urls.empty() && RandomFloat() < kRevisitLinkProbability) { | |
181 // Draw a URL from revisit_urls at random. | |
182 url = revisit_urls[RandomInt(0, static_cast<int>(revisit_urls.size()))]; | |
183 } else { | |
184 url = ConstructRandomURL(); | |
185 } | |
186 | |
187 // Randomly construct a redirect chain. | |
188 history::RedirectList redirects; | |
189 if (RandomFloat() < kRedirectProbability) { | |
190 const int redir_count = RandomInt(1, 4); | |
191 for (int i = 0; i < redir_count; ++i) | |
192 redirects.push_back(ConstructRandomURL()); | |
193 redirects.push_back(url); | |
194 } | |
195 | |
196 // Add all of this information to the history service. | |
197 history_service->AddPage(url, base::Time::Now(), | |
198 id_scope, page_id, | |
199 previous_url, redirects, | |
200 transition, history::SOURCE_BROWSED, true); | |
201 ThumbnailScore score(0.75, false, false); | |
202 history_service->SetPageTitle(url, ConstructRandomTitle()); | |
203 if (types & FULL_TEXT) | |
204 history_service->SetPageContents(url, ConstructRandomPage()); | |
205 if (types & TOP_SITES && top_sites) { | |
206 const SkBitmap& bitmap = (RandomInt(0, 2) == 0) ? *google_bitmap : | |
207 *weewar_bitmap; | |
208 gfx::Image image = gfx::Image::CreateFrom1xBitmap(bitmap); | |
209 top_sites->SetPageThumbnail(url, image, score); | |
210 } | |
211 | |
212 previous_url = url; | |
213 | |
214 if (revisit_urls.empty() || RandomFloat() < kRevisitableURLProbability) | |
215 revisit_urls.push_back(url); | |
216 } | |
217 } | |
218 | |
219 int main(int argc, char* argv[]) { | |
220 CommandLine::Init(argc, argv); | |
221 base::EnableTerminationOnHeapCorruption(); | |
222 base::AtExitManager exit_manager; | |
223 CommandLine* cl = CommandLine::ForCurrentProcess(); | |
224 | |
225 int types = 0; | |
226 if (cl->HasSwitch("top-sites")) | |
227 types |= TOP_SITES; | |
228 if (cl->HasSwitch("full-text")) | |
229 types |= FULL_TEXT; | |
230 | |
231 // We require two arguments: urlcount and profiledir. | |
232 const CommandLine::StringVector& args = cl->GetArgs(); | |
233 if (args.size() < 2) { | |
234 printf("usage: %s [--top-sites] [--full-text] <urlcount> " | |
235 "<profiledir>\n", argv[0]); | |
236 printf("\n --top-sites Generate thumbnails\n"); | |
237 printf("\n --full-text Generate full text index\n"); | |
238 return -1; | |
239 } | |
240 | |
241 int url_count = 0; | |
242 base::StringToInt(args[0], &url_count); | |
243 base::FilePath dst_dir(args[1]); | |
244 if (!dst_dir.IsAbsolute()) { | |
245 base::FilePath current_dir; | |
246 file_util::GetCurrentDirectory(¤t_dir); | |
247 dst_dir = current_dir.Append(dst_dir); | |
248 } | |
249 if (!file_util::CreateDirectory(dst_dir)) { | |
250 PLOG(ERROR) << "Unable to create directory " << dst_dir.value().c_str(); | |
251 } | |
252 | |
253 icu_util::Initialize(); | |
254 // Copied from base/test/test_suite.cc. | |
255 #if defined(TOOLKIT_GTK) | |
256 gtk_init_check(&argc, &argv); | |
257 #endif | |
258 | |
259 InitBrowserProcess initialize_browser_process; | |
260 chrome::RegisterPathProvider(); | |
261 ui::RegisterPathProvider(); | |
262 MessageLoopForUI message_loop; | |
263 content::BrowserThreadImpl ui_thread(BrowserThread::UI, &message_loop); | |
264 content::BrowserThreadImpl db_thread(BrowserThread::DB, &message_loop); | |
265 ResourceBundle::InitSharedInstanceWithLocale("en-US", NULL); | |
266 TestingProfile profile; | |
267 profile.CreateHistoryService(false, false); | |
268 if (types & TOP_SITES) { | |
269 profile.CreateTopSites(); | |
270 profile.BlockUntilTopSitesLoaded(); | |
271 } | |
272 | |
273 srand(static_cast<unsigned int>(Time::Now().ToInternalValue())); | |
274 | |
275 // The maximum number of URLs to insert into history in one batch. | |
276 const int kBatchSize = 2000; | |
277 int page_id = 0; | |
278 while (page_id < url_count) { | |
279 const int batch_size = std::min(kBatchSize, url_count - page_id); | |
280 InsertURLBatch(&profile, page_id, batch_size, types); | |
281 // Run all pending messages to give TopSites a chance to catch up. | |
282 message_loop.RunUntilIdle(); | |
283 page_id += batch_size; | |
284 } | |
285 | |
286 printf("Writing to disk\n"); | |
287 | |
288 profile.DestroyTopSites(); | |
289 profile.DestroyHistoryService(); | |
290 | |
291 message_loop.RunUntilIdle(); | |
292 | |
293 file_util::FileEnumerator file_iterator(profile.GetPath(), false, | |
294 file_util::FileEnumerator::FILES); | |
295 base::FilePath path = file_iterator.Next(); | |
296 while (!path.empty()) { | |
297 base::FilePath dst_file = dst_dir.Append(path.BaseName()); | |
298 file_util::Delete(dst_file, false); | |
299 printf("Copying file %" PRFilePath " to " | |
300 "%" PRFilePath "\n", path.value().c_str(), | |
301 dst_file.value().c_str()); | |
302 if (!file_util::CopyFile(path, dst_file)) { | |
303 PLOG(ERROR) << "Copying file failed"; | |
304 return -1; | |
305 } | |
306 path = file_iterator.Next(); | |
307 } | |
308 | |
309 return 0; | |
310 } | |
OLD | NEW |