Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(3157)

Side by Side Diff: chrome/tools/profiles/generate_profile.cc

Issue 14585015: Recommit the generate profile patch (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Load the browser dll manually on Windows. Created 7 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « chrome/test/ui/ui_test.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // This program generates a user profile and history by randomly generating
6 // data and feeding it to the history service.
7
8 #include "chrome/tools/profiles/thumbnail-inl.h"
9
10 #include "base/at_exit.h"
11 #include "base/command_line.h"
12 #include "base/file_util.h"
13 #include "base/files/file_path.h"
14 #include "base/i18n/icu_util.h"
15 #include "base/logging.h"
16 #include "base/message_loop.h"
17 #include "base/path_service.h"
18 #include "base/process_util.h"
19 #include "base/strings/string_number_conversions.h"
20 #include "base/time.h"
21 #include "base/utf_string_conversions.h"
22 #include "chrome/browser/history/history_service.h"
23 #include "chrome/browser/history/history_service_factory.h"
24 #include "chrome/browser/history/top_sites.h"
25 #include "chrome/common/chrome_paths.h"
26 #include "chrome/common/thumbnail_score.h"
27 #include "chrome/test/base/testing_browser_process.h"
28 #include "chrome/test/base/testing_profile.h"
29 #include "content/browser/browser_thread_impl.h"
30 #include "content/public/browser/browser_thread.h"
31 #include "content/public/browser/notification_service.h"
32 #include "third_party/skia/include/core/SkBitmap.h"
33 #include "ui/base/resource/resource_bundle.h"
34 #include "ui/base/ui_base_paths.h"
35 #include "ui/gfx/codec/jpeg_codec.h"
36
37 #if defined(TOOLKIT_GTK)
38 #include <gtk/gtk.h>
39 #endif
40
41 using base::Time;
42 using content::BrowserThread;
43
44 // Addition types data can be generated for. By default only urls/visits are
45 // added.
46 enum Types {
47 TOP_SITES = 1 << 0,
48 FULL_TEXT = 1 << 1
49 };
50
51 // RAII for initializing and shutting down the TestBrowserProcess
52 class InitBrowserProcess {
53 public:
54 InitBrowserProcess() {
55 DCHECK(!g_browser_process);
56 g_browser_process = new TestingBrowserProcess;
57 }
58
59 ~InitBrowserProcess() {
60 DCHECK(g_browser_process);
61 delete g_browser_process;
62 g_browser_process = NULL;
63 }
64 };
65
66 // Probabilities of different word lengths, as measured from Darin's profile.
67 // kWordLengthProbabilities[n-1] = P(word of length n)
68 const float kWordLengthProbabilities[] = { 0.069f, 0.132f, 0.199f,
69 0.137f, 0.088f, 0.115f, 0.081f, 0.055f, 0.034f, 0.021f, 0.019f, 0.018f,
70 0.007f, 0.007f, 0.005f, 0.004f, 0.003f, 0.003f, 0.003f };
71
72 // Return a float uniformly in [0,1].
73 // Useful for making probabilistic decisions.
74 float RandomFloat() {
75 return rand() / static_cast<float>(RAND_MAX);
76 }
77
78 // Return an integer uniformly in [min,max).
79 int RandomInt(int min, int max) {
80 return min + (rand() % (max-min));
81 }
82
83 // Return a string of |count| lowercase random characters.
84 string16 RandomChars(int count) {
85 string16 str;
86 for (int i = 0; i < count; ++i)
87 str += L'a' + rand() % 26;
88 return str;
89 }
90
91 string16 RandomWord() {
92 // TODO(evanm): should we instead use the markov chain based
93 // version of this that I already wrote?
94
95 // Sample a word length from kWordLengthProbabilities.
96 float sample = RandomFloat();
97 size_t i;
98 for (i = 0; i < arraysize(kWordLengthProbabilities); ++i) {
99 sample -= kWordLengthProbabilities[i];
100 if (sample < 0) break;
101 }
102 const int word_length = i + 1;
103 return RandomChars(word_length);
104 }
105
106 // Return a string of |count| random words.
107 string16 RandomWords(int count) {
108 string16 str;
109 for (int i = 0; i < count; ++i) {
110 if (!str.empty())
111 str += L' ';
112 str += RandomWord();
113 }
114 return str;
115 }
116
117 // Return a random URL-looking string.
118 GURL ConstructRandomURL() {
119 return GURL(ASCIIToUTF16("http://") + RandomChars(3) + ASCIIToUTF16(".com/") +
120 RandomChars(RandomInt(5, 20)));
121 }
122
123 // Return a random page title-looking string.
124 string16 ConstructRandomTitle() {
125 return RandomWords(RandomInt(3, 15));
126 }
127
128 // Return a random string that could function as page contents.
129 string16 ConstructRandomPage() {
130 return RandomWords(RandomInt(10, 4000));
131 }
132
133 // Insert a batch of |batch_size| URLs, starting at pageid |page_id|.
134 void InsertURLBatch(Profile* profile,
135 int page_id,
136 int batch_size,
137 int types) {
138 HistoryService* history_service =
139 HistoryServiceFactory::GetForProfile(profile, Profile::EXPLICIT_ACCESS);
140
141 // Probability of following a link on the current "page"
142 // (vs randomly jumping to a new page).
143 const float kFollowLinkProbability = 0.85f;
144 // Probability of visiting a page we've visited before.
145 const float kRevisitLinkProbability = 0.1f;
146 // Probability of a URL being "good enough" to revisit.
147 const float kRevisitableURLProbability = 0.05f;
148 // Probability of a URL being the end of a redirect chain.
149 const float kRedirectProbability = 0.05f;
150
151 // A list of URLs that we sometimes revisit.
152 std::vector<GURL> revisit_urls;
153
154 // Scoping value for page IDs (required by the history service).
155 void* id_scope = reinterpret_cast<void*>(1);
156
157 scoped_ptr<SkBitmap> google_bitmap(
158 gfx::JPEGCodec::Decode(kGoogleThumbnail, sizeof(kGoogleThumbnail)));
159 scoped_ptr<SkBitmap> weewar_bitmap(
160 gfx::JPEGCodec::Decode(kWeewarThumbnail, sizeof(kWeewarThumbnail)));
161
162 printf("Inserting %d URLs...\n", batch_size);
163 GURL previous_url;
164 content::PageTransition transition = content::PAGE_TRANSITION_TYPED;
165 const int end_page_id = page_id + batch_size;
166 history::TopSites* top_sites = profile->GetTopSites();
167 for (; page_id < end_page_id; ++page_id) {
168 // Randomly decide whether this new URL simulates following a link or
169 // whether it's a jump to a new URL.
170 if (!previous_url.is_empty() && RandomFloat() < kFollowLinkProbability) {
171 transition = content::PAGE_TRANSITION_LINK;
172 } else {
173 previous_url = GURL();
174 transition = content::PAGE_TRANSITION_TYPED;
175 }
176
177 // Pick a URL, either newly at random or from our list of previously
178 // visited URLs.
179 GURL url;
180 if (!revisit_urls.empty() && RandomFloat() < kRevisitLinkProbability) {
181 // Draw a URL from revisit_urls at random.
182 url = revisit_urls[RandomInt(0, static_cast<int>(revisit_urls.size()))];
183 } else {
184 url = ConstructRandomURL();
185 }
186
187 // Randomly construct a redirect chain.
188 history::RedirectList redirects;
189 if (RandomFloat() < kRedirectProbability) {
190 const int redir_count = RandomInt(1, 4);
191 for (int i = 0; i < redir_count; ++i)
192 redirects.push_back(ConstructRandomURL());
193 redirects.push_back(url);
194 }
195
196 // Add all of this information to the history service.
197 history_service->AddPage(url, base::Time::Now(),
198 id_scope, page_id,
199 previous_url, redirects,
200 transition, history::SOURCE_BROWSED, true);
201 ThumbnailScore score(0.75, false, false);
202 history_service->SetPageTitle(url, ConstructRandomTitle());
203 if (types & FULL_TEXT)
204 history_service->SetPageContents(url, ConstructRandomPage());
205 if (types & TOP_SITES && top_sites) {
206 const SkBitmap& bitmap = (RandomInt(0, 2) == 0) ? *google_bitmap :
207 *weewar_bitmap;
208 gfx::Image image = gfx::Image::CreateFrom1xBitmap(bitmap);
209 top_sites->SetPageThumbnail(url, image, score);
210 }
211
212 previous_url = url;
213
214 if (revisit_urls.empty() || RandomFloat() < kRevisitableURLProbability)
215 revisit_urls.push_back(url);
216 }
217 }
218
219 int main(int argc, char* argv[]) {
220 CommandLine::Init(argc, argv);
221 base::EnableTerminationOnHeapCorruption();
222 base::AtExitManager exit_manager;
223 CommandLine* cl = CommandLine::ForCurrentProcess();
224
225 int types = 0;
226 if (cl->HasSwitch("top-sites"))
227 types |= TOP_SITES;
228 if (cl->HasSwitch("full-text"))
229 types |= FULL_TEXT;
230
231 // We require two arguments: urlcount and profiledir.
232 const CommandLine::StringVector& args = cl->GetArgs();
233 if (args.size() < 2) {
234 printf("usage: %s [--top-sites] [--full-text] <urlcount> "
235 "<profiledir>\n", argv[0]);
236 printf("\n --top-sites Generate thumbnails\n");
237 printf("\n --full-text Generate full text index\n");
238 return -1;
239 }
240
241 int url_count = 0;
242 base::StringToInt(args[0], &url_count);
243 base::FilePath dst_dir(args[1]);
244 if (!dst_dir.IsAbsolute()) {
245 base::FilePath current_dir;
246 file_util::GetCurrentDirectory(&current_dir);
247 dst_dir = current_dir.Append(dst_dir);
248 }
249 if (!file_util::CreateDirectory(dst_dir)) {
250 PLOG(ERROR) << "Unable to create directory " << dst_dir.value().c_str();
251 }
252
253 icu_util::Initialize();
254 // Copied from base/test/test_suite.cc.
255 #if defined(TOOLKIT_GTK)
256 gtk_init_check(&argc, &argv);
257 #endif
258
259 InitBrowserProcess initialize_browser_process;
260 chrome::RegisterPathProvider();
261 ui::RegisterPathProvider();
262 MessageLoopForUI message_loop;
263 content::BrowserThreadImpl ui_thread(BrowserThread::UI, &message_loop);
264 content::BrowserThreadImpl db_thread(BrowserThread::DB, &message_loop);
265 ResourceBundle::InitSharedInstanceWithLocale("en-US", NULL);
266 TestingProfile profile;
267 profile.CreateHistoryService(false, false);
268 if (types & TOP_SITES) {
269 profile.CreateTopSites();
270 profile.BlockUntilTopSitesLoaded();
271 }
272
273 srand(static_cast<unsigned int>(Time::Now().ToInternalValue()));
274
275 // The maximum number of URLs to insert into history in one batch.
276 const int kBatchSize = 2000;
277 int page_id = 0;
278 while (page_id < url_count) {
279 const int batch_size = std::min(kBatchSize, url_count - page_id);
280 InsertURLBatch(&profile, page_id, batch_size, types);
281 // Run all pending messages to give TopSites a chance to catch up.
282 message_loop.RunUntilIdle();
283 page_id += batch_size;
284 }
285
286 printf("Writing to disk\n");
287
288 profile.DestroyTopSites();
289 profile.DestroyHistoryService();
290
291 message_loop.RunUntilIdle();
292
293 file_util::FileEnumerator file_iterator(profile.GetPath(), false,
294 file_util::FileEnumerator::FILES);
295 base::FilePath path = file_iterator.Next();
296 while (!path.empty()) {
297 base::FilePath dst_file = dst_dir.Append(path.BaseName());
298 file_util::Delete(dst_file, false);
299 printf("Copying file %" PRFilePath " to "
300 "%" PRFilePath "\n", path.value().c_str(),
301 dst_file.value().c_str());
302 if (!file_util::CopyFile(path, dst_file)) {
303 PLOG(ERROR) << "Copying file failed";
304 return -1;
305 }
306 path = file_iterator.Next();
307 }
308
309 return 0;
310 }
OLDNEW
« no previous file with comments | « chrome/test/ui/ui_test.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698