Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(192)

Side by Side Diff: utils/apidoc/mdn/crawl.js

Issue 1361163002: remove docgen remnants from repo, update CHANGELOG (Closed) Base URL: https://github.com/dart-lang/sdk.git@master
Patch Set: remove unused code Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « utils/apidoc/mdn/README.txt ('k') | utils/apidoc/mdn/data/dartIdl.json » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // TODO(jacobr): convert this file to Dart once Dart supports all of the
2 // nodejs functionality used here. For example, search for all occurences of
3 // "http." and "fs."
4 var http = require('http');
5 var fs = require('fs');
6
7 try {
8 fs.mkdirSync('output/crawl');
9 } catch (e) {
10 // It doesn't matter if the directories already exist.
11 }
12
13 var domTypes = JSON.parse(fs.readFileSync('data/domTypes.json', 'utf8'));
14
15 var cacheData = {};
16
17 function scrape(filename, link) {
18 console.log(link);
19 var httpsPrefix = "https://";
20 var prefix = 'https://developer.mozilla.org/';
21 var notFoundPrefix = 'https://developer.mozilla.org/Article_not_found?uri=';
22 if (link.indexOf(prefix) != 0 ) {
23 throw "Unexpected url: " + link;
24 }
25 var scrapePath = "/search?q=cache:" + link;
26 // We crawl content from googleusercontent.com so we don't have to worry about
27 // crawler politeness like we would have to if scraping developer.mozilla.org
28 // directly.
29 var options = {
30 host: 'webcache.googleusercontent.com',
31 path: scrapePath,
32 port: 80,
33 method: 'GET'
34 };
35
36 var req = http.request(options, function(res) {
37 res.setEncoding('utf8');
38 var data='';
39
40 res.on('data', function(d) {
41 data += d;
42 });
43 var onClose = function(e) {
44 console.log("Writing crawl result for " + link);
45 fs.writeFileSync("output/crawl/" + filename + ".html", data, 'utf8');
46 }
47 res.on('close', onClose);
48 res.on('end', onClose);
49 });
50 req.end();
51
52 req.on('error', function(e) {
53 throw "Error " + e + " scraping " + link;
54 });
55 }
56
57 for (var i = 0; i < domTypes.length; i++) {
58 var type = domTypes[i];
59
60 // Json containing the search results for the current type.
61 var data = fs.readFileSync("output/search/" + type + ".json");
62 json = JSON.parse(data);
63 if (!('items' in json)) {
64 console.warn("No search results for " + type);
65 continue;
66 }
67 var items = json['items'];
68
69 var entry = [];
70 cacheData[type] = entry;
71
72 // Hardcode the correct matching url for a few types where the search engine
73 // gets the wrong answer.
74 var link = null;
75 if (type == 'Screen') {
76 link = 'https://developer.mozilla.org/en/DOM/window.screen';
77 } else if (type == 'Text') {
78 link = 'https://developer.mozilla.org/en/DOM/Text';
79 } else if (type == 'Touch') {
80 link = 'https://developer.mozilla.org/en/DOM/Touch';
81 } else if (type == 'TouchEvent' || type == 'webkitTouchEvent' || type == 'Webk itTouchEvent' || type == 'WebKitTouchEvent') {
82 link = 'https://developer.mozilla.org/en/DOM/TouchEvent';
83 } else if (type == 'HTMLSpanElement') {
84 link = 'https://developer.mozilla.org/en/HTML/Element/span';
85 } else if (type == 'HTMLPreElement') {
86 link = 'https://developer.mozilla.org/en/HTML/Element/pre';
87 } else if (type == 'HTMLFrameElement') {
88 link = 'https://developer.mozilla.org/en/HTML/Element/frame';
89 } else if (type == 'HTMLFrameSetElement') {
90 link = 'https://developer.mozilla.org/en/HTML/Element/frameset';
91 } else if (type == 'Geolocation') {
92 link = 'https://developer.mozilla.org/en/nsIDOMGeolocation;'
93 } else if (type == 'Notification') {
94 link = 'https://developer.mozilla.org/en/DOM/notification';
95 } else if (type == 'IDBDatabase') {
96 link = 'https://developer.mozilla.org/en/IndexedDB/IDBDatabase'
97 }
98 if (link != null) {
99 entry.push({index: 0, link: link, title: type});
100 scrape(type + 0, link);
101 continue;
102 }
103
104 for (j = 0; j < items.length; j++) {
105 var item = items[j];
106 var prefix = 'https://developer.mozilla.org/';
107 var notFoundPrefix = 'https://developer.mozilla.org/Article_not_found?uri=';
108 // Be optimistic and replace article not found links with links to where the
109 // article should be.
110 link = item['link'];
111 if (link.indexOf(notFoundPrefix) == 0) {
112 link = prefix + link.substr(notFoundPrefix.length);
113 }
114
115 entry.push({index: j, link: link, title: item['title']});
116 scrape(type + j, link);
117 }
118 }
119
120 fs.writeFileSync('output/crawl/cache.json', JSON.stringify(cacheData, null, ' ') , 'utf8');
OLDNEW
« no previous file with comments | « utils/apidoc/mdn/README.txt ('k') | utils/apidoc/mdn/data/dartIdl.json » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698