utils/apidoc/mdn/crawl.js - Issue 1361163002: remove docgen remnants from repo, update CHANGELOG - Code Review

Chromium Code Reviews

chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out

(192)

My Issues | Starred Open | Closed | All

Side by Side Diff: utils/apidoc/mdn/crawl.js

Issue 1361163002: remove docgen remnants from repo, update CHANGELOG (Closed) Base URL: https://github.com/dart-lang/sdk.git@master

Patch Set: remove unused code Created 5 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « utils/apidoc/mdn/README.txt ('k') | utils/apidoc/mdn/data/dartIdl.json » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
	(Empty)
1 // TODO(jacobr): convert this file to Dart once Dart supports all of the

2 // nodejs functionality used here. For example, search for all occurences of

3 // "http." and "fs."

4 var http = require('http');

5 var fs = require('fs');

6

7 try {

8 fs.mkdirSync('output/crawl');

9 } catch (e) {

10 // It doesn't matter if the directories already exist.

11 }

12

13 var domTypes = JSON.parse(fs.readFileSync('data/domTypes.json', 'utf8'));

14

15 var cacheData = {};

16

17 function scrape(filename, link) {

18 console.log(link);

19 var httpsPrefix = "https://";

20 var prefix = 'https://developer.mozilla.org/';

21 var notFoundPrefix = 'https://developer.mozilla.org/Article_not_found?uri=';

22 if (link.indexOf(prefix) != 0 ) {

23 throw "Unexpected url: " + link;

24 }

25 var scrapePath = "/search?q=cache:" + link;

26 // We crawl content from googleusercontent.com so we don't have to worry about

27 // crawler politeness like we would have to if scraping developer.mozilla.org

28 // directly.

29 var options = {

30 host: 'webcache.googleusercontent.com',

31 path: scrapePath,

32 port: 80,

33 method: 'GET'

34 };

35

36 var req = http.request(options, function(res) {

37 res.setEncoding('utf8');

38 var data='';

39

40 res.on('data', function(d) {

41 data += d;

42 });

43 var onClose = function(e) {

44 console.log("Writing crawl result for " + link);

45 fs.writeFileSync("output/crawl/" + filename + ".html", data, 'utf8');

46 }

47 res.on('close', onClose);

48 res.on('end', onClose);

49 });

50 req.end();

51

52 req.on('error', function(e) {

53 throw "Error " + e + " scraping " + link;

54 });

55 }

56

57 for (var i = 0; i < domTypes.length; i++) {

58 var type = domTypes[i];

59

60 // Json containing the search results for the current type.

61 var data = fs.readFileSync("output/search/" + type + ".json");

62 json = JSON.parse(data);

63 if (!('items' in json)) {

64 console.warn("No search results for " + type);

65 continue;

66 }

67 var items = json['items'];

68

69 var entry = [];

70 cacheData[type] = entry;

71

72 // Hardcode the correct matching url for a few types where the search engine

73 // gets the wrong answer.

74 var link = null;

75 if (type == 'Screen') {

76 link = 'https://developer.mozilla.org/en/DOM/window.screen';

77 } else if (type == 'Text') {

78 link = 'https://developer.mozilla.org/en/DOM/Text';

79 } else if (type == 'Touch') {

80 link = 'https://developer.mozilla.org/en/DOM/Touch';

81 } else if (type == 'TouchEvent' \|\| type == 'webkitTouchEvent' \|\| type == 'Webk itTouchEvent' \|\| type == 'WebKitTouchEvent') {

82 link = 'https://developer.mozilla.org/en/DOM/TouchEvent';

83 } else if (type == 'HTMLSpanElement') {

84 link = 'https://developer.mozilla.org/en/HTML/Element/span';

85 } else if (type == 'HTMLPreElement') {

86 link = 'https://developer.mozilla.org/en/HTML/Element/pre';

87 } else if (type == 'HTMLFrameElement') {

88 link = 'https://developer.mozilla.org/en/HTML/Element/frame';

89 } else if (type == 'HTMLFrameSetElement') {

90 link = 'https://developer.mozilla.org/en/HTML/Element/frameset';

91 } else if (type == 'Geolocation') {

92 link = 'https://developer.mozilla.org/en/nsIDOMGeolocation;'

93 } else if (type == 'Notification') {

94 link = 'https://developer.mozilla.org/en/DOM/notification';

95 } else if (type == 'IDBDatabase') {

96 link = 'https://developer.mozilla.org/en/IndexedDB/IDBDatabase'

97 }

98 if (link != null) {

99 entry.push({index: 0, link: link, title: type});

100 scrape(type + 0, link);

101 continue;

102 }

103

104 for (j = 0; j < items.length; j++) {

105 var item = items[j];

106 var prefix = 'https://developer.mozilla.org/';

107 var notFoundPrefix = 'https://developer.mozilla.org/Article_not_found?uri=';

108 // Be optimistic and replace article not found links with links to where the

109 // article should be.

110 link = item['link'];

111 if (link.indexOf(notFoundPrefix) == 0) {

112 link = prefix + link.substr(notFoundPrefix.length);

113 }

114

115 entry.push({index: j, link: link, title: item['title']});

116 scrape(type + j, link);

117 }

118 }

119

120 fs.writeFileSync('output/crawl/cache.json', JSON.stringify(cacheData, null, ' ') , 'utf8');

OLD	NEW

« no previous file with comments | « utils/apidoc/mdn/README.txt ('k') | utils/apidoc/mdn/data/dartIdl.json » ('j') | no next file with comments »

Powered by Google App Engine

This is Rietveld 408576698