| OLD | NEW |
| 1 // Copyright 2017 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 1 const fs = require('fs'); | 4 const fs = require('fs'); |
| 2 | 5 |
| 3 /* | 6 /* |
| 4 How to use: | 7 How to use: |
| 5 1) Get dump of data as CSV format and name it 3pas.csv same directory as this sc
ript. | 8 1) Get dump of data as CSV format and name it 3pas.csv same directory as this sc
ript. |
| 6 2) Header fields in the CSV will be used as keys when destructing into JSON obje
cts [ie: top row data should not have spaces or special chars] | 9 2) Header fields in the CSV will be used as keys when destructing into JSON obje
cts [ie: top row data should not have spaces or special chars] |
| 7 3) The two important column names are: 'name_legal_product' and 'domain'. | 10 3) The two important column names are: 'name_legal_product' and 'domain'. |
| 8 4) There may not be a header named 'prefix'. | 11 4) There may not be a header named 'prefix'. |
| 9 5) 'name_legal_product' Will have it's data cleaned up a bit, so be prepared for
it to change. | 12 5) 'name_legal_product' Will have it's data cleaned up a bit, so be prepared for
it to change. |
| 10 6) This script tries to de-duplicate any data, so be prepared for many entries t
o go away if it finds a shorter one. | 13 6) This script tries to de-duplicate any data, so be prepared for many entries t
o go away if it finds a shorter one. |
| 11 7) This script will output a javascript file in the product_registry's data form
at. | 14 7) This script will output a javascript file in the product_registry's data form
at. |
| 12 */ | 15 */ |
| 13 | 16 |
| 14 /* | 17 /* |
| 15 * Configurable variables. You may need to tweak these to be compatible with | 18 * Configurable variables. You may need to tweak these to be compatible with |
| 16 * the server-side, but the defaults work in most cases. | 19 * the server-side, but the defaults work in most cases. |
| 17 */ | 20 */ |
| 18 const hexcase = 0; /* hex output format. 0 - lowercase; 1 - uppercase */ | 21 const hexcase = 0; /* hex output format. 0 - lowercase; 1 - uppercase */ |
| 19 const b64pad = '='; /* base-64 pad character. "=" for strict RFC compliance */ | 22 const b64pad = '='; /* base-64 pad character. "=" for strict RFC compliance */ |
| 20 const chrsz = 8; /* bits per input character. 8 - ASCII; 16 - Unicode */ | 23 const chrsz = 8; /* bits per input character. 8 - ASCII; 16 - Unicode */ |
| 21 | 24 |
| 25 const typeClassifications = new Map([ |
| 26 ['cdn_provider', 'CDN'], ['cdn_commercial_owner', 'CDN'], ['cdn_creative_agenc
y', 'CDN'], ['ad_blocking', 'Ad'], |
| 27 ['ad_exchange', 'Ad'], ['ad_server_ad_network', 'Ad'], ['ad_server_advertiser'
, 'Ad'], ['demand_side_platform', 'Ad'], |
| 28 ['vast_provider', 'Ad'], ['data_management_platform', 'Tracking'], ['research_
analytics', 'Tracking'], |
| 29 ['research_verification', 'Tracking'], ['research_brand_lift', 'Tracking'] |
| 30 ]); |
| 31 |
| 22 var data = fs.readFileSync('3pas.csv', 'utf8'); | 32 var data = fs.readFileSync('3pas.csv', 'utf8'); |
| 23 var headerLine = data.split('\n', 1)[0]; | 33 var headerLine = data.split('\n', 1)[0]; |
| 24 data = data.substr(headerLine.length); | 34 data = data.substr(headerLine.length); |
| 25 var headerLineOrigLength = headerLine.length; | 35 var headerLineOrigLength = headerLine.length; |
| 26 | 36 |
| 27 var columnNames = Array.from(csvUnmarshaller(headerLine)).map(v => v[0]); | 37 var columnNames = Array.from(csvUnmarshaller(headerLine)).map(v => v[0]); |
| 28 var lineObjs = []; | 38 var lineObjs = []; |
| 29 | 39 |
| 30 var marshaller = csvUnmarshaller(data, 2); | 40 var marshaller = csvUnmarshaller(data, 2); |
| 31 var lineObj = {}; | 41 var lineObj = {}; |
| (...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 107 if (!prefixMap) { | 117 if (!prefixMap) { |
| 108 prefixMap = new Map(); | 118 prefixMap = new Map(); |
| 109 mapOfSubdomains.set(lineObj.domain, prefixMap); | 119 mapOfSubdomains.set(lineObj.domain, prefixMap); |
| 110 } | 120 } |
| 111 if (prefixMap.has(prefix)) | 121 if (prefixMap.has(prefix)) |
| 112 console.log('Problem with: ', domain, lineObj.domain); | 122 console.log('Problem with: ', domain, lineObj.domain); |
| 113 prefixMap.set(prefix, lineObj); | 123 prefixMap.set(prefix, lineObj); |
| 114 } | 124 } |
| 115 | 125 |
| 116 var outputProducts = []; | 126 var outputProducts = []; |
| 127 var outputTypes = []; |
| 117 var outputObj = new Map(); | 128 var outputObj = new Map(); |
| 118 for (var [baseDomain, subdomains] of map) { | 129 for (var [baseDomain, subdomains] of map) { |
| 119 for (var prefixes of subdomains.values()) { | 130 for (var prefixes of subdomains.values()) { |
| 120 SKIP_ENTRY: for (var lineObj of prefixes.values()) { | 131 SKIP_ENTRY: for (var lineObj of prefixes.values()) { |
| 121 var prefix = lineObj.prefix; | 132 var prefix = lineObj.prefix; |
| 122 var wildLineObj = prefixes.get('*'); | 133 var wildLineObj = prefixes.get('*'); |
| 123 if (wildLineObj && prefix !== '*') { | 134 if (wildLineObj && prefix !== '*') { |
| 124 if (wildLineObj.name_legal_product === lineObj.name_legal_product) { | 135 if (wildLineObj.name_legal_product === lineObj.name_legal_product) { |
| 125 // Skip entry, since wild card is there and already in table. | 136 // Skip entry, since wild card is there and already in table. |
| 126 continue SKIP_ENTRY; | 137 continue SKIP_ENTRY; |
| (...skipping 20 matching lines...) Expand all Loading... |
| 147 continue SKIP_ENTRY; | 158 continue SKIP_ENTRY; |
| 148 } | 159 } |
| 149 } | 160 } |
| 150 previousDomainPart = domainParts.shift(); | 161 previousDomainPart = domainParts.shift(); |
| 151 } | 162 } |
| 152 var outputPart = outputObj.get(fullSubdomain); | 163 var outputPart = outputObj.get(fullSubdomain); |
| 153 if (!outputPart) { | 164 if (!outputPart) { |
| 154 outputPart = {hash: hex_sha1(fullSubdomain).substr(0, 16), prefixes: {}}
; | 165 outputPart = {hash: hex_sha1(fullSubdomain).substr(0, 16), prefixes: {}}
; |
| 155 outputObj.set(fullSubdomain, outputPart); | 166 outputObj.set(fullSubdomain, outputPart); |
| 156 } | 167 } |
| 157 outputPart.prefixes[lineObj.prefix] = registerOutputProduct(lineObj.name_l
egal_product); | 168 outputPart.prefixes[lineObj.prefix] = registerOutputProduct(lineObj.name_l
egal_product, lineObj.type_vendor); |
| 158 } | 169 } |
| 159 } | 170 } |
| 160 } | 171 } |
| 161 | 172 |
| 162 console.log( | 173 console.log( |
| 163 '// Copyright 2017 The Chromium Authors. All rights reserved.\n' + | 174 '// Copyright 2017 The Chromium Authors. All rights reserved.\n' + |
| 164 '// Use of this source code is governed by a BSD-style license that can be\n
' + | 175 '// Use of this source code is governed by a BSD-style license that can be\n
' + |
| 165 '// found in the LICENSE file.\n' + | 176 '// found in the LICENSE file.\n' + |
| 166 '// clang-format off\n' + | 177 '// clang-format off\n' + |
| 167 '/* eslint-disable */\n' + | 178 '/* eslint-disable */\n' + |
| 168 'ProductRegistry.register(['); | 179 'ProductRegistry.register(['); |
| 180 if (outputTypes.length) { |
| 181 var data = JSON.stringify(outputTypes).replace(/","/g, '",\n "'); |
| 182 console.log(' ' + data.substring(1, data.length - 1)); |
| 183 } |
| 184 console.log('],'); |
| 185 console.log('['); |
| 169 var data = JSON.stringify(outputProducts).replace(/","/g, '",\n "'); | 186 var data = JSON.stringify(outputProducts).replace(/","/g, '",\n "'); |
| 170 console.log(' ' + data.substring(1, data.length - 1)); | 187 console.log(' ' + data.substring(1, data.length - 1)); |
| 171 console.log('],'); | 188 console.log('],'); |
| 172 console.log('['); | 189 console.log('['); |
| 173 var outputObjArray = Array.from(outputObj.values()); | 190 var outputObjArray = Array.from(outputObj.values()); |
| 174 for (var i = 0; i < outputObjArray.length; i++) { | 191 for (var i = 0; i < outputObjArray.length; i++) { |
| 175 var obj = outputObjArray[i]; | 192 var obj = outputObjArray[i]; |
| 176 var lineEnding = (i === outputObjArray.length - 1) ? '' : ','; | 193 var lineEnding = (i === outputObjArray.length - 1) ? '' : ','; |
| 177 var comments = []; | 194 var comments = []; |
| 178 for (var prefix in obj.prefixes) | 195 for (var prefix in obj.prefixes) { |
| 179 comments.push('[' + outputProducts[obj.prefixes[prefix]] + ']'); | 196 var typeName = outputTypes[obj.prefixes[prefix].type]; |
| 197 if (!typeName) |
| 198 typeName = ''; |
| 199 else |
| 200 typeName = ':' + typeName; |
| 201 comments.push('[' + outputProducts[obj.prefixes[prefix].product] + typeName
+ ']'); |
| 202 } |
| 180 console.log(' ' + JSON.stringify(obj) + lineEnding + ' // ' + comments.join('
')); | 203 console.log(' ' + JSON.stringify(obj) + lineEnding + ' // ' + comments.join('
')); |
| 181 } | 204 } |
| 182 console.log(']);'); | 205 console.log(']);'); |
| 183 | 206 |
| 184 | 207 |
| 185 // items.forEach(lineObj => console.log(lineObj.name_legal_product.padStart(50),
lineObj.domain.padStart(30))); | 208 // items.forEach(lineObj => console.log(lineObj.name_legal_product.padStart(50),
lineObj.domain.padStart(30))); |
| 186 // console.log("With *: ", items.filter(v => v.domain.indexOf('*') !== -1).lengt
h); | 209 // console.log("With *: ", items.filter(v => v.domain.indexOf('*') !== -1).lengt
h); |
| 187 // console.log("Total: ", items.length); | 210 // console.log("Total: ", items.length); |
| 188 | 211 |
| 189 | 212 |
| 190 | 213 |
| 191 // Linear but meh. | 214 // Linear but meh. |
| 192 function registerOutputProduct(name) { | 215 function registerOutputProduct(name, type) { |
| 193 var index = outputProducts.indexOf(name); | 216 var index = outputProducts.indexOf(name); |
| 217 var typeIndex = registerOutputType(type); |
| 218 var outObj = {product: index}; |
| 194 if (index === -1) { | 219 if (index === -1) { |
| 195 outputProducts.push(name); | 220 outputProducts.push(name); |
| 196 return outputProducts.length - 1; | 221 outObj.product = outputProducts.length - 1; |
| 222 } |
| 223 if (typeIndex !== -1) |
| 224 outObj.type = typeIndex; |
| 225 return outObj; |
| 226 } |
| 227 |
| 228 function registerOutputType(type) { |
| 229 var name = typeClassifications.get(type); |
| 230 if (!name) |
| 231 return -1; |
| 232 var index = outputTypes.indexOf(name); |
| 233 if (index === -1) { |
| 234 outputTypes.push(name); |
| 235 return outputTypes.length - 1; |
| 197 } | 236 } |
| 198 return index; | 237 return index; |
| 199 } | 238 } |
| 200 | 239 |
| 201 function* csvUnmarshaller(data, lineOffset) { | 240 function* csvUnmarshaller(data, lineOffset) { |
| 202 var origLen = data.length; | 241 var origLen = data.length; |
| 203 var colLength = 0; | 242 var colLength = 0; |
| 204 var lineNo = lineOffset || 1; | 243 var lineNo = lineOffset || 1; |
| 205 while (data.length) { | 244 while (data.length) { |
| 206 var colData; | 245 var colData; |
| (...skipping 213 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 420 ((binarray[i + 2 >> 2] >> 8 * (3 - (i + 2) % 4)) & 0xFF); | 459 ((binarray[i + 2 >> 2] >> 8 * (3 - (i + 2) % 4)) & 0xFF); |
| 421 for (var j = 0; j < 4; j++) { | 460 for (var j = 0; j < 4; j++) { |
| 422 if (i * 8 + j * 6 > binarray.length * 32) | 461 if (i * 8 + j * 6 > binarray.length * 32) |
| 423 str += b64pad; | 462 str += b64pad; |
| 424 else | 463 else |
| 425 str += tab.charAt((triplet >> 6 * (3 - j)) & 0x3F); | 464 str += tab.charAt((triplet >> 6 * (3 - j)) & 0x3F); |
| 426 } | 465 } |
| 427 } | 466 } |
| 428 return str; | 467 return str; |
| 429 } | 468 } |
| OLD | NEW |