2018-10-12 11:41:11 +01:00
|
|
|
const fs = require('fs');
|
|
|
|
const sqlite3 = require('sqlite3').verbose();
|
2020-01-30 09:28:01 +00:00
|
|
|
const axios = require('axios');
|
2018-10-12 11:41:11 +01:00
|
|
|
|
|
|
|
const INPUT_SQL_URL = 'https://raw.githubusercontent.com/cliqz-oss/whotracks.me/master/whotracksme/data/assets/trackerdb.sql';
|
2018-10-14 21:24:11 +01:00
|
|
|
const OUTPUT_PATH = 'whotracksme.json';
|
2018-10-12 11:41:11 +01:00
|
|
|
|
2020-01-30 09:28:01 +00:00
|
|
|
async function runScript() {
|
|
|
|
console.log('Downloading ' + INPUT_SQL_URL);
|
|
|
|
let response = await axios.get(INPUT_SQL_URL);
|
|
|
|
let trackersDbSql = response.data;
|
2018-10-12 11:41:11 +01:00
|
|
|
|
2020-01-30 09:28:01 +00:00
|
|
|
let transformToSqlite = function (sql) {
|
|
|
|
sql = sql.trim();
|
|
|
|
|
|
|
|
if (sql.indexOf("CREATE TABLE") >= 0) {
|
|
|
|
sql = sql.replace(/UNIQUE/g, '');
|
|
|
|
}
|
|
|
|
|
|
|
|
return sql;
|
2018-10-12 11:41:11 +01:00
|
|
|
}
|
|
|
|
|
2020-01-30 09:28:01 +00:00
|
|
|
let whotracksme = {
|
|
|
|
timeUpdated: new Date().toISOString(),
|
|
|
|
categories: {},
|
|
|
|
trackers: {},
|
|
|
|
trackerDomains: {}
|
|
|
|
};
|
2018-10-12 11:41:11 +01:00
|
|
|
|
2020-01-30 09:28:01 +00:00
|
|
|
console.log('Initializing the in-memory trackers database');
|
|
|
|
let db = new sqlite3.Database(':memory:');
|
|
|
|
db.serialize(function () {
|
|
|
|
trackersDbSql.split(/;\s*$/gm).forEach(function (sql) {
|
|
|
|
sql = transformToSqlite(sql);
|
|
|
|
db.run(sql, function () { });
|
|
|
|
});
|
2018-10-12 11:41:11 +01:00
|
|
|
|
2020-01-30 09:28:01 +00:00
|
|
|
db.each("SELECT * FROM categories", function (err, row) {
|
|
|
|
if (err) {
|
|
|
|
console.error(err);
|
|
|
|
return;
|
|
|
|
}
|
2018-10-12 11:41:11 +01:00
|
|
|
|
2020-01-30 09:28:01 +00:00
|
|
|
whotracksme.categories[row.id] = row.name;
|
|
|
|
});
|
2018-10-12 11:41:11 +01:00
|
|
|
|
2020-01-30 15:40:58 +00:00
|
|
|
const companies = {};
|
|
|
|
db.each("SELECT * FROM companies", function (err, row) {
|
|
|
|
if (err) {
|
|
|
|
console.error(err);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
companies[row.id] = {
|
|
|
|
"id": row.id,
|
|
|
|
"name": row.name,
|
|
|
|
"website_url": row.website_url
|
|
|
|
};
|
|
|
|
});
|
|
|
|
|
2020-01-30 09:28:01 +00:00
|
|
|
db.each("SELECT * FROM trackers", function (err, row) {
|
|
|
|
if (err) {
|
|
|
|
console.error(err);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-01-30 15:40:58 +00:00
|
|
|
const company = companies[row.company_id];
|
|
|
|
let url = row.website_url;
|
|
|
|
if (!url && company) {
|
|
|
|
url = company.website_url;
|
|
|
|
}
|
|
|
|
|
2020-01-30 09:28:01 +00:00
|
|
|
whotracksme.trackers[row.id] = {
|
|
|
|
"name": row.name,
|
|
|
|
"categoryId": row.category_id,
|
2020-01-30 15:40:58 +00:00
|
|
|
"url": url,
|
2020-01-30 09:28:01 +00:00
|
|
|
};
|
|
|
|
});
|
2018-10-12 11:41:11 +01:00
|
|
|
|
2020-01-30 09:28:01 +00:00
|
|
|
db.each("SELECT * FROM tracker_domains", function (err, row) {
|
|
|
|
if (err) {
|
|
|
|
console.error(err);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
whotracksme.trackerDomains[row.domain] = row.tracker;
|
|
|
|
});
|
2018-10-12 11:41:11 +01:00
|
|
|
});
|
|
|
|
|
2020-01-30 09:28:01 +00:00
|
|
|
db.close(function (err) {
|
2018-10-12 11:41:11 +01:00
|
|
|
if (err) {
|
|
|
|
console.error(err);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-01-30 09:28:01 +00:00
|
|
|
fs.writeFileSync(OUTPUT_PATH, JSON.stringify(whotracksme, 0, 4));
|
|
|
|
console.log('Trackers json file has been updated: ' + OUTPUT_PATH);
|
2018-10-12 11:41:11 +01:00
|
|
|
});
|
2020-01-30 09:28:01 +00:00
|
|
|
}
|
2018-10-12 11:41:11 +01:00
|
|
|
|
2020-01-30 09:28:01 +00:00
|
|
|
runScript();
|