diff --git a/scripts/querylog/.gitignore b/scripts/querylog/.gitignore new file mode 100644 index 00000000..fadd09d3 --- /dev/null +++ b/scripts/querylog/.gitignore @@ -0,0 +1,2 @@ +node_modules +test/anonquerylog.json \ No newline at end of file diff --git a/scripts/querylog/README.md b/scripts/querylog/README.md new file mode 100644 index 00000000..219d9581 --- /dev/null +++ b/scripts/querylog/README.md @@ -0,0 +1,10 @@ +# Helper tools to work with the Query log + +### Usage + +- `npm install` - Install the dependencies +- `npm run anonymize ` - Reads querylog from the `` and writes anonymized version to `` + +### Examples + +- `npm run anonymize test/querylog.json test/anonquerylog.json` - anonymizes the `test/querylog.json`. \ No newline at end of file diff --git a/scripts/querylog/anonymize.js b/scripts/querylog/anonymize.js new file mode 100644 index 00000000..3aed2877 --- /dev/null +++ b/scripts/querylog/anonymize.js @@ -0,0 +1,124 @@ +const fs = require('fs'); +const readline = require('readline'); +const dnsPacket = require('dns-packet') + +const decodeBase64 = (data) => { + let buff = new Buffer(data, 'base64'); + return buff.toString('ascii'); +} + +const processLineByLine = async (source, callback) => { + const fileStream = fs.createReadStream(source); + + const rl = readline.createInterface({ + input: fileStream, + crlfDelay: Infinity + }); + + for await (const line of rl) { + await callback(line); + } +} + +const anonDomain = (domain) => { + // Replace all question domain letters with a + return domain.replace(/[a-z]/g, 'a'); +} + +const anonIP = (ip) => { + // Replace all numbers with '1' + return ip.replace(/[0-9]/g, '1'); +} + +const anonAnswer = (answer) => { + const answerData = Buffer.from(answer, 'base64'); + const packet = dnsPacket.decode(answerData, 0); + + packet.questions.forEach((q) => { + q.name = anonDomain(q.name); + }); + packet.answers.forEach((q) => { + q.name = anonDomain(q.name); + + if (q.type === 'A' || q.type === 'AAAA') { + q.data = anonIP(q.data); + } else if (typeof q.data === 'string') { + q.data = anonDomain(q.data); + } + }); + + const anonData = dnsPacket.encode(packet); + return anonData.toString('base64'); +} + +const anonLine = (line) => { + if (!line) { + return null; + } + + try { + const logItem = JSON.parse(line); + + // Replace all numbers with '1' + logItem['IP'] = logItem['IP'].replace(/[0-9]/g, '1'); + // Replace all question domain letters with a + logItem['QH'] = logItem['QH'].replace(/[a-z]/g, 'a'); + // Anonymize "Answer" and "OrigAnswer" fields + if (logItem['Answer']) { + logItem['Answer'] = anonAnswer(logItem['Answer']); + } + if (logItem['OrigAnswer']) { + logItem['OrigAnswer'] = anonAnswer(logItem['OrigAnswer']); + } + + // If Result is set, anonymize the "Rule" field + if (logItem['Result'] && logItem['Result']['Rule']) { + logItem['Result']['Rule'] = anonDomain(logItem['Result']['Rule']); + } + + return JSON.stringify(logItem); + } catch (ex) { + console.error(`Failed to parse ${line}: ${ex} ${ex.stack}`); + return null; + } +} + +const anon = async (source, dest) => { + const out = fs.createWriteStream(dest, { + flags: 'w', + }); + + + await processLineByLine(source, async (line) => { + const newLine = anonLine(line); + if (!newLine) { + return; + } + out.write(`${newLine}\n`); + }); +} + +const main = async () => { + console.log('Start query log anonymization'); + + const source = process.argv[2]; + const dest = process.argv[3]; + + console.log(`Source: ${source}`); + console.log(`Destination: ${dest}`); + + if (!fs.existsSync(source)) { + throw new Error(`${source} not found`); + } + + try { + await anon(source, dest); + } catch (ex) { + console.error(ex); + } + + console.log('Finished query log anonymization') +} + +main(); + diff --git a/scripts/querylog/package-lock.json b/scripts/querylog/package-lock.json new file mode 100644 index 00000000..0cf08773 --- /dev/null +++ b/scripts/querylog/package-lock.json @@ -0,0 +1,21 @@ +{ + "name": "querylog", + "version": "0.1.0", + "lockfileVersion": 1, + "requires": true, + "dependencies": { + "dns-packet": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/dns-packet/-/dns-packet-5.2.1.tgz", + "integrity": "sha512-JHj2yJeKOqlxzeuYpN1d56GfhzivAxavNwHj9co3qptECel27B1rLY5PifJAvubsInX5pGLDjAHuCfCUc2Zv/w==", + "requires": { + "ip": "^1.1.5" + } + }, + "ip": { + "version": "1.1.5", + "resolved": "https://registry.npmjs.org/ip/-/ip-1.1.5.tgz", + "integrity": "sha1-vd7XARQpCCjAoDnnLvJfWq7ENUo=" + } + } +} diff --git a/scripts/querylog/package.json b/scripts/querylog/package.json new file mode 100644 index 00000000..3f550ca1 --- /dev/null +++ b/scripts/querylog/package.json @@ -0,0 +1,10 @@ +{ + "name": "querylog", + "version": "0.1.0", + "scripts": { + "anonymize": "node anonymize.js" + }, + "dependencies": { + "dns-packet": "^5.2.1" + } +} diff --git a/scripts/querylog/test/querylog.json b/scripts/querylog/test/querylog.json new file mode 100644 index 00000000..a8fa71f4 --- /dev/null +++ b/scripts/querylog/test/querylog.json @@ -0,0 +1,5 @@ +{"IP":"192.168.0.0","T":"2020-08-31T16:43:37.724457416+03:00","QH":"mtalk.google.com","QT":"A","QC":"IN","CP":"","Answer":"rm+BgAABAAIAAAAABW10YWxrBmdvb2dsZQNjb20AAAEAAcAMAAUAAQAAnwUAEQxtb2JpbGUtZ3RhbGsBbMASwC4AAQABAAAAWQAEjvobvA==","Result":{},"Elapsed":48051030,"Upstream":"tls://dns-unfiltered.adguard.com:853"} +{"IP":"127.0.0.1","T":"2020-09-09T13:56:35.532956+03:00","QH":"example.org","QT":"AAAA","QC":"IN","CP":"","Answer":"mrOBgAABAAEAAAAAB2V4YW1wbGUDb3JnAAAcAAHADAAcAAEAAKjAABAmBigAAiAAAQJIGJMlyBlG","Result":{},"Elapsed":132164793,"Upstream":"https://dns10.quad9.net:443/dns-query"} +{"IP":"127.0.0.1","T":"2020-09-09T13:56:54.255453+03:00","QH":"ad.doubleclick.net","QT":"A","QC":"IN","CP":"","Answer":"wqmBgAABAAIAAAAAAmFkC2RvdWJsZWNsaWNrA25ldAAAAQABwAwABQABAACTawAJBGRhcnQBbMAPwDAAAQABAAAA5gAErNkQhg==","Result":{},"Elapsed":48131793,"Upstream":"https://dns10.quad9.net:443/dns-query"} +{"IP":"127.0.0.1","T":"2020-09-09T13:57:07.495948+03:00","QH":"ad.doubleclick.net","QT":"A","QC":"IN","CP":"","Answer":"JP2BhQABAAAAAAAAAmFkC2RvdWJsZWNsaWNrA25ldAAAAQAB","Result":{"IsFiltered":true,"Reason":3,"Rule":"||ad.doubleclick.net^","FilterID":1},"Elapsed":369806} +{"IP":"192.168.0.15","T":"2020-01-17T17:39:40.306375885+03:00","QH":"push.apple.com","QT":"TXT","QC":"IN","Answer":"8AWBgAABAAEAAAABBHB1c2gFYXBwbGUDY29tAAAQAAHADAAQAAEAABOsAAkIY291bnQ9NTAAACkFrAAAAAAAQAAMADwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=","Result":{},"Elapsed":30271893,"Upstream":"https://cloudflare-dns.com:443/dns-query"} \ No newline at end of file