import fs from "fs"; import assert from "assert"; import UAParser from "ua-parser-js"; import readline from "readline"; class Parser { constructor() { this.text = ""; this.next = ""; } match(reg) { const n = this.next.match(reg); const element = n.length > 1 ? n.slice(1) : n[0]; this.next = this.next.slice(n[0].length); return element; } start(line) { this.text = line; this.next = line; } ws() { this.match(/ +/); } parse() { try { const [ip, port, conn_id, conn_count] = this.match(/([0-9.]+):([0-9]+):([0-9]+):([0-9]+)/); this.ws(); const [ time ] = this.match(/\[(.*)\]/); this.ws(); const [ url ] = this.match(/"(.+?)"/); this.ws(); const code = this.match(/\-|[0-9]+/); this.ws(); const bytes = this.match(/\-|[0-9]+/); this.ws(); const [refer] = this.match(/"(.+?)"/); this.ws(); const [ua] = this.match(/"(.+?)"/); return { ip, conn_id: parseInt(conn_id), conn_count: parseInt(conn_count), time: new Date(time), url, code: parseInt(code), size: parseInt(bytes), refer: refer === '-' ? undefined : refer, ua: UAParser(ua) }; } catch(error) { throw new Error(`Parsing Error: ${ this.next }`); } } } const read_stream = fs.createReadStream(process.argv[2]); const rl = readline.createInterface({ input: read_stream, crlfDelay: Infinity }); const parser = new Parser(); const chains = {}; const skip = /(authcheck\/?|.*.svg|.*.webmanifest|.*.js|.*.css|.*.png|.*.txt|.*.woff|.*.jpg|.*.mp4|.*.torrent|\-|.*.ico|\/api\/.*\?.*|.*.html|.*.map)$/ for await (let line of rl) { parser.start(line); try { const data = parser.parse(); if(data.ua.os && data.code === 200 && !data.url.match(skip)) { let chain = data.ip in chains ? chains[data.ip] : []; chain.push([data.time, data.url, data.refer]); chains[data.ip] = chain; } } catch(err) { if(line !== "") console.error(err); } } const uniques = {}; for(let key in chains) { const chain = chains[key]; const first = chain[0][2]; const urls = chain.map(s => { const ref = s[2]; if(ref && ref !== first && !ref.includes("learnjsthehardway.com") ) { return `${s[1]}(${s[2]})`; } else { return s[1]; } }); let seen; let full = urls.filter((s, index, self) => { if(s === seen) { return false; } else { seen = s; return true; } }).join(" "); if(first) { full = `[${first}] ${full}`; } uniques[full] = full in uniques ? uniques[full] + 1 : 1; } const sorted = Object.entries(uniques); sorted.sort((a, b) => b[1] - a[1]); for(let [url, count] of sorted) { console.log(count, url); }