|
|
|
@ -9,13 +9,20 @@ class Parser { |
|
|
|
|
this.next = ""; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
match(reg) { |
|
|
|
|
match(reg, consume=true) { |
|
|
|
|
const n = this.next.match(reg); |
|
|
|
|
|
|
|
|
|
const element = n.length > 1 ? n.slice(1) : n[0]; |
|
|
|
|
if(n === null) { |
|
|
|
|
return undefined; |
|
|
|
|
} else { |
|
|
|
|
const element = n.length > 1 ? n.slice(1) : n[0]; |
|
|
|
|
|
|
|
|
|
if(consume) { |
|
|
|
|
this.next = this.next.slice(n[0].length); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
this.next = this.next.slice(n[0].length); |
|
|
|
|
return element; |
|
|
|
|
return element; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
start(line) { |
|
|
|
@ -23,122 +30,195 @@ class Parser { |
|
|
|
|
this.next = line; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
ws() { |
|
|
|
|
this.match(/ +/); |
|
|
|
|
ws(consume=true) { |
|
|
|
|
return this.match(/ +/, consume); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
parse() { |
|
|
|
|
try { |
|
|
|
|
const [ip, port, conn_id, conn_count] = this.match(/([0-9.]+):([0-9]+):([0-9]+):([0-9]+)/); |
|
|
|
|
parse_new_log(ip) { |
|
|
|
|
const [port, conn_id, conn_count] = this.match(/^([0-9]+):([0-9]+):([0-9]+)/); |
|
|
|
|
|
|
|
|
|
this.ws(); |
|
|
|
|
this.ws(); |
|
|
|
|
|
|
|
|
|
const [ time ] = this.match(/\[(.*)\]/); |
|
|
|
|
const [ time ] = this.match(/\[(.*)\]/); |
|
|
|
|
|
|
|
|
|
this.ws(); |
|
|
|
|
this.ws(); |
|
|
|
|
|
|
|
|
|
const [ url ] = this.match(/"(.+?)"/); |
|
|
|
|
const [ full_url ] = this.match(/"(.+?)"/); |
|
|
|
|
|
|
|
|
|
this.ws(); |
|
|
|
|
const [url, params] = full_url.split("?"); |
|
|
|
|
|
|
|
|
|
const code = this.match(/\-|[0-9]+/); |
|
|
|
|
this.ws(); |
|
|
|
|
|
|
|
|
|
this.ws(); |
|
|
|
|
const code = this.match(/\-|[0-9]+/); |
|
|
|
|
|
|
|
|
|
const bytes = this.match(/\-|[0-9]+/); |
|
|
|
|
this.ws(); |
|
|
|
|
|
|
|
|
|
this.ws(); |
|
|
|
|
const bytes = this.match(/\-|[0-9]+/); |
|
|
|
|
|
|
|
|
|
const [refer] = this.match(/"(.+?)"/); |
|
|
|
|
this.ws(); |
|
|
|
|
|
|
|
|
|
this.ws(); |
|
|
|
|
const [refer] = this.match(/"(.+?)"/); |
|
|
|
|
|
|
|
|
|
const [ua] = this.match(/"(.+?)"/); |
|
|
|
|
this.ws(); |
|
|
|
|
|
|
|
|
|
return { |
|
|
|
|
ip, |
|
|
|
|
conn_id: parseInt(conn_id), |
|
|
|
|
conn_count: parseInt(conn_count), |
|
|
|
|
time: new Date(time), |
|
|
|
|
url, |
|
|
|
|
code: parseInt(code), |
|
|
|
|
size: parseInt(bytes), |
|
|
|
|
refer: refer === '-' ? undefined : refer, |
|
|
|
|
ua: UAParser(ua) |
|
|
|
|
}; |
|
|
|
|
} catch(error) { |
|
|
|
|
throw new Error(`Parsing Error: ${ this.next }`); |
|
|
|
|
} |
|
|
|
|
const [ua] = this.match(/"(.+?)"/); |
|
|
|
|
|
|
|
|
|
return { |
|
|
|
|
ip, |
|
|
|
|
conn_id: parseInt(conn_id), |
|
|
|
|
conn_count: parseInt(conn_count), |
|
|
|
|
time: new Date(time), |
|
|
|
|
url, params, |
|
|
|
|
code: parseInt(code), |
|
|
|
|
size: parseInt(bytes), |
|
|
|
|
refer: refer === '-' ? undefined : refer, |
|
|
|
|
ua: UAParser(ua) |
|
|
|
|
}; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
const read_stream = fs.createReadStream(process.argv[2]); |
|
|
|
|
parse_old_log(ip) { |
|
|
|
|
this.match(/- -/); |
|
|
|
|
|
|
|
|
|
this.ws(); |
|
|
|
|
|
|
|
|
|
// FORMAT: 29/Mar/2022:22:40:52 +0200
|
|
|
|
|
const [ day, month, year, hour, minute, seconds, tz_offset ] = this.match(/\[([0-9]+)\/([A-Za-z]+)\/([0-9]+):([0-9]+):([0-9]+):([0-9]+) (.+?)\]/); |
|
|
|
|
|
|
|
|
|
this.ws(); |
|
|
|
|
|
|
|
|
|
const rl = readline.createInterface({ |
|
|
|
|
input: read_stream, |
|
|
|
|
crlfDelay: Infinity |
|
|
|
|
}); |
|
|
|
|
const [ method, full_url, http_version ] = this.match(/"([A-Z]+) (.+) HTTP\/([0-9].[0-9])"/); |
|
|
|
|
|
|
|
|
|
const parser = new Parser(); |
|
|
|
|
const chains = {}; |
|
|
|
|
const [ url, params ] = full_url.split("?"); |
|
|
|
|
|
|
|
|
|
const skip = /(authcheck\/?|.*.svg|.*.webmanifest|.*.js|.*.css|.*.png|.*.txt|.*.woff|.*.jpg|.*.mp4|.*.torrent|\-|.*.ico|\/api\/.*\?.*|.*.html|.*.map)$/ |
|
|
|
|
this.ws(); |
|
|
|
|
|
|
|
|
|
for await (let line of rl) { |
|
|
|
|
parser.start(line); |
|
|
|
|
const code = this.match(/\-|[0-9]+/); |
|
|
|
|
|
|
|
|
|
try { |
|
|
|
|
const data = parser.parse(); |
|
|
|
|
this.ws(); |
|
|
|
|
|
|
|
|
|
if(data.ua.os && data.code === 200 && !data.url.match(skip)) { |
|
|
|
|
let chain = data.ip in chains ? chains[data.ip] : []; |
|
|
|
|
const bytes = this.match(/\-|[0-9]+/); |
|
|
|
|
|
|
|
|
|
chain.push([data.time, data.url, data.refer]); |
|
|
|
|
this.ws(); |
|
|
|
|
|
|
|
|
|
chains[data.ip] = chain; |
|
|
|
|
const [refer] = this.match(/"(.+?)"/); |
|
|
|
|
|
|
|
|
|
this.ws(); |
|
|
|
|
|
|
|
|
|
const [ua] = this.match(/"(.+?)"/); |
|
|
|
|
|
|
|
|
|
// this is another IP address sometimes in another log format that I'll ignore
|
|
|
|
|
const unknown = this.match(/".+?"$/); |
|
|
|
|
|
|
|
|
|
return { |
|
|
|
|
ip, |
|
|
|
|
method, |
|
|
|
|
http_version, |
|
|
|
|
time: new Date(`${day} ${month} ${year} ${hour}:${minute}:${seconds} ${tz_offset}`), |
|
|
|
|
url, params, |
|
|
|
|
code: parseInt(code), |
|
|
|
|
size: parseInt(bytes), |
|
|
|
|
refer: refer === '-' ? undefined : refer, |
|
|
|
|
ua: UAParser(ua) |
|
|
|
|
}; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
parse() { |
|
|
|
|
const ip = this.match(/^[0-9\.]+/); |
|
|
|
|
const test = this.match(/(:| )/); |
|
|
|
|
|
|
|
|
|
// BUG: uhh for some reason it needs == here? === says : doesn't equal :
|
|
|
|
|
if(test == ":") { |
|
|
|
|
return this.parse_new_log(ip); |
|
|
|
|
} else if(test == " ") { |
|
|
|
|
return this.parse_old_log(ip); |
|
|
|
|
} else { |
|
|
|
|
// console.log(`PARSE ERROR, expected : or ' ' but got ${test}`);
|
|
|
|
|
return {}; |
|
|
|
|
} |
|
|
|
|
} catch(err) { |
|
|
|
|
if(line !== "") console.error(err); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
const uniques = {}; |
|
|
|
|
const parse_logs = async (file_name) => { |
|
|
|
|
const read_stream = fs.createReadStream(file_name); |
|
|
|
|
|
|
|
|
|
for(let key in chains) { |
|
|
|
|
const chain = chains[key]; |
|
|
|
|
const rl = readline.createInterface({ |
|
|
|
|
input: read_stream, |
|
|
|
|
crlfDelay: Infinity |
|
|
|
|
}); |
|
|
|
|
|
|
|
|
|
const first = chain[0][2]; |
|
|
|
|
const parser = new Parser(); |
|
|
|
|
|
|
|
|
|
const urls = chain.map(s => { |
|
|
|
|
const ref = s[2]; |
|
|
|
|
if(ref && ref !== first && !ref.includes("learnjsthehardway.com") ) { |
|
|
|
|
return `${s[1]}(${s[2]})`; |
|
|
|
|
} else { |
|
|
|
|
return s[1]; |
|
|
|
|
const stats = { |
|
|
|
|
lines: 0, |
|
|
|
|
chains: 0, |
|
|
|
|
excluded: 0, |
|
|
|
|
errors: 0, |
|
|
|
|
roots: 0, |
|
|
|
|
firsts: 0 |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
const skip = /(authcheck|.*\.svg|.*\.webmanifest|.*\.js|.*\.css|.*php|socket\.io|\.env|.*\.png|.*\.txt|.*\.woff|.*\.jpg|.*\.mp4|.*\.torrent|\-|.*\.ico|\/api\/.*\?.*|.*\.html|.*\.map|.*.php)/ |
|
|
|
|
|
|
|
|
|
const by_ip = {}; |
|
|
|
|
|
|
|
|
|
for await (let line of rl) { |
|
|
|
|
try { |
|
|
|
|
stats.lines += 1; |
|
|
|
|
parser.start(line); |
|
|
|
|
|
|
|
|
|
const data = parser.parse(); |
|
|
|
|
|
|
|
|
|
// skip lines that have content we don't care about
|
|
|
|
|
if(data.url.match(skip)) continue; |
|
|
|
|
|
|
|
|
|
// store or update the chain in the by_ip chain
|
|
|
|
|
const ip_chain = by_ip[data.ip] || []; |
|
|
|
|
|
|
|
|
|
ip_chain.push(data); |
|
|
|
|
|
|
|
|
|
by_ip[data.ip] = ip_chain; |
|
|
|
|
} catch(error) { |
|
|
|
|
stats.errors += 1; |
|
|
|
|
} |
|
|
|
|
}); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return [by_ip, stats]; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
const chain_to_set = (requests) => { |
|
|
|
|
const path = new Set(); |
|
|
|
|
|
|
|
|
|
for(let r of requests) { |
|
|
|
|
path.add(r.url); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return path.values(); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
const sort_request_chains = (by_ip) => { |
|
|
|
|
let ip_chains = {}; |
|
|
|
|
let seen; |
|
|
|
|
|
|
|
|
|
let full = urls.filter((s, index, self) => { |
|
|
|
|
if(s === seen) { |
|
|
|
|
return false; |
|
|
|
|
} else { |
|
|
|
|
seen = s; |
|
|
|
|
return true; |
|
|
|
|
} |
|
|
|
|
}).join(" "); |
|
|
|
|
for(let [ip, requests] of Object.entries(by_ip)) { |
|
|
|
|
const chain = chain_to_set(requests); |
|
|
|
|
|
|
|
|
|
const ref = requests[0].refer ? `[${requests[0].refer}]` : ""; |
|
|
|
|
const url_set = [ref, ...chain].join(" "); |
|
|
|
|
|
|
|
|
|
if(first) { |
|
|
|
|
full = `[${first}] ${full}`; |
|
|
|
|
ip_chains[url_set] = url_set in ip_chains ? ip_chains[url_set] + 1 : 1; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
uniques[full] = full in uniques ? uniques[full] + 1 : 1; |
|
|
|
|
const chains_sorted = Object.entries(ip_chains); |
|
|
|
|
chains_sorted.sort((a, b) => b[1] - a[1]); |
|
|
|
|
|
|
|
|
|
return chains_sorted; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
const sorted = Object.entries(uniques); |
|
|
|
|
sorted.sort((a, b) => b[1] - a[1]); |
|
|
|
|
const [by_ip, stats] = await parse_logs(process.argv[2]); |
|
|
|
|
const chains_sorted = sort_request_chains(by_ip); |
|
|
|
|
|
|
|
|
|
for(let [url, count] of sorted) { |
|
|
|
|
for(let [url, count] of chains_sorted) { |
|
|
|
|
console.log(count, url); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
console.log(stats); |
|
|
|
|