Bringing in a custom parser for my own log servers that I wrote quickly to study the content in my logs. This will organize the logs into chains of requests from each IP address to show how users go through your website.
parent
4d3175a862
commit
503007149f
@ -0,0 +1,144 @@ |
||||
import fs from "fs"; |
||||
import assert from "assert"; |
||||
import UAParser from "ua-parser-js"; |
||||
import readline from "readline"; |
||||
|
||||
class Parser { |
||||
constructor() { |
||||
this.text = ""; |
||||
this.next = ""; |
||||
} |
||||
|
||||
match(reg) { |
||||
const n = this.next.match(reg); |
||||
|
||||
const element = n.length > 1 ? n.slice(1) : n[0]; |
||||
|
||||
this.next = this.next.slice(n[0].length); |
||||
return element; |
||||
} |
||||
|
||||
start(line) { |
||||
this.text = line; |
||||
this.next = line; |
||||
} |
||||
|
||||
ws() { |
||||
this.match(/ +/); |
||||
} |
||||
|
||||
parse() { |
||||
try { |
||||
const [ip, port, conn_id, conn_count] = this.match(/([0-9.]+):([0-9]+):([0-9]+):([0-9]+)/); |
||||
|
||||
this.ws(); |
||||
|
||||
const [ time ] = this.match(/\[(.*)\]/); |
||||
|
||||
this.ws(); |
||||
|
||||
const [ url ] = this.match(/"(.+?)"/); |
||||
|
||||
this.ws(); |
||||
|
||||
const code = this.match(/\-|[0-9]+/); |
||||
|
||||
this.ws(); |
||||
|
||||
const bytes = this.match(/\-|[0-9]+/); |
||||
|
||||
this.ws(); |
||||
|
||||
const [refer] = this.match(/"(.+?)"/); |
||||
|
||||
this.ws(); |
||||
|
||||
const [ua] = this.match(/"(.+?)"/); |
||||
|
||||
return { |
||||
ip, |
||||
conn_id: parseInt(conn_id), |
||||
conn_count: parseInt(conn_count), |
||||
time: new Date(time), |
||||
url, |
||||
code: parseInt(code), |
||||
size: parseInt(bytes), |
||||
refer: refer === '-' ? undefined : refer, |
||||
ua: UAParser(ua) |
||||
}; |
||||
} catch(error) { |
||||
throw new Error(`Parsing Error: ${ this.next }`); |
||||
} |
||||
} |
||||
} |
||||
|
||||
const read_stream = fs.createReadStream(process.argv[2]); |
||||
|
||||
const rl = readline.createInterface({ |
||||
input: read_stream, |
||||
crlfDelay: Infinity |
||||
}); |
||||
|
||||
const parser = new Parser(); |
||||
const chains = {}; |
||||
|
||||
const skip = /(authcheck\/?|.*.svg|.*.webmanifest|.*.js|.*.css|.*.png|.*.txt|.*.woff|.*.jpg|.*.mp4|.*.torrent|\-|.*.ico|\/api\/.*\?.*|.*.html|.*.map)$/ |
||||
|
||||
for await (let line of rl) { |
||||
parser.start(line); |
||||
|
||||
try { |
||||
const data = parser.parse(); |
||||
|
||||
if(data.ua.os && data.code === 200 && !data.url.match(skip)) { |
||||
let chain = data.ip in chains ? chains[data.ip] : []; |
||||
|
||||
chain.push([data.time, data.url, data.refer]); |
||||
|
||||
chains[data.ip] = chain; |
||||
} |
||||
} catch(err) { |
||||
if(line !== "") console.error(err); |
||||
} |
||||
} |
||||
|
||||
const uniques = {}; |
||||
|
||||
for(let key in chains) { |
||||
const chain = chains[key]; |
||||
|
||||
const first = chain[0][2]; |
||||
|
||||
const urls = chain.map(s => { |
||||
const ref = s[2]; |
||||
if(ref && ref !== first && !ref.includes("learnjsthehardway.com") ) { |
||||
return `${s[1]}(${s[2]})`; |
||||
} else { |
||||
return s[1]; |
||||
} |
||||
}); |
||||
|
||||
let seen; |
||||
|
||||
let full = urls.filter((s, index, self) => { |
||||
if(s === seen) { |
||||
return false; |
||||
} else { |
||||
seen = s; |
||||
return true; |
||||
} |
||||
}).join(" "); |
||||
|
||||
if(first) { |
||||
full = `[${first}] ${full}`; |
||||
} |
||||
|
||||
uniques[full] = full in uniques ? uniques[full] + 1 : 1; |
||||
} |
||||
|
||||
const sorted = Object.entries(uniques); |
||||
sorted.sort((a, b) => b[1] - a[1]); |
||||
|
||||
for(let [url, count] of sorted) { |
||||
console.log(count, url); |
||||
} |
File diff suppressed because it is too large
Load Diff
Loading…
Reference in new issue