Bringing in a custom parser for my own log servers that I wrote quickly to study the content in my logs. This will organize the logs into chains of requests from each IP address to show how users go through your website.

master
Zed A. Shaw 2 years ago
parent 4d3175a862
commit 503007149f
  1. 144
      02-filter-a-log-file/chains_parser.js
  2. 3023
      02-filter-a-log-file/package-lock.json
  3. 3
      02-filter-a-log-file/package.json

@ -0,0 +1,144 @@
import fs from "fs";
import assert from "assert";
import UAParser from "ua-parser-js";
import readline from "readline";
class Parser {
constructor() {
this.text = "";
this.next = "";
}
match(reg) {
const n = this.next.match(reg);
const element = n.length > 1 ? n.slice(1) : n[0];
this.next = this.next.slice(n[0].length);
return element;
}
start(line) {
this.text = line;
this.next = line;
}
ws() {
this.match(/ +/);
}
parse() {
try {
const [ip, port, conn_id, conn_count] = this.match(/([0-9.]+):([0-9]+):([0-9]+):([0-9]+)/);
this.ws();
const [ time ] = this.match(/\[(.*)\]/);
this.ws();
const [ url ] = this.match(/"(.+?)"/);
this.ws();
const code = this.match(/\-|[0-9]+/);
this.ws();
const bytes = this.match(/\-|[0-9]+/);
this.ws();
const [refer] = this.match(/"(.+?)"/);
this.ws();
const [ua] = this.match(/"(.+?)"/);
return {
ip,
conn_id: parseInt(conn_id),
conn_count: parseInt(conn_count),
time: new Date(time),
url,
code: parseInt(code),
size: parseInt(bytes),
refer: refer === '-' ? undefined : refer,
ua: UAParser(ua)
};
} catch(error) {
throw new Error(`Parsing Error: ${ this.next }`);
}
}
}
const read_stream = fs.createReadStream(process.argv[2]);
const rl = readline.createInterface({
input: read_stream,
crlfDelay: Infinity
});
const parser = new Parser();
const chains = {};
const skip = /(authcheck\/?|.*.svg|.*.webmanifest|.*.js|.*.css|.*.png|.*.txt|.*.woff|.*.jpg|.*.mp4|.*.torrent|\-|.*.ico|\/api\/.*\?.*|.*.html|.*.map)$/
for await (let line of rl) {
parser.start(line);
try {
const data = parser.parse();
if(data.ua.os && data.code === 200 && !data.url.match(skip)) {
let chain = data.ip in chains ? chains[data.ip] : [];
chain.push([data.time, data.url, data.refer]);
chains[data.ip] = chain;
}
} catch(err) {
if(line !== "") console.error(err);
}
}
const uniques = {};
for(let key in chains) {
const chain = chains[key];
const first = chain[0][2];
const urls = chain.map(s => {
const ref = s[2];
if(ref && ref !== first && !ref.includes("learnjsthehardway.com") ) {
return `${s[1]}(${s[2]})`;
} else {
return s[1];
}
});
let seen;
let full = urls.filter((s, index, self) => {
if(s === seen) {
return false;
} else {
seen = s;
return true;
}
}).join(" ");
if(first) {
full = `[${first}] ${full}`;
}
uniques[full] = full in uniques ? uniques[full] + 1 : 1;
}
const sorted = Object.entries(uniques);
sorted.sort((a, b) => b[1] - a[1]);
for(let [url, count] of sorted) {
console.log(count, url);
}

File diff suppressed because it is too large Load Diff

@ -10,6 +10,7 @@
"author": "",
"license": "BSD",
"dependencies": {
"ava": "^4.3.1"
"ava": "^4.3.1",
"ua-parser-js": "^1.0.2"
}
}

Loading…
Cancel
Save