|
|
|
@ -3,6 +3,23 @@ import assert from "assert"; |
|
|
|
|
import UAParser from "ua-parser-js"; |
|
|
|
|
import readline from "readline"; |
|
|
|
|
import { program } from "commander"; |
|
|
|
|
import glob from "fast-glob"; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
program |
|
|
|
|
.option("--min <Number>", "The lowest count to print. Stop at this.", 1) |
|
|
|
|
.option("--errors", "Show the erorrs so you can fix them.", false) |
|
|
|
|
.option("--format <string>", "Output format, text or json. Ignores min for raw output.", "json") |
|
|
|
|
.option("--outfile <string>", "Save to file rather than stdout.") |
|
|
|
|
.requiredOption("--domain <String>", "Domain for the log. Gets removed as a refer.") |
|
|
|
|
.requiredOption("--input <String>", "Input file glob.") |
|
|
|
|
.description("Processes different web server logs to determine request chain frequency.") |
|
|
|
|
.version(0.1); |
|
|
|
|
|
|
|
|
|
program.parse(); |
|
|
|
|
const OPTS = program.opts(); |
|
|
|
|
OPTS.min = parseInt(OPTS.min); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Parser { |
|
|
|
|
constructor() { |
|
|
|
@ -139,7 +156,7 @@ class Parser { |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
const parse_logs = async (file_name, errors) => { |
|
|
|
|
const parse_log_file = async (results, stats, file_name, errors) => { |
|
|
|
|
const read_stream = fs.createReadStream(file_name); |
|
|
|
|
|
|
|
|
|
const rl = readline.createInterface({ |
|
|
|
@ -149,19 +166,8 @@ const parse_logs = async (file_name, errors) => { |
|
|
|
|
|
|
|
|
|
const parser = new Parser(); |
|
|
|
|
|
|
|
|
|
const stats = { |
|
|
|
|
lines: 0, |
|
|
|
|
chains: 0, |
|
|
|
|
excluded: 0, |
|
|
|
|
errors: 0, |
|
|
|
|
roots: 0, |
|
|
|
|
firsts: 0 |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
const skip = /(authcheck|.*\.svg|.*\.webmanifest|.*\.js|.*\.css|.*php|socket\.io|\.env|.*\.png|.*\.txt|.*\.woff|.*\.jpg|.*\.mp4|.*\.torrent|\-|.*\.ico|\/api\/.*\?.*|.*\.html|.*\.map|.*.php)/ |
|
|
|
|
|
|
|
|
|
const by_ip = {}; |
|
|
|
|
|
|
|
|
|
for await (let line of rl) { |
|
|
|
|
try { |
|
|
|
|
stats.lines += 1; |
|
|
|
@ -173,40 +179,43 @@ const parse_logs = async (file_name, errors) => { |
|
|
|
|
if(data.url.match(skip)) continue; |
|
|
|
|
|
|
|
|
|
// store or update the chain in the by_ip chain
|
|
|
|
|
const ip_chain = by_ip[data.ip] || []; |
|
|
|
|
const ip_chain = results[data.ip] || []; |
|
|
|
|
|
|
|
|
|
ip_chain.push(data); |
|
|
|
|
|
|
|
|
|
by_ip[data.ip] = ip_chain; |
|
|
|
|
results[data.ip] = ip_chain; |
|
|
|
|
} catch(error) { |
|
|
|
|
if(errors) console.error(error); |
|
|
|
|
|
|
|
|
|
stats.errors += 1; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return [by_ip, stats]; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
const chain_to_set = (requests) => { |
|
|
|
|
const path = new Set(); |
|
|
|
|
const parse_logs_glob = async (file_glob, errors) => { |
|
|
|
|
const file_list = glob.sync(file_glob); |
|
|
|
|
const results = {}; |
|
|
|
|
const stats = { |
|
|
|
|
lines: 0, |
|
|
|
|
chains: 0, |
|
|
|
|
excluded: 0, |
|
|
|
|
errors: 0, |
|
|
|
|
roots: 0, |
|
|
|
|
firsts: 0 |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
for(let r of requests) { |
|
|
|
|
path.add(r.url); |
|
|
|
|
for(let file_name of file_list) { |
|
|
|
|
await parse_log_file(results, stats, file_name, errors); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return path.values(); |
|
|
|
|
return [results, stats]; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
const chain_to_list = (requests) => { |
|
|
|
|
const path = []; |
|
|
|
|
let seen; |
|
|
|
|
const chain_to_set = (requests) => { |
|
|
|
|
const path = new Set(); |
|
|
|
|
|
|
|
|
|
for(let r of requests) { |
|
|
|
|
if(r.url != seen) { |
|
|
|
|
path.push(r.url); |
|
|
|
|
seen = r.url; |
|
|
|
|
} |
|
|
|
|
path.add(r.url); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return path.values(); |
|
|
|
@ -221,11 +230,11 @@ const construct_url_set = (domain, ref, full_chain) => { |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
const construct_request_chains = (by_ip, domain, as_set) => { |
|
|
|
|
const construct_request_chains = (by_ip, domain) => { |
|
|
|
|
let ip_chains = {}; |
|
|
|
|
|
|
|
|
|
for(let [ip, requests] of Object.entries(by_ip)) { |
|
|
|
|
const chain = as_set ? chain_to_set(requests) : chain_to_list(requests); |
|
|
|
|
const chain = chain_to_set(requests); |
|
|
|
|
|
|
|
|
|
// record the initial refer to track entry to the site
|
|
|
|
|
const ref = requests[0].refer; |
|
|
|
@ -294,33 +303,14 @@ const write_results = async (stats, chains, format, outfile) => { |
|
|
|
|
fs.closeSync(fd); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
program |
|
|
|
|
.option("--no-set", "Use a Set instead of a list for chains.") |
|
|
|
|
.option("--min <Number>", "The lowest count to print. Stop at this.", 1) |
|
|
|
|
.option("--errors", "Show the erorrs so you can fix them.", false) |
|
|
|
|
.option("--format <string>", "Output format, text or json. Ignores min for raw output.", "json") |
|
|
|
|
.option("--outfile <string>", "Save to file rather than stdout.") |
|
|
|
|
.requiredOption("--domain <String>", "Domain for the log. Gets removed as a refer.") |
|
|
|
|
.requiredOption("--input <String>", "Input file.") |
|
|
|
|
.description("Processes different web server logs to determine request chain frequency.") |
|
|
|
|
.version(0.1); |
|
|
|
|
|
|
|
|
|
program.parse(); |
|
|
|
|
const OPTS = program.opts(); |
|
|
|
|
OPTS.min = parseInt(OPTS.min); |
|
|
|
|
|
|
|
|
|
assert(!isNaN(OPTS.min), `min must be a number, you have ${OPTS.min}`); |
|
|
|
|
|
|
|
|
|
try { |
|
|
|
|
const [by_ip, stats] = await parse_logs(OPTS.input, OPTS.errors); |
|
|
|
|
const chains = construct_request_chains(by_ip, OPTS.domain, OPTS.set); |
|
|
|
|
const [by_ip, stats] = await parse_logs_glob(OPTS.input, OPTS.errors); |
|
|
|
|
const chains = construct_request_chains(by_ip, OPTS.domain); |
|
|
|
|
|
|
|
|
|
if(OPTS.outfile) { |
|
|
|
|
if(OPTS.outfile) { |
|
|
|
|
write_results(stats, chains, OPTS.format, OPTS.outfile); |
|
|
|
|
} else { |
|
|
|
|
} else { |
|
|
|
|
output_results(stats, chains, OPTS.format, OPTS.outfile); |
|
|
|
|
} |
|
|
|
|
} catch(error) { |
|
|
|
|
console.error(error.message); |
|
|
|
|
process.exit(1); |
|
|
|
|
} |
|
|
|
|