|
|
|
@ -4,13 +4,13 @@ import UAParser from "ua-parser-js"; |
|
|
|
|
import readline from "readline"; |
|
|
|
|
import { program } from "commander"; |
|
|
|
|
import glob from "fast-glob"; |
|
|
|
|
|
|
|
|
|
import format from "date-fns/format/index.js"; |
|
|
|
|
|
|
|
|
|
program |
|
|
|
|
.option("--min <Number>", "The lowest count to print. Stop at this.", 1) |
|
|
|
|
.option("--errors", "Show the erorrs so you can fix them.", false) |
|
|
|
|
.option("--format <string>", "Output format, text or json. Ignores min for raw output.", "json") |
|
|
|
|
.option("--outfile <string>", "Save to file rather than stdout.") |
|
|
|
|
.option("--errors", "Show the errors so you can fix them.", false) |
|
|
|
|
.option("--format <String>", "Output format, text or json. Ignores min for raw output.") |
|
|
|
|
.option("--outfile <String>", "Save to file rather than stdout.") |
|
|
|
|
.requiredOption("--domain <String>", "Domain for the log. Gets removed as a refer.") |
|
|
|
|
.requiredOption("--input <String>", "Input file glob.") |
|
|
|
|
.description("Processes different web server logs to determine request chain frequency.") |
|
|
|
@ -19,6 +19,7 @@ program |
|
|
|
|
program.parse(); |
|
|
|
|
const OPTS = program.opts(); |
|
|
|
|
OPTS.min = parseInt(OPTS.min); |
|
|
|
|
assert(!isNaN(OPTS.min), `min must be a number, you have ${OPTS.min}`); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Parser { |
|
|
|
@ -178,12 +179,20 @@ const parse_log_file = async (results, stats, file_name, errors) => { |
|
|
|
|
// skip lines that have content we don't care about
|
|
|
|
|
if(data.url.match(skip)) continue; |
|
|
|
|
|
|
|
|
|
// store or update the chain in the by_ip chain
|
|
|
|
|
const ip_chain = results[data.ip] || []; |
|
|
|
|
// convert the date to a only day length
|
|
|
|
|
const date_key = format(data.time, "yyyy-MM-dd"); |
|
|
|
|
|
|
|
|
|
// get the date entry or a new one
|
|
|
|
|
const date_entry = results[date_key] || {}; |
|
|
|
|
|
|
|
|
|
// store or update the chain in the by_ip chain
|
|
|
|
|
const ip_chain = date_entry[data.ip] || []; |
|
|
|
|
ip_chain.push(data); |
|
|
|
|
date_entry[data.ip] = ip_chain; |
|
|
|
|
|
|
|
|
|
// and update this date entry
|
|
|
|
|
results[date_key] = date_entry; |
|
|
|
|
|
|
|
|
|
results[data.ip] = ip_chain; |
|
|
|
|
} catch(error) { |
|
|
|
|
if(errors) console.error(error); |
|
|
|
|
|
|
|
|
@ -268,49 +277,44 @@ const sort_request_chains = (chains, min) => { |
|
|
|
|
return converted.sort((a, b) => b[0] - a[0]); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
const output_results = async (stats, chains, format, outfile) => { |
|
|
|
|
if(format === "json") { |
|
|
|
|
const data = {stats, chains, date: new Date()}; |
|
|
|
|
console.log(data); |
|
|
|
|
} else { |
|
|
|
|
const chains_sorted = sort_request_chains(chains, OPTS.min); |
|
|
|
|
const output_report = async (stats, data, domain, min) => { |
|
|
|
|
for(let key in data) { |
|
|
|
|
console.log(`--- ${key} ---`); |
|
|
|
|
const chains = construct_request_chains(data[key], domain); |
|
|
|
|
const chains_sorted = sort_request_chains(chains, min); |
|
|
|
|
|
|
|
|
|
for(let [count, url] of chains_sorted) { |
|
|
|
|
console.log(count, url); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
console.log(stats); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
const write_results = async (stats, chains, format, outfile) => { |
|
|
|
|
assert(outfile, "Output file required."); |
|
|
|
|
const output_json = async (stats, data, domain, min, outfile) => { |
|
|
|
|
const result = {}; |
|
|
|
|
|
|
|
|
|
// unlike unix APIs this uses exceptions rather than return values for errors
|
|
|
|
|
const fd = fs.openSync(outfile, "w+"); |
|
|
|
|
for(let key in data) { |
|
|
|
|
const chains = construct_request_chains(data[key], domain); |
|
|
|
|
const chains_sorted = sort_request_chains(chains, min); |
|
|
|
|
result[key] = chains_sorted; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if(format === "json") { |
|
|
|
|
const data = {stats, chains, date: new Date()}; |
|
|
|
|
if(outfile) { |
|
|
|
|
const fd = fs.openSync(outfile, "w+"); |
|
|
|
|
const data = {stats, result, generated_on: new Date()}; |
|
|
|
|
const bytes = fs.writeSync(fd, Buffer.from(JSON.stringify(data, null, 4)), 0); |
|
|
|
|
fs.closeSync(fd); |
|
|
|
|
} else { |
|
|
|
|
const chains_sorted = sort_request_chains(chains, OPTS.min); |
|
|
|
|
|
|
|
|
|
for(let [count, url] of chains_sorted) { |
|
|
|
|
const bytes = fs.writeSync(fd, Buffer.from(`${count} ${url}\n`)); |
|
|
|
|
} |
|
|
|
|
console.log({stats, result}); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
fs.closeSync(fd); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
assert(!isNaN(OPTS.min), `min must be a number, you have ${OPTS.min}`); |
|
|
|
|
const [by_date, stats] = await parse_logs_glob(OPTS.input, OPTS.errors); |
|
|
|
|
|
|
|
|
|
const [by_ip, stats] = await parse_logs_glob(OPTS.input, OPTS.errors); |
|
|
|
|
const chains = construct_request_chains(by_ip, OPTS.domain); |
|
|
|
|
|
|
|
|
|
if(OPTS.outfile) { |
|
|
|
|
write_results(stats, chains, OPTS.format, OPTS.outfile); |
|
|
|
|
if(OPTS.format === "json") { |
|
|
|
|
output_json(stats, by_date, OPTS.domain, OPTS.min, OPTS.outfile); |
|
|
|
|
} else { |
|
|
|
|
output_results(stats, chains, OPTS.format, OPTS.outfile); |
|
|
|
|
output_report(stats, by_date, OPTS.domain, OPTS.min); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
console.log(stats); |
|
|
|
|