Now it will parse any group of files and extract data by dates in the files.

master
Zed A. Shaw 2 years ago
parent b2c521b521
commit 9e122f732e
  1. 74
      02-filter-a-log-file/chains_parser.js

@ -4,13 +4,13 @@ import UAParser from "ua-parser-js";
import readline from "readline"; import readline from "readline";
import { program } from "commander"; import { program } from "commander";
import glob from "fast-glob"; import glob from "fast-glob";
import format from "date-fns/format/index.js";
program program
.option("--min <Number>", "The lowest count to print. Stop at this.", 1) .option("--min <Number>", "The lowest count to print. Stop at this.", 1)
.option("--errors", "Show the erorrs so you can fix them.", false) .option("--errors", "Show the errors so you can fix them.", false)
.option("--format <string>", "Output format, text or json. Ignores min for raw output.", "json") .option("--format <String>", "Output format, text or json. Ignores min for raw output.")
.option("--outfile <string>", "Save to file rather than stdout.") .option("--outfile <String>", "Save to file rather than stdout.")
.requiredOption("--domain <String>", "Domain for the log. Gets removed as a refer.") .requiredOption("--domain <String>", "Domain for the log. Gets removed as a refer.")
.requiredOption("--input <String>", "Input file glob.") .requiredOption("--input <String>", "Input file glob.")
.description("Processes different web server logs to determine request chain frequency.") .description("Processes different web server logs to determine request chain frequency.")
@ -19,6 +19,7 @@ program
program.parse(); program.parse();
const OPTS = program.opts(); const OPTS = program.opts();
OPTS.min = parseInt(OPTS.min); OPTS.min = parseInt(OPTS.min);
assert(!isNaN(OPTS.min), `min must be a number, you have ${OPTS.min}`);
class Parser { class Parser {
@ -178,12 +179,20 @@ const parse_log_file = async (results, stats, file_name, errors) => {
// skip lines that have content we don't care about // skip lines that have content we don't care about
if(data.url.match(skip)) continue; if(data.url.match(skip)) continue;
// store or update the chain in the by_ip chain // convert the date to a only day length
const ip_chain = results[data.ip] || []; const date_key = format(data.time, "yyyy-MM-dd");
// get the date entry or a new one
const date_entry = results[date_key] || {};
// store or update the chain in the by_ip chain
const ip_chain = date_entry[data.ip] || [];
ip_chain.push(data); ip_chain.push(data);
date_entry[data.ip] = ip_chain;
// and update this date entry
results[date_key] = date_entry;
results[data.ip] = ip_chain;
} catch(error) { } catch(error) {
if(errors) console.error(error); if(errors) console.error(error);
@ -268,49 +277,44 @@ const sort_request_chains = (chains, min) => {
return converted.sort((a, b) => b[0] - a[0]); return converted.sort((a, b) => b[0] - a[0]);
} }
const output_results = async (stats, chains, format, outfile) => { const output_report = async (stats, data, domain, min) => {
if(format === "json") { for(let key in data) {
const data = {stats, chains, date: new Date()}; console.log(`--- ${key} ---`);
console.log(data); const chains = construct_request_chains(data[key], domain);
} else { const chains_sorted = sort_request_chains(chains, min);
const chains_sorted = sort_request_chains(chains, OPTS.min);
for(let [count, url] of chains_sorted) { for(let [count, url] of chains_sorted) {
console.log(count, url); console.log(count, url);
} }
console.log(stats);
} }
} }
const write_results = async (stats, chains, format, outfile) => { const output_json = async (stats, data, domain, min, outfile) => {
assert(outfile, "Output file required."); const result = {};
// unlike unix APIs this uses exceptions rather than return values for errors for(let key in data) {
const fd = fs.openSync(outfile, "w+"); const chains = construct_request_chains(data[key], domain);
const chains_sorted = sort_request_chains(chains, min);
result[key] = chains_sorted;
}
if(format === "json") { if(outfile) {
const data = {stats, chains, date: new Date()}; const fd = fs.openSync(outfile, "w+");
const data = {stats, result, generated_on: new Date()};
const bytes = fs.writeSync(fd, Buffer.from(JSON.stringify(data, null, 4)), 0); const bytes = fs.writeSync(fd, Buffer.from(JSON.stringify(data, null, 4)), 0);
fs.closeSync(fd);
} else { } else {
const chains_sorted = sort_request_chains(chains, OPTS.min); console.log({stats, result});
for(let [count, url] of chains_sorted) {
const bytes = fs.writeSync(fd, Buffer.from(`${count} ${url}\n`));
}
} }
fs.closeSync(fd);
} }
assert(!isNaN(OPTS.min), `min must be a number, you have ${OPTS.min}`); const [by_date, stats] = await parse_logs_glob(OPTS.input, OPTS.errors);
const [by_ip, stats] = await parse_logs_glob(OPTS.input, OPTS.errors); if(OPTS.format === "json") {
const chains = construct_request_chains(by_ip, OPTS.domain); output_json(stats, by_date, OPTS.domain, OPTS.min, OPTS.outfile);
if(OPTS.outfile) {
write_results(stats, chains, OPTS.format, OPTS.outfile);
} else { } else {
output_results(stats, chains, OPTS.format, OPTS.outfile); output_report(stats, by_date, OPTS.domain, OPTS.min);
} }
console.log(stats);

Loading…
Cancel
Save