Now it will parse any group of files and extract data by dates in the files.

master
Zed A. Shaw 2 years ago
parent b2c521b521
commit 9e122f732e
  1. 74
      02-filter-a-log-file/chains_parser.js

@ -4,13 +4,13 @@ import UAParser from "ua-parser-js";
import readline from "readline";
import { program } from "commander";
import glob from "fast-glob";
import format from "date-fns/format/index.js";
program
.option("--min <Number>", "The lowest count to print. Stop at this.", 1)
.option("--errors", "Show the erorrs so you can fix them.", false)
.option("--format <string>", "Output format, text or json. Ignores min for raw output.", "json")
.option("--outfile <string>", "Save to file rather than stdout.")
.option("--errors", "Show the errors so you can fix them.", false)
.option("--format <String>", "Output format, text or json. Ignores min for raw output.")
.option("--outfile <String>", "Save to file rather than stdout.")
.requiredOption("--domain <String>", "Domain for the log. Gets removed as a refer.")
.requiredOption("--input <String>", "Input file glob.")
.description("Processes different web server logs to determine request chain frequency.")
@ -19,6 +19,7 @@ program
program.parse();
const OPTS = program.opts();
OPTS.min = parseInt(OPTS.min);
assert(!isNaN(OPTS.min), `min must be a number, you have ${OPTS.min}`);
class Parser {
@ -178,12 +179,20 @@ const parse_log_file = async (results, stats, file_name, errors) => {
// skip lines that have content we don't care about
if(data.url.match(skip)) continue;
// store or update the chain in the by_ip chain
const ip_chain = results[data.ip] || [];
// convert the date to a only day length
const date_key = format(data.time, "yyyy-MM-dd");
// get the date entry or a new one
const date_entry = results[date_key] || {};
// store or update the chain in the by_ip chain
const ip_chain = date_entry[data.ip] || [];
ip_chain.push(data);
date_entry[data.ip] = ip_chain;
// and update this date entry
results[date_key] = date_entry;
results[data.ip] = ip_chain;
} catch(error) {
if(errors) console.error(error);
@ -268,49 +277,44 @@ const sort_request_chains = (chains, min) => {
return converted.sort((a, b) => b[0] - a[0]);
}
const output_results = async (stats, chains, format, outfile) => {
if(format === "json") {
const data = {stats, chains, date: new Date()};
console.log(data);
} else {
const chains_sorted = sort_request_chains(chains, OPTS.min);
const output_report = async (stats, data, domain, min) => {
for(let key in data) {
console.log(`--- ${key} ---`);
const chains = construct_request_chains(data[key], domain);
const chains_sorted = sort_request_chains(chains, min);
for(let [count, url] of chains_sorted) {
console.log(count, url);
}
console.log(stats);
}
}
const write_results = async (stats, chains, format, outfile) => {
assert(outfile, "Output file required.");
const output_json = async (stats, data, domain, min, outfile) => {
const result = {};
// unlike unix APIs this uses exceptions rather than return values for errors
const fd = fs.openSync(outfile, "w+");
for(let key in data) {
const chains = construct_request_chains(data[key], domain);
const chains_sorted = sort_request_chains(chains, min);
result[key] = chains_sorted;
}
if(format === "json") {
const data = {stats, chains, date: new Date()};
if(outfile) {
const fd = fs.openSync(outfile, "w+");
const data = {stats, result, generated_on: new Date()};
const bytes = fs.writeSync(fd, Buffer.from(JSON.stringify(data, null, 4)), 0);
fs.closeSync(fd);
} else {
const chains_sorted = sort_request_chains(chains, OPTS.min);
for(let [count, url] of chains_sorted) {
const bytes = fs.writeSync(fd, Buffer.from(`${count} ${url}\n`));
}
console.log({stats, result});
}
fs.closeSync(fd);
}
assert(!isNaN(OPTS.min), `min must be a number, you have ${OPTS.min}`);
const [by_ip, stats] = await parse_logs_glob(OPTS.input, OPTS.errors);
const chains = construct_request_chains(by_ip, OPTS.domain);
const [by_date, stats] = await parse_logs_glob(OPTS.input, OPTS.errors);
if(OPTS.outfile) {
write_results(stats, chains, OPTS.format, OPTS.outfile);
if(OPTS.format === "json") {
output_json(stats, by_date, OPTS.domain, OPTS.min, OPTS.outfile);
} else {
output_results(stats, chains, OPTS.format, OPTS.outfile);
output_report(stats, by_date, OPTS.domain, OPTS.min);
}
console.log(stats);

Loading…
Cancel
Save