diff --git a/02-filter-a-log-file/chains_parser.js b/02-filter-a-log-file/chains_parser.js index 102ff85..48710d9 100644 --- a/02-filter-a-log-file/chains_parser.js +++ b/02-filter-a-log-file/chains_parser.js @@ -4,13 +4,13 @@ import UAParser from "ua-parser-js"; import readline from "readline"; import { program } from "commander"; import glob from "fast-glob"; - +import format from "date-fns/format/index.js"; program .option("--min ", "The lowest count to print. Stop at this.", 1) - .option("--errors", "Show the erorrs so you can fix them.", false) - .option("--format ", "Output format, text or json. Ignores min for raw output.", "json") - .option("--outfile ", "Save to file rather than stdout.") + .option("--errors", "Show the errors so you can fix them.", false) + .option("--format ", "Output format, text or json. Ignores min for raw output.") + .option("--outfile ", "Save to file rather than stdout.") .requiredOption("--domain ", "Domain for the log. Gets removed as a refer.") .requiredOption("--input ", "Input file glob.") .description("Processes different web server logs to determine request chain frequency.") @@ -19,6 +19,7 @@ program program.parse(); const OPTS = program.opts(); OPTS.min = parseInt(OPTS.min); +assert(!isNaN(OPTS.min), `min must be a number, you have ${OPTS.min}`); class Parser { @@ -178,12 +179,20 @@ const parse_log_file = async (results, stats, file_name, errors) => { // skip lines that have content we don't care about if(data.url.match(skip)) continue; - // store or update the chain in the by_ip chain - const ip_chain = results[data.ip] || []; + // convert the date to a only day length + const date_key = format(data.time, "yyyy-MM-dd"); + + // get the date entry or a new one + const date_entry = results[date_key] || {}; + // store or update the chain in the by_ip chain + const ip_chain = date_entry[data.ip] || []; ip_chain.push(data); + date_entry[data.ip] = ip_chain; + + // and update this date entry + results[date_key] = date_entry; - results[data.ip] = ip_chain; } catch(error) { if(errors) console.error(error); @@ -268,49 +277,44 @@ const sort_request_chains = (chains, min) => { return converted.sort((a, b) => b[0] - a[0]); } -const output_results = async (stats, chains, format, outfile) => { - if(format === "json") { - const data = {stats, chains, date: new Date()}; - console.log(data); - } else { - const chains_sorted = sort_request_chains(chains, OPTS.min); +const output_report = async (stats, data, domain, min) => { + for(let key in data) { + console.log(`--- ${key} ---`); + const chains = construct_request_chains(data[key], domain); + const chains_sorted = sort_request_chains(chains, min); for(let [count, url] of chains_sorted) { console.log(count, url); } - - console.log(stats); } } -const write_results = async (stats, chains, format, outfile) => { - assert(outfile, "Output file required."); +const output_json = async (stats, data, domain, min, outfile) => { + const result = {}; - // unlike unix APIs this uses exceptions rather than return values for errors - const fd = fs.openSync(outfile, "w+"); + for(let key in data) { + const chains = construct_request_chains(data[key], domain); + const chains_sorted = sort_request_chains(chains, min); + result[key] = chains_sorted; + } - if(format === "json") { - const data = {stats, chains, date: new Date()}; + if(outfile) { + const fd = fs.openSync(outfile, "w+"); + const data = {stats, result, generated_on: new Date()}; const bytes = fs.writeSync(fd, Buffer.from(JSON.stringify(data, null, 4)), 0); + fs.closeSync(fd); } else { - const chains_sorted = sort_request_chains(chains, OPTS.min); - - for(let [count, url] of chains_sorted) { - const bytes = fs.writeSync(fd, Buffer.from(`${count} ${url}\n`)); - } + console.log({stats, result}); } - - fs.closeSync(fd); } -assert(!isNaN(OPTS.min), `min must be a number, you have ${OPTS.min}`); - -const [by_ip, stats] = await parse_logs_glob(OPTS.input, OPTS.errors); -const chains = construct_request_chains(by_ip, OPTS.domain); +const [by_date, stats] = await parse_logs_glob(OPTS.input, OPTS.errors); -if(OPTS.outfile) { - write_results(stats, chains, OPTS.format, OPTS.outfile); +if(OPTS.format === "json") { + output_json(stats, by_date, OPTS.domain, OPTS.min, OPTS.outfile); } else { - output_results(stats, chains, OPTS.format, OPTS.outfile); + output_report(stats, by_date, OPTS.domain, OPTS.min); } + +console.log(stats);