diff --git a/02-filter-a-log-file/chains_parser.js b/02-filter-a-log-file/chains_parser.js index b1d94c2..ce8dae5 100644 --- a/02-filter-a-log-file/chains_parser.js +++ b/02-filter-a-log-file/chains_parser.js @@ -212,27 +212,53 @@ const chain_to_list = (requests) => { return path.values(); } -const sort_request_chains = (by_ip, as_set) => { +const construct_url_set = (domain, ref, full_chain) => { + // this tags the chains with refer using [ref] + if(ref && !ref.includes(domain)) { + return [`[${ref}]`, ...full_chain.slice(1)].join(" "); + } else { + return full_chain.join(" "); + } +} + +const construct_request_chains = (by_ip, domain, as_set) => { let ip_chains = {}; - let seen; for(let [ip, requests] of Object.entries(by_ip)) { const chain = as_set ? chain_to_set(requests) : chain_to_list(requests); - const ref = requests[0].refer ? `[${requests[0].refer}]` : ""; - const url_set = [ref, ...chain].join(" "); + // record the initial refer to track entry to the site + const ref = requests[0].refer; + const full_chain = [...chain]; + const url_set = construct_url_set(domain, ref, full_chain); - ip_chains[url_set] = url_set in ip_chains ? ip_chains[url_set] + 1 : 1; + // using url as key to count,full_chain + if(url_set in ip_chains) { + ip_chains[url_set].count += 1; + } else { + ip_chains[url_set] = { count: 1, comes_from: ref, full_chain}; + } } - const chains_sorted = Object.entries(ip_chains); - chains_sorted.sort((a, b) => b[1] - a[1]); + return ip_chains; +} + +const sort_request_chains = (chains) => { + const converted = []; - return chains_sorted; + for(let [url, stats] of Object.entries(chains)) { + if(stats.comes_from) { + converted.push([stats.count, `[${stats.comes_from}] ${stats.full_chain.join(' ')}`]); + } else { + converted.push([stats.count, `${stats.full_chain.join(' ')}`]); + } + } + + return converted.sort((a, b) => b[0] - a[0]); } -const output_results = (min, chains_sorted) => { - for(let [url, count] of chains_sorted) { +const output_results = (chains_sorted, min) => { + for(let [count, url] of chains_sorted) { if(count >= min) { console.log(count, url); } @@ -245,6 +271,8 @@ program .option("--no-set", "Use a Set instead of a list for chains.") .option("--min ", "The lowest count to print. Stop at this.", 1) .option("--errors", "Show the erorrs so you can fix them.", false) + .option("--format ", "Output format, text or json. Ignores min for raw output.", "json") + .requiredOption("--domain ", "Domain for the log. Gets removed as a refer.") .requiredOption("--input ", "Input file.") .description("Processes different web server logs to determine request chain frequency.") .version(0.1); @@ -256,5 +284,11 @@ OPTS.min = parseInt(OPTS.min); assert(!isNaN(OPTS.min), `min must be a number, you have ${OPTS.min}`); const [by_ip, stats] = await parse_logs(OPTS.input, OPTS.errors); -const chains_sorted = sort_request_chains(by_ip, OPTS.set); -output_results(OPTS.min, chains_sorted); +const chains = construct_request_chains(by_ip, OPTS.domain, OPTS.set); +const chains_sorted = sort_request_chains(chains); + +if(OPTS.format === "json") { + console.log(chains); +} else { + output_results(chains_sorted, OPTS.min); +}