From 057c286cd3884b4eacb4a9b282c5dcfdccda0b98 Mon Sep 17 00:00:00 2001 From: "Zed A. Shaw" Date: Tue, 27 Jun 2023 06:09:19 -0400 Subject: [PATCH] This is a simple import tool from a django JSON dump using their dump tool. --- commands/dumpup.js | 173 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 173 insertions(+) create mode 100644 commands/dumpup.js diff --git a/commands/dumpup.js b/commands/dumpup.js new file mode 100644 index 0000000..1eba585 --- /dev/null +++ b/commands/dumpup.js @@ -0,0 +1,173 @@ +// you may not need all of these but they come up a lot +import fs from "fs"; +import assert from "assert"; +import logging from '../lib/logging.js'; +import { mkdir, write } from "../lib/builderator.js"; +import glob from "fast-glob"; +import { Database } from "duckdb-async"; +import { UserPaymentProduct, User, Product, Payment } from "../lib/models.js"; +import slugify from "slugify"; + +const log = logging.create(import.meta.url); + +export const description = "Is used to load the data found in the django dump to the database." + +// your command uses the npm package commander's options format +export const options = [ + ["--mode-split", "Split a django dump into multiple files per table with pk=id"], + ["--mode-analyze", "Run in analysis mode, which loads the tables into duckdb."], + ["--mode-import", "Use the split files in --split-dir to import"], + ["--input ", "A string option with a default."], +] + +// put required options in the required variable +export const required = [ + ["--split-dir ", "Directory where the split .json files go", "data_dump"], + ["--duckdb ", "The duckdb to create.", "dump.duckdb"], +] + +// handy function for checking things are good and aborting +const check = (test, fail_message) => { + if(!test) { + log.error(fail_message); + process.exit(1); + } +} + +export const mode_split = async (opts) => { + // it's easier to debug options with console + const raw_data = JSON.parse(fs.readFileSync(opts.input)); + + const tables = {}; + + for(let row of raw_data) { + assert(row.fields.id === undefined, `Bad ID ${JSON.stringify(row)}`); + row.fields.id = row.pk; // why did django do this? so weird + + if(tables[row.model] === undefined) { + // first one + tables[row.model] = [row.fields]; + } else { + tables[row.model].push(row.fields); + } + } + + console.log(opts); + mkdir(opts.splitDir); + + for(let [name, data] of Object.entries(tables)) { + const out_file = `${opts.splitDir}/${name}.json`; + console.log("WRITE", out_file); + write(out_file, JSON.stringify(data, null, 4)); + } + + // due to how async/await works it's just easier to manually exit with exit codes + process.exit(0); +} + +export const mode_analyze = async (opts) => { + console.log("ANALYZE", opts); + const db = new Database(opts.duckdb); + + // glob the files in the splitDir + const split_dir = await glob(`${opts.splitDir}/*.json`); + + for(let file_name of split_dir) { + const [junk, table_name, ext] = file_name.split('.'); + + console.log(">> TABLE", table_name, "CREATE FROM", file_name); + await db.exec(`CREATE TABLE "${table_name}" AS SELECT * FROM "${file_name}"`); + // syntax doesn't follow SQL for replace ? with variable + } + + await db.close(); + process.exit(0); +} + +export const mode_import = async (opts) => { + console.log("IMPORT", opts); + + const db = new Database(opts.duckdb); + // DDB go through each product + + const products = await db.all("SELECT * FROM PRODUCT"); + + for(let product of products) { + const my_product = await Product.insert({ + created_at: product.created_on, + title: product.title, + description: product.description, + price: product.base_price, + currency: "USD", + currency_symbol: "$", + active: product.active, + slug: slugify(product.title, {lower: true, strict: true}), + short_blurb: product.description, + docs_url: product.private_location, + poster: product.poster, + category: "Python", + created_by: "Zed A. Shaw", + preorder: 1 + }); + + const purchases = await db.all(`SELECT * FROM PURCHASE WHERE product=${product.id} AND state='PAID'`); + + console.log("PRODUCT", my_product.title, "PURCHASES", purchases.length); + + for(let purchase of purchases) { + const cust_q = await db.all(`SELECT * FROM CUSTOMER WHERE id=${purchase.customer}`); + const customer = cust_q[0]; + const fake_password = User.random_hex(10); + + let user = await User.register({ + created_at: customer.created_on, + initials: "", + full_name: customer.full_name, + password: fake_password, + password_repeat: fake_password, + email: customer.email, + unsubscribe: !customer.promotable, + unsubscribed_on: null, + }); + + if(!user) { + console.log("USER EXISTS LOADING", customer.email); + user = await User.first({email: customer.email}); + } else { + console.log("ADDED", user.email, user.id, "PASS", fake_password); + } + + // create payment and then connect with upp + const payment = await Payment.insert({ + created_at: purchase.created_on, + system: purchase.purchase_system.toLowerCase(), + status: "complete", + internal_id: Payment.gen_internal_id(), + sys_created_on: purchase.ended_on ? purchase.ended_on : purchase.created_on, + sys_primary_id: purchase.service_data || "", + sys_secondary_id: purchase.fsm_state, + user_id: purchase.customer, + status_reason: "imported", + }); + + const upp = await UserPaymentProduct.finish_purchase(user, payment, my_product); + } + } + + await db.close(); + process.exit(0); +} + +export const main = async (opts) => { + if(opts.modeSplit) { + check(opts.input !== undefined, "--input required for --mode-split"); + await mode_split(opts); + } else if(opts.modeAnalyze) { + await mode_analyze(opts); + } else if(opts.modeImport) { + await mode_import(opts); + } else { + console.error("USAGE: need one of --mode-analyze, --mode-import, or --mode-sync. BACKUP THE DB FIRST."); + process.exit(1); + } +}