bandolier2/lib/ormish.js

/*
 The ORMish object-relational-mapping-ish library for knex. It's just enough
 ORM added to knex to be useful, and anything else you need can be done with
 knex.  If there's any situation where you can't do something in ORMish it's
 expected that you will access the `knex` variable in thie module and simply use
 knex directly.  Here's how you'd do that:

 ```javascript
 import { knex } from './lib/ormish.js';

 // do your own update using a model's table name
 knex(somemodel.table_name).where(where).update(what);
 ```

 This makes it easy to use ORMish, but still break out when you need to create
 custom queries or speed things up.  The only advice here is to put any special
 operations in a `class Model` subclass as a member function.
 */
import config from '../knexfile.cjs';
import knexConfig from 'knex';
import assert from 'assert';
import { attachPaginate } from 'knex-paginate';

/*
 A preconfigured knex.js driver using the config.development configuration
 by default.

 ___TODO___: Need to make this configurable, even though I just use one config right now since I run sqlite3 all the time.
 */
export const knex = knexConfig(config.development);

/*
 run the PERF_TRICKS to configure sqlite3 when thing start, really need to make this
 a configuration and only do it with sqlite3, but for now just get this done
 */
if(config.development.client === "sqlite3") {
  const PERF_TRICKS = [
    "pragma journal_mode = WAL", // use a WAL journal to not block writers/readers
    "pragma synchronous = normal", // set concurrency to normal now that we have WAL
    // "pragma temp_store = memory", // use RAM to make the temporary indices
    // "pragma mmap_size = 30000000000", // use 30GB of mmap to store DB, not useful in multi-process settings
    // "pragma page_size = 32768", // improve performance only if you store large stuff
    "pragma vacuum",  // give it a vacuum, this could impact startup time significantly
    "pragma optimize", // optimize it, but should be done periodically
  ];

  for(let sql of PERF_TRICKS) {
    await knex.raw(sql);
  }
}

attachPaginate();

/*
 Filled in by `load_schema` to give access to the database scheme in the admin
 tool and generally through the API.
 */
export const SCHEMA = {};

const load_schema = async () => {
  const raw = await knex("sqlite_schema").where({"type": "table"}).select(['type', 'name']);
  for(let table of raw) {
    table._columns = await knex(table.name).columnInfo();
    SCHEMA[table.name] = table;
  }
}

await load_schema();

/*
 In some cases (like the generic admin) you need to get validation
 rules but you don't have a specific class to work with.  This function
 is called by Model.validation and you can call it directly to get rules
 for a database table.


 ___BUG___: validator doesn't do proper date formatting yet, so it won't support date types.

 1. `name string` - the table name.
 2. `rules Object` - default rules with empty "" for the rules you want filled in
 3. `all boolean` - set this to true if you want everything
 4. `no_id boolean` - defaults to true, set false if you also want the id
 5. `return Object` - the resulting rules to use with Validator
 */
export const validation = (name, rules, all=false, no_id=true) => {
  assert(rules, "rules parameter is required and will be modified");
  const schema = SCHEMA[name];
  assert(schema, `There is no schema for table named ${name}. Did you forget to migrate:latest?`);

  for(let [key, opts] of Object.entries(schema._columns)) {
    // most of the time you don't want the id
    if(no_id && key === "id") continue;

    if(all || rules[key] === "") {
      let required = opts.nullable || opts.defaultValue ? "nullable|" : "required|";

      switch(opts.type) {
        case "varchar": // fallthrough
        case "text":
          // some databases have an email type but we'll also look for ones named email
          if(key === "email") {
            rules[key] = required + "email";
          } else {
            rules[key] = required + "string";
          }

          if(opts.maxLength) {
            rules[key] += `|min:0|max:${opts.maxLength}`;
          }
          break;
        case "boolean":
          rules[key] = required + "boolean";
          break;
        case "integer":
        case "float":
          rules[key] = required + "numeric";
          break;
        case "date":
          // rules[key] = required + "date";
          break;
        case "email":
          // not all databases have this
          rules[key] = required + "email";
          break;
        default:
          rules[key] = required;
      }
    }
  }

  return rules;
}

/*
 The base class for all models found in `lib/models.js`.  You use this by extending it with:

 ```javascript
 class User extends Model.from_table('user') {
 }
 ```

 This will create a `User` class that is automatically configured using the SCHEMA create from the `user` table in your database.  You won't need to define the attributes on this class as it will be correctly populated from the database.

 The database is therefore the "source of truth" for all of the models.  You can then add functions to extend what this class does.
 */
export class Model {

  /*
   Allows you to build a new object of this Model with the given `attr`
   already set, but really you should use the `Model.from(attr)` method instead.
   This does _no_ object sanitization with `Model.clean(attr)` method, and if
   it doesn't match the underlying database it will throw an exception.

   - `attr Object` - the attributes for this model
   */
  constructor(attr) {
    assert(attr, "Must give attributes.");
    Object.assign(this, attr);
  }

  /*
   How to actually create a new instance of this model. This
   will do two things:

   1. Correctly use the schema for the subclass model.
   2. Sanitize the input to remove anything that shouldn't be in the database.

   The `also_remove` parameter is a list of additional keys to also scrub from the object.

   - `attr Object` -- The attributes this should start with.
   - `also_remove Array` -- list of additional attributes to remove.
   */
  static from(attr, also_remove=undefined) {
    return new this(this.clean(attr, also_remove));
  }

  /*
   Returns an object representing the schema for this Model. Remember that this
   will reflect what's in the database schema, which is formatted however
   `knex.js` formats your database Schema.  Might not be portable between
   databases and only tested with SQlite3.

   _This is an attribute accessor, so just do `obj.schema` rather than call it like a function._

   - `return Object` - The schema for this model.
   */
  get schema() {
    return this.constructor.schema;
  }

  /*
   Uses the `this.schema` scrub out any attributes that are not valid for the
   schema.  This is effectively a whitelist for the allowed attributes based on
   the database schema.  You can use the `also_remove` parameter to list
   additional attributes to remove, which you should do to sanitize incoming
   objects for things like password fields.

   - `attr Object` - The attributes to clean.
   - `also_remove Array` - Additional attributes to remove.
   */
  static clean(attr, also_remove=undefined) {
    assert(attr, "Must give attributes to clean.");

    let clean_entries = Object.entries(attr)
      .filter(([k,v]) => k in this.schema);

    if(also_remove) also_remove.forEach(k => delete clean_entries[k]);

    return Object.fromEntries(clean_entries);
  }

  /*
   Returns the name of this model's current table. Used mostly internally but
   useful otherwise.  This is an attribute accessor (get) so you can just do:

   ```javascript
   const name = obj.table_name;
   ```

   Instead of calling it like a function.  The main reason to use this is in
   tools like the admin table browser found in `admin/pages/Table.svelte` and
   `api/admin/table.js` so check those files out to see what's going on.
  */
  get table_name() {
    return this.constructor.table_name;
  }

  /*
    Returns an object of basic rules meant for `lib/api.js:validate`
    based on what's in the database.  It's meant to be an easy to
    pass in starter which you can augment.  It expects a set of rules
    with keys you want configured. Any key that's set to an empty string ""
    will be filled in with a minimum rule to match the database schema.

    It's designed to be called once at the top of an api/ handler to get
    a basic set of rules.  You could also run it to print out the rules then
    simply write the rules directly where you need them.  It actually just
    calls the `validation` module function with `validation(this.table_name, rules)`.

    - `param rules {Object}` - rules specifier
   */
  static validation(rules) {
    return validation(this.table_name, rules);
  }

  /*
   Delete this object from the database.  We use the word "destroy" because
   `delete` is reserved. It uses this object's `.id` to determine which object
   to delete and uses this `knex` code:

   ```javascript
   await knex(obj.table_name).where({id: obj.id}).del();
   ```

   As with all of ORMish it doesn't handle any relations or references when it does
   the delete so if you have constraints it will fail.  Use `knex` directly in that
   case.

   + `obj Model` -- the object to destroy, just have id
   */
  static async destroy(obj) {
    assert(this.table_name !== undefined, "You must set class variable table_name.");
    assert(obj.id !== undefined, "No id in object to destroy.");

    await knex(this.table_name).
      where({id: obj.id}).
      del();
  }

  /*
   Does very basic 1-to-1 (1:1) relation, for use inside a custom function that returns
   additional records.  1:1 mappings don't show up too often, as I think most modern database
   designs would rather combine them into one giant table, but it does happen when you need to
   add to a database without changing a "sacred" table.

   This is really just a call to `model.first(where, columns)` and mostly acts as a kind of
   documentation.

   + `model Model` - The other model to query.
   + `where Object` - The knex style selection criteria passed to `Model.first`.
   + `columns` - Passed to `Model.first` to restrict the columns returned.
   + ___return___ `Model` subclass found, or `undefined` if nothing.
   */
  async has_one(model, where, columns) {
    assert(where.id !== undefined, `where must at least have id for has_one ${model.table_name} you have ${JSON.stringify(where)}`);
    return await model.first(where, columns);
  }

  /*
   Maps this model to another with a 1-to-many (1:M) relational mapping. It queries
   the other `model` based on the given { where }, which is usually an `id` in the other
   table.

   For example, if I have a `user` table and a `payment` table, I can have a 1 `user` -> M `payment`
   like this:

   ```javascript
    export class User extends Model.from_table('user') {
      async payments() {
        return await this.has_many(Payment, { user_id: this.id });
      }
    }
    ```

    In this situation my `User` model is querying the `Payment` model for any `payment`
    records that have `user_id=this.id`.  That means if the `user.id` is 1, then it will
    find any `payment` records with `user_id=1`.
   */
  async has_many(model, where, columns) {
    return await model.all(where, columns);
  }

  /*
    Implements a simple many-to-many (M:M) using an intermediary table which
    maps two table's IDs using two columns.  For example, if you have a `User`
    and `Payment` model, and you decide that `Payment` can have multiple users
    then you'd do this:

    ```javascript
    await paid1.many_to_many(User, "payment_user");
    ```

    This is translated into `knex` as:

    ```javascript
    await knex("user").where("id", "in",
      knex("payment_user") // subquery in payment_user
        .select(`user_id as id`)
        .where(`payment_id`, "=", this.id)
    );
    ```

    The inverse operation would be:

    ```javascript
    await user1.many_to_many(Payment, "payment_user");
    ```

    Which is translated into `knex` as:

    ```javascript
    await knex("payment").where("id", "in",
      knex("payment_user") // subquery in payment_user
        .select(`payment_id as id`)
        .where(`user_id`, "=", this.id)
    );
    ```

    ### Performance

    This will fail if you have a massive many-to-many since it uses a subquery to get
    a set of IDs, but in many cases it actually might outperform a more direct complicated
    query.  You should use this, then resort to raw `knex` code to craft a better one as
    needed.

    ### Cleaning

    Everything returned is first ran though `.clean()` to and turned into the
    target model so you can use it directly.  This ensures that if a database
    driver infects the returned data with garbage it will be cleaned and you
    get pure models.  In the above example you'd get a `User` or `Payment`
    with only what's in the schema.

    ### Attributed Relations

    If your relation table has extra attributes--a super useful trick--then this
    will not pick them up.  In our example above, if you have another field
    `payment_user.transaction_date` in addition to `user_id` and `payment_id` then
    you won't get the `transaction_date`.  In that case--you guessed it--use `knex`
    directly to query for those.
   */
  async many_to_many(model, through_table) {
    // SECURITY: doing string interpolation which might allow injecting SQL
    let query = knex(model.table_name).where("id", "in",
      knex(through_table).select(`${model.table_name}_id as id`).where(`${this.table_name}_id`, "=", this.id));

    let rows = await query;

    let results = [];

    for(let r of rows) {
      results.push(new model(await model.clean(r)));
    }

    return results;
  }

  /*
    Counts the number of records matching the `where` specification.
    It uses a direct SQL query using the `knex` operation `count`, but
    `knex` returns a "database specific" result.  This will try to
    extract the count result, but will warn you when it can't do that.

    + `where Object` - knex where specification.
    + `columns Array` - columns to return
    + ___return___ `number`
   */
  static async count(where, columns) {
    // the knex count api returns a DB specific result, so we need
    // to specify what we want, which is count:
    const spec = { count: columns || ['id']};
    let res = await knex(this.table_name).where(where).count(spec);
    if(res.length == 1) {
      // single result, just give the count
      return res[0].count;
    } else {
      console.warn("Your call to count in", this.table_name, "using where=", where, "columns: ", columns, "returned a weird result:", res);
      return res; // weird result let them deal with it
    }
  }

  /*
    Your generic insert for this model's table.  The `attr` is checked against the
    database schema by `knex`, so you have to remove anything that doesn't belong.
    Use the `Model.clean` function to do that easily.  Otherwise it works exactly
    like in knex with:

    ```javascript
    await knex(this.table_name).insert(attr);
    ```

    This function expects a return value that has one result with the `id` of
    the inserted row, which might be database specific. These days if a database
    doesn't support this it's a trash database that shouldn't be used.

    + `attr Object` - The attributes to store.
    + ___return___ `number` - returns the id of the inserted object
  */
  static async insert(attr) {
    assert(this.table_name !== undefined, "You must set class variable table_name.");
    assert(attr, `You must give some attr to insert into ${this.table_name}`);

    let res = await knex(this.table_name).insert(attr).returning(['id']);
    assert(res, `Failed to get an id from the insert for ${this.table_name}`);

    attr.id = res[0].id;

    return new this(attr);
  }

  /*
    Implements an upsert (insert but update on conflict) for Postgres, MySQL, and SQLite3 only.

    ___TODO___: allow specifying returns for databases that support it

    + attr `Object` - The attributes to insert or update.
    + conflict_key `string` - The key that can cause a conflict then update.
    + merge `boolean` - Defaults to true and will change the record. false will ignore and not update on conflict.
    + ___return___ `number` - id or undefined if ignored, no update
   */
  static async upsert(attr, conflict_key, merge=true) {
    assert(conflict_key !== undefined, `You forgot to set the conflict_key on upsert to table ${this.table_name}`);
    let result = undefined;

    if(merge) {
      result = await knex(this.table_name)
        .insert(attr)
        .returning(['id'])
        .onConflict(conflict_key)
        .merge();
    } else {
      result = await knex(this.table_name).insert(attr).returning(['id']).onConflict(conflict_key).ignore();
    }

    // BUG: is returning undefined for ignored and no result right?
    if(result == undefined || result.length == 0) {
      return undefined;
    } else {
      attr.id = result[0].id;
      return attr;
    }
  }

  /*
   Performs an UPDATE operation on the object `what` found by `where`.  This
   translates into the following `knex.js` code:

   ```javascript
   return knex(this.table_name).where(where).update(what);
   ```

   So it's mostly just syntactic sugar over `knex.js` (like everything in ORMish). If you
   need a more complex `update` then just use `knex()` directly.

   + `where Object` -- The knex where options for a query.
   + `what Model` -- The table/Model to use as the update.
   + ___return___ `number` -- _WARNING_: I believe this returns the count updated but that might be database specific.
   */
  static async update(where, what) {
    assert(where, "You must give a where options.");
    return knex(this.table_name).where(where).update(what);
  }

  /*
    Returns one record from the table based on the `knex` where specification,
    and can limit the columns (attributes) that are returned. This is a `static`
    method so you use it like this:

    ```javascript
    const person = User.first({id: 1});
    ```

    The `where` can be anything that `knex` understands as query as well.

    + `where Object` -- The `knex` where specification.
    + `columns Array` -- List of columns to include in the returned object.
    + ___return___ `Object`
   */
  static async first(where, columns) {
    assert(where, "You must give a where options.");
    let attr = undefined;

    if(columns) {
      attr = await knex(this.table_name).column(columns).first().where(where);
    } else {
      attr = await knex(this.table_name).first().where(where);
    }

    return attr !== undefined ? new this(attr) : attr;
  }

  /*
     Delete a given record--or batch of records--based on the `where` specification.
     It really just calls this `knex` operation:

     ```javascript
     return knex(this.table_name).where(where).del();
     ```

     + `where Object` -- The where specification, doesn't have to be an Object.
     + ___return___ `number` -- Number deleted, but that might be database specific.
   */
  static async delete(where) {
    assert(where, "You must give a where options.");
    return knex(this.table_name).where(where).del();
  }

  /*
    Returns all records matching the `where` specification, and also
    reduces the returned columns based on the `columns` list. This is
    the most common operation in `knex`, and when you do both `columns`
    and `where` it's just doing this:

    ```javascript
      results = await knex(this.table_name).column(columns).where(where).select();
    ```

    As usual, if this isn't efficient or complex enough for you then you can
    just do it directly in `knex`.

    + `where Object` -- The usual `knex` where specification, and can be anything `knex` likes.
    + `columns Array` -- The list of columns to return.
    + ___return___ `Array` of `Model` subclass.
   */
  static async all(where, columns) {
    assert(where, "You must give a where options.");
    let results = [];

    if(columns) {
      results = await knex(this.table_name).column(columns).where(where).select();
    } else {
      results = await knex(this.table_name).where(where);
    }

    let final = results.map(r => new this(r));
    return final;
  }

  /*
    This is how you create your own models based on `Model`.  It's a neat
    trick as well, which allows you to specify a table to use to populate
    a new class.  First, you use it like this:

    ```javascript
    class User extends Model.from_table('user') {
    }
    ```

    How this works:

    + `from_table` crafts an empty class with `let m = class extends Model {}`.
    + Since JavaScript is a scripting language you can modify this class, and return it.
    + `from_table` then adds a `table_name` and the `schema` to this class.
    + After that it returns the new empy class, which you then extend and now you have a class pre-configured with the schema and table_name already set.

    Obviously this only works with ES6 style classes.  After this setup you just add your own
    methods, use `super` like normal, and everything else.  The functions in `Model` will all
    work because `schema` and `table_name` are set.

    + `table_name` -- the name of the base table in the database to use. Must be in `SCHEMA`.
   */
  static from_table(table_name) {
    let m = class extends Model { };
    m.table_name = table_name;

    assert(SCHEMA, "schema is not loaded!");
    assert(SCHEMA[table_name], `table named ${table_name} not in SCHEMA: ${ Object.keys(SCHEMA) }`);

    m.schema = SCHEMA[table_name]._columns;
    return m;
  }

  /*
    Determines if at least one record exists for the `where` specification.
    This is doing a select for only an `id` column with a limit of 1, and if
    it gets a result then it returns true.  Probably not the most efficient
    but it is portable. Here's what `knex` it's doing:

    ```javascript
    let res = await knex(this.table_name).select('id').where(where).limit(1).first();
    ```

    + `where Object` -- the query specification for `knex`.
    + ___return___ `boolean` -- Whether it exists or not.
   */
  static async exists(where) {
    let res = await knex(this.table_name).select('id').where(where).limit(1).first();
    return res ? res.id : false;
  }
}

export default { knex, SCHEMA, Model, validation };