Neat Paraskavedekatriaphobia's Meaning
    Share your code. npm Orgs help your team discover, share, and reuse code. Create a free org »

    scrubbypublic

    Scrubby

    A simple CSV scrubber.

    Example Usage

    Command line

    We read from STDIN and write to STDOUT

    ./lead-activities.js < in/leadActivities.csv > out/lead_task.csv
    

    lead-activities.js

    Here is a JavaScript file that uses scrubby.

    #!/usr/bin/env node
    
    'use strict';
    
    const scrub = require('scrubby');
    const moment = require('moment');
    const oppLookup = new scrub.Lookup();
    const users = require('./in/userMap');
    
    function doDate(s) {
      if (s) {
        let d = moment(s, 'M/D/YYYY');
        if (d.isValid()) {
          return d.format('YYYY-MM-DD');
        }
      }
      return '';
    }
    
    oppLookup.build('./in/leadMap.csv', 'oldId', 'newId', (error, leadMap) => {
    
      const header = [
        'WhoId',
        'Subject',
        'OwnerId',
        'TaskSubtype',
        'ActivityDate',
        'Priority',
        'Status',
        'Description'
      ];
    
      const w = new scrub.Writer(header);
    
      w.transform = (oldRow) => {
        if (!oldRow['Lead ID']) {
            // We don't want these rows. So they will be removed from the output.
            return null;
        }
        const newRow = {};
        newRow.WhoId = leadMap[oldRow['Lead ID']];
        newRow.Subject = oldRow.Subject;
        newRow.OwnerId = users[oldRow.Assigned];
        newRow.TaskSubtype = oldRow['Task Subtype'];
        newRow.ActivityDate = doDate(oldRow.Date);
        newRow.Priority = oldRow.Priority;
        newRow.Status = oldRow.Status;
        newRow.Description = oldRow['Full Comments'];
        return newRow;
      };
    
      (new scrub.Reader(w)).read();
    
    });
    

    API DOCS

    // This is the guy that reads your CSV file and turns each row into JSON
    // to make your life easy. We always read from STDIN.
    class Reader {
    
      // I need at least one writer to do anything. You can pass me a single
      // writer or an array of writers. Multiple writers let you output a bunch
      // of files from one input file.
      constructor(writers) {
      }
    
      // Call my read method and away we go!
      read() {
      }
    }
    
    // So I take the row from the reader that has nicely been turned into JSON
    // and transform it into an output row (or maybe I remove a row, or maybe
    // I aggregate rows). Like gumby, I am very flexible.
    class Writer {
    
      // I need a header which is just an array of your output CSV file column
      // names.
      // file is optional. If you don't pass me a file name/path, I just write to
      // STDOUT.
      constructor(header, file) {
      }
    
      // For the most simple case where you just need to transform the input row
      // to a single output row (and possibly remove some rows). Just
      // implement this method.
      // If you return null, we will remove the row from the output.
      transform(inRow) {
      }
    
      // If you have more complex requirements than you can accomplish with
      // just overriding transform, you can override this method and do
      // whatever you want. Whatever you stuff into outRows gets output on flush.
      onFlush() {
      }
    
      // Template method. Implement this if you have custom logic that aggregates
      // multiple input rows. You will also need to implement onFlush as well.
      shouldFlush(row) {
      }
    
      // I get called by my buddy the Reader. You don't need to worry about me
      // I know what I am doing.
      // If final is true we have processed all the rows and are calling flush
      // for the last time.
      flush(final) {
      }
    }
    
    // Build a key value lookup from a CSV file.
    //
    // Loads the whole thing in memory.
    //
    // myfile.csv
    //
    // Name,Rank,Serial
    // Bill,Private,1234
    // Bob,General,7890
    //
    // keyColName is required. This is the column that will be the key.
    //
    // valColName is optional.
    //
    // If you don't pass a valColName, you will receive the entire CSV row
    // as a JSON as the value.
    //
    // Example: { Bill: { Name: 'Bill', Rank: 'Private', Serial: '1234' } }
    //
    // If you pass valColName, it can be either a single column name or
    // an array of column names.
    //
    // If you pass a single valColName you will receive just a key value.
    //
    // Example
    //
    // build('myfile.csv', 'Name', 'Rank', cb);
    //
    // { Bill: 'Private' }
    //
    // You can also pass an array of column names for valColName.
    //
    // Example
    //
    // build('myfile.csv', 'Name', ['Rank','Serial'], cb);
    //
    // { Bill: { Rank: 'Private', Serial: '1234' } }
    class Lookup {
    
      // I will call your cb with the map you need.
      build(file, keyColName, valColName, cb) {
      }
    
      // Or you can call my get method. Whichever you prefer.
      get(key) {
      }
    }
    

    Multiple Lookups with Async

    If you have to do a bunch of lookups, it's probably nicer to use async series.

    #!/usr/bin/env node
    
    'use strict';
    
    const scrub = require('scrubby');
    const moment = require('moment');
    const series = require('async').series;
    const oppLookup = new scrub.Lookup();
    const dadLookup = new scrub.Lookup();
    const bookLookup = new scrub.Lookup();
    const users = require('./in/userMap');
    
    function doDate(s) {
      if (s) {
        let d = moment(s, 'M/D/YYYY');
        if (d.isValid()) {
          return d.format('YYYY-MM-DD');
        }
      }
      return '';
    }
    
    const header = [
      'WhatId',
      'DadName',
      'OwnerId',
      'TaskSubtype',
      'ActivityDate',
      'BookTitle',
      'Status',
      'Description'
    ];
    
    const w = new scrub.Writer(header);
    
    w.transform = (oldRow) => {
      const newRow = {};
      newRow.WhatId = oppLookup.get(oldRow['Organization ID']);
      newRow.DadName = dadLookup.get(oldRow['Dad ID']);
      newRow.OwnerId = users[oldRow.Assigned];
      newRow.TaskSubtype = oldRow['Task Subtype'];
      newRow.ActivityDate = doDate(oldRow.Date);
      newRow.BookTitle = bookLookup.get(oldRow.ISBN);
      newRow.Status = oldRow.Status;
      newRow.Description = oldRow['Full Comments'];
      return newRow;
    };
    
    series(
      [
        oppLookup.build.bind(oppLookup, './in/accountMap.csv', 'oldId', 'newId'),
        dadLookup.build.bind(dadLookup, './in/dadMap.csv', 'id', 'name'),
        bookLookup.build.bind(bookLookup, './in/bookMap.csv', 'ISBN', 'title'),
        () => { (new scrub.Reader(w)).read(); }
      ]
    );
    

    Keywords

    none

    install

    npm i scrubby

    Downloadsweekly downloads

    4

    version

    0.0.4

    license

    none

    repository

    githubgithub

    last publish

    collaborators

    • avatar