spiderbee
TypeScript icon, indicating that this package has built-in type declarations

1.0.0-alpha.21 • Public • Published

Spiderbee

Crawl websites with JSON configuration

Installation

npm install --save puppeteer spiderbee

How to use

const { Spiderbee } = require('spiderbee')
// this example requires lodash
const _ = require('lodash')

const result = {}

function run() {
  const spiderbee = await Spiderbee.launch({})
  
  await spiderbee.execute({ /* configuration */ }, function (spider) {
    spider.on('data', function ({ path, value }) {
      _.set(result, path.substring(2), value)
    })
  
    spider.on('end', function () {
      console.log(result)
    })
  })
}

run()

Configuration

{
  "url": "https://www.google.com",
  "actions": [ /* actions */ ]
}

Actions

Text action:

{
  "type": "text",
  "selector": /* html selector */,
  "resultKey": /* output json result key */,
  "multiple": /* search multiple tags */,
}

Url Action:

{
  "type": "url",
  "resultKey": /* output json result key */
}

Links action:

{
  "type": "links",
  "selector": /* html selector */,
  "resultKey": /* output json result key */,
  "multiple": /* search multiple tags */,
  "regex": /* regex filter for urls */,
  "navigate": {
    "actions": [ /* actions to execute navigating each url */ ]
  }
}

Loop action:

{
  "type": "loop",
  "resultKey": /* output json result key */,
  "times": /* number of times to execute */,
  "actions": [ /* actions to execute */ ]
}

Each action:

{
  "type": "each",
  "selector": /* html selector */,
  "resultKey": /* output json result key */,
  "actions": [ /* actions to execute */ ],
  "infinite": /* use this with infinite scroll */
}

Mouse Move action:

{
  "type": "mouse_move",
  "selector": /* html selector */,
}
{
  "type": "mouse_move",
  "movement": {
    "x": /* x axes movement */,
    "y": /* y axes movement */,
  }
}

Mouse Down action:

{
  "type": "mouse_down",
  }
}

Mouse Up action:

{
  "type": "mouse_up",
  }
}

Click action:

{
  "type": "click",
  "selector": /* html selector */,
}

Write action:

{
  "type": "write",
  "selector": /* html selector */,
  "value": /* value to write */
}

Wait action:

{
  "type": "wait",
  "millis": /* milliseconds to wait */,
}

Package Sidebar

Install

npm i spiderbee

Weekly Downloads

1

Version

1.0.0-alpha.21

License

MIT

Unpacked Size

181 kB

Total Files

55

Last publish

Collaborators

  • diddy_o
  • j4m3sb0mb
  • roberto.cangiamila
  • ceprini
  • ffiore81