Spiderbee
Crawl websites with JSON configuration
Installation
npm install --save puppeteer spiderbee
How to use
const { Spiderbee } = require('spiderbee')
// this example requires lodash
const _ = require('lodash')
const result = {}
function run() {
const spiderbee = await Spiderbee.launch({})
await spiderbee.execute({ /* configuration */ }, function (spider) {
spider.on('data', function ({ path, value }) {
_.set(result, path.substring(2), value)
})
spider.on('end', function () {
console.log(result)
})
})
}
run()
Configuration
{
"url": "https://www.google.com",
"actions": [ /* actions */ ]
}
Actions
Text action:
{
"type": "text",
"selector": /* html selector */,
"resultKey": /* output json result key */,
"multiple": /* search multiple tags */,
}
Url Action:
{
"type": "url",
"resultKey": /* output json result key */
}
Links action:
{
"type": "links",
"selector": /* html selector */,
"resultKey": /* output json result key */,
"multiple": /* search multiple tags */,
"regex": /* regex filter for urls */,
"navigate": {
"actions": [ /* actions to execute navigating each url */ ]
}
}
Loop action:
{
"type": "loop",
"resultKey": /* output json result key */,
"times": /* number of times to execute */,
"actions": [ /* actions to execute */ ]
}
Each action:
{
"type": "each",
"selector": /* html selector */,
"resultKey": /* output json result key */,
"actions": [ /* actions to execute */ ],
"infinite": /* use this with infinite scroll */
}
Mouse Move action:
{
"type": "mouse_move",
"selector": /* html selector */,
}
{
"type": "mouse_move",
"movement": {
"x": /* x axes movement */,
"y": /* y axes movement */,
}
}
Mouse Down action:
{
"type": "mouse_down",
}
}
Mouse Up action:
{
"type": "mouse_up",
}
}
Click action:
{
"type": "click",
"selector": /* html selector */,
}
Write action:
{
"type": "write",
"selector": /* html selector */,
"value": /* value to write */
}
Wait action:
{
"type": "wait",
"millis": /* milliseconds to wait */,
}