Boloto
Faster, easier http crawler by Node.js
v2 was rebuild which whole different from v1
Features
- Server side dom
- Fork sub task
- Rate limiter
- Task watcher
Install
Usage
const boloto = require('boloto')
async function start() {
let res = await boloto('http://xxx.ooo/')
let res2 = await boloto('http://example.com/', 'http://xxx.ooo/')
let $ = await res.html()
return $('a').map(function() {
return $(this).attr('href')
})
}
Save file
let res = await boloto('http://xxx.ooo/picture.jpg')
await res.save('./test.jpg')
Response infos
let res = await boloto('http://xxx.ooo/')
res.headers.get('content-type')
res.headers.raw()
res.cookie()
res.status
await res.text()
await res.html()
await res.buffer()
await res.json()
await res.body
Request with proxy
const agent = boloto.proxy('http://127.0.0.1:1080')
await boloto('url', { agent })
Request options
await boloto('url', {
method: 'GET',
headers: {
'User-Agent': 'Boloto/2'
},
compress: true,
body: null,
redirect: 3,
timeout: 10000,
agent: boloto.proxy('https://127.0.0.1:1080'),
limit: 3,
delay: 1000,
cookie: { session: '1' },
referer: 'http://from.url/'
})
Task queue
boloto.queue(['url1', 'url2'], {
limit: 3,
delay: 3000
}).on('data', function(res) {
console.log(res.url)
await res.html()
}).on('end', function() {
console.log('finished')
})
Watch
boloto.watch('url', 2000, options).on('data', function(res, stop) {
console.log(res.url)
stop()
})
boloto.watch('url', '0 */10 * * * *', options).on('data', function(res, stop) {
console.log(res.url)
stop()
})
Save all files
await boloto.saveAll([...urls], '/path/to/save', options)