hcr

1.4.1 • Public • Published

hcr

The Hcr helps you to grab some data on the web page. It allows you to crawl all site recursively. It supports limiting requests, adding custom headers and converting html to object as you wish.

Depencies

Getting Started

There is an example config that you can modify and use. The callback argument that you passed to constructor is default callback for all functions.

Installation

hcr is available on npm. To install it, type:

$ npm install hcr

Usage

var opts = {
	reservoir: 100,
  	reservoirRefreshInterval: 60 * 1000 
};

var crawler = new Crawler(opts, callback);

crawler.getPage(['site1.com', 'site2.com']);
var opts = {
	reservoir: 100,
  	reservoirRefreshInterval: 60 * 1000 
};

var crawler = new Crawler(opts, callback);

var object = {
	'Name': {
		selector: '#name',
		func: 'text'
	},
	'Image': {
		selector: '#image',
		func: 'attr',
		args: ['src']
	}
};

crawler.toObject(['site1.com', 'site2.com'], object);
var opts = {
	reservoir: 100,
  	reservoirRefreshInterval: 60 * 1000 
};

var crawler = new Crawler(opts, callback);

var object = {
	'Name': {
		selector: '#name',
		func: 'text'
	},
	'Image': {
		selector: '#image',
		func: 'attr',
		args: ['src']
	}
};

crawler.recursiveToObject(['site1.com', 'site2.com'], object);
var opts = {
	reservoir: 100,
  	reservoirRefreshInterval: 60 * 1000 
};

var crawler = new Crawler(opts, callback);

var object = {
	'Name': {
		selector: '#span',
		prop: 'textContent'
	}
};

crawler.recursiveToObject(['site1.com', 'site2.com'], object);
var opts = {
	reservoir: 100,
  	reservoirRefreshInterval: 60 * 1000 
};

var crawler = new Crawler(opts, callback);

var regex = /[A-Z]/g;

var object = {
	'Name': {
		selector: '#span',
		prop: 'textContent'
	}
};

crawler.recursiveRegexToObject(['site1.com', 'site2.com'], regex, object);
var opts = {
	reservoir: 100,
  	reservoirRefreshInterval: 60 * 1000 
};

var crawler = new Crawler(opts, callback);
var doneCallback = function() {
	// crawling done
};

crawler.on('completed', doneCallback);

Package Sidebar

Install

npm i hcr

Weekly Downloads

6

Version

1.4.1

License

ISC

Unpacked Size

14.8 kB

Total Files

7

Last publish

Collaborators

  • jrergon