octopus

A fast & easy web scraping framework

node-octopus

An octopus that can get his hands all over the web.

This package is a product of Syria, 2013.

npm install octopus
var octopus = require('octopus'),
    $ = octopus.$,
    htmlparser = require('htmlparser2');
var getText = function getText(elem){//from `domutils' package 
    if(htmlparser.DomUtils.isTag(elem)) return elem.children.map(getText).join("");
    if(elem.type === htmlparser.ElementType.Text) return elem.data;
    return "";
};
 
var wikiOctopus = new octopus.Octopus();
 
wikiOctopus.addURL('https://en.wikipedia.org/wiki/Main_Page');
wikiOctopus.handle(/^/, function(hrefdom) {
    $(dom, '#mp-itn li').forEach(function(elem) {
        console.log('', getText(elem), $(elem, 'a')[0].attribs.href)
    })
})
 
wikiOctopus.start();
  • Documenting current features and usage.

© 2013 Hasan Arous. All rights reserved.

Mozilla Public License Version 2.0