bquery

0.4.0 • Public • Published

bquery

Quick , simple and elegant way to fetch a web documents and structure it.

Installation

Latest release:

$ npm install bquery
var buquery = require("buqery");
bquery.query({
  "url": "https://github.com/",
  "selector": "ul.header-nav.left>li",
  "extract": {
    "title":{},
    "url": {
      "selector": "a",
      "extract": "href"
    }
  }
}).then(function(docs){
  console.log(docs);
  //=> {"results":[{"result":[{"title":"Explore","url":"https://github.com/explore"},{"title":"Features","url":"https://github.com/features"},{"title":"Enterprise","url":"https://enterprise.github.com/"},{"title":"Blog","url":"https://github.com/blog"}]}]}
})

Options

bquery can sutomatic recognition the web document charset, but special circumstances you can also set docuemnt's charset.

var buquery = require("buqery");
bquery.query({
  "url": "https://github.com/",
  "selector": "ul.header-nav.left>li",
  "charset": "utf-8",
  "extract": {
    "title":{},
    "url": {
      "selector": "a",
      "extract": "href"
    }
  }
}).then(function(docs){
  console.log(docs);
})

You can also set the timeout period for the request.

bquery.query({
  "url": "https://github.com/",
  "selector": "ul.header-nav.left>li>a",
  "timeout": 3000
});

Sometimes you need to modify the page content link css, javascript or other content before you fetch the docuemnt content. you can use "preSelect" option.

bquery.query({
  "url": "https://github.com/",
  "selector": "ul.header-nav.left>li",
  "preSelect": function($){   //=> $ is a cheerio object, you can do any operate wich base on cheerio
    $("ul.header-nav.left>li").each(function(i, elem){
      if($("a", elem).text() == "Explore"){
        $(elem).remove()
      }
    });
  },
  "extract": {
    "title":{},
    "url": {
      "selector": "a",
      "extract": "href"
    }
  }
}).then(function(docs){
  console.log(docs); 
  //=>[
  //=>  { title: 'Features', url: 'https://github.com/features' },
  //=>  { title: 'Enterprise', url: 'https://enterprise.github.com/' },
  //=>  { title: 'Blog', url: 'https://github.com/blog' } 
  //=>]
})

you can also use callback to modify selected attribute

{
  "url": "https://github.com/",
  "selector": "ul.header-nav.left>li",
  "extract": {
    "title":{
      "extract": "text",
      "callback": function(txt){
        return "foo_" + txt;
      }
    },
    "url": {
      "selector": "a",
      "extract": "href"
    }
  }
}

Package Sidebar

Install

npm i bquery

Weekly Downloads

4

Version

0.4.0

License

BSD

Last publish

Collaborators

  • rickjose