bquery

Quick , simple and elegant way to fetch a web documents and structure it.

Installation

Latest release:

$ npm install bquery

var buquery = require("buqery");
bquery.query({
  "url": "https://github.com/",
  "selector": "ul.header-nav.left>li",
  "extract": {
    "title":{},
    "url": {
      "selector": "a",
      "extract": "href"
    }
  }
}).then(function(docs){
  console.log(docs);
  //=> {"results":[{"result":[{"title":"Explore","url":"https://github.com/explore"},{"title":"Features","url":"https://github.com/features"},{"title":"Enterprise","url":"https://enterprise.github.com/"},{"title":"Blog","url":"https://github.com/blog"}]}]}
})

Options

bquery can sutomatic recognition the web document charset, but special circumstances you can also set docuemnt's charset.

var buquery = require("buqery");
bquery.query({
  "url": "https://github.com/",
  "selector": "ul.header-nav.left>li",
  "charset": "utf-8",
  "extract": {
    "title":{},
    "url": {
      "selector": "a",
      "extract": "href"
    }
  }
}).then(function(docs){
  console.log(docs);
})

You can also set the timeout period for the request.

bquery.query({
  "url": "https://github.com/",
  "selector": "ul.header-nav.left>li>a",
  "timeout": 3000
});

Sometimes you need to modify the page content link css, javascript or other content before you fetch the docuemnt content. you can use "preSelect" option.

bquery.query({
  "url": "https://github.com/",
  "selector": "ul.header-nav.left>li",
  "preSelect": function($){   //=> $ is a cheerio object, you can do any operate wich base on cheerio
    $("ul.header-nav.left>li").each(function(i, elem){
      if($("a", elem).text() == "Explore"){
        $(elem).remove()
      }
    });
  },
  "extract": {
    "title":{},
    "url": {
      "selector": "a",
      "extract": "href"
    }
  }
}).then(function(docs){
  console.log(docs); 
  //=>[
  //=>  { title: 'Features', url: 'https://github.com/features' },
  //=>  { title: 'Enterprise', url: 'https://enterprise.github.com/' },
  //=>  { title: 'Blog', url: 'https://github.com/blog' } 
  //=>]
})

you can also use callback to modify selected attribute

{
  "url": "https://github.com/",
  "selector": "ul.header-nav.left>li",
  "extract": {
    "title":{
      "extract": "text",
      "callback": function(txt){
        return "foo_" + txt;
      }
    },
    "url": {
      "selector": "a",
      "extract": "href"
    }
  }
}

bquery

bquery

Installation

Options

Readme

Keywords

Package Sidebar

Install

Weekly Downloads

Version

License

Last publish

Collaborators

bquery

bquery

Installation

Options

Readme

Keywords

Package Sidebar

Install

DownloadsWeekly Downloads

Version

License

Last publish

Collaborators

Weekly Downloads