crawler html or json
npm install kw-crawler
const { HTMLCrawler } = require("kw-crawler");
(async () => {
const url = "https://github.com/shana0440/crawler";
const crawler = new HTMLCrawler(url);
crawler.setRule({
name: "title",
selector: "#readme > article > h1:nth-child(1)",
callback: selector => selector.text()
});
const results = await crawler.getResults();
assert(results["title"], "Crawler");
})();
const { JSONCrawler } = require("kw-crawler");
(async () => {
const url = "https://api.github.com/search/repositories?q=shana0440/crawler";
const crawler = new JSONCrawler(url);
crawler.setHeader({
name: "User-Agent",
value: "request"
});
crawler.setRule({
name: "items",
selector: "items",
callback: items =>
items.map(item => ({
name: item.name,
full_name: item.full_name,
html_url: item.html_url
}))
});
let results = await crawler.getResults();
console.log(results);
/**
* {
* "items": [{
* "name": "...",
* "full_name": "...",
* "html_url": "..."
* }]
* }
*/
})();