html-article-extractor
A web page content extractor for News websites
installation
npm install html-article-extractor
usage
var htmlArticleExtractor = ; var dom = "...";var body = domwindowdocumentbodyresult = ;console
Outputs:
{
html: '<div>contents</div>',
text: 'contents'
}
example
git clone https://github.com/jungyoun/html-article-extractor
cd html-article-extractor
npm install
node example/crawler.js