Scrape a NYT article from nytimes.com
npm install nyt-reader --save
var NYTReader = require('nyt-reader');
var nytreader = new NYTReader();
// Promise
nytreader.read('http://atwar.blogs.nytimes.com/2015/10/23/the-specter-of-addiction-and-suicide-among-veterans').then(function(article) {
// Do Something with Article
});
// Callback
nytreader.read('http://atwar.blogs.nytimes.com/2015/10/23/the-specter-of-addiction-and-suicide-among-veterans', function(article) {
// Do Something with Article
});
var Article = {
title: '',
datetime: '',
body: {
clean: '',
markdown: ''
},
images: [
{
full: ''
}
],
source: ''
};
title The title of the Article. What appears in the h1 on the page.
datetime
The datetime with timezone of the last update of the article. Format: YY-mm-dd H:i:s GMT
. The datetime will always be GMT+0000
.
body The body of the article. Comes in two formats. clean and minimal. The clean format removes all html elements and separates paragraphs by two newlines. Markdown version of the body text.
images
An array of image urls found in the body. Comes in sizes full
for each image.
source The url of the nyt article.