ngram-natural-language-generator
Takes in text/file(s)/stream(s) and generates random sentences that sound like they could have been in the original text using a bigram generator. Surprisingly works on most languages and writing styles.
You can experiment with your own texts here http://cesine.github.io/ngram-natural-language-generator/samples
Usage
Commandline
$ npm install ngram-natural-language-generator --save
$ ./index.js samples/jaberwocky.txt
Browser
$ bower install ngram-natural-language-generator --save
There is an example browser use in samples/index.html .
< textarea id = " ngram-nlg-text " > < / textarea >
< textarea id = " ngram-nlg-result " > < / textarea >
< script >
window . NLG = window . exports = window . exports || { } ;
</ script >
< script src = " bower_components/ngram-natural-language-generator/lib/tokenizer.js " > < / script >
< script src = " bower_components/ngram-natural-language-generator/lib/nlg.js " > < / script >
< script src = " bower_components/ngram-natural-language-generator/lib/ngrams.js " > < / script >
< script src = " bower_components/ngram-natural-language-generator/lib/ngram-nlg.js " > < / script >
< script src = " bower_components/ngram-natural-language-generator/lib/drag-and-drop-file-upload.js " > < / script >
< script >
NLG . currentOptions = {
text : ' '
} ;
NLG . currentOptions . text = document . getElementById ( ' ngram-nlg-text ' ) . value ;
NLG . build ( NLG . currentOptions , function ( err , result ) {
if ( err ) return console . warn ( err ) ;
document . getElementById ( ' ngram-nlg-result ' ) . value = NLG . generate ( NLG . currentOptions . model ) ;
} ) ;
</ script >
Node
From file:
var generator = require ( ' ngram-natural-language-generator ' ) . generator ;
generator ( {
filename : ' samples/jabberwocky.txt ' ,
model : {
maxLength : 100 ,
minLength : 50
}
} , function ( err , sentence ) {
console . log ( sentence ) ;
} ) ;
From text:
var generator = require ( ' ngram-natural-language-generator ' ) . generator ;
generator ( {
text : ' Colorless green ideas sleep furiously. ' ,
model : {
maxLength : 100 ,
minLength : 50
}
} , function ( err , sentence ) {
console . log ( sentence ) ;
} ) ;
From web url:
var generator = require ( ' ngram-natural-language-generator ' ) . generator ;
var http = require ( ' http ' ) ;
http . get ( ' http://www.jabberwocky.com/carroll/jabber/jabberwocky.html ' , function ( res ) {
generator ( {
stream : res
} , function ( err , sentence ) {
console . log ( sentence ) ;
} ) ;
} ) ;
From tokens:
If you're working with a language which doesn't tokenize on whitespace or unicode punctionation you can supply the tokens.
var generator = require ( ' ngram-natural-language-generator ' ) . generator ;
generator ( {
tokens : [ ' その ' , ' 酩酊 ' , ' 状態を ' , ' 愛する ' , ' ことに ' , ' よって ' ] ,
model : {
maxLength : 100 ,
minLength : 50
}
} , function ( err , sentence ) {
console . log ( sentence ) ;
} ) ;