a frequency analysis of the terms in a text
it uses nlp-compromise for proper tokenization & normalization
t.ngram({min_count:1, max_size:5})
optionsmin_count = 1 // throws away seldom-repeated gramsoptionsmax_size = 5 // maximum gram count. prevents the result from becoming gigantic
var nlp = ;var nlpNgram = ;nlp; var t = nlptext'she said she swims';t/*[ [ { word: 'she', count: 2, size: 1 }, { word: 'said', count: 1, size: 1 }, { word: 'swims', count: 1, size: 1 } ], [ { word: 'she said', count: 1, size: 2 }, { word: 'said she', count: 1, size: 2 }, { word: 'she swims', count: 1, size: 2 } ], [ { word: 'she said she', count: 1, size: 3 }, { word: 'said she swims', count: 1, size: 3 } ], [ { word: 'she said she swims', count: 1, size: 4 } ] ]*/
it also takes advantage of the proper tokenization & cleverness of nlp_compromise, ie "Tony Hawk" is one token, not two:
var t = nlptext`Tony Hawk played Tony Hawk's pro skater`;t/*[ [ { word: 'tony hawk', count: 2, size: 1 }, { word: 'played', count: 1, size: 1 }, { word: 'pro', count: 1, size: 1 }, { word: 'skater', count: 1, size: 1 } ], [ { word: 'tony hawk played', count: 1, size: 2 }, { word: 'played tony hawk', count: 1, size: 2 }, { word: 'tony hawk pro', count: 1, size: 2 }, { word: 'pro skater', count: 1, size: 2 } ], [ { word: 'tony hawk played tony hawk', count: 1, size: 3 }, { word: 'played tony hawk pro', count: 1, size: 3 }, { word: 'tony hawk pro skater', count: 1, size: 3 } ], [ { word: 'tony hawk played tony hawk pro', count: 1, size: 4 }, { word: 'played tony hawk pro skater', count: 1, size: 4 } ] ]*/
var t = nlptext`Tony Hawk played Tony Hawk's pro skater`;t// [ [ { word: 'tony hawk', count: 2, size: 1 } ], [], [], [] ]