norvig-frequencies-stemmed

0.0.123 • Public • Published

norvig-frequencies-stemmed

dataset of frequencies of stemmed words based on norvig dataset. contains data for top 40,000 words [after which the words are very uncommon/strange...]

words were stemmed with stemmer and their frequencies combined from the unstemmed versions in the norvig dataset.

Installation

npm i norvig-frequencies-stemmed

Usage

var nvs = require('norvig-frequencies-stemmed')

//nvs[word] = frequency [0...1]

console.log(Object.entries(nvs));

Result:

[
  [ 'the', 0.0393389 ],    [ 'of', 0.0223634 ],     [ 'and', 0.0221026 ],
  [ 'to', 0.0206424 ],     [ 'a', 0.0154434 ],      [ 'in', 0.0144294 ],
  [ 'for', 0.0100889 ],    [ 'is', 0.0080034 ],     [ 'on', 0.0081438 ],
  [ 'that', 0.0057982 ],   [ 'by', 0.0056964 ],     [ 'thi', 0.0054923 ],
  [ 'with', 0.0054126 ],   [ 'i', 0.0052483 ],      [ 'you', 0.0050949 ],
  [ 'it', 0.0056792 ],     [ 'not', 0.0044793 ],    [ 'or', 0.0044155 ],
  [ 'be', 0.0045027 ],     [ 'ar', 0.0041427 ],     [ 'from', 0.0038692 ],
  [ 'at', 0.0038795 ],     [ 'as', 0.0038238 ],     [ 'your', 0.0035437 ],
  [ 'all', 0.0034482 ],    [ 'have', 0.0028471 ],   [ 'new', 0.0039227 ],
  [ 'more', 0.0026275 ],   [ 'an', 0.0025873 ],     [ 'wa', 0.0025816 ],
  [ 'we', 0.0023689 ],     [ 'will', 0.0023474 ],   [ 'home', 0.002286 ],
  [ 'can', 0.0021247 ],    [ 'us', 0.0046865 ],     [ 'about', 0.002086 ],
  [ 'if', 0.0019335 ],     [ 'page', 0.0022421 ],   [ 'my', 0.0018023 ],
  [ 'ha', 0.0018158 ],     [ 'search', 0.0018935 ], [ 'free', 0.0017275 ],
  [ 'but', 0.0017009 ],    [ 'our', 0.0017071 ],    [ 'other', 0.0018533 ],
  [ 'do', 0.0017726 ],     [ 'no', 0.0016009 ],     [ 'inform', 0.0017304 ],
  [ 'time', 0.0019318 ],   [ 'thei', 0.0015023 ],   [ 'site', 0.0017826 ],
  [ 'he', 0.0014359 ],     [ 'up', 0.0014659 ],     [ 'mai', 0.0014223 ],
  [ 'what', 0.0013923 ],   [ 'which', 0.0013783 ],  [ 'their', 0.0013347 ],
  [ 'out', 0.0012738 ],    [ 'ani', 0.0012115 ],    [ 'there', 0.0011956 ],
  [ 'see', 0.0012209 ],    [ 'onli', 0.0011258 ],   [ 'so', 0.0011297 ],
  [ 'hi', 0.0012374 ],     [ 'when', 0.0011064 ],   [ 'contact', 0.0011794 ],
  [ 'here', 0.0010896 ],   [ 'busi', 0.0011964 ],   [ 'who', 0.0010741 ],
  [ 'web', 0.0010612 ],    [ 'also', 0.0010488 ],   [ 'now', 0.00104 ],
  [ 'help', 0.0012461 ],   [ 'get', 0.0012724 ],    [ 'pm', 0.0010315 ],
  [ 'view', 0.0012651 ],   [ 'onlin', 0.0010231 ],  [ 'c', 0.0010145 ],
  [ 'e', 0.0010089 ],      [ 'first', 0.0009841 ],  [ 'am', 0.000992 ],
  [ 'been', 0.0009784 ],   [ 'would', 0.0009737 ],  [ 'how', 0.000978 ],
  [ 'were', 0.0009704 ],   [ 'me', 0.0009663 ],     [ 's', 0.0009609 ],
  [ 'servic', 0.0018543 ], [ 'some', 0.0009334 ],   [ 'these', 0.0009229 ],
  [ 'click', 0.0009629 ],  [ 'like', 0.0010202 ],   [ 'x', 0.0008648 ],
  [ 'than', 0.0008546 ],   [ 'find', 0.0009801 ],   [ 'price', 0.001368 ],
  [ 'date', 0.0010108 ],   [ 'back', 0.0008607 ],   [ 'top', 0.0008504 ],
  [ 'peopl', 0.0008333 ],
  ... 39900 more items
]

See Also

stonks

Dependencies (0)

    Dev Dependencies (0)

      Package Sidebar

      Install

      npm i norvig-frequencies-stemmed

      Weekly Downloads

      1

      Version

      0.0.123

      License

      MIT

      Unpacked Size

      700 kB

      Total Files

      5

      Last publish

      Collaborators

      • stonkpunk