minhashjs

1.0.1 • Public • Published

Welcome to minhashjs

Version Documentation Maintenance License: ISC

MinHash and LSH in JavaScript (Datasketch implementation)

Install

npm i minhashjs

MinHash Usage

const MH = require('./MinHash.js');

first_sentence = ['minhash', 'is', 'a',  'data', 'structure', 'for',
    'estimating', 'the', 'similarity', 'between']

second_sentence = ['minhash', 'is', 'a',  'data', 'structure', 'for',
    'estimating', 'the', 'similarity', 'between']

// First, instanciate the MinHash object
m1 = new MH.MinHash()
m2 = new MH.MinHash()

// Then, update each MinHash
first_sentence.forEach((d, i) => m1.update(d));
second_sentence.forEach((d, i) => m2.update(d));

// You can now compute the jaccard distance between these two hashes
m1.jaccard(m2)

LSH Usage

const F = require('./ForestLSH.js');
const MH = require("./MinHash");

var data1 = ['minhash', 'is', 'a', 'probabilistic', 'data', 'structure', 'for',
    'estimating', 'the', 'similarity', 'between', 'datasets']
var data2 = ['minhash', 'dog', 'data', 'structure', 'for',
    'fork', 'twice', 'similarity', 'food', 'money']
var data3 = ['minhash', 'is', 'probability', 'data', 'structure', 'for',
    'estimating', 'the', 'similarity', 'between', 'documents']

// Create MinHash objects
var m1 = new MH.MinHash(num_perm=128)
var m2 = new MH.MinHash(num_perm=128)
var m3 = new MH.MinHash(num_perm=128)

// Update the hashes
data1.forEach((d, i) => m1.update(d));
data2.forEach((d, i) => m2.update(d));
data3.forEach((d, i) => m3.update(d));


// Create a MinHash LSH Forest with the same num_perm parameter
var forest = new F.MinHashLSHForest(num_perm=128)

// Add m2 and m3 into the index
forest.add("m2", m2)
forest.add("m3", m3)

//IMPORTANT: must call index() otherwise the keys won't be searchable
forest.index()


// Using m1 as the query, retrieve top X keys that have the higest Jaccard
var X = 2
let r = forest.query(m1, X)

Run tests

npm run test

Run speed tests

npm run speed

🤝 Contributing

Contributions, issues and feature requests are welcome!
Feel free to check issues page.

📝 License

Copyright © 2022 F4llis.
This project is ISC licensed.

Readme

Keywords

Package Sidebar

Install

npm i minhashjs

Weekly Downloads

6

Version

1.0.1

License

ISC

Unpacked Size

6.41 MB

Total Files

17

Last publish

Collaborators

  • clement.train