node package manager

word-overlap

word-overlap

NPM version Build Status Coverage Status Code Climate Dependency Status

Check the number of words overlapping between 2 phrases or sentences

Used in cases to check whether 2 titles / sentences / phrases are referring to the same context. E.g. 2 event names.

Install

  1. with npm

    npm install word-overlap
  • with browserify
    1. in file main.js

      // in main.js 
      var overlap = require('word-overlap');
       
      var sentence1 = 'The Hitchhikings Meetup in Betelgeuse by Ford Prefect';
      var sentence2 = 'The hitchhikings meetups by the hitchhikers';
       
      var reply = overlap(sentence1, sentence2, {
        ignoreCase: true,
        minWordLength: 2,
        ignoreCommonWords: true
      });
       
      console.log(reply);
    • in file index.html

      <script src="build.js"></script>
    • make the file build.js

      browserify main.js -o build.js --exclude WNdb --exclude lapack

Usage

var overlap = require('word-overlap');
 
var sentence1 = 'The Hitchhikings Meetup in Betelgeuse by Ford Prefect';
var sentence2 = 'The hitchhikings meetups by the hitchhikers';

simple case

overlap(sentence1, sentence2);
// [ 'The', 'by' ] 

option: ignore case

overlap(sentence1, sentence2, {
   ignoreCase: true
});
// [ 'the', 'hitchhikings', 'by' ] 

option: min word length

overlap(sentence1, sentence2, {
  ignoreCase: true,
  minWordLength: 2
});
// [ 'the', 'hitchhiking', 'by' ] 

option: ignore default common words

Common words by default include: a, an, the, this, that, there, it, in, on, for, not, your, you, at, to, is, us, out, by, I

overlap(sentence1, sentence2, {
  ignoreCase: true,
  minWordLength: 2,
  ignoreCommonWords: true
});
// [ 'hitchhikings' ] 

option: ignore number

Ignore numbers such as: 5e3, 0xff, -1.1, 0, 1, 1.1, 10, 10.10, 100, '-1.1', etc.

sentence1 = 'Welcome to 2015';
sentence2 = '2015 Meetup for the year';
console.log(overlap(sentence1, sentence2, {
  ignoreNumber: true
}));
// [ ] 

option: add your common words to ignore

overlap(sentence1, sentence2, {
  ignoreCase: true,
  minWordLength: 2,
  ignoreCommonWords: true,
  common: [ 'hitchhikings' ]
});
// [ ] 

option: depluralize words

overlap(sentence1, sentence2, {
  ignoreCase: true,
  minWordLength: 2,
  ignoreCommonWords: true,
  depluralize: true
});
// [ 'hitchhiking', 'meetup' ] 

option: depluralize words with plurals to ignore

overlap(sentence1, sentence2, {
  ignoreCase: true,
  minWordLength: 2,
  ignoreCommonWords: true,
  depluralize: true,
  ignorePlurals: [ 'hitchhikings' ]
});
// [ 'hitchhikings', 'meetup' ] 

option: stemming

var sentence1 = 'A programming course in SmallTalk';
var sentence2 = 'Have you programmed in SmallTalk?';
 
overlap(sentence1, sentence2, {
  stemming: true,
  ignoreCommonWords: true
});
// [ 'program', 'smalltalk' ] 

Try out the examples in file example.js with the command node example.js

Contribute

Please see CONTRIBUTING.md for details.

Versioning

This repository follows the Semantic Versioning guidelines:

  1. For patches, run grunt bump
  • For minor release, run grunt bump:minor
  • For major release, run grunt bump:major

License

(C) Sayanee Basu 2014, released under an MIT license