node package manager
Painless code sharing. npm Orgs help your team discover, share, and reuse code. Create a free org ยป

descent

descent

Recursive descent parser generator for nodejs.

Installation

 $ npm install -g descent

Usage

   Usage: descent [options]

   Options:

     -h, --help      output help information
     -C, --no-color  disable colored outpu
     -a, --ast       output ast
     -V, --version   output descent version

  Examples:

     $ descent < parser.g --ast
     $ descent < parser.g --ast --no-color
     $ descent < parser.g > parser.js

About

Descent produces a backtracking recursive descent parser, compiling to highly readable, structure JavaScript much like you would code by hand. For example the simple rule defined below includes lexical analysis within the production rule itself:

switch = 'on' | 'off';

producing the following JavaScript, in which the parser speculatively parses each alternative, restoring the thunkpos / inputpos each time, then finally returning a positive or negative value indicating success.

 function _foo(){
   var s = state();
   if ((string("on")
     || (restore(s)
      && string("off")))) return 1;
   restore(s);
 }

Example

The following grammar parses a name followed by a switch value, aka "on" or "off" which we then assign to $ our magic variable that is pushed to the parser's value stack. These values are then accessible when we define a rule with <name>:<rule-name>. This simple grammar below expands to roughly 150 lines of JavaScript, providing a much more manageable solution.

 program
   : stmt+
   ;

 stmt
   : n:name - s:switch eol  { console.log('%s is now %s', n, s ? 'on' : 'off') }
   ;

 name
   : < [a-zA-Z]+ > { $ = text }
   ;

 switch
   : 'on'    { $ = true }
   | 'off'   { $ = false }
   ;

 eol: '\n' | ';';

 -: [ \t]*;

This parser will recognize the following input:

myswitch on
myswitch off

outputting:

myswitch is now on
myswitch is now off

to use the grammar first parse it with descent(1) to construct the javascript:

$ descent < switch.g > out.js

then fire up the node REPL and give it a try:

$ node
> var parse = require('./out')
> parse('foo on;')
foo is now on
> parse('foo off;')
foo is now off

Below is example of speculative parsing power:

program
  : res:tobi { $ = res }
  ;

tobi
  : 'mr' - 'tobi' - 'the' - 'ferret' - 'is' - 'cool'           { $ = 'cool' }
  | 'mr' - 'tobi' - 'the' - 'ferret' - 'is' - 'super' - 'cool' { $ = 'super' }
  | 'mr' - 'tobi' - 'the' - 'ferret' - 'is' - 'not' - 'cool'   { $ = 'lame' }
  ;

-: [ \t]*;

Descent speculatively parses the alternatives until one matches, effectively ordering the rules, however providing a powerful parsing mechanism. Currently Descent does not employ memoization (though it may in the future), as the memoization overhead roughly decreasing performance by ~%45.

Parser

As mentioned descent produces a recursive descent parser, to illustrate this suppose we have the following small grammar:

 stmt: bool ';';

 bool
   : 'true'  { $ = true }
   | 'yes'   { $ = true }
   | 'false' { $ = false }
   | 'no'    { $ = false }
   ;

The following functions are generated, which closely resemble the input.

 function _stmt(){
   var s = state();
   if ((_bool()
       && char(";"))) return 1;
   restore(s);
 }

 function _bool(){
   var s = state();
   if (((string("true")
         && thunk(function(){
       $ = true
     }))
     || (restore(s)
      && ((string("yes")
       && thunk(function(){
       $ = true
     }))
     || (restore(s)
      && ((string("false")
       && thunk(function(){
       $ = false
     }))
     || (restore(s)
      && (string("no")
       && thunk(function(){
       $ = false
     }))))))))) return 1;
   restore(s);
 }

The entire module generated along with the parser boilerplate:

module.exports = function parse(input) {
  var orig = input
    , thunks = []
    , vstack = []
    , rightmostPos = 0
    , pos = 0
    , begin = 0
    , end = 0
    , thunkpos
    , mark
    , text
    , tmp
    , ts
    , $;

  return _stmt(), done();

  function done() {
    var len = thunks.length
      , thunk;
    for (var i = 0; i < len; ++i) {
      thunk = thunks[i];
      $ = undefined;
      thunk(thunk.begin, thunk.end);
      if (undefined !== $) vstack.push($);
    }
    if (input.length) error();
    return $;
  }

  function inspect(str) {
    return str
      .replace(/\\/g, '\\\\')
      .replace(/\r/g, '\\r')
      .replace(/\n/g, '\\n')
      .replace(/"/g, '\\"');
  }

  function state() {
    rightmostPos = pos;
    return {
        thunkpos: thunks.length
      , input: input
      , pos: pos
    };
  }

  function restore(state) {
    thunks.length = state.thunkpos;
    input = state.input;
    pos = state.pos;
    return 1;
  }

  function thunk(fn) {
    fn.begin = begin;
    fn.end = end;
    thunks.push(fn);
    return 1;
  }

  function anyChar() {
    if (input.length) {
      ++pos;
      input = input.substr(1);
      return 1;
    }
  }

  function char(c) {
    if (c == input[0]) {
      ++pos;
      input = input.substr(1);
      return 1;
    }
  }

  function string(str) {
    if (0 == input.indexOf(str)) {
      var len = str.length;
      pos += len;
      input = input.substr(len);
      return 1;
    }
  }

  function pattern(regexp) {
    var captures;
    if (captures = regexp.exec(input)) {
      var len = captures[0].length;
      pos += len;
      input = input.substr(len);
      return 1;
    }
  }

  function errorPosition() {
    var lineno = 1;
    for (var i = 0; i < pos; ++i) {
      switch (orig[i]) {
        case '\n': ++lineno; break;
      }
    }
    return { line: lineno };
  }

  function error() {
    pos = rightmostPos;
    var prefix = 'parse error on line ' + errorPosition().line + ' near '
      , prefixlen = 'Error: '.length + prefix.length + 1
      , context = 10
      , start = Math.max(pos - context, 0)
      , len = orig.length
      , end = Math.min(pos + context, len)
      , mark = pos - start
      , tilde = Array(end - pos).join('~')
      , ellipsis = end == len ? '' : '...'
      , _context = orig.slice(start, end) + ellipsis
      , context = inspect(_context)
      , head = _context.slice(0, mark)
      , diff = inspect(head).length - head.length
      , mark = Array(prefixlen + 1 + diff + mark).join(' ') + '^' + tilde;
    throw new Error(prefix + '"' + context + '"' + '\n' + mark);
  }

  function _stmt(){
    var s = state();
    if ((_bool()
        && char(";"))) return 1;
    restore(s);
  }

  function _bool(){
    var s = state();
    if (((string("true")
          && thunk(function(){
        $ = true
      }))
      || (restore(s)
       && ((string("yes")
        && thunk(function(){
        $ = true
      }))
      || (restore(s)
       && ((string("false")
        && thunk(function(){
        $ = false
      }))
      || (restore(s)
       && (string("no")
        && thunk(function(){
        $ = false
      }))))))))) return 1;
    restore(s);
  }

}

License

(The MIT License)

Copyright (c) 2011 TJ Holowaychuk <tj@learnboost.com>

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the 'Software'), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.