can-parse

0.0.3 • Public • Published

can-parse

Build Status

A simple context-free parser generator

Usage

This is similar to use as Jison, but it's designed to be tiny (<1k min+gip) and fast (while being less expressive than Jison).

To create a parser, first create a grammar with lexical tokens and an expression tree as follows:

const grammar = {
    lex: {
        "{": /^\{/,
        "}": /^\}/,
        "<": /^</,
        "/>": /^\/>/,
        "</": /^<\//,
        ">": /^>/,
        "SPACE": /^\s/,
        "'": /^'/,
        "\"": /^"/,
        "=": /^=/,
        ALPHA_NUMERIC: /^[A-Za-z0-9]+/,
        TAGNAME: /^[a-z][-:_A-Za-z0-9]*/,
        NOT_END_MAGIC: /^([^\}]*)/,
        NOT_SPACE: /^[^\s\{\}<]+/,
        NOT_SPACE_RIGHT_CARROT: /^[^\s>=\{]+/,
        NOT_MAGIC_OR_DOUBLE: /^[^"\{]+/,
        NOT_MAGIC_OR_SINGLE: /^[^'\{]+/
    },
    tree: {
        EXPRESSION: [
            [ "TAG" ],
            [ "CLOSING" ],
            [ "MAGIC_OR_TEXT" ],
            [ "TAG", "EXPRESSION" ],
            [ "CLOSING", "EXPRESSION" ],
            [ "MAGIC_OR_TEXT", "EXPRESSION" ]
        ],
        TAG: [
            [ "<", "TAGNAME", ">" ],
            [ "<", "TAGNAME", "/>" ],
            [ "<", "TAGNAME", "SPACE", ">" ],
            [ "<", "TAGNAME", "SPACE", "/>" ],
            [ "<", "TAGNAME", "SPACE", "ATTRS", ">" ],
            [ "<", "TAGNAME", "SPACE", "ATTRS", "/>" ]
        ],
        CLOSING: [
            [ "</", "TAGNAME", ">" ]
        ],
        ATTRS: [
            [ "ATTR" ],
            [ "ATTR", "SPACE", "ATTRS" ],
            [ "MAGIC" ],
            [ "MAGIC", "ATTRS" ]
        ],
        ATTR: [
            [ "QUOTE", "=", "QUOTE" ],
            [ "NOT_SPACE_RIGHT_CARROT", "=", "QUOTE" ],
            [ "NOT_SPACE_RIGHT_CARROT", "=", "NOT_SPACE_RIGHT_CARROT" ],
            [ "NOT_SPACE_RIGHT_CARROT", "=", "MAGIC" ],
            [ "NOT_SPACE_RIGHT_CARROT" ]
        ],
        QUOTE: [
            [ "'", "SINGLE_QUOTE_MAGIC_OR_TEXT", "'" ],
            [ "\"", "DOUBLE_QUOTE_MAGIC_OR_TEXT", "\"" ]
        ],
        SINGLE_QUOTE_MAGIC_OR_TEXT: [
            [ "NOT_MAGIC_OR_SINGLE" ],
            [ "NOT_MAGIC_OR_SINGLE", "SINGLE_QUOTE_MAGIC_OR_TEXT" ],
            [ "MAGIC" ],
            [ "MAGIC", "SINGLE_QUOTE_MAGIC_OR_TEXT" ]
        ],
        DOUBLE_QUOTE_MAGIC_OR_TEXT: [
            [ "NOT_MAGIC_OR_DOUBLE" ],
            [ "NOT_MAGIC_OR_DOUBLE", "DOUBLE_QUOTE_MAGIC_OR_TEXT" ],
            [ "MAGIC" ],
            [ "MAGIC", "DOUBLE_QUOTE_MAGIC_OR_TEXT" ]
        ],
        MAGIC_OR_TEXT: [
            [ "TEXT" ],
            [ "TEXT", "MAGIC_OR_TEXT" ],
            [ "MAGIC" ],
            [ "MAGIC", "MAGIC_OR_TEXT" ]
        ],
        MAGIC: [
            [ "{", "NOT_END_MAGIC", "}" ]
        ],
        TEXT: [
            [ "SPACE" ],
            [ "SPACE", "TEXT" ],
            [ "NOT_SPACE" ],
            [ "NOT_SPACE", "TEXT" ]
        ]
    }
};

EXPRESSION is a key word and is the starting point of the expression tree.

Once you built your grammar, build a parser like:

import parse from "can-parse";
 
const parser = parse( grammar );

Then parse something:

parser( "<my-element bar='car'/>", function( token, expressions ) {
    token; //-> { lex: "<", match: "<", index: 0 }
    expressions.end; // -> []
    expressions.start;
 
    // ->[
    //   {
    //    "expression": "EXPRESSION",
    //    "ruleIndexes": [ 0, 3 ]
    //   },
    //   {
    //    "expression": "TAG",
    //    "ruleIndexes": [ 0, 1, 2, 3, 4, 5]
    //   }
    // ]
} );

The parser function takes:

  • The string to be parsed
  • A callback which will be called back with each lexical token as it's matched , what was matched, and where it's matched, and the expressions that have started and ended with the matching of that token.

Readme

Keywords

Package Sidebar

Install

npm i can-parse

Homepage

canjs.com

Weekly Downloads

3

Version

0.0.3

License

none

Unpacked Size

34.1 kB

Total Files

12

Last publish

Collaborators

  • christopherjbaker
  • justinbmeyer