split-tools

1.2.3 • Public • Published

split-tools

Advanced text splitter. Turn a text string into a array (optional items as tokens)

NPM version Dependency Status License Downloads

WARNING: The package is partially inoperable (it does not know how to parse balanced brackets or other terminators with a nesting depth of more than one element and in some other cases). The corrected version of the package is numbered 2.0.0.

Installation

npm install split-tools --save

Usage

const Splitter = require('split-tools');
 
const splitter = new Splitter(options?);
  • options
    • matchers аn associative array in which the key is an arbitrary token ID, the value is a string or a regular expression to match
    • parsers аn associative array in which the key is an arbitrary token ID, the value is a converter function that takes values:
      • text part of the text matched with RegExp specified for identical ID
      • group1, group2, ..., groupN bracket groups specified in RegExp
      • other parameters such as offset and source string (see string.replace). By virtue of the algorithm used to separate lines, they do not carry reliable values.

It is important!!!

The order of adding matchers does matter. Matchers will be processed in the same order in which they were added.

Samples

simple (array out)

const Splitter = require('split-tools');
 
const text = `
111
222 // aaaaaaaaaaaaaa
222 // aaaaaaaaaaaaaa
333 333333333333333 /*ccccc*/ 33333333333333333333 333333 33333333333 
444 ## bbbbbbbbbbbbbb
555
666 /* cccccccccccccc */
777
/* cccccccccccccc */
888
// aaaaaaaaaaaaaa
999
`;
 
 
const splitter = new Splitter({
    matchers: {
        comment1: /\/\/\s*(.*)/g,
        comment2: /\#\#\s*(.*)/g,
        multicomment: /\/\*\s*([\s\S]*?)\s*\*\//g
    }
});
 
splitter.addParser("unmatched",(match)=>{
    return {
        type: "text",
        data: match
    };
});
 
splitter.addParser("comment1",(match,text)=>{
    return {
        type: "comment1",
        data: text
    };
});
 
splitter.addParser("comment2",(match,text)=>{
    return {
        type: "comment2",
        data: text
    };
});
 
splitter.addParser("multicomment",(match,text)=>{
    return {
        type: "multicomment",
        data: text
    };
});
 
const list = splitter.process(text);
 
console.log(list);

result:

[ 
  { type: 'text', data: '\n111\n222 ' },
  { type: 'comment1', data: 'aaaaaaaaaaaaaa' },
  { type: 'text', data: '\n222 ' },
  { type: 'comment1', data: 'aaaaaaaaaaaaaa' },
  { type: 'text', data: '\n333 333333333333333 ' },
  { type: 'multicomment', data: 'ccccc' },
  { type: 'text', data: ' 33333333333333333333 333333 33333333333 \n444 ' },
  { type: 'comment2', data: 'bbbbbbbbbbbbbb' },
  { type: 'text', data: '\n555\n666 ' },
  { type: 'multicomment', data: 'cccccccccccccc' },
  { type: 'text', data: '\n777\n' },
  { type: 'multicomment', data: 'cccccccccccccc' },
  { type: 'text', data: '\n888\n' },
  { type: 'comment1', data: 'aaaaaaaaaaaaaa' },
  { type: 'text', data: '\n999\n' }
]

advanced sample 1 (tree out)

const Splitter = require("./split-tools/index.js");
 
var text = "x > 5 && (y == 7.5)";
 
const splitter = new Splitter({
    matchers: {
      bracket: /\(\s*(.*)\s*\)/g,
        operator: /\s*(\>|\<|\=\=|\!\=|\&\&|\|\||\+|\-|\*|\/|\%)\s*/g,
        variable: /\s*([a-z]\w*)\s*/gi,
        value: /\s*(\d[\d\.]*)\s*/gi
    }
});
 
 
const   tree = {
    "brackets": ["bracket", "operator", "variable", "value"]
};
 
function subSplit(id, text) {
    console.log("subSplit:", id, "[", text, "]");
    if(tree[id] && tree[id].length)
        return splitter.process(text, tree[id]);
    return text;
}
 
 
splitter.addParser("bracket",(match,body)=>{
    return {
        type: "bracket",
        body: subSplit("brackets", body)
    };
});
 
splitter.addParser("operator",(match,body)=>{
    return {
        type: "operator",
        body: body
    };
});
 
splitter.addParser("variable",(match,body)=>{
    return {
        type: "variable",
        body: body
    };
});
 
splitter.addParser("value",(match,body)=>{
    return {
        type: "value",
        body: +body
    };
});
 
 
const list = splitter.process(text, tree.tags);
 
console.log("\n\nresult:\n",JSON.stringify(list, null, '  '));

result:

 [
  { "type": "variable", "body": "x" },
  { "type": "operator", "body": ">" },
  { "type": "value", "body": 5 },
  { "type": "operator", "body": "&&" },
  { "type": "bracket", "body": [
      { "type": "variable", "body": "y" },
      { "type": "operator", "body": "==" },
      { "type": "value", "body": 7.5 }
    ]
  }
]
 

advanced sample 2 (tree out)

const Splitter = require('split-tools');
const text = `
<html param1="1111" param2=2222 param3="333 333 333" >
    <head>
        <script>
            var script = 1;
        </script>
        <style>
            body {color: #000;}
        </style>
    </head>
    <body>
        <image src="test.png"/>
    </body>
</html>
`;
 
 
 
 
const splitter = new Splitter({
    matchers: {
        tag1: /\<(\w+)\s*([\s\S]*?)\>([\s\S]*)\<\/\1\>/g,
        tag2: /\<(\w+)\s*([\s\S]*?)\/\>/g,
        space: /(^\s+|\s+$)/g,
        param: /(\w+)=((["'])(.+?)\3|()(\S+))/g
    }
});
 
 
const  tree = {
    "tags": ["tag1","tag2","space"],
    "params": ["param","space"]
};
 
function subSplit(id, text) {
    console.log("subSplit:", id, "[", text, "]");
    if(tree[id] && tree[id].length)
        return splitter.process(text, tree[id]);
    return text;
}
 
splitter.addParser("space",(match)=>{
    return;
});
 
splitter.addParser("tag1",(match,tag,params,body)=>{
    return {
        type: "tag",
        tag: tag,
        params: subSplit("params", params),
        body: subSplit("tags", body)
    };
});
 
splitter.addParser("tag2",(match,tag,params)=>{
    return {
        type: "tag",
        tag: tag,
        params: subSplit("params", params)
    };
});
 
 
splitter.addParser("param",(match,name,tmp,isStr,value)=>{
    console.log("param:",match,name,tmp,isStr,value);
    return {
        type: "param",
        name: name,
        value: isStr?value:tmp
    };
});
 
 
const list = splitter.process(text, tree.tags);
 
console.log("\n\nresult:\n",JSON.stringify(list, null, '  '));

result:

[
  {
    "type": "tag",
    "tag": "html",
    "params": [
      {
        "type": "param",
        "name": "param1",
        "value": "1111"
      },
      {
        "type": "param",
        "name": "param2",
        "value": "2222"
      },
      {
        "type": "param",
        "name": "param3",
        "value": "333 333 333"
      }
    ],
    "body": [
      {
        "type": "tag",
        "tag": "head",
        "params": [],
        "body": [
          {
            "type": "tag",
            "tag": "script",
            "params": [],
            "body": [
              "var script = 1;"
            ]
          },
          {
            "type": "tag",
            "tag": "style",
            "params": [],
            "body": [
              "body {color: #000;}"
            ]
          }
        ]
      },
      {
        "type": "tag",
        "tag": "body",
        "params": [],
        "body": [
          {
            "type": "tag",
            "tag": "image",
            "params": [
              {
                "type": "param",
                "name": "src",
                "value": "test.png"
              }
            ]
          }
        ]
      }
    ]
  }
]

Participation in development

https://github.com/lastuniverse/split-tools/issues

License

MIT

NPM

Package Sidebar

Install

npm i split-tools

Weekly Downloads

0

Version

1.2.3

License

MIT

Unpacked Size

12.7 kB

Total Files

4

Last publish

Collaborators

  • lastuniverse