html-to-anything

1.0.3 • Public • Published

html-to-anything

Dead simple rule/callback based html traverser that can output unicorns.

Example usage

const hta = require('./html-to-anything')
 
const rules = new hta.RuleSet()
 
rules.addRule('<p>?',
    e => console.log(`enter ${e.data}`), // enter element e
    e => console.log(`exit ${e.data}`) // exit element e
)
 
const html = '<p><a href="/derp">link</a> unicorns </p>'
const dom = hta.parseDom(html)
 
rules.runAllWithRules(dom,
    (err, element) => console.error({err, element})
)

NOTE: the '?' in '<p>?' is a placeholder for html text. You can replace it by any text - it won't be matched as of now. Regular expressions are strong contenders for next addition. It'd be a breaking change, so we'd naturally bump the major version number, no worries.

Example output

{ err: '(root) misses rule for (p)',
  element:
   { raw: 'p',
     data: 'p',
     type: 'tag',
     name: 'p',
     children: [ [Object], [Object] ] } }
{ err: '(root.p) misses rule for (a)',
  element:
   { raw: 'a href="/derp"',
     data: 'a href="/derp"',
     type: 'tag',
     name: 'a',
     attribs: { href: '/derp' },
     children: [ [Object] ] } }
enter link
exit link
enter  unicorns
exit  unicorns

API

function parseDom (source)
function query (dom, source)
function match (element, source)
function textOf (dom, src)

RuleSet

function runAllWithRules (dom, errorCallback)
function addRule (htmlSrcToMatch, onEnter, onExit, ignoreInit)
function addRules (htmlSourcesToMatch, onEnter, onExit, ignoreInit)
function addIgnoreRule (htmlSrcToMatch)
function addIgnoreRules (htmlSourcesToMatch)

xml-writer utilities

The package is not listed as a dependency, so you will have to install it manually.

function addTrivialXmlRule (htmlSrcToMatch, xmlTagName, xmlWriter)
function addTrivialInlineXmlRule (htmlSrcToMatch, xmlTagName, xmlWriter)

Package Sidebar

Install

npm i html-to-anything

Weekly Downloads

1

Version

1.0.3

License

MIT

Last publish

Collaborators

  • martingronlund