epoch

Epoch

Content.js

Convert:

  • xls (excel) to csv
  • ppt to pdf
  • pdf to text
  • image to text
  • csv to json
  • text to json
  • html to json

In the end, all data should be converted to JSON, except code, which is kept unprocessed.

# convert html to json
content https://twitter.com/ twitter.json
content -i https://twitter.com/ -o twitter.json
{
  "title": "Content.js",
  "description": "Convert content to JSON",
  "tags": [],
  "input": {
    "encoding": "utf-8",
    "size": 1024,
    "type": "text/html",
    "data": "# Content.js\n\n> Convert content to JSON...",
  },
  "output": [
    {
      "type": "header"
    },
    {
      "type": "leftSidebar"
    },
    {
      "type": "content",
      "children": [
        {
          "type": "ul",
          "children": [
            {
              "type": "li",
              "text": "xls (excel) to csv"
            }
          ]
        }
      ]
    }
  ]
}
content = require('content.js')
 
content.parse 'https://github.com/viatropos/content'(error, result) ->
  result.title
  result.tags
  result.input
  result.output
  # cheerio instance, alias to `result.output.find` 
  result.find('ul').each ->
 
  # render a standardized html 
  result.toHTML()
  # alias to `result.output` 
  result.toJSON()