@essetwide/html-scrapper

1.0.4 • Public • Published

html-scrapper

A HTML to JSON scrapper based on a Headless Browser navigation.

Usage

import { Navigation } from '@essetwide/html-scrapper';

const scrapper = new Navigation({
    model: {
        addressee: {
            name: "#DestRem tr:nth-child(1) > td:nth-child(1) > span"
            cpf: "#DestRem tr:nth-child(2) > td:nth-child(1) > span"
        },
        payee: {
            name: "#Emitente tr:nth-child(1) > td:nth-child(2) > span",
            socialName: "#Emitente tr:nth-child(1) > td:nth-child(1) > span",
            cnpj: "#Emitente tr:nth-child(2) > td:nth-child(1) > span",
            phone: "#Emitente tr:nth-child(4) > td:nth-child(2) > span",
            address: {
                street: "#Emitente tr:nth-child(2) > td:nth-child(2) > span",
                neighborhood: "#Emitente tr:nth-child(3) > td:nth-child(1) > span",
                city: "#Emitente tr:nth-child(4) > td:nth-child(1) > span",
                state: "#Emitente tr:nth-child(5) > td:nth-child(1) > span",
                cep: "#Emitente tr:nth-child(3) > td:nth-child(2) > span"
            }
        },
        products: {
            _scope: "#Prod .toggle.box"
            _merge: {
                _scope: "#Prod .toggable.box"
            }
            code: "td > table:first-child tr:nth-child(1) > td:nth-child(1) > span",
            ncm: "td > table:first-child tr:nth-child(1) > td:nth-child(2) > span",
            cest: "td > table:first-child tr:nth-child(1) > td:nth-child(3) > span",
            cfop: "td > table:first-child tr:nth-child(2) > td:nth-child(2) > span",
            discount: "td > table:first-child tr:nth-child(3) > td:nth-child(1) > span",
            comercialEAN: "td > table:nth-of-type(2) > tbody > tr:nth-of-type(2) > td:nth-child(1) > span",
            price: "td > table:nth-of-type(2) > tbody > tr:nth-of-type(4) > td:nth-child(1) > span"
        },
        payment: {
            value: "#Cobranca tr:nth-child(2) > td:nth-child(2) span",
            mode: "#Cobranca tr:nth-child(2) > td:nth-child(1) span",
            brand: "#Cobranca tr:nth-child(2) > td:nth-child(5) span"
        }
    },
    beforeOpen: function(jsdomWindow) {
        jsdomWindow.
    }
});

scrapper.parse();

/@essetwide/html-scrapper/

    Package Sidebar

    Install

    npm i @essetwide/html-scrapper

    Weekly Downloads

    6

    Version

    1.0.4

    License

    Apache-2.0

    Unpacked Size

    8.35 kB

    Total Files

    5

    Last publish

    Collaborators

    • menosprezzi
    • oliveira-filipe