Scraping with d3

It's not going to replace the beloved cheerio library, but I've made the shell of a scraper for node using d3 instead of jQuery:

"d3, fs, jsdom, request".split(", ").forEach(function(lib) {
        GLOBAL[lib] = require(lib);
});

if (process.argv[2].match("http")) {  
    request(process.argv[2],function (err,response,html) {
        if (err) throw "Error reading "+process.argv[2] +": "+err;
        read(html); 
    });//request   
} else {
    fs.readFile(process.argv[2],function(err,html) {   
        if (err) throw "Error reading"+ process.argv[2]+": "+err; 
        read(html);
    });//readFile
}

//now need a language on how to specify what I want to grab and how to put it into json
function grab(wd3) {  
//use window.d3 for selections
    console.log(wd3.select("table").attr("id"));
        var td = wd3.select("table tbody").select("td"); 
        console.log(td.text());

//use d3 for its methods
        console.log(d3.keys(wd3));
}

//make the html available
function read(html) {  
        jsdom.env({
        done : function(error, window) {
            grab(d3.select(window.document));
        }, //done        
        features : {QuerySelector:true}, 
        html : html
    }); //jsdom.env 
}

/**
 * Created by elise on 12/14/15.
 */

Because of how I need to use it, I've set it up to read either local files or served files from elsewhere.

The grab function is able to use plain old d3 selectors to parse the document. Once a syntax for declaring what should be scraped and how it should be stored as json, grab can be fleshed out and we're golden.

Usage: npm thiscode urlOrFilename

»