Advertisement
Guest User

Untitled

a guest
Jul 5th, 2013
451
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/env node
  2. /*
  3. Automatically grade files for the presence of specified HTML tags/attributes.
  4. Uses commander.js and cheerio. Teaches command line application development
  5. and basic DOM parsing.
  6.  
  7. References:
  8.  
  9.  + cheerio
  10.    - https://github.com/MatthewMueller/cheerio
  11.    - http://encosia.com/cheerio-faster-windows-friendly-alternative-jsdom/
  12.    - http://maxogden.com/scraping-with-node.html
  13.  
  14.  + commander.js
  15.    - https://github.com/visionmedia/commander.js
  16.    - http://tjholowaychuk.com/post/9103188408/commander-js-nodejs-command-line-interfaces-made-easy
  17.  
  18.  + JSON
  19.    - http://en.wikipedia.org/wiki/JSON
  20.    - https://developer.mozilla.org/en-US/docs/JSON
  21.    - https://developer.mozilla.org/en-US/docs/JSON#JSON_in_Firefox_2
  22. */
  23.  
  24. var fs = require('fs');
  25. var program = require('commander');
  26. var cheerio = require('cheerio');
  27. var HTMLFILE_DEFAULT = "index.html";
  28. var CHECKSFILE_DEFAULT = "checks.json";
  29. var rest = require('restler');
  30.  
  31. // Checks to see if the file infile exists. If so, the returns file name. If not, then error.
  32. var assertFileExists = function(infile) {
  33.     var instr = infile.toString();
  34.     if(!fs.existsSync(instr)) {
  35.         console.log("%s does not exist. Exiting.", instr);
  36.         process.exit(1); // http://nodejs.org/api/process.html#process_process_exit_code
  37.     }
  38.     return instr;
  39. };
  40.  
  41. // Checks to see if the given URL exists. If so, the returns URL in string. If not, then error.
  42. var assertURLExists = function(url) {
  43.   var urlstr = url.toString();
  44.   rest.get(urlstr).on('complete', function(result) {
  45.     if (result instanceof Error) {
  46.       console.log('Error: ' + result.message);
  47.       this.retry(5000); // try again after 5 sec
  48.     }
  49.   });
  50.   return urlstr;
  51. }
  52.  
  53. var cheerioHtmlFile = function(htmlfile) {
  54.     return cheerio.load(fs.readFileSync(htmlfile));
  55. };
  56.  
  57. var loadChecks = function(checksfile) {
  58.     return JSON.parse(fs.readFileSync(checksfile));
  59. };
  60.  
  61. var checkHtmlFile = function(htmlfile, checksfile) {
  62.     $ = cheerioHtmlFile(htmlfile);
  63.     var checks = loadChecks(checksfile).sort();
  64.     var out = {};
  65.     for(var ii in checks) {
  66.         var present = $(checks[ii]).length > 0;
  67.         out[checks[ii]] = present;
  68.     }
  69.     return out;
  70. };
  71.  
  72. //
  73. var checkURL = function(urlContents, checksfile) {
  74.     $ = cheerio.load(urlContents);
  75.     var checks = loadChecks(checksfile).sort();
  76.     var out = {};
  77.     for(var ii in checks) {
  78.         var present = $(checks[ii]).length > 0;
  79.         out[checks[ii]] = present;
  80.     }
  81.     return out;
  82. };
  83.  
  84.  
  85. var clone = function(fn) {
  86.     // Workaround for commander.js issue.
  87.     // http://stackoverflow.com/a/6772648
  88.     return fn.bind({});
  89. };
  90.  
  91. if(require.main == module) {
  92.     program
  93.         .option('-c, --checks <check_file>', 'Path to checks.json', clone(assertFileExists), CHECKSFILE_DEFAULT)
  94.         .option('-f, --file <html_file>', 'Path to index.html', clone(assertFileExists), HTMLFILE_DEFAULT)
  95.         .option('-u, --url <url_file>', 'Path to url of a file', clone(assertURLExists))
  96.         .parse(process.argv);
  97.  
  98.     // If file name was given, compare checks to file
  99.     if(!program.url) {
  100.       var checkJson = checkHtmlFile(program.file, program.checks);
  101.       var outJson = JSON.stringify(checkJson, null, 4);
  102.       console.log(outJson);
  103.     }
  104.  
  105.     // If URL was given, compare checks to URL
  106.     else {
  107.         // Compare checks to URL
  108.         rest.get(program.url).on('complete', function(result) {
  109.             // result is the info in URL
  110.             var checkJson = checkURL(result, program.checks);
  111.             var outJson = JSON.stringify(checkJson, null, 4);
  112.  
  113.             console.log(outJson);
  114.         });
  115.     }
  116. }
  117.  
  118. else {
  119.     exports.checkHtmlFile = checkHtmlFile;
  120. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement