Advertisement
Guest User

NodeJS with JSDOM (youtube scrapping)

a guest
Dec 23rd, 2012
730
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1.  
  2. /**
  3.  * Module dependencies.
  4.  */
  5.  
  6. var express = require('express')
  7.   , jsdom = require('jsdom')
  8.   , request = require('request')
  9.   , url = require('url')
  10.   , app = module.exports = express.createServer()
  11.   , routes = require('./routes')
  12.   , user = require('./routes/user')
  13.   , http = require('http')
  14.   , path = require('path');
  15.  
  16. var app = express();
  17.  
  18. app.configure(function(){
  19.   app.set('port', process.env.PORT || 3000);
  20.   app.set('views', __dirname + '/views');
  21.   app.set('view engine', 'jade');
  22.   app.use(express.favicon());
  23.   app.use(express.logger('dev'));
  24.   app.use(express.bodyParser());
  25.   app.use(express.methodOverride());
  26.   app.use(app.router);
  27.   app.use(express.static(path.join(__dirname, 'public')));
  28. });
  29.  
  30. app.configure('development', function(){
  31.   app.use(express.errorHandler());
  32. });
  33.  
  34. app.get('/', routes.index);
  35. app.get('/users', user.list);
  36. app.get('/nodetube', function (req, res) {
  37.     //Tell the request that we want to fetch youtube.com, send the results to a callback function
  38.     request({
  39.         uri: 'http://youtube.com'
  40.     }, function (err, response, body) {
  41.         var self = this;
  42.         self.items = new Array(); //I feel like I want to save my results in an array
  43.         //Just a basic error check
  44.         if (err && response.statusCode !== 200) {
  45.             console.log('Request error.');
  46.         }
  47.         //Send the body param as the HTML code we will parse in jsdom
  48.         //also tell jsdom to attach jQuery in the scripts
  49.         jsdom.env({
  50.             html: body,
  51.             scripts: ['http://code.jquery.com/jquery-1.6.min.js']
  52.         }, function (err, window) {
  53.             //Use jQuery just as in any regular HTML page
  54.             var $ = window.jQuery,
  55.                 $body = $('div #page-container'),
  56.                 $videos = $body.find('.lohp-category-shelf-item');
  57.             //I know .video-entry elements contain the regular sized thumbnails
  58.             //for each one of the .video-entry elements found
  59.             $videos.each(function (i, item) {
  60.                 //I will use regular jQuery selectors
  61.                 var $a = $(item).children('a'),
  62.                     //first anchor element which is children of our .video-entry item
  63.                     $title = $(item).find('.lohp-video-link').text(),
  64.                     //video title
  65.                     $time = $a.find('.video-time').text(),
  66.                     //video duration time
  67.                     $img = $a.find('span.yt-thumb-clip-inner img'); //thumbnail
  68.                 //and add all that data to my items array
  69.                 self.items[i] = {
  70.                     href: $a.attr('href'),
  71.                     title: $title.trim(),
  72.                     time: $time,
  73.                     //there are some things with youtube video thumbnails, those images whose data-thumb attribute
  74.                     //is defined use the url in the previously mentioned attribute as src for the thumbnail, otheriwse
  75.                     //it will use the default served src attribute.
  76.                     thumbnail: $img.attr('data-thumb') ? $img.attr('data-thumb') : $img.attr('src'),
  77.                     urlObj: url.parse($a.attr('href'), true) //parse our URL and the query string as well
  78.                 };
  79.             });
  80.                 res.render('list', {
  81.                   title: 'NodeTube',
  82.                   items: self.items
  83.                });
  84.         });
  85.     });
  86. });
  87.  
  88. //Pass the video id to the video view
  89. app.get('/watch/:id', function(req, res){
  90.   res.render('video', {
  91.     title: 'Watch',
  92.     vid: req.params.id
  93.   });
  94. });
  95.  
  96. http.createServer(app).listen(app.get('port'), function(){
  97.   console.log("Express server listening on port " + app.get('port'));
  98. });
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement