Advertisement
Guest User

Untitled

a guest
Oct 30th, 2014
181
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. //modules//////////////////////
  2.  
  3. var request = require('request');
  4. var cheerio = require('cheerio');
  5. //////////////////////////////////
  6.  
  7.  
  8.  
  9. //constants//////////////////////
  10.  
  11. var HTTP_OK = 200;
  12. var IPHONE_USER_AGENT = 'Mozilla/5.0 (iPhone; CPU iPhone OS 7_0_6 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11B651 Safari/9537.53';
  13. //////////////////////////////////
  14.  
  15.  
  16. //REQUEST//////////////////////
  17.  
  18. var REQUEST_HOST = 'm.yad2.co.il';
  19. var REQUEST_PATH = '/Cars/Private.php';//better name??
  20. var REQUEST_PROTOCOL = 'http';
  21. var REQUEST_URL = REQUEST_PROTOCOL + '://' + REQUEST_HOST + '/' + REQUEST_PATH;
  22. var REQUEST_HEADERS = {'User-Agent': IPHONE_USER_AGENT, 'Content-Type':'text/html; charset=utf-8'};
  23.  
  24. function getRequestOptions(url){
  25.     return {
  26.         url: ensureFullURL(url),
  27.         method: 'GET',
  28.         headers: REQUEST_HEADERS
  29.     };
  30. }
  31. //////////////////////////////////
  32.  
  33.  
  34. //functions//////////////////////
  35.  
  36.  
  37. function requesthandler_AdPage(error,response,body){
  38.  
  39.  
  40.     var document= getDocument(error,response,body);
  41.     if (!document)
  42.         return false;
  43.  
  44.     Ads.push({details:getAdPageDetails(document)});
  45.  
  46.     if (Ads.length==process.totalNumberOfAds)//naive check if we're done
  47.         process.res.end(JSON.stringify(Ads));
  48. }
  49.  
  50. function requesthandler_SearchResultsPage(error,response,body){
  51.  
  52.     var document= getDocument(error,response,body);
  53.     if (!document)
  54.         return false;
  55.  
  56.  
  57.     getSearchResultsPageDetails(document);
  58. }
  59.  
  60.  
  61. function getAdPageDetails(document){
  62.  
  63.  
  64.     var $ = cheerio.load(document);
  65.     var details = [];
  66.  
  67.     $('.clearfix.key-value').each(function(){
  68.  
  69.         var key = stripTags($(this).find('.key').text());
  70.         var value = stripTags($(this).find('.value').text());
  71.  
  72.         if (key!=null && key!='' && value!=null && value!=''){
  73.             details.push({key:key,value:value});
  74.         }
  75.     });
  76.  
  77.     return details;
  78. }
  79.  
  80. function getDocument (error,response,body){
  81.  
  82.     if (error || response.statusCode != HTTP_OK) {
  83.         console.log('error');
  84.         return false;
  85.     }
  86.     return body;
  87. }
  88.  
  89.  
  90.  
  91. function getSearchResultsPageDetails(html){
  92.  
  93.  
  94.     var $ = cheerio.load(html);
  95.  
  96.     var $adLinks = $("a[href*='Info.php']");
  97.     process.totalNumberOfAds = $adLinks.length;
  98.  
  99.     $adLinks.each(function(){
  100.  
  101.         var url = ensureFullURL($(this).attr('href'));
  102.         request(getRequestOptions(url),requesthandler_AdPage);
  103.     });
  104.  
  105. }
  106. //////////////////////////////////
  107.  
  108.  
  109.  
  110. //helper functions//////////////////////
  111.  
  112. function isFullURL(url){//naive
  113.     return url.substr(0,7)=='http://' || url.substr(0,8)=='https://';
  114. }
  115.  
  116.  
  117. function ensureFullURL(url){//naive and i don't think it works
  118.  
  119.     if (isFullURL(url))
  120.         return url;
  121.     if (url.substr(0,1)=='/')
  122.         return REQUEST_PROTOCOL + '://' + REQUEST_HOST + url;
  123.     return REQUEST_URL + '/' + url;
  124. }
  125.  
  126. function stripTags(html){
  127.     if (typeof html !== "string")
  128.         return '';
  129.     return html.replace(/<(?:.|\n)*?>/gm, '');
  130. }
  131.  
  132.  
  133. ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  134.  
  135. //run
  136. var Ads = [];
  137. function scan(url){
  138.  
  139.     request(getRequestOptions(url),requesthandler_SearchResultsPage);
  140. }
  141.  
  142. var http = require('http');
  143. http.createServer(function (req, res) {
  144.  
  145.     res.writeHead(200, {'Content-Type': 'application/json; charset=utf-8'});
  146.     process.res = res;
  147.  
  148.     scan(REQUEST_URL);
  149.  
  150. }).listen(80, 'localhost');
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement