Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- //modules//////////////////////
- var request = require('request');
- var cheerio = require('cheerio');
- //////////////////////////////////
- //constants//////////////////////
- var HTTP_OK = 200;
- var IPHONE_USER_AGENT = 'Mozilla/5.0 (iPhone; CPU iPhone OS 7_0_6 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11B651 Safari/9537.53';
- //////////////////////////////////
- //REQUEST//////////////////////
- var REQUEST_HOST = 'm.yad2.co.il';
- var REQUEST_PATH = '/Cars/Private.php';//better name??
- var REQUEST_PROTOCOL = 'http';
- var REQUEST_URL = REQUEST_PROTOCOL + '://' + REQUEST_HOST + '/' + REQUEST_PATH;
- var REQUEST_HEADERS = {'User-Agent': IPHONE_USER_AGENT, 'Content-Type':'text/html; charset=utf-8'};
- function getRequestOptions(url){
- return {
- url: ensureFullURL(url),
- method: 'GET',
- headers: REQUEST_HEADERS
- };
- }
- //////////////////////////////////
- //functions//////////////////////
- function requesthandler_AdPage(error,response,body){
- var document= getDocument(error,response,body);
- if (!document)
- return false;
- Ads.push({details:getAdPageDetails(document)});
- if (Ads.length==process.totalNumberOfAds)//naive check if we're done
- process.res.end(JSON.stringify(Ads));
- }
- function requesthandler_SearchResultsPage(error,response,body){
- var document= getDocument(error,response,body);
- if (!document)
- return false;
- getSearchResultsPageDetails(document);
- }
- function getAdPageDetails(document){
- var $ = cheerio.load(document);
- var details = [];
- $('.clearfix.key-value').each(function(){
- var key = stripTags($(this).find('.key').text());
- var value = stripTags($(this).find('.value').text());
- if (key!=null && key!='' && value!=null && value!=''){
- details.push({key:key,value:value});
- }
- });
- return details;
- }
- function getDocument (error,response,body){
- if (error || response.statusCode != HTTP_OK) {
- console.log('error');
- return false;
- }
- return body;
- }
- function getSearchResultsPageDetails(html){
- var $ = cheerio.load(html);
- var $adLinks = $("a[href*='Info.php']");
- process.totalNumberOfAds = $adLinks.length;
- $adLinks.each(function(){
- var url = ensureFullURL($(this).attr('href'));
- request(getRequestOptions(url),requesthandler_AdPage);
- });
- }
- //////////////////////////////////
- //helper functions//////////////////////
- function isFullURL(url){//naive
- return url.substr(0,7)=='http://' || url.substr(0,8)=='https://';
- }
- function ensureFullURL(url){//naive and i don't think it works
- if (isFullURL(url))
- return url;
- if (url.substr(0,1)=='/')
- return REQUEST_PROTOCOL + '://' + REQUEST_HOST + url;
- return REQUEST_URL + '/' + url;
- }
- function stripTags(html){
- if (typeof html !== "string")
- return '';
- return html.replace(/<(?:.|\n)*?>/gm, '');
- }
- ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- //run
- var Ads = [];
- function scan(url){
- request(getRequestOptions(url),requesthandler_SearchResultsPage);
- }
- var http = require('http');
- http.createServer(function (req, res) {
- res.writeHead(200, {'Content-Type': 'application/json; charset=utf-8'});
- process.res = res;
- scan(REQUEST_URL);
- }).listen(80, 'localhost');
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement