Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- '''This program scrapes craigslist. Still much to be done
- to make more robust but this is a stable build on its own
- Author: https://www.fiverr.com/n3tr1x
- '''
- print('Loading..')
- from time import time
- from time import strftime
- from time import gmtime
- import os
- import sys
- import traceback
- from threading import Timer
- import datetime
- import re
- ##To check module loading time
- START_TIME = time()
- from sys import path as PATH, stdout as STDOUT
- PATH.insert(0, './lib/python3.5/site-packages')
- ##Colorama module to print colored text on the screen
- from colorama import init
- init()
- from colorama import Fore, Back, Style
- ##Set browser type
- ##Change this to CHROME to use chrome. I recommend chrome
- TYPE = 'CHROME'
- POSTING_REGEX = 'PostingID[^\d]{0,}([\d]{1,})'
- def pprint(string, color=Fore.GREEN, style=Style.RESET_ALL):
- '''
- Returns an escaped shell string for pretty printing
- :Args:
- - string - The string to be pretty printed
- - color - The ASCII color code to use
- - style - Optional style
- '''
- if not STDOUT.isatty():
- return string
- try:
- c = color + string + style
- return c
- except:
- return string
- '''
- [TODO] Implement this function to dynamically load modules if scale
- '''
- def load_module(frm, imp, a = ''):
- '''
- Does the job of importing a module from the system library or local library
- Arguments:
- frm - The name to be used afater python from keyword
- imp - The name to be used after python import keyword
- a (optional) - The as clause to be used
- e.g.
- load_module('pandas', 'read_sql', 'READ_SQL')
- same as: from pandas import read_sql as READ_SQL
- '''
- try:
- s = 'from ' + frm + ' import ' + imp + (' as ' + a if a else '')
- exec(s)
- return True
- except Exception as e:
- print(pprint('Error', Fore.RED), pprint('while loading module: '), pprint(frm, Fore.YELLOW), str(e))
- return False
- '''
- [TODO] Implement this function to dynamically load modules if scale
- '''
- def load_modules(deps = []):
- for dep in deps:
- if len(dep) == 2:
- if not load_module(dep[0], dep[1]):
- exit()
- else:
- print( pprint('[{0}] checked.. ').format(dep[1]), pprint('Loaded: {0}s', color=Fore.YELLOW).format(time() - START_TIME) )
- elif len(dep) == 3:
- if not load_module(dep[0], dep[1], dep[2]):
- exit()
- else:
- print( pprint('[{0}] checked.. ').format(dep[1]), pprint('Loaded: {0}s', color=Fore.YELLOW).format(time() - START_TIME) )
- print( pprint("\t\t\t\t\t\t==== SCGL BOT v1.0 ====", Fore.BLUE) )
- print( pprint("== CHECKING MODULES ==", Fore.YELLOW) )
- ##Import random
- try:
- from random import randint as RAND_INT
- print( pprint('[{0}] checked.. ').format('random'), pprint('Loaded: {0}s', color=Fore.YELLOW).format(time() - START_TIME) )
- except Exception as e:
- print(pprint('Error', Fore.RED), pprint('while loading module: '), pprint('random.randint', Fore.YELLOW), str(e))
- exit()
- ##import mysql [Important]
- try:
- from mysql.connector import connect as CONNECT
- print( pprint('[{0}] checked.. ').format('mysql'), pprint('Loaded: {0}s', color=Fore.YELLOW).format(time() - START_TIME) )
- except Exception as e:
- print(pprint('Error', Fore.RED), pprint('while loading module: '), pprint('mysql', Fore.YELLOW), str(e))
- exit()
- ##import pandas [Important]
- try:
- from pandas import read_sql as RSQL
- print( pprint('[{0}] checked.. ').format('pandas'), pprint('Loaded: {0}s', color=Fore.YELLOW).format(time() - START_TIME) )
- except Exception as e:
- print(pprint('Error', Fore.RED), pprint('while loading module: '), pprint('pandas', Fore.YELLOW), str(e))
- exit()
- ##import time
- try:
- from time import sleep
- print( pprint('[{0}] checked.. ').format('time.sleep'), pprint('Loaded: {0}s', color=Fore.YELLOW).format(time() - START_TIME) )
- except Exception as e:
- print(pprint('Error', Fore.RED), pprint('while loading module: '), pprint('time.sleep', Fore.YELLOW), str(e))
- exit()
- ##import webdriver [important]
- if TYPE.upper() == 'FIREFOX':
- try:
- from selenium.webdriver import Firefox
- print( pprint('[{0}] checked.. ').format('selenium.webdriver.Firefox'), pprint('Loaded: {0}s', color=Fore.YELLOW).format(time() - START_TIME) )
- except Exception as e:
- print(pprint('Error', Fore.RED), pprint('while loading module: '), pprint('selenium.webdriver', Fore.YELLOW), str(e))
- exit()
- ##import firefox-binary
- try:
- from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
- print( pprint('[{0}] checked.. ').format('selenium.webdriver.firefox.firefox_binary'), pprint('Loaded: {0}s', color=Fore.YELLOW).format(time() - START_TIME) )
- except Exception as e:
- print(pprint('Error', Fore.RED), pprint('while loading module: '), pprint('selenium.webdriver.firefox.firefox_binary', Fore.YELLOW), str(e))
- exit()
- ##import webdriver [important]
- if TYPE.upper() == 'CHROME':
- try:
- from selenium.webdriver import Chrome
- print( pprint('[{0}] checked.. ').format('selenium.webdriver.Chrome'), pprint('Loaded: {0}s', color=Fore.YELLOW).format(time() - START_TIME) )
- except Exception as e:
- print(pprint('Error', Fore.RED), pprint('while loading module: '), pprint('selenium.webdriver', Fore.YELLOW), str(e))
- exit()
- ##import By
- try:
- from selenium.webdriver.common.by import By
- print( pprint('[{0}] checked.. ').format('selenium.webdriver.common.by'), pprint('Loaded: {0}s', color=Fore.YELLOW).format(time() - START_TIME) )
- except Exception as e:
- print(pprint('Error', Fore.RED), pprint('while loading module: '), pprint('selenium.webdriver.firefox.common.by', Fore.YELLOW), str(e))
- exit()
- ##import WebDriverWait
- try:
- from selenium.webdriver.support.ui import WebDriverWait
- print( pprint('[{0}] checked.. ').format('selenium.webdriver.support.ui'), pprint('Loaded: {0}s', color=Fore.YELLOW).format(time() - START_TIME) )
- except Exception as e:
- print(pprint('Error', Fore.RED), pprint('while loading module: '), pprint('selenium.webdriver.support.ui', Fore.YELLOW), str(e))
- exit()
- ##import Expected-conditions
- try:
- from selenium.webdriver.support import expected_conditions as EC
- print( pprint('[{0}] checked.. ').format('selenium.webdriver.support'), pprint('Loaded: {0}s', color=Fore.YELLOW).format(time() - START_TIME) )
- except Exception as e:
- print(pprint('Error', Fore.RED), pprint('while loading module: '), pprint('selenium.webdriver.support', Fore.YELLOW), str(e))
- exit()
- ##import Select
- try:
- from selenium.webdriver.support.select import Select
- print( pprint('[{0}] checked.. ').format('selenium.webdriver.support.select'), pprint('Loaded: {0}s', color=Fore.YELLOW).format(time() - START_TIME) )
- except Exception as e:
- print(pprint('Error', Fore.RED), pprint('while loading module: '), pprint('selenium.webdriver.support.select', Fore.YELLOW), str(e))
- exit()
- from selenium.common.exceptions import *
- ##import re.sub
- try:
- from re import sub as REPLACE
- print( pprint('[{0}] checked.. ').format('re.sub'), pprint('Loaded: {0}s', color=Fore.YELLOW).format(time() - START_TIME) )
- except Exception as e:
- print(pprint('Error', Fore.RED), pprint('while loading module: '), pprint('re.sub', Fore.YELLOW), str(e))
- exit()
- driver = None
- if TYPE.upper() == 'FIREFOX':
- ##check firefox-binary
- binary = None
- binary_location = r'/usr/bin/firefox-esr'
- try:
- binary = FirefoxBinary(binary_location)
- print( pprint('[{0}] checked.. ').format('Firefox-binary'), pprint('Loaded: {0}s', color=Fore.YELLOW).format(time() - START_TIME) )
- except Exception as e:
- print(pprint('Error', Fore.RED), pprint('while loading module: '), pprint('Firefox-binary', Fore.YELLOW), str(e))
- exit()
- #Check firefox driver
- try:
- driver = Firefox(firefox_binary=binary)
- print( pprint('[{0}] checked.. ').format('Firefox-driver'), pprint('Loaded: {0}s', color=Fore.YELLOW).format(time() - START_TIME) )
- except Exception as e:
- print(pprint('Error', Fore.RED), pprint('while loading module: '), pprint("Firefox-driver. \nPlease download geckodriver and place in PATH", Fore.YELLOW), str(e))
- exit()
- if TYPE.upper() == 'CHROME':
- path_to_cdriver = 'chromedriver'
- print( pprint('Loading [{0}].. ').format('Chrome browser'), pprint('Loaded: {0}s', color=Fore.YELLOW).format(time() - START_TIME) )
- driver = Chrome(path_to_cdriver)
- print('')
- print('')
- ##WebWait object
- wait = WebDriverWait(driver, 20)
- def wait_load():
- """Throws in random waits to not appear as a bot to
- craigslist anti-scraping software"""
- return sleep(RAND_INT(1, 3))
- BLOCKED_STUB = "This IP has been automatically blocked."
- def load_url(url, callabl, name = 'element', counts=5, **args):
- '''
- Loads a URL and or check for the callable using webwait
- : Args:
- - url - The url to load or '' to just wait until element is found
- - callabl - The callable object that WebWait calls
- - name - The name to be printed as message
- - count - The amount of retries after an element is not found
- '''
- try:
- if url != '':
- driver.get(url)
- while counts > 0:
- try:
- wait.until(callabl)
- return True
- except:
- cr_miscellany(**args)
- counts -= 1
- print(pprint("\n[DEBUG] ", Fore.YELLOW), pprint(name, Fore.GREEN))
- print('ERROR URL: %s'%(url))
- print(pprint('Trying again.. ' + str(counts) + ' more tries left', Fore.YELLOW))
- return False
- except (RemoteDriverServerException, TimeoutException) as e:
- print(pprint('[Error]', Fore.RED), pprint(str(e), Fore.YELLOW))
- exit()
- def url_changed(url):
- '''
- Checks if URL of the selenium driver has changed
- '''
- return (url not in driver.current_url)
- '''
- [TODO] Saved state
- def get_jobs():
- fname = os.path.join("jobs", "uncompleted_tasks")
- if( not os.path.exists(fname)):
- print(pprint('Path: ' + fname + ' does not exist. Creating..', Fore.YELLOW))
- os.makedirs("jobs")
- with open(fname, mode="w", encoding='utf-8') as f:
- f.write('')
- if( not os.access(fname, os.R_OK)):
- print(pprint('Permission denied: ' + fname, Fore.RED))
- return False
- with open(fname, encoding='utf-8') as a_file:
- return a_file.read()
- def put_job(name):
- fname = os.path.join("jobs", "uncompleted_tasks")
- with open(fname, mode='a+', encoding='utf-8') as f:
- f.write(name + "\n")
- def discard_jobs():
- fname = os.path.join("jobs", "uncompleted_tasks")
- with open(fname, mode="w", encoding='utf-8') as f:
- f.write('')
- def handle_unfinished(info, scrnsht):
- jobs_set = set()
- try:
- for job in get_jobs().split():
- jobs_set.add(job)
- if len(jobs_set) > 0:
- print(pprint( '[DEBUG] ' + str(len(jobs_set)) + ' uncompleted ads detected.. Proceeding to complete them.'))
- for j in jobs_set:
- j = j.strip()
- print(pprint('[INFO] Handling unfinished job: ' + j, Fore.YELLOW))
- j_s = j.split('[===]')
- if len(j_s) < 4:
- return
- email_login(unfinished=j_s[0], username=j_s[1], password=j_s[2], ID_MSG = j_s[3] )
- complete_post(j_s[0],info, scrnsht)
- remove_job(j_s[0])
- jobs_set.discard(j)
- except Exception as e:
- print(pprint('[DEBUG] Cannot handle unfinished jobs'))
- print(e)
- finally:
- write_jobs(jobs_set)
- def remove_job(url):
- jobs_set = set()
- try:
- for job in get_jobs().split():
- jobs_set.add(job)
- for j in jobs_set:
- j = j.strip()
- j_s = j.split('[===]')
- if len(j_s) < 4:
- return
- if url in j_s[0]:
- jobs_set.discard(j)
- except Exception as e:
- print(pprint('[DEBUG] Cannot handle unfinished jobs'))
- print(e)
- finally:
- write_jobs(jobs_set)
- def write_jobs(js):
- discard_jobs()
- for j in js:
- put_job(j)
- '''
- CLIENT = ['']
- def switch_to_email_tab(user):
- print('Searching for the email tab for user: ' + user)
- for handle in driver.window_handles:
- driver.switch_to.window(handle)
- if 'Workspace Webmail' in driver.title:
- try:
- if user in driver.find_element_by_xpath('//span[@id="username"]').text:
- print('Found the tab for user: ' + user)
- return True
- else:
- continue
- except:
- return False
- return False
- def switch_to_post_tab(link, func=None
- ):
- print('Searching for the post tab for link: ' + link)
- for handle in driver.window_handles:
- driver.switch_to.window(handle)
- if link in driver.current_url:
- try:
- func()
- except:
- pass
- break
- LOGIN_CONTROLLER = dict()
- def email_login(username='sales39@containerking.com', password='falcon00', name = 'I', ID_MSG = 'POST/EDIT/DELETE'):
- current_url = driver.current_url
- if ('craigslist.org' in current_url) and (not 'You should receive an email shortly' in driver.page_source):
- print(pprint("No verification mail needed. Returning.. :) "))
- return ''
- else:
- print(pprint("Email verification required.. "))
- if not (username in LOGIN_CONTROLLER.keys()):
- LOGIN_CONTROLLER[username] = False
- if not LOGIN_CONTROLLER[username]:
- print('You are not logged in with: ' + username)
- driver.get('https://sso.godaddy.com/login?app=email&realm=pass')
- print(pprint("\n==Please wait while " + name + " check your mail=="))
- if not load_url('',
- EC.presence_of_element_located((By.ID, 'password')),
- 'Cannot find password field'):
- print('An error occured, Please retry')
- uname = driver.find_element_by_id('username')
- uname.clear()
- uname.send_keys(username)
- pword = driver.find_element_by_id('password')
- pword.clear()
- pword.send_keys(password)
- driver.find_element_by_xpath('//*[@id="submitBtn"]').click()
- if not switch_to_email_tab(username):
- LOGIN_CONTROLLER[username] = False
- return email_login(username, password, name, ID_MSG)
- if not load_url('',
- EC.presence_of_element_located((By.XPATH, '//*[@id="foldertree_INBOX"]//span')),
- 'Cannot find Inbox button'):
- print('An error occured, Please retry')
- switch_to_post_tab('post.craigslist', lambda : driver.close())
- switch_to_email_tab(username)
- LOGIN_CONTROLLER[username] = True
- elem = driver.find_element_by_xpath('//*[@id="foldertree_INBOX"]//span')
- if not elem.text in 'Inbox':
- print(pprint('Godaddy updated their code, Please update email login code', Fore.RED))
- elem.click()
- if not load_url('',
- EC.presence_of_element_located((By.XPATH, '//*[@id="tbody_mailindex"]')),
- 'Inbox display'):
- print(pprint('An error occured, Please retry', Fore.RED))
- inbox = None
- messages = []
- while len(messages) <= 1:
- sleep(1)
- inbox = driver.find_element_by_id("tbody_mailindex")
- messages = inbox.find_elements_by_tag_name('tr')
- print(pprint("Looking for unread messages with the keyword: " + ID_MSG, Fore.YELLOW))
- print(pprint("\nThere are " + str(len(messages)) + " emails.", Fore.YELLOW))
- for message in messages:
- tmp_id = message.get_attribute('id')
- temp = None
- try:
- temp = driver.find_element_by_xpath('//tr[@id="' + tmp_id + '"]/td[3]/div/span')
- except Exception as e:
- print(e)
- continue
- if not (ID_MSG in temp.text):
- continue
- if (not 'bold' in message.get_attribute('class')):
- continue
- temp.click()
- if not load_url('',EC.presence_of_element_located((By.XPATH, '//*[@id="wmMessage"]')),'Inbox message check', counts=2):
- print(pprint('An error occured, Please retry', Fore.RED))
- return email_login(username, password, name, ID_MSG)
- wm = driver.find_element_by_id('wmMessage')
- links = wm.find_elements_by_tag_name('a')
- for link in links:
- try:
- if 'post.craigslist' in link.get_attribute('href'):
- href = link.get_attribute('href')
- print( pprint('[INFO] Found verification link: ' + href, Fore.YELLOW))
- link.click()
- wait_load()
- return href
- except TypeError as e:
- pass
- except:
- pass
- return False
- def cr_miscellany(**args):
- #dynamically assign variables
- for a in args:
- exec("%s = '%s'"%(a,args[a]))
- try:
- del a, args #clear variables assigned
- except:
- pass
- current_url = driver.current_url
- if current_url.endswith('pn') or current_url.endswith('tou'):
- if not load_url('', EC.presence_of_element_located((By.XPATH,
- '//*[@class="previewButtons"]')),
- 'Accept Terms and Conditions', counts=1):
- raise Exception('Phone number verification stuff encountered') #signify phone number verification needed
- #Click the accept terms and conditions button
- try:
- driver.find_element_by_xpath('//section[@class="body"]//section[@class="previewButtons"]//button[@type="submit"]').click()
- except:
- pass
- wait_load()
- #necessary in case you're logged out
- if not load_url('',EC.visibility_of_element_located((By.XPATH, '//*[@name="an"]')),'Continue with credit card', counts=1):
- print(pprint('An error occured, Please retry', Fore.RED))
- return
- try:
- driver.find_element_by_xpath('//form[1]//input[@value="ccard"]').click()
- wait_load()
- except:
- pass
- try:
- driver.find_element_by_xpath('//form[1]//button[@type="submit"]').click()
- except:
- pass
- #deal with confirmation after billing page
- current_url = driver.current_url
- if (('craigslist.org' in current_url) and ('You should receive an email shortly'
- in driver.page_source)) or current_url.endswith('mailoop'):
- print(pprint("Email verification required.. "))
- state = email_login(email, password, ID_MSG = title)
- if state == False:
- raise Exception('Error occured while verifying email')
- wait_load()
- try:
- for handle in driver.window_handles:
- driver.switch_to.window(handle)
- if state in current_url:
- break
- except Exception as e:
- print(e)
- if BLOCKED_STUB in driver.page_source:
- print('Attempting to use pre-coded algorithm to correct IP block')
- driver.back()
- sleep(2)
- driver.forward()
- def complete_post(url, infos, scrnsht, email, password, title):
- SUCCESSFUL_PURCHASE = False
- if len(infos) is 0:
- print('Empty credit card info sent')
- #attempt login incase login wasn't persisted
- craigslist_login(email, password)
- #click continue on billing page
- if not load_url('',EC.presence_of_element_located((By.NAME, 'go')),'Continue with credit card button', counts=5, email=email, password=password, ID_MSG=title):
- print(pprint('An error occured, Please retry', Fore.RED))
- driver.find_element_by_name('go').click()
- if not load_url('',EC.presence_of_element_located((By.XPATH, '//*[@id="ccinfo"]')),'Credit card information'):
- print(pprint('An error occured, Please retry', Fore.RED))
- driver.find_element_by_name('go').click()
- try:
- for info in infos:
- if info == 'pcard':
- continue
- if not load_url('',EC.presence_of_element_located((By.NAME, info)),'Providing credit card information: ' + info):
- print(pprint('An error occured, Please retry', Fore.RED))
- if (info in 'cardCountry'):
- ccntr = driver.find_elements_by_name(info)
- for ctr in ccntr:
- if ctr.get_attribute('value') == 'US':
- print(pprint('[INFO] Filling: ' + info + ": " + infos[info], Fore.YELLOW))
- ctr.click()
- print(pprint('[INFO] Filled: ' + info + ": " + infos[info], Fore.YELLOW))
- break
- continue
- if (info in 'cardState'):
- cst = driver.find_element_by_name(info)
- st = infos[info]
- stag = Select(cst)
- stag.select_by_visible_text(infos[info])
- while True:
- try:
- print(pprint('[INFO] Filling: ' + info + ": " + infos[info], Fore.YELLOW))
- stag.first_selected_option
- print(pprint('[INFO] Filled: ' + info + ": " + infos[info], Fore.YELLOW))
- break
- except:
- continue
- continue
- print('Filling: ' + info + ": " + str(infos[info]))
- driver.find_element_by_name(info).send_keys(infos[info])
- print('Filled: ' + info + ": " + str(infos[info]))
- except Exception as e:
- tb = sys.exc_info()[2]
- print(tb)
- return False
- purchase_button = driver.find_element_by_xpath('//div[@id="presubmit"]/button')
- try:
- purchase_button.click()
- wait_load()
- if not load_url('',EC.presence_of_element_located((By.XPATH, '//div[@id="presubmit"]/button')),'Using guess to determine if post was successful', counts=1):
- SUCCESSFUL_PURCHASE = True
- print('Counting the post as success. Checks passed.. If this is wrong, please contact admin')
- except Exception as e:
- print(e)
- return False
- #driver.get_screenshot_as_file( str(time()) + '_SUCCESS_' + infos['cardLastName'])
- if SUCCESSFUL_PURCHASE:
- invoice_elem = driver.find_elements_by_xpath('//table[@id="postingInvoice"]//td')
- for inv_elem in invoice_elem:
- inv_text = inv_elem.text
- m = re.search(POSTING_REGEX, inv_text)
- if m:
- posting_id = m.group(1)
- return posting_id
- print(pprint('[DEBUG] Posting ID not found.'), Fore.RED)
- else:
- return False
- def breakloop():
- eval('break')
- def is_logged_in(username):
- return (username == CRAIGSACC_CONTROLLER[0])
- CRAIGSACC_CONTROLLER = ['']
- def craigslist_login(username, password):
- try:
- if (not is_logged_in(username)):
- try:
- driver.find_element_by_partial_link_text('log in').click()
- except:
- driver.get('https://accounts.craigslist.org/login/home')
- if load_url('',EC.presence_of_element_located((By.ID,
- 'paginator')), '', counts=0):
- try:
- driver.find_element_by_partial_link_text('log out').click()
- except:
- pass
- else:
- print(pprint('You are already logged into current city', Fore.BLUE))
- return
- driver.find_element_by_id('inputEmailHandle').send_keys(username)
- driver.find_element_by_id('inputPassword').send_keys(password)
- wait_load()
- driver.find_element_by_xpath('//*[@class="accountform-actions"]/button').click()
- if not load_url('',EC.presence_of_element_located((By.ID,
- 'paginator')), '', counts=5):
- print(pprint('Login unsuccessful', Fore.RED))
- raise Exception
- CRAIGSACC_CONTROLLER[0] = username
- except:
- print("Encountered an error while I was logging in, Proceeding with error..")
- traceback.print_exception(*sys.exc_info())
- def getinput(prompt, typeCheck=False, obj=None, timeout=30.0):
- prompt = prompt + "\n\t\t/QUIT to cancel command prompt\n<> "
- retval = False
- try:
- while True:
- inp = input(prompt)
- if '/QUIT' not in inp.strip().upper():
- return retval
- if typeCheck:
- try:
- retval = obj(inp)
- break
- except:
- continue
- else:
- retval = inp
- break
- finally:
- return retval
- def prettify(inp):
- if inp is None:
- return ''
- inp = REPLACE(r'[\r]{1,}[\n]{1,}', '\n', inp)
- return inp
- def craigslist_handler(url, typ, cat, market, title, body, postal, price, email, password, images, pn, sale_man, sale_mod, sale_size):
- #images='/root/Documents/slar/crawler/A.jpeg<==>/root/Documents/slar/crawler/B.jpeg<==>/root/Documents/slar/crawler/C.jpeg'
- craigslist_login(email, password)
- """Main workhorse of the script. This function uses Selenium to automate
- posting in the supplied location and category. """
- if not load_url(url, EC.presence_of_element_located((By.ID, 'post')),
- "I'm unable to find the link to post to classifieds\n", counts=5):
- print(pprint('An error occured, Please retry', Fore.RED))
- #if not load_url('https://www.craigslist.org/about/sites',
- # EC.presence_of_element_located((By.PARTIAL_LINK_TEXT, market)),
- # 'location url'):
- # print('An error occured, Please retry')
- #
- try:
- driver.find_element_by_partial_link_text('post to classifieds').click()
- except:
- try:
- driver.find_element_by_xpath('//*[@class="page-container"]//div[@id="leftbar"]//ul[@id="postlks"]//li//a[@id="post"]').click()
- except:
- driver.find_element_by_id('post').click()
- wait_load()
- if not load_url('', EC.presence_of_element_located((By.CLASS_NAME, 'picker')),
- "Unable to select type", counts=5):
- print(pprint('An error occured, Please retry', Fore.RED))
- select_opts = driver.find_elements_by_xpath('//form[@class="picker"]//ul[@class="selection-list"]//label')
- print(pprint("[INFO] Selecting a type", Fore.YELLOW))
- for opt in select_opts:
- if typ.strip().lower() in opt.find_element_by_class_name('right-side').text.lower():
- sleep(3)
- opt.find_element_by_tag_name('input').click()
- break
- wait_load()
- if driver.current_url.endswith('type'):
- sleep(5)
- if driver.current_url.endswith('type'):
- driver.find_element_by_name('go').click()
- print(pprint("[INFO] Selected type: " + typ, Fore.YELLOW))
- while True:
- if(driver.current_url.endswith('cat')):
- break
- print(pprint("[INFO] Selecting a category", Fore.YELLOW))
- select_opts = driver.find_elements_by_xpath('//form[@class="picker"]//ul[@class="selection-list"]//label')
- for opt in select_opts:
- if cat.strip().lower() in opt.find_element_by_class_name('right-side').text.lower():
- wait_load()
- opt.find_element_by_tag_name('input').click()
- break
- wait_load()
- if driver.current_url.endswith('cat'):
- sleep(5)
- if driver.current_url.endswith('cat'):
- driver.find_element_by_name('go').click()
- print(pprint("[INFO] Selected category: " + cat.strip(), Fore.YELLOW))
- MARKET = False
- msg = "Subarea is required"
- count = 2
- try:
- while True:
- if driver.current_url.endswith('edit'):
- break
- if not load_url('', EC.presence_of_element_located((By.CLASS_NAME, 'picker')),
- msg, counts=count):
- break
- if(driver.current_url.endswith('subarea')) and (not MARKET):
- print(pprint('[INFO] Selecting a subarea', Fore.YELLOW))
- if not market:
- elems = driver.find_elements_by_xpath('//form[@class="picker"]//ul[@class="selection-list"]//label//input')
- print(pprint('Subarea is required but isn\'t provided', Fore.YELLOW))
- print(pprint('Using pre-coded guess algorithm to pick one subarea', Fore.YELLOW))
- elems[0].click()
- MARKET = True
- continue
- select_opts = driver.find_elements_by_xpath('//form[@class="picker"]//ul[@class="selection-list"]//label')
- for opt in select_opts:
- if market.strip().lower() in opt.text.lower():
- opt.find_element_by_tag_name('input').click()
- MARKET = True
- break
- if not MARKET:
- elems = driver.find_elements_by_xpath('//form[@class="picker"]//ul[@class="selection-list"]//label//input')
- if len(elems) > 0:
- elems[0].click()
- msg = "Using pre-coded guess algorithm to check for any unexpected input from craigslist"
- count = 1
- print(pprint("[INFO] Selected subarea: " + market, Fore.YELLOW))
- else:
- if driver.current_url.endswith('edit'):
- break
- if driver.current_url.endswith('subarea') and MARKET:
- continue
- elems = driver.find_elements_by_xpath('//form[@class="picker"]//ul[@class="selection-list"]//label//input')
- if len(elems) > 0:
- elems[0].click()
- else:
- break
- except:
- pass
- wait_load()
- while True:
- if(driver.current_url.endswith('edit')):
- break
- print(pprint('[INFO] Filling post information', Fore.YELLOW))
- if not load_url('', EC.presence_of_element_located((By.ID, 'postingForm')),
- "Unable to get the posting form", counts=5):
- print(pprint('An error occured, Please retry', Fore.RED))
- driver.find_element_by_name('PostingTitle').send_keys(title)
- driver.find_element_by_name('Ask').send_keys(price)
- driver.find_element_by_name('postal').send_keys(postal)
- driver.find_element_by_name('GeographicArea').send_keys(market)
- driver.find_element_by_name('PostingBody').send_keys(prettify(body))
- driver.find_element_by_name('sale_manufacturer').send_keys(sale_man)
- driver.find_element_by_name('sale_model').send_keys(sale_mod)
- driver.find_element_by_name('sale_size').send_keys(sale_size)
- driver.find_element_by_name('wantamap').click()
- try:
- driver.find_element_by_name('FromEMail').send_keys(prettify(email))
- driver.find_element_by_name('ConfirmEMail').send_keys(prettify(email))
- except:
- pass
- driver.find_element_by_xpath(
- '//*[@id="contact_phone"]').send_keys(pn)
- sleep(2)
- print(pprint('[INFO] Sending post information', Fore.YELLOW))
- driver.find_element_by_name('go').click()
- wait_load()
- '''
- while True:
- if(driver.current_url.endswith('geoverify')):
- break
- wait_load()
- print(pprint('[INFO] Confirming post location', Fore.YELLOW))
- if not load_url('', EC.presence_of_element_located((By.ID, 'leafletForm')),
- "Unable to get the posting form", counts=5):
- print(pprint('An error occured, Please retry', Fore.RED))
- driver.find_element_by_id('leafletForm').find_element_by_xpath('//button[@class="continue bigbutton"]').click()
- print(pprint("[INFO] Post location confirmed ", Fore.YELLOW))
- wait_load()
- '''
- try:
- if load_url('', EC.presence_of_element_located((By.ID, 'uploader')),
- "Unable to get the posting images", counts=2):
- try:
- driver.find_element_by_id('classic').click()
- except:
- pass
- if not load_url('', EC.presence_of_element_located((By.XPATH, '//form[@class="add"]//input[@name="file"]')),
- "Unable to get the posting form", counts=5):
- print(pprint('An error occured while processing upload, Please retry', Fore.YELLOW))
- if images:
- image_arr = images.split('<==>')
- for image in image_arr:
- image = image.strip()
- if not (os.path.exists(image)):
- print(pprint('[DEBUG]'), ' Image: ' + image + ' does not exist.. Proceeding')
- continue
- inps = driver.find_elements_by_tag_name('input')
- for inp in inps:
- if 'file' in inp.get_attribute('type'):
- try:
- print(pprint('Uploading image: ' + image, Fore.BLUE))
- inp.clear()
- inp.send_keys(image)
- driver.find_element_by_xpath('//form[@class="add"]').submit()
- if not load_url('', EC.presence_of_element_located((By.XPATH, '//div[@class="imgwrap"]//img')), "Uploading image..", counts=6):
- print(pprint('An error occured while processing upload, Please retry', Fore.YELLOW))
- print(pprint('Image: ' + image + ' uploaded', Fore.BLUE))
- except Exception:
- print(pprint('Error: ' + str(e) + ' occured while uploading.', Fore.RED))
- tb = sys.exc_info()[2]
- print(pprint("[DEBUG] More info:\n\t\t " + tb, Fore.YELLOW))
- break
- driver.find_element_by_xpath('//button[@class="done bigbutton"]').click()
- wait_load()
- except:
- print(pprint('[DEBUG] Image(s) not uploaded. Proceeding that way'))
- traceback.print_exception(*sys.exc_info())
- print(pprint('Publishing..'))
- if not load_url('', EC.presence_of_element_located((By.NAME, 'go')), "Publish button", counts=5):
- print(pprint('An error occured while processing upload, Please retry', Fore.YELLOW))
- driver.find_element_by_name('go').click()
- print(pprint("\nI've published a draft to post the ad: " + title, Fore.YELLOW))
- print(pprint("Proceeding to verify the post.. Please be patient", Fore.YELLOW))
- def publish_post(url, typ, cat, market, title, body, postal, price,
- email='sales39@containerking.com', password='falcon00', info={}, scrnsht=str(time()) + 'screenshot.png',
- images=False, pn='', sale_man='', sale_mod='', sale_size=''):
- # [TODO] handle_unfinished(info, scrnsht)
- try:
- status = False
- post_time = time()
- craigslist_handler(url, typ, cat, market, title, body,
- postal, price, email, password, images, pn, sale_man,
- sale_mod, sale_size)
- # [TODO] put_job(href + '[===]' + email + '[===]' + password + '[===]' + title)
- state = email_login(email, password, ID_MSG = title)
- if state == False:
- completed_time = time()
- return status, (completed_time - post_time)
- wait_load()
- try:
- for handle in driver.window_handles:
- driver.switch_to.window(handle)
- if state in driver.current_url:
- break
- except Exception as e:
- print(e)
- status = complete_post(state, info, scrnsht, email, password, title)
- completed_time = time()
- return status, (completed_time - post_time)
- except:
- traceback.print_exception(*sys.exc_info())
- return False, None
- def cleanup_tabs():
- for handle in driver.window_handles:
- driver.switch_to.window(handle)
- if 'post.craigslist' in driver.current_url:
- print(pprint('[INFO] Closing: ' + driver.title, Fore.YELLOW))
- driver.close()
- #all post objects go in here
- POSTS_CONTROLLER = {}
- def check_market(post_title,post_id, market_url, city, limit = 5):
- if limit == 0:
- limit = 5
- print(pprint('[INFO] Checking market if ' + str(post_title)+ " (" + str(post_id) + ') post is in ' + city + ' top ' + str(limit)))
- if not load_url(market_url, EC.presence_of_element_located((By.ID, 'sortable-results')) ,"Markets result info", counts=5):
- print(pprint('An error occured while checking the market: ' + market_url + ', Please retry', Fore.YELLOW))
- total_res = len(driver.find_elements_by_xpath('//div[@id="sortable-results"]/ul[@class="rows"]/li[@class="result-row"]/p[@class="result-info"]'))
- print(pprint('[INFO] I can see ' + str(total_res) + ' ads on ' + city + '\'s page', Fore.YELLOW))
- XP = '//div[@id="sortable-results"]/ul[@class="rows"]/li[@class="result-row"][position() < ' + str(limit + 1) + ']'
- top_posts = driver.find_elements_by_xpath(XP)
- print(pprint('[INFO] Looking for the post: ' + str(post_title)+ " (" + str(post_id) + ') in ' + city, Fore.YELLOW))
- for i in range(0, len(top_posts)):
- if i >= limit:
- break
- top_post = top_posts[i].get_attribute('data-pid')
- if top_post in str(post_id):
- print(post_id + ' in top: ' + str(limit))
- return True
- print (top_post)
- return False
- SERVER_CONNECTOR = [None]
- def update_posts_db(city, state, cat, time, pid, posting_id):
- try:
- if not SERVER_CONNECTOR[0]:
- print(pprint('[DEBUG] Cannot find the server connector object'))
- cur = SERVER_CONNECTOR[0].cursor()
- cur.execute( "INSERT INTO CraigslistPoster.posts(`client`, `postingID`, `post_id`, `market_city`, `market_state`, `category`, `POST_TIME`) VALUES(%s, %s, %s, %s, %s, %s, %s)", (CLIENT[0], str(posting_id), str(pid), str(city), str(state), str(cat), time))
- print('Post db updated %s'%(str(ret)))
- SERVER_CONNECTOR[0].commit()
- except:
- traceback.print_exception(*sys.exc_info())
- def handle_dump(pdump):
- return publish_post(url=pdump['ad_url'], typ=pdump['typ'],
- cat=pdump['cat'],market=pdump['market'],title=pdump['title'],
- body=pdump['body'],
- postal=pdump['postal'], price=pdump['price'], email=pdump['email'],
- password=pdump['password'], images=pdump['images'], info = pdump['info'],
- pn = pdump['pn'], sale_man=pdump['sale_man'], sale_mod=pdump['sale_mod'],
- sale_size=pdump['sale_size'])
- def check_posts_db(pid, city, delay):
- try:
- if not SERVER_CONNECTOR[0]:
- print(pprint('[DEBUG] Cannot find the server connector object'))
- cur = SERVER_CONNECTOR[0].cursor(buffered=True)
- cur.execute("SELECT * FROM `posts` WHERE `market_city` LIKE '%%%s%%'"%(city))
- NON_EMPTY = False
- for x in cur:
- NON_EMPTY = True
- if not NON_EMPTY:
- print(pprint('Not yet registered.. Proceeding', Fore.BLUE))
- return True
- cur.execute("SELECT POST_TIME, postingID FROM CraigslistPoster.posts WHERE `post_id` = %s ORDER BY `POST_TIME` DESC LIMIT 1", (str(pid),))
- for then, pid in cur:
- now = datetime.datetime.now()
- if ((now - then).total_seconds()) > delay:
- return pid
- else:
- print(pprint('%d minutes left..'%((delay/60) - ((now - then).total_seconds()/60) ), Fore.BLUE))
- return False
- except:
- traceback.print_exception(*sys.exc_info())
- #Unsuccessful ads
- US_ADS = []
- #Skipped ads
- SK_ADS = []
- #Successful ads
- SU_ADS = []
- #Incomplete ads
- INC_ADS = []
- AD_IN_PROGRESS = [False]
- def handle_market(ads):
- status = None
- t_o_c = None
- murl = ads['m_url']
- mlim = ads['m_lim']
- delay = ads['del'] * 60
- city = ads['m_cit']
- state = ads['m_sta']
- post_id = ads['pid']
- INC_ADS.append("City: %s; PID: %s"%(city,str(post_id)))
- try:
- postingID = check_posts_db(post_id, city, delay)
- if postingID:
- if (not check_market(ads['title'], postingID, murl, city, mlim)):
- while AD_IN_PROGRESS[0]:
- print(pprint('[INFO] An ad in progress.. Queueing'))
- sleep(5)
- AD_IN_PROGRESS[0] = True
- status, t_o_c = handle_dump(ads)
- else:
- print(pprint('[INFO] %s\'s not ready. Running to the next city :)'%(city)))
- INC_ADS.pop()
- SK_ADS.append("City: %s; PID: %s"%(city,str(post_id)))
- return
- else:
- print(pprint('[INFO] %s\'s not ready. Running to the next city :)'%(city)))
- INC_ADS.pop()
- SK_ADS.append("City: %s; PID: %s"%(city,str(post_id)))
- return
- if not status:
- print(pprint('[ERROR] An error occured while posting to: ' + city, Fore.RED))
- AD_IN_PROGRESS[0] = False
- INC_ADS.pop()
- US_ADS.append("City: %s; PID: %s"%(city,str(post_id)))
- return
- else:
- #status here is posting_id [TODO] Clean up later
- update_posts_db(city, state, ads['cat'], datetime.datetime.now(), post_id, status)
- AD_IN_PROGRESS[0] = False
- except:
- traceback.print_exception(*sys.exc_info())
- INC_ADS.pop()
- US_ADS.append("City: %s; PID: %s"%(city,str(post_id)))
- return
- print(pprint('Ad posted to: {0} in: {1}'.format(city,t_o_c)))
- INC_ADS.pop()
- SU_ADS.append("City: %s; PID: %s"%(city,str(post_id)))
- def manage_posts(ads):
- print('%d ads/market recorded for: %s'%(len(ads), CLIENT[0]))
- for i in range(0, len(ads)):
- print(pprint('======================= ad: %d ========================'%(i + 1)), end="\n\n")
- try:
- handle_market(ads[i])
- print("Leaving " + str(ads[i]['m_cit']) + " Delay time: " + str(ads[i]['del']) + "mins")
- except:
- pass
- '''
- [TODO]
- class Post:
- \'''
- A portable object representation of a post
- e.g. with field types for the use of publish post
- \'''
- def __init__(self):
- #Post id should be replaced by a real hash code generator
- post_id = None
- self['url] = ''
- self.typ = ''
- self.cat = ''
- self.market = ''
- self.title = ''
- self.body = ''
- self.postal = ''
- self.price = ''
- self.email = ''
- self.password = ''
- self.images = []
- self.ccinfo = {}
- def setType(self, t):
- self.typ = t
- def setUrl(self, u):
- self.post_url = u
- def setCat(self, c):
- self.cat = c
- def setMarket(self, c):
- self.market = c
- def setTitle(self, c):
- self.title = c
- def setBody(self, c):
- self.body = c
- def setPostal(self, c):
- self.postal = c
- def setPrice(self, c):
- self.price = c
- def setEmail(self, c):
- self.email = c
- def setPassword(self, c):
- self.password = c
- def addImage(self, c):
- self.images.append(c)
- def setImages(self, c):
- self.images = c
- def setCcinfo(self, c):
- self.ccinfo = c
- def appendCcinfo(self, field, value):
- self.ccinfo[field] = value
- '''
- def read_data_from_sql_db(connection, sql):
- df = RSQL(con=connection,sql=sql)
- dataframe_dict = df.to_dict()
- return dataframe_dict
- def prettify(inp):
- if inp is None:
- return ''
- inp = inp.strip()
- inp = inp.replace('\r', '')
- return inp
- def handle_ads_data(dic):
- total_ads = len(dic['search_url'].values())
- if total_ads < 0:
- return False
- ad_dict = {'typ' : ['type'], 'cat' : ['category'], 'market' : ['tags'], 'title' : ['title'],
- 'body' : ['description'], 'password' : ['password'], 'price' : ['price'], 'postal' : ['postalcode'],
- 'email' : ['email'], 'images' : ['images'], 'sale_man': ['tag1'], 'sale_mod': ['tag2'],
- 'sale_size': ['tag3'], 'pid' : ['market', int], 'del': ['delay', float], 'm_url' : ['search_url'],
- 'm_lim' : ['post_above', int], 'm_cit' : ['market_city'], 'm_sta' : ['market_state'],
- 'ad_url' : ['post_url'], 'pn' : ['phone_no']}
- return parse_db_item(dic, total_ads, ad_dict)
- def handle_cc_data(dic, ads):
- total_ads = len(dic['number'].values())
- if total_ads < 0:
- return False
- ad_dict = {'cardNumber' : ['number', int], 'cvNumber' : ['cvv', int], 'expMonth' : ['exp_month', int],
- 'expYear': ['exp_year', int], 'cardFirstName' : ['firstname'], 'cardLastName' : ['lastname'],
- 'cardAddress': ['address'], 'cardCity' : ['city'], 'cardState' : ['state'], 'cardPostal': ['postalcode', int],
- 'contactPhone': ['phone'], 'pcard': ['primarycard', bool]}
- ret = parse_db_item(dic, total_ads, ad_dict)
- for i in range(0, len(ret)):
- if ret[i]['pcard'] == False:
- continue
- ret[i]['cardCountry'] = "US"
- ret[i]['contactName'] = ret[i]['cardFirstName'] + " " + ret[i]['cardLastName']
- cardinfo = ret[i]
- for i in range(0, len(ads)):
- ads[i]["info"] = cardinfo
- break
- return ads
- def parse_db_item(dic, total_ads, parse_dict):
- ret = list()
- for i in range(0, total_ads):
- retval = dict()
- for item in parse_dict:
- parse_val = parse_dict[item]
- item_idx = parse_val[0]
- item_val = None
- try:
- item_val = dic[item_idx][i]
- except:
- pass
- if item_val is None:
- if len(parse_val) > 1:
- if parse_val[1] is int:
- item_val = 0
- if parse_val[1] is float:
- item_val = 0.0
- else:
- item_val = ''
- if isinstance(item_val, str):
- item_val = item_val.strip()
- if len(parse_val) > 1:
- try:
- parse_val[1](item_val)
- item_val = parse_val[1](item_val)
- except:
- pass
- retval[item] = item_val
- ret.append(retval)
- return ret
- #provide info about unsuccessful, successful and incomplete ads
- def account():
- print(pprint('UNSUCCESSFUL ADS: %d.'%(len(US_ADS)), Fore.BLUE))
- for i in range(0, len(US_ADS)):
- print(pprint("\t\t%s"%(US_ADS[i]), Fore.BLUE))
- print(pprint('INCOMPLETE ADS: %d.'%(len(INC_ADS)), Fore.BLUE))
- for i in range(0, len(INC_ADS)):
- print(pprint("\t\t%s"%(INC_ADS[i]), Fore.BLUE))
- print(pprint('SKIPPED ADS: %d.'%(len(SK_ADS)), Fore.BLUE))
- for i in range(0, len(SK_ADS)):
- print(pprint("\t\t%s"%(SK_ADS[i]), Fore.BLUE))
- print(pprint('SUCCESSFUL ADS: %d.'%(len(SU_ADS)), Fore.BLUE))
- for i in range(0, len(SU_ADS)):
- print(pprint("\t\t%s"%(SU_ADS[i]), Fore.BLUE))
- def initAll():
- #Unsuccessful ads
- US_ADS = []
- #Skipped ads
- SK_ADS = []
- #Successful ads
- SU_ADS = []
- #Incomplete ads
- INC_ADS = []
- def main():
- RESTING_TIME = 120 #time to rest in secs after a round of post
- bot_name = 'Gideon'
- print(pprint('Hi, I\'m %s. Nice to meet you.'%(bot_name), Fore.BLUE))
- while True:
- try:
- initAll()
- print('')
- print(pprint('[INFO] Trying to reach the server'))
- conn = CONNECT(user='admin', password='noobzilla1', database='CraigslistPoster', port='3306',
- host='digitechautomation.cywrzfld2wji.us-west-2.rds.amazonaws.com')
- print(pprint('Server reached..'))
- print('')
- SERVER_CONNECTOR[0] = conn
- CLIENT[0] = 'Container King'
- #posts_data = read_data_from_sql_db(conn, sql="SELECT * FROM CraigslistPoster.posts WHERE client = '%s'"%(CLIENT[0]))
- #cities_data = read_data_from_sql_db(conn, sql="SELECT * FROM CraigslistPoster.markets WHERE client = '%s'"%(CLIENT[0]))
- #container_data = read_data_from_sql_db(conn, sql="SELECT * FROM CraigslistPoster.ads WHERE client = '%s'"%(CLIENT[0]))
- container_data = read_data_from_sql_db(conn, sql="SELECT ads.id, ads.market, ads.client, ads.type, ads.category, ads.tags, ads.tag1, ads.tag2, ads.tag3, ads.title, ads.description, ads.price, ads.postalcode, ads.email, ads.password, ads.images, markets.market_city, markets.market_state, markets.phone_no, markets.delay, markets.post_above, markets.post_url, markets.search_url FROM ads INNER JOIN markets ON ads.market = markets.id WHERE ads.client = '%s'"%(CLIENT[0]))
- payments_data = read_data_from_sql_db(conn, sql="SELECT * FROM CraigslistPoster.payments WHERE client = '%s'"%(CLIENT[0]))
- ads = handle_ads_data(container_data)
- ads = handle_cc_data(payments_data, ads)
- #print(cities_data)
- #print('========================')
- #print(container_data)
- #print(ads)
- manage_posts(ads)
- #rest after posting all ads
- print(pprint('Sleeping.. I\'ll be awake in %d mins'%(RESTING_TIME/60), Fore.BLUE))
- account()
- sleep(RESTING_TIME)
- print('')
- print(pprint('I\'m awake now', Fore.BLUE))
- except:
- account()
- if __name__ == "__main__":
- main()
- exit()
- #email_login()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement