Guest User

Untitled

a guest
Jan 5th, 2018
393
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 40.41 KB | None | 0 0
  1. '''This program scrapes craigslist. Still much to be done
  2. to make more robust but this is a stable build on its own
  3.  
  4. Author: https://www.fiverr.com/n3tr1x
  5. '''
  6. print('Loading..')
  7. import os
  8. import sys
  9. import traceback
  10. import re
  11. import random
  12. import time
  13. import datetime
  14. import re
  15. from sys import path as PATH, stdout as STDOUT, argv # MARK
  16.  
  17.  
  18. print("\t\t\t\t\t\t==== SCGL BOT v1.0 ====")
  19. print("== CHECKING MODULES ==")
  20.  
  21.  
  22. from re import sub as REPLACE # [MARK]
  23.  
  24. from mysql.connector import connect as CONNECT
  25. from pandas import read_sql as RSQL
  26. from settings import *
  27.  
  28.  
  29. from selenium.webdriver.common.by import By
  30. from selenium.webdriver.support.ui import WebDriverWait
  31. from selenium.webdriver.support import expected_conditions as EC
  32. from selenium.webdriver.support.select import Select
  33. from selenium.common.exceptions import *
  34.  
  35. driver = None # MARK
  36.  
  37.  
  38. if TYPE.upper() == 'FIREFOX':
  39. from selenium.webdriver import Firefox
  40. from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
  41. binary = None
  42. binary_location = r'/usr/bin/firefox-esr'
  43. binary = FirefoxBinary(binary_location)
  44. driver = Firefox(firefox_binary=binary)
  45.  
  46.  
  47. if TYPE.upper() == 'CHROME':
  48. from selenium.webdriver import Chrome
  49. path_to_cdriver = 'chromedriver'
  50. driver = Chrome(path_to_cdriver)
  51.  
  52.  
  53. print('')
  54. print('')
  55.  
  56.  
  57. # WebWait object
  58. web_wait_object = WebDriverWait(driver, 20) # MARK
  59.  
  60.  
  61. def wait_load():
  62. """Throws in random waits to not appear as a bot to
  63. craigslist anti-scraping software"""
  64. return time.sleep(random.randint(1, 3))
  65.  
  66.  
  67. BLOCKED_STUB = "This IP has been automatically blocked."
  68.  
  69.  
  70. def load_url(url, callabl, name='element', counts=5, **args):
  71. '''
  72. Loads a URL and or check for the callable using webwait
  73. : Args:
  74. - url - The url to load or '' to just wait until element is found
  75. - callabl - The callable object that WebWait calls
  76. - name - The name to be printed as message
  77. - count - The amount of retries after an element is not found
  78. '''
  79. try:
  80. if url != '':
  81. driver.get(url)
  82. while counts > 0:
  83. try:
  84. web_wait_object.until(callabl)
  85. return True
  86. except:
  87. cr_miscellany(**args)
  88. counts -= 1
  89. print("\n[DEBUG] ", name)
  90. print('ERROR URL: %s' % (url))
  91. print('Trying again..', counts, 'more tries left')
  92. return False
  93. except (RemoteDriverServerException, TimeoutException) as e:
  94. print('[Error]', e)
  95. exit()
  96.  
  97.  
  98. def url_changed(url):
  99. '''
  100. Checks if URL of the selenium driver has changed
  101. '''
  102. return (url not in driver.current_url)
  103.  
  104.  
  105. def switch_to_email_tab(user):
  106. print('Searching for the email tab for user:', user)
  107. for handle in driver.window_handles:
  108. driver.switch_to.window(handle)
  109. if 'Workspace Webmail' in driver.title:
  110. try:
  111. if user in driver.find_element_by_xpath('//span[@id="username"]').text:
  112. print('Found the tab for user:', user)
  113. return True
  114. else:
  115. continue
  116. except:
  117. return False
  118. return False
  119.  
  120.  
  121. def switch_to_post_tab(link, func=None):
  122. print('Searching for the post tab for link: ', link)
  123. for handle in driver.window_handles:
  124. driver.switch_to.window(handle)
  125. if link in driver.current_url:
  126. try:
  127. func()
  128. except:
  129. pass
  130. break
  131.  
  132.  
  133. LOGIN_CONTROLLER = dict()
  134.  
  135.  
  136. def email_login(username='sales39@containerking.com', password='falcon00', name='I', ID_MSG='POST/EDIT/DELETE'):
  137. current_url = driver.current_url
  138. if ('craigslist.org' in current_url) and (not 'You should receive an email shortly' in driver.page_source):
  139. print("No verification mail needed. Returning.. :) ")
  140. return ''
  141. else:
  142. print("Email verification required.. ")
  143.  
  144. if not (username in LOGIN_CONTROLLER.keys()):
  145. LOGIN_CONTROLLER[username] = False
  146.  
  147. if not LOGIN_CONTROLLER[username]:
  148. print('You are not logged in with: ' + username)
  149. driver.get('https://sso.godaddy.com/login?app=email&realm=pass')
  150. print("\n==Please wait while " + name + " check your mail==")
  151. if not load_url('',
  152. EC.presence_of_element_located((By.ID, 'password')),
  153. 'Cannot find password field'):
  154. print('An error occured, Please retry')
  155.  
  156. uname = driver.find_element_by_id('username')
  157. uname.clear()
  158. uname.send_keys(username)
  159. pword = driver.find_element_by_id('password')
  160. pword.clear()
  161. pword.send_keys(password)
  162. driver.find_element_by_xpath('//*[@id="submitBtn"]').click()
  163.  
  164. if not switch_to_email_tab(username):
  165. LOGIN_CONTROLLER[username] = False
  166. return email_login(username, password, name, ID_MSG)
  167.  
  168. if not load_url('',
  169. EC.presence_of_element_located(
  170. (By.XPATH, '//*[@id="foldertree_INBOX"]//span')),
  171. 'Cannot find Inbox button'):
  172. print('An error occured, Please retry')
  173.  
  174. switch_to_post_tab('post.craigslist', lambda: driver.close())
  175. switch_to_email_tab(username)
  176. LOGIN_CONTROLLER[username] = True
  177. elem = driver.find_element_by_xpath('//*[@id="foldertree_INBOX"]//span')
  178. if not elem.text in 'Inbox':
  179. print('Godaddy updated their code, Please update email login code')
  180. elem.click()
  181. if not load_url('',
  182. EC.presence_of_element_located(
  183. (By.XPATH, '//*[@id="tbody_mailindex"]')),
  184. 'Inbox display'):
  185. print('An error occured, Please retry')
  186.  
  187. inbox = None
  188. messages = []
  189. while len(messages) <= 1:
  190. time.sleep(1)
  191. inbox = driver.find_element_by_id("tbody_mailindex")
  192. messages = inbox.find_elements_by_tag_name('tr')
  193. print("Looking for unread messages with the keyword: " + ID_MSG)
  194. print("\nThere are " + str(len(messages)) + " emails.")
  195. for message in messages:
  196. tmp_id = message.get_attribute('id')
  197. # print(tmp_id)
  198. temp = None
  199. try:
  200. temp = driver.find_element_by_xpath(
  201. '//tr[@id="' + tmp_id + '"]/td[3]/div/span')
  202. except Exception as e:
  203. # print(e)
  204. continue
  205. print(temp.text)
  206. if not (ID_MSG in temp.text):
  207. #print ("A tpe continue in email")
  208. continue
  209. if (not 'bold' in message.get_attribute('class')):
  210. #print ("B type")
  211. continue
  212. # print('here')
  213. temp.click()
  214. if not load_url('', EC.presence_of_element_located((By.XPATH, '//*[@id="wmMessage"]')), 'Inbox message check', counts=2):
  215. print('An error occured, Please retry')
  216. return email_login(username, password, name, ID_MSG)
  217.  
  218. wm = driver.find_element_by_id('wmMessage')
  219. links = wm.find_elements_by_tag_name('a')
  220. for link in links:
  221. try:
  222. if 'post.craigslist' in link.get_attribute('href'):
  223. href = link.get_attribute('href')
  224. print('[INFO] Found verification link: ' + href)
  225. link.click()
  226. wait_load()
  227. return href
  228. except TypeError as e:
  229. pass
  230. except:
  231. pass
  232. return False
  233.  
  234.  
  235. def cr_miscellany(**args):
  236. # dynamically assign variables
  237. for a in args:
  238. exec("%s = '%s'" % (a, args[a]))
  239. try:
  240. del a, args # clear variables assigned
  241. except:
  242. pass
  243.  
  244. current_url = driver.current_url
  245. if current_url.endswith('pn') or current_url.endswith('tou'):
  246. if not load_url('', EC.presence_of_element_located((By.XPATH,
  247. '//*[@class="previewButtons"]')),
  248. 'Accept Terms and Conditions', counts=1):
  249. # signify phone number verification needed
  250. raise Exception('Phone number verification stuff encountered')
  251. # Click the accept terms and conditions button
  252. try:
  253. driver.find_element_by_xpath(
  254. '//section[@class="body"]//section[@class="previewButtons"]//button[@type="submit"]').click()
  255. except:
  256. pass
  257. wait_load()
  258.  
  259. # necessary in case you're logged out
  260. if not load_url('', EC.visibility_of_element_located((By.XPATH, '//*[@name="an"]')), 'Continue with credit card', counts=1):
  261. print('An error occured, Please retry')
  262. return
  263. try:
  264. driver.find_element_by_xpath(
  265. '//form[1]//input[@value="ccard"]').click()
  266. wait_load()
  267. except:
  268. pass
  269. try:
  270. driver.find_element_by_xpath(
  271. '//form[1]//button[@type="submit"]').click()
  272. except:
  273. pass
  274.  
  275. # deal with confirmation after billing page
  276. current_url = driver.current_url
  277. if (('craigslist.org' in current_url) and ('You should receive an email shortly'
  278. in driver.page_source)) or current_url.endswith('mailoop'):
  279. print("Email verification required.. ")
  280. state = email_login(email, password, ID_MSG=title)
  281. if state == False:
  282. raise Exception('Error occured while verifying email')
  283. wait_load()
  284. try:
  285. for handle in driver.window_handles:
  286. driver.switch_to.window(handle)
  287. if state in current_url:
  288. break
  289. except Exception as e:
  290. print(e)
  291.  
  292. if BLOCKED_STUB in driver.page_source:
  293. print('Attempting to use pre-coded algorithm to correct IP block')
  294.  
  295. driver.back()
  296. time.sleep(2)
  297. driver.forward()
  298.  
  299.  
  300. def complete_post(url, infos, scrnsht, email, password, title):
  301. SUCCESSFUL_PURCHASE = False
  302. if len(infos) is 0:
  303. print('Empty credit card info sent')
  304.  
  305. # attempt login incase login wasn't persisted
  306. craigslist_login(email, password)
  307.  
  308. # click continue on billing page
  309. if not load_url('', EC.presence_of_element_located((By.NAME, 'go')), 'Continue with credit card button', counts=5, email=email, password=password, ID_MSG=title):
  310. print('An error occured, Please retry')
  311. driver.find_element_by_name('go').click()
  312.  
  313. if not load_url('', EC.presence_of_element_located((By.XPATH, '//*[@id="ccinfo"]')), 'Credit card information'):
  314. print('An error occured, Please retry')
  315. driver.find_element_by_name('go').click()
  316. try:
  317. for info in infos:
  318. if info == 'pcard':
  319. continue
  320. if not load_url('', EC.presence_of_element_located((By.NAME, info)), 'Providing credit card information: ' + info):
  321. print('An error occured, Please retry')
  322.  
  323. if (info in 'cardCountry'):
  324. ccntr = driver.find_elements_by_name(info)
  325. for ctr in ccntr:
  326. if ctr.get_attribute('value') == 'US':
  327. print('[INFO] Filling: ' + info + ": " + infos[info])
  328. ctr.click()
  329. print('[INFO] Filled: ' + info + ": " + infos[info])
  330. break
  331. continue
  332. if (info in 'cardState'):
  333. cst = driver.find_element_by_name(info)
  334. st = infos[info]
  335. stag = Select(cst)
  336. stag.select_by_visible_text(infos[info])
  337. while True:
  338. try:
  339. print('[INFO] Filling: ' + info + ": " + infos[info])
  340. stag.first_selected_option
  341. print('[INFO] Filled: ' + info + ": " + infos[info])
  342. break
  343. except:
  344. continue
  345. continue
  346. print('Filling: ' + info + ": " + str(infos[info]))
  347. driver.find_element_by_name(info).send_keys(infos[info])
  348. print('Filled: ' + info + ": " + str(infos[info]))
  349. except Exception as e:
  350. tb = sys.exc_info()[2]
  351. print(tb)
  352. return False
  353. purchase_button = driver.find_element_by_xpath(
  354. '//div[@id="presubmit"]/button')
  355. try:
  356. purchase_button.click()
  357. wait_load()
  358. if not load_url('', EC.presence_of_element_located((By.XPATH, '//div[@id="presubmit"]/button')), 'Using guess to determine if post was successful', counts=1):
  359. SUCCESSFUL_PURCHASE = True
  360. print(
  361. 'Counting the post as success. Checks passed.. If this is wrong, please contact admin')
  362. except Exception as e:
  363. print(e)
  364. return False
  365. #driver.get_screenshot_as_file( str(time()) + '_SUCCESS_' + infos['cardLastName'])
  366. if SUCCESSFUL_PURCHASE:
  367. invoice_elem = driver.find_elements_by_xpath(
  368. '//table[@id="postingInvoice"]//td')
  369. for inv_elem in invoice_elem:
  370. inv_text = inv_elem.text
  371. m = re.search(POSTING_REGEX, inv_text)
  372. if m:
  373. posting_id = m.group(1)
  374. return posting_id
  375. print('[DEBUG] Posting ID not found.')
  376. else:
  377. return False
  378.  
  379.  
  380. def breakloop():
  381. eval('break')
  382.  
  383.  
  384. def is_logged_in(username):
  385. return (username == CRAIGSACC_CONTROLLER[0])
  386.  
  387.  
  388. CRAIGSACC_CONTROLLER = ['']
  389.  
  390.  
  391. def craigslist_login(username, password):
  392. try:
  393. if (not is_logged_in(username)):
  394. try:
  395. driver.find_element_by_partial_link_text('log in').click()
  396. except:
  397. driver.get('https://accounts.craigslist.org/login/home')
  398. if load_url('', EC.presence_of_element_located((By.ID,
  399. 'paginator')), '', counts=0):
  400. try:
  401. driver.find_element_by_partial_link_text('log out').click()
  402. except:
  403. pass
  404. else:
  405. print('You are already logged into current city')
  406. return
  407.  
  408. driver.find_element_by_id('inputEmailHandle').send_keys(username)
  409. driver.find_element_by_id('inputPassword').send_keys(password)
  410. wait_load()
  411. driver.find_element_by_xpath(
  412. '//*[@class="accountform-actions"]/button').click()
  413. if not load_url('', EC.presence_of_element_located((By.ID,
  414. 'paginator')), '', counts=5):
  415. print('Login unsuccessful')
  416. raise Exception
  417. CRAIGSACC_CONTROLLER[0] = username
  418. except:
  419. print("Encountered an error while I was logging in, Proceeding with error..")
  420. traceback.print_exception(*sys.exc_info())
  421.  
  422.  
  423. def getinput(prompt, typeCheck=False, obj=None, timeout=30.0):
  424. prompt = prompt + "\n\t\t/QUIT to cancel command prompt\n<> "
  425. retval = False
  426. try:
  427. while True:
  428. inp = input(prompt)
  429. if '/QUIT' not in inp.strip().upper():
  430. return retval
  431. if typeCheck:
  432. try:
  433. retval = obj(inp)
  434. break
  435. except:
  436. continue
  437. else:
  438. retval = inp
  439. break
  440. finally:
  441. return retval
  442.  
  443.  
  444. def prettify(inp):
  445. if inp is None:
  446. return ''
  447. inp = REPLACE(r'[\r]{1,}[\n]{1,}', '\n', inp)
  448. return inp
  449.  
  450.  
  451. def craigslist_handler(url, typ, cat, market, subarea, title, body, randomize, postal, price, email, password, images, pn, sale_man, sale_mod, sale_size):
  452. # images='/root/Documents/slar/crawler/A.jpeg<==>/root/Documents/slar/crawler/B.jpeg<==>/root/Documents/slar/crawler/C.jpeg'
  453. craigslist_login(email, password)
  454. """Main workhorse of the script. This function uses Selenium to automate
  455. posting in the supplied location and category. """
  456. if not load_url(url, EC.presence_of_element_located((By.ID, 'post')),
  457. "I'm unable to find the link to post to classifieds\n", counts=5):
  458. print('An error occured, Please retry')
  459.  
  460. # if not load_url('https://www.craigslist.org/about/sites',
  461. # EC.presence_of_element_located((By.PARTIAL_LINK_TEXT, market)),
  462. # 'location url'):
  463. # print('An error occured, Please retry')
  464. #
  465. try:
  466. driver.find_element_by_partial_link_text('post to classifieds').click()
  467. except:
  468. try:
  469. driver.find_element_by_xpath(
  470. '//*[@class="page-container"]//div[@id="leftbar"]//ul[@id="postlks"]//li//a[@id="post"]').click()
  471. except:
  472. driver.find_element_by_id('post').click()
  473. wait_load()
  474. if not load_url('', EC.presence_of_element_located((By.CLASS_NAME, 'picker')),
  475. "Unable to select type", counts=5):
  476. print('An error occured, Please retry')
  477.  
  478. select_opts = driver.find_elements_by_xpath(
  479. '//form[@class="picker"]//ul[@class="selection-list"]//label')
  480. print("[INFO] Selecting a type")
  481. for opt in select_opts:
  482. if typ.strip().lower() in opt.find_element_by_class_name('right-side').text.lower():
  483. time.sleep(3)
  484. opt.find_element_by_tag_name('input').click()
  485. break
  486. wait_load()
  487. if driver.current_url.endswith('type'):
  488. time.sleep(5)
  489. if driver.current_url.endswith('type'):
  490. driver.find_element_by_name('go').click()
  491. print("[INFO] Selected type: " + typ)
  492.  
  493. while True:
  494. if(driver.current_url.endswith('cat')):
  495. break
  496. print("[INFO] Selecting a category")
  497. select_opts = driver.find_elements_by_xpath(
  498. '//form[@class="picker"]//ul[@class="selection-list"]//label')
  499. for opt in select_opts:
  500. if cat.strip().lower() in opt.find_element_by_class_name('right-side').text.lower():
  501. wait_load()
  502. opt.find_element_by_tag_name('input').click()
  503. break
  504. wait_load()
  505. if driver.current_url.endswith('cat'):
  506. time.sleep(5)
  507. if driver.current_url.endswith('cat'):
  508. driver.find_element_by_name('go').click()
  509. print("[INFO] Selected category: " + cat.strip())
  510.  
  511. MARKET = False
  512. msg = "Subarea is required"
  513. count = 2
  514. print(subarea)
  515. try:
  516. while True:
  517. if driver.current_url.endswith('edit'):
  518. break
  519. if not load_url('', EC.presence_of_element_located((By.CLASS_NAME, 'picker')),
  520. msg, counts=count):
  521. break
  522. if(driver.current_url.endswith('subarea')) and (not MARKET):
  523. print('[INFO] Selecting a subarea')
  524. if not subarea:
  525. elems = driver.find_elements_by_xpath(
  526. '//form[@class="picker"]//ul[@class="selection-list"]//label//input')
  527. print('Subarea is required but isn\'t provided')
  528. print('Using pre-coded guess algorithm to pick one subarea')
  529. elems[0].click()
  530. MARKET = True
  531. continue
  532. select_opts = driver.find_elements_by_xpath(
  533. '//form[@class="picker"]//ul[@class="selection-list"]//label')
  534. for opt in select_opts:
  535. if subarea.strip().lower() in opt.text.lower():
  536. opt.find_element_by_tag_name('input').click()
  537. MARKET = True
  538. break
  539. if not MARKET:
  540. elems = driver.find_elements_by_xpath(
  541. '//form[@class="picker"]//ul[@class="selection-list"]//label//input')
  542. if len(elems) > 0:
  543. elems[0].click()
  544. msg = "Using pre-coded guess algorithm to check for any unexpected input from craigslist"
  545. count = 1
  546. print("[INFO] Selected subarea: " + subarea)
  547. else:
  548. if driver.current_url.endswith('edit'):
  549. break
  550. if driver.current_url.endswith('subarea') and MARKET:
  551. continue
  552. elems = driver.find_elements_by_xpath(
  553. '//form[@class="picker"]//ul[@class="selection-list"]//label//input')
  554. if len(elems) > 0:
  555. elems[0].click()
  556. else:
  557. break
  558. except:
  559. pass
  560.  
  561. wait_load()
  562. while True:
  563. if(driver.current_url.endswith('edit')):
  564. break
  565. print('[INFO] Filling post information')
  566. if not load_url('', EC.presence_of_element_located((By.ID, 'postingForm')),
  567. "Unable to get the posting form", counts=5):
  568. print('An error occured, Please retry')
  569.  
  570. driver.find_element_by_name('PostingTitle').send_keys(title)
  571. driver.find_element_by_name('Ask').send_keys(price)
  572. driver.find_element_by_name('postal').send_keys(postal)
  573. driver.find_element_by_name('GeographicArea').send_keys(market)
  574. driver.find_element_by_name('PostingBody').send_keys(prettify(body))
  575. driver.find_element_by_name('sale_manufacturer').send_keys(sale_man)
  576. driver.find_element_by_name('sale_model').send_keys(sale_mod)
  577. driver.find_element_by_name('sale_size').send_keys(sale_size)
  578. driver.find_element_by_name('wantamap').click()
  579. try:
  580. driver.find_element_by_name('FromEMail').send_keys(prettify(email))
  581. driver.find_element_by_name('ConfirmEMail').send_keys(prettify(email))
  582. except:
  583. pass
  584. driver.find_element_by_xpath(
  585. '//*[@id="contact_phone"]').send_keys(pn)
  586. time.sleep(2)
  587. print('[INFO] Sending post information')
  588. driver.find_element_by_name('go').click()
  589. wait_load()
  590. '''
  591. while True:
  592. if(driver.current_url.endswith('geoverify')):
  593. break
  594. wait_load()
  595. print('[INFO] Confirming post location')
  596. if not load_url('', EC.presence_of_element_located((By.ID, 'leafletForm')),
  597. "Unable to get the posting form", counts=5):
  598. print('An error occured, Please retry')
  599.  
  600. driver.find_element_by_id('leafletForm').find_element_by_xpath('//button[@class="continue bigbutton"]').click()
  601. print("[INFO] Post location confirmed ")
  602. wait_load()
  603. '''
  604. try:
  605. if load_url('', EC.presence_of_element_located((By.ID, 'uploader')),
  606. "Unable to get the posting images", counts=2):
  607. try:
  608. driver.find_element_by_id('classic').click()
  609. except:
  610. pass
  611. if not load_url('', EC.presence_of_element_located((By.XPATH, '//form[@class="add"]//input[@name="file"]')),
  612. "Unable to get the posting form", counts=5):
  613. print('An error occured while processing upload, Please retry')
  614. if images:
  615. image_arr = images.split('<==>')
  616. if int(randomize) == 1:
  617. random.shuffle(image_arr)
  618. for image in image_arr:
  619. image = image.strip()
  620. if not image.startswith('/'):
  621. image = '/' + image
  622. if not (os.path.exists(image)):
  623. print('[DEBUG] Image: ' + image +
  624. ' does not exist.. Proceeding')
  625. continue
  626. try:
  627. inp = driver.find_element_by_xpath(
  628. '//input[@type="file"]')
  629. print('Uploading image: ' + image)
  630. inp.clear()
  631. inp.send_keys(image)
  632. if not load_url(driver.current_url + '?s=editimage', EC.presence_of_element_located((By.XPATH, '//form[@class="add"]')), "Upload button..", counts=6):
  633. print('An error occured while locating upload button')
  634. driver.find_element_by_xpath(
  635. '//form[@class="add"]').submit()
  636. if not load_url('', EC.presence_of_element_located((By.XPATH, '//div[@class="imgwrap"]//img')), "Uploading image..", counts=6):
  637. print(
  638. 'An error occured while processing upload, Please retry')
  639. print('Image: ' + image + ' uploaded')
  640. except StaleElementReferenceException:
  641. print('Stale reference')
  642. except Exception as e:
  643. print('Error: ' + str(e) + ' occured while uploading.')
  644. tb = sys.exc_info()[2]
  645. print("[DEBUG] More info:\n\t\t " + tb)
  646. driver.find_element_by_xpath(
  647. '//button[@class="done bigbutton"]').click()
  648. wait_load()
  649. except:
  650. driver.find_element_by_xpath(
  651. '//button[@class="done bigbutton"]').click()
  652. wait_load()
  653. print('[DEBUG] Image(s) not uploaded. Proceeding that way')
  654. traceback.print_exception(*sys.exc_info())
  655. print('Publishing..')
  656. if not load_url('', EC.presence_of_element_located((By.NAME, 'go')), "Publish button", counts=5):
  657. print('An error occured while processing upload, Please retry')
  658. driver.find_element_by_name('go').click()
  659. print("\nI've published a draft to post the ad: " + title)
  660. print("Proceeding to verify the post.. Please be patient")
  661.  
  662.  
  663. def publish_post(url, typ, cat, market, subarea, title, body, randomize, postal, price,
  664. email='sales39@containerking.com', password='falcon00', info={}, scrnsht=str(time.time()) + 'screenshot.png',
  665. images=False, pn='', sale_man='', sale_mod='', sale_size=''):
  666. # [TODO] handle_unfinished(info, scrnsht)
  667. try:
  668. status = False
  669. post_time = time.time()
  670. craigslist_handler(url, typ, cat, market, subarea, title, body, randomize,
  671. postal, price, email, password, images, pn, sale_man,
  672. sale_mod, sale_size)
  673. # [TODO] put_job(href + '[===]' + email + '[===]' + password + '[===]' + title)
  674. state = email_login(email, password)
  675. if state == False:
  676. completed_time = time.time()
  677. return status, (completed_time - post_time)
  678. wait_load()
  679. try:
  680. for handle in driver.window_handles:
  681. driver.switch_to.window(handle)
  682. if state in driver.current_url:
  683. break
  684. except Exception as e:
  685. print(e)
  686. status = complete_post(state, info, scrnsht, email, password, title)
  687. completed_time = time.time()
  688. return status, (completed_time - post_time)
  689. except:
  690. traceback.print_exception(*sys.exc_info())
  691. return False, None
  692.  
  693.  
  694. def cleanup_tabs():
  695. for handle in driver.window_handles:
  696. driver.switch_to.window(handle)
  697. if 'post.craigslist' in driver.current_url:
  698. print('[INFO] Closing: ' + driver.title)
  699. driver.close()
  700.  
  701.  
  702. def close_tabs():
  703. for handle in driver.window_handles:
  704. driver.switch_to.window(handle)
  705. driver.close()
  706.  
  707.  
  708. # all post objects go in here
  709. POSTS_CONTROLLER = {}
  710.  
  711.  
  712. def check_market(post_title, post_id, market_url, city, limit=5):
  713. if limit == 0:
  714. limit = 5
  715. print('[INFO] Checking market if ' + str(post_title) +
  716. " (" + str(post_id) + ') post is in ' + city + ' top ' + str(limit))
  717. if not load_url(market_url, EC.presence_of_element_located((By.ID, 'sortable-results')), "Markets result info", counts=5):
  718. print('An error occured while checking the market: ' +
  719. market_url + ', Please retry')
  720. total_res = len(driver.find_elements_by_xpath(
  721. '//div[@id="sortable-results"]/ul[@class="rows"]/li[@class="result-row"]/p[@class="result-info"]'))
  722.  
  723. print('[INFO] I can see ' + str(total_res) +
  724. ' ads on ' + city + '\'s page')
  725.  
  726. XP = '//div[@id="sortable-results"]/ul[@class="rows"]/li[@class="result-row"][position() < ' + \
  727. str(limit + 1) + ']'
  728. top_posts = driver.find_elements_by_xpath(XP)
  729. print('[INFO] Looking for the post: ' + str(post_title) +
  730. " (" + str(post_id) + ') in ' + city)
  731. for i in range(0, len(top_posts)):
  732. if i >= limit:
  733. break
  734. top_post = top_posts[i].get_attribute('data-pid')
  735. if top_post in str(post_id):
  736. print(post_id + ' in top: ' + str(limit))
  737. return True
  738. print (top_post)
  739. return False
  740.  
  741.  
  742. SERVER_CONNECTOR = [None]
  743.  
  744.  
  745. def update_posts_db(city, state, cat, time, pid, posting_id, ad_id):
  746. try:
  747. if not SERVER_CONNECTOR[0]:
  748. print('[DEBUG] Cannot find the server connector object')
  749. cur = SERVER_CONNECTOR[0].cursor()
  750. cur.execute("INSERT INTO CraigslistPoster.posts(`client`, `postingID`, `post_id`, `ad`, `market_city`, `market_state`, `category`, `POST_TIME`) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)",
  751. (current_client[0], str(posting_id), str(pid), str(ad_id), str(city), str(state), str(cat), time))
  752. #print('Post db updated %s'%(str(postingID)))
  753. SERVER_CONNECTOR[0].commit()
  754. except:
  755. traceback.print_exception(*sys.exc_info())
  756.  
  757.  
  758. def handle_dump(pdump):
  759. return publish_post(url=pdump['ad_url'], typ=pdump['typ'],
  760. cat=pdump['cat'], market=pdump['market'], subarea=pdump['subarea'], title=pdump['title'],
  761. body=pdump['body'], randomize=pdump['random'],
  762. postal=pdump['postal'], price=pdump['price'], email=pdump['email'],
  763. password=pdump['password'], images=pdump['images'], info=pdump['info'],
  764. pn=pdump['pn'], sale_man=pdump['sale_man'], sale_mod=pdump['sale_mod'],
  765. sale_size=pdump['sale_size'])
  766.  
  767.  
  768. def check_posts_db(pid, city, delay):
  769. try:
  770. if not SERVER_CONNECTOR[0]:
  771. print('[DEBUG] Cannot find the server connector object')
  772. cur = SERVER_CONNECTOR[0].cursor(buffered=True)
  773. cur.execute(
  774. "SELECT * FROM `posts` WHERE `market_city` LIKE '%%%s%%'" % (city))
  775. NON_EMPTY = False
  776. for x in cur:
  777. NON_EMPTY = True
  778. if not NON_EMPTY:
  779. print('Not yet registered.. Proceeding')
  780. return True
  781. cur.execute(
  782. "SELECT POST_TIME, postingID FROM CraigslistPoster.posts WHERE `post_id` = %s ORDER BY `POST_TIME` DESC LIMIT 1", (str(pid),))
  783. for then, pid in cur:
  784. now = datetime.datetime.now()
  785. if ((now - then).total_seconds()) > delay:
  786. return pid
  787. else:
  788. print('%d minutes left..' %
  789. ((delay / 60) - ((now - then).total_seconds() / 60)))
  790. return False
  791. except:
  792. traceback.print_exception(*sys.exc_info())
  793.  
  794.  
  795. # Unsuccessful ads
  796. US_ADS = []
  797.  
  798. # Skipped ads
  799. SK_ADS = []
  800.  
  801. # Successful ads
  802. SU_ADS = []
  803.  
  804. # Incomplete ads
  805. INC_ADS = []
  806.  
  807.  
  808. AD_IN_PROGRESS = [False]
  809.  
  810.  
  811. def handle_market(ads):
  812. status = None
  813. t_o_c = None
  814. murl = ads['m_url']
  815. mlim = ads['m_lim']
  816. delay = ads['del'] * 60
  817. city = ads['m_cit']
  818. state = ads['m_sta']
  819. post_id = ads['pid']
  820. ad_id = ads['ad_id']
  821. INC_ADS.append("City: %s; PID: %s" % (city, str(post_id)))
  822. try:
  823. postingID = check_posts_db(post_id, city, delay)
  824. if postingID:
  825. if (not check_market(ads['title'], postingID, murl, city, mlim)) or (postingID is True):
  826. while AD_IN_PROGRESS[0]:
  827. print('[INFO] An ad in progress.. Queueing')
  828. time.sleep(5)
  829. AD_IN_PROGRESS[0] = True
  830. status, t_o_c = handle_dump(ads)
  831. else:
  832. print('[INFO] %s\'s not ready. Running to the next city :)' %
  833. (city))
  834. INC_ADS.pop()
  835. SK_ADS.append("City: %s; PID: %s" % (city, str(post_id)))
  836. return
  837. else:
  838. print('[INFO] %s\'s not ready. Running to the next city :)' %
  839. (city))
  840. INC_ADS.pop()
  841. SK_ADS.append("City: %s; PID: %s" % (city, str(post_id)))
  842. return
  843. if not status:
  844. print('[ERROR] An error occured while posting to: ' + city)
  845. AD_IN_PROGRESS[0] = False
  846. INC_ADS.pop()
  847. US_ADS.append("City: %s; PID: %s" % (city, str(post_id)))
  848. return
  849. else:
  850. # status here is posting_id [TODO] Clean up later
  851. update_posts_db(
  852. city, state, ads['cat'], datetime.datetime.now(), post_id, status, ad_id)
  853. AD_IN_PROGRESS[0] = False
  854. except:
  855. traceback.print_exception(*sys.exc_info())
  856. INC_ADS.pop()
  857. US_ADS.append("City: %s; PID: %s" % (city, str(post_id)))
  858. return
  859. print('Ad posted to: {0} in: {1}'.format(city, t_o_c))
  860. INC_ADS.pop()
  861. SU_ADS.append("City: %s; PID: %s" % (city, str(post_id)))
  862.  
  863.  
  864. def manage_posts(ads):
  865. print('%d ads/market recorded for: %s' % (len(ads), current_client[0]))
  866. if (len(ads) == 0) or (not authorize_post()):
  867. print("[INFO] Posts not authorized! Moving on..")
  868. return
  869. for i in range(0, len(ads)):
  870. print('======================= ad: %d ========================' %
  871. (i + 1), end="\n\n")
  872. try:
  873. handle_market(ads[i])
  874. print("Leaving " + str(ads[i]['m_cit']) +
  875. " Delay time: " + str(ads[i]['del']) + "mins")
  876. except:
  877. pass
  878.  
  879.  
  880. def authorize_post():
  881. if ('Prepaid Ads'.lower() in user_data[0]['paymentPlan'][0].lower()) and (user_data[0]['purchasedAds'][0] <= 0):
  882. return False
  883. return True
  884.  
  885.  
  886. def decrease_posts_left():
  887. ads_left = user_data[0]['purchasedAds'][0]
  888. ads_left_temp = ads_left - 1
  889. try:
  890. if not SERVER_CONNECTOR[0]:
  891. print('[DEBUG] Cannot find the server connector object')
  892. cur = SERVER_CONNECTOR[0].cursor()
  893. cur.execute("UPDATE users SET purchasedAds = %d WHERE client = %s",
  894. (ads_left_temp, current_client[0]))
  895. print('User db updated %s:%d'%(current_client[0], ads_left_temp))
  896. SERVER_CONNECTOR[0].commit()
  897. ads_left = ads_left_temp
  898. except:
  899. print('[ERROR] Unable to update purchased Ads')
  900. traceback.print_exception(*sys.exc_info())
  901.  
  902.  
  903. def read_data_from_sql_db(connection, sql):
  904. df = RSQL(con=connection, sql=sql)
  905. dataframe_dict = df.to_dict()
  906. return dataframe_dict
  907.  
  908.  
  909. def prettify(inp):
  910. if inp is None:
  911. return ''
  912. inp = inp.strip()
  913. inp = inp.replace('\r', '')
  914. return inp
  915.  
  916.  
  917. def handle_ads_data(dic):
  918. total_ads = len(dic['search_url'].values())
  919. if total_ads < 0:
  920. return False
  921. ad_dict = {'typ': ['type'], 'cat': ['category'], 'market': ['tags'], 'subarea': ['market_area'], 'title': ['title'],
  922. 'body': ['description'], 'password': ['password'], 'price': ['price'], 'postal': ['postalcode'],
  923. 'email': ['email'], 'images': ['images'], 'sale_man': ['tag1'], 'sale_mod': ['tag2'],
  924. 'sale_size': ['tag3'], 'pid': ['market', int], 'del': ['delay', float], 'm_url': ['search_url'],
  925. 'm_lim': ['post_above', int], 'm_cit': ['market_city'], 'm_sta': ['market_state'],
  926. 'random': ['randomize_images', int], 'ad_url': ['post_url'], 'pn': ['phone_no'], 'ad_id': ['id', int]}
  927. return parse_db_item(dic, total_ads, ad_dict)
  928.  
  929.  
  930. def handle_cc_data(dic, ads):
  931. total_ads = len(dic['number'].values())
  932. if total_ads < 0:
  933. return False
  934. ad_dict = {'cardNumber': ['number', int], 'cvNumber': ['cvv', int], 'expMonth': ['exp_month', int],
  935. 'expYear': ['exp_year', int], 'cardFirstName': ['firstname'], 'cardLastName': ['lastname'],
  936. 'cardAddress': ['address'], 'cardCity': ['city'], 'cardState': ['state'], 'cardPostal': ['postalcode', int],
  937. 'contactPhone': ['phone'], 'pcard': ['primarycard', bool]}
  938. ret = parse_db_item(dic, total_ads, ad_dict)
  939. for i in range(0, len(ret)):
  940. if ret[i]['pcard'] == False:
  941. continue
  942. ret[i]['cardCountry'] = "US"
  943. ret[i]['contactName'] = ret[i]['cardFirstName'] + \
  944. " " + ret[i]['cardLastName']
  945. cardinfo = ret[i]
  946. for i in range(0, len(ads)):
  947. ads[i]["info"] = cardinfo
  948. break
  949. return ads
  950.  
  951.  
  952. def parse_db_item(dic, total_ads, parse_dict):
  953. ret = list()
  954. for i in range(0, total_ads):
  955. retval = dict()
  956. for item in parse_dict:
  957. parse_val = parse_dict[item]
  958. item_idx = parse_val[0]
  959. item_val = None
  960. try:
  961. item_val = dic[item_idx][i]
  962. except:
  963. pass
  964. if item_val is None:
  965. if len(parse_val) > 1:
  966. if parse_val[1] is int:
  967. item_val = 0
  968. if parse_val[1] is float:
  969. item_val = 0.0
  970. else:
  971. item_val = ''
  972. if isinstance(item_val, str):
  973. item_val = item_val.strip()
  974. if len(parse_val) > 1:
  975. try:
  976. parse_val[1](item_val)
  977. item_val = parse_val[1](item_val)
  978. except:
  979. pass
  980. retval[item] = item_val
  981. ret.append(retval)
  982. return ret
  983.  
  984.  
  985. # provide info about unsuccessful, successful and incomplete ads
  986. def account():
  987. print('UNSUCCESSFUL ADS: %d.' % (len(US_ADS)))
  988. for i in range(0, len(US_ADS)):
  989. # print("\t\t%s"%(US_ADS[i]))
  990. pass
  991.  
  992. print('INCOMPLETE ADS: %d.' % (len(INC_ADS)))
  993. for i in range(0, len(INC_ADS)):
  994. # print("\t\t%s"%(INC_ADS[i]))
  995. pass
  996.  
  997. print('SKIPPED ADS: %d.' % (len(SK_ADS)))
  998. for i in range(0, len(SK_ADS)):
  999. # print("\t\t%s"%(SK_ADS[i]))
  1000. pass
  1001.  
  1002. print('SUCCESSFUL ADS: %d.' % (len(SU_ADS)))
  1003. for i in range(0, len(SU_ADS)):
  1004. # print("\t\t%s"%(SU_ADS[i]))
  1005. pass
  1006.  
  1007.  
  1008. def initAll():
  1009. global US_ADS, SK_ADS, SU_ADS, INC_ADS
  1010. # Unsuccessful ads
  1011. US_ADS = []
  1012.  
  1013. # Skipped ads
  1014. SK_ADS = []
  1015.  
  1016. # Successful ads
  1017. SU_ADS = []
  1018.  
  1019. # Incomplete ads
  1020. INC_ADS = []
  1021.  
  1022.  
  1023. '''
  1024. Get the arguments from the shell
  1025. '''
  1026.  
  1027.  
  1028. def get_args():
  1029. parser = argparse.ArgumentParser(
  1030. description='Run craigslist till they give up')
  1031. group = parser.add_argument_group('The following are required')
  1032. group.add_argument('--client', '-c', metavar="CLIENT", type=str, nargs=1,
  1033. help="specify the clients name\n", required=True)
  1034. args = parser.parse_args()
  1035. return args
  1036.  
  1037.  
  1038. import argparse
  1039. current_client = ['']
  1040. user_data = [{}]
  1041.  
  1042. def main():
  1043. initAll()
  1044. print('')
  1045. print('[INFO] Trying to reach the server')
  1046. conn = CONNECT(user=DB_USER, password=DB_PASS,
  1047. database=DB_NAME, port=DB_PORT, host=DB_HOST)
  1048. print('Server reached..')
  1049. print('')
  1050. SERVER_CONNECTOR[0] = conn
  1051. current_client[0] = get_args().client[0]
  1052. container_data = read_data_from_sql_db(
  1053. conn, sql="SELECT ads.id, ads.randomize_images, ads.market, ads.client, ads.type, ads.category, ads.tags, ads.tag1, ads.tag2, ads.tag3, ads.title, ads.description, ads.price, ads.postalcode, ads.email, ads.password, ads.images, markets.market_city, markets.market_state, markets.phone_no, markets.delay, markets.post_above, markets.post_url, markets.search_url, markets.market_area FROM ads INNER JOIN markets ON ads.market = markets.id WHERE ads.active = '1' and ads.client = '%s'" % (current_client[0]))
  1054. payments_data = read_data_from_sql_db(
  1055. conn, sql="SELECT * FROM CraigslistPoster.payments WHERE client = '%s'" % (current_client[0]))
  1056. user_data[0] = read_data_from_sql_db(conn, sql="SELECT * FROM users WHERE client = '%s'" % (current_client[0]))
  1057. ads = handle_ads_data(container_data)
  1058. ads = handle_cc_data(payments_data, ads)
  1059. manage_posts(ads)
  1060. account()
  1061.  
  1062.  
  1063. if __name__ == "__main__":
  1064. try:
  1065. main()
  1066. driver.quit()
  1067. except:
  1068. driver.quit()
  1069.  
  1070. # email_login()
Add Comment
Please, Sign In to add comment