Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python3
- from selenium import webdriver
- from time import sleep
- from selenium.webdriver.chrome.options import Options
- import os
- import csv
- from time import gmtime, strftime
- from bs4 import BeautifulSoup
- import mysql.connector
- from pyvirtualdisplay import Display
- def wait_until_csv_file_exists():
- waits = 0
- while True:
- for root, dirs, files in os.walk(os.getcwd()):
- for file in files:
- if file.endswith('.csv'):
- return file
- print("sleeping...." + str(waits))
- sleep(.5) # make sure file completes downloading
- waits += .5
- def delete_if_csv_exists():
- for root, dirs, files in os.walk(os.getcwd()):
- for file in files:
- if file.endswith('.csv'):
- os.remove(file)
- def enable_downloading_in_google_headless(driver):
- """
- there is currently a "feature" in chrome where
- headless does not allow file download: https://bugs.chromium.org/p/chromium/issues/detail?id=696481
- This method is a hacky work-around until the official chromedriver support for this.
- Requires chrome version 62.0.3196.0 or above.
- """
- download_dir = os.getcwd()
- # add missing support for chrome "send_command" to selenium webdriver
- driver.command_executor._commands["send_command"] = ("POST", '/session/$sessionId/chromium/send_command')
- params = {'cmd': 'Page.setDownloadBehavior', 'params': {'behavior': 'allow', 'downloadPath': download_dir}}
- command_result = driver.execute("send_command", params)
- print("response from browser:")
- for key in command_result:
- print("result:" + key + ":" + str(command_result[key]))
- def make_a_query(lot_number, actual_date, description, sales_date, year, high_bid, odometer, fuel_type, color,
- transmission, engine, drive, vin, lot_cond,v5, has_keys, damage_description, secondary_damage,
- location_city, make, model_detail, imgs, body):
- # imgs : str - img do obrazkow oddzielone przecinkami
- # current date yyyy-mm-dd hh:mm:ss
- # dla wpcm_expration yyyy-mm-dd
- return str("INSERT INTO `admin_strona`.`auta_posts` (\n" +
- "`ID` ,\n" +
- "`post_author` ,\n" +
- "`post_date` ,\n" +
- "`post_date_gmt` ,\n" +
- "`post_content` ,\n" +
- "`post_title` ,\n" +
- "`post_excerpt` ,\n" +
- "`post_status` ,\n" +
- "`comment_status` ,\n" +
- "`ping_status` ,\n" +
- "`post_password` ,\n" +
- "`post_name` ,\n" +
- "`to_ping` ,\n" +
- "`pinged` ,\n" +
- "`post_modified` ,\n" +
- "`post_modified_gmt` ,\n" +
- "`post_content_filtered` ,\n" +
- "`post_parent` ,\n" +
- "`guid` ,\n" +
- "`menu_order` ,\n" +
- "`post_type` ,\n" +
- "`post_mime_type` ,\n" +
- "`comment_count`\n" +
- ")\n" +
- "VALUES (\n" +
- "\'{}\', '1', \'{}\', \'{}\', '{}', \'{} {}\', '', 'publish', 'closed', 'closed', '', \'{}\', '', '', \'{}\', \'{}\', '', '0', '', '0', 'wpcm_vehicle', '', '0'\n".format(
- lot_number, actual_date, actual_date, description, make, model_detail, lot_number, actual_date,
- actual_date) +
- ");\n" +
- "\n" +
- "INSERT INTO `admin_strona`.`auta_postmeta`(`meta_id`, `post_id`, `meta_key`, `meta_value`) VALUES (NULL, \'{}\','wpcm_expiration',\'{}\');\n".format(
- lot_number, sales_date) +
- "INSERT INTO `admin_strona`.`auta_postmeta`(`meta_id`, `post_id`, `meta_key`, `meta_value`) VALUES (NULL, \'{}\','wpcm_sold',0);\n".format(
- lot_number) +
- "INSERT INTO `admin_strona`.`auta_postmeta`(`meta_id`, `post_id`, `meta_key`, `meta_value`) VALUES (NULL, \'{}\','wpcm_featured',0);\n".format(
- lot_number) +
- "INSERT INTO `admin_strona`.`auta_postmeta`(`meta_id`, `post_id`, `meta_key`, `meta_value`) VALUES (NULL, \'{}\','wpcm_condition','new');\n".format(
- lot_number) +
- "INSERT INTO `admin_strona`.`auta_postmeta`(`meta_id`, `post_id`, `meta_key`, `meta_value`) VALUES (NULL, \'{}\','wpcm_make',0);\n".format(
- lot_number) +
- "INSERT INTO `admin_strona`.`auta_postmeta`(`meta_id`, `post_id`, `meta_key`, `meta_value`) VALUES (NULL, \'{}\','wpcm_model',0);\n".format(
- lot_number) +
- "INSERT INTO `admin_strona`.`auta_postmeta`(`meta_id`, `post_id`, `meta_key`, `meta_value`) VALUES (NULL, \'{}\','wpcm_frdate',\'{}\');\n".format(
- lot_number, year) +
- "INSERT INTO `admin_strona`.`auta_postmeta`(`meta_id`, `post_id`, `meta_key`, `meta_value`) VALUES (NULL, \'{}\','wpcm_price',\'{}\');\n".format(
- lot_number, high_bid) +
- "INSERT INTO `admin_strona`.`auta_postmeta`(`meta_id`, `post_id`, `meta_key`, `meta_value`) VALUES (NULL, \'{}\','wpcm_mileage',\'{}\');\n".format(
- lot_number, odometer) +
- "INSERT INTO `admin_strona`.`auta_postmeta`(`meta_id`, `post_id`, `meta_key`, `meta_value`) VALUES (NULL, \'{}\','wpcm_fuel_type',\'{}\');\n".format(
- lot_number, fuel_type) +
- "INSERT INTO `admin_strona`.`auta_postmeta`(`meta_id`, `post_id`, `meta_key`, `meta_value`) VALUES (NULL, \'{}\','wpcm_color',\'{}\');\n".format(
- lot_number, color) +
- "INSERT INTO `admin_strona`.`auta_postmeta`(`meta_id`, `post_id`, `meta_key`, `meta_value`) VALUES (NULL, \'{}\','wpcm_transmission',\'{}\');\n".format(
- lot_number, transmission) +
- "INSERT INTO `admin_strona`.`auta_postmeta`(`meta_id`, `post_id`, `meta_key`, `meta_value`) VALUES (NULL, \'{}\','wpcm_doors',0);\n".format(
- lot_number) +
- "INSERT INTO `admin_strona`.`auta_postmeta`(`meta_id`, `post_id`, `meta_key`, `meta_value`) VALUES (NULL, \'{}\','wpcm_engine',\'{}\');\n".format(
- lot_number, engine) +
- "INSERT INTO `admin_strona`.`auta_postmeta`(`meta_id`, `post_id`, `meta_key`, `meta_value`) VALUES (NULL, \'{}\','wpcm_power_kw',0);\n".format(
- lot_number) +
- "INSERT INTO `admin_strona`.`auta_postmeta`(`meta_id`, `post_id`, `meta_key`, `meta_value`) VALUES (NULL, \'{}\','wpcm_pwer_hp',0);\n".format(
- lot_number) +
- "INSERT INTO `admin_strona`.`auta_postmeta`(`meta_id`, `post_id`, `meta_key`, `meta_value`) VALUES (NULL, \'{}\','wpcm_test',0);\n".format(
- lot_number) +
- "INSERT INTO `admin_strona`.`auta_postmeta`(`meta_id`, `post_id`, `meta_key`, `meta_value`) VALUES (NULL, \'{}\','zawieszenie',\'{}\');\n".format(
- lot_number, drive) +
- "INSERT INTO `admin_strona`.`auta_postmeta`(`meta_id`, `post_id`, `meta_key`, `meta_value`) VALUES (NULL, \'{}\','vin',\'{}\');\n".format(
- lot_number, vin) +
- "INSERT INTO `admin_strona`.`auta_postmeta`(`meta_id`, `post_id`, `meta_key`, `meta_value`) VALUES (NULL, \'{}\','kategoria_abi',\'{}\');\n".format(
- lot_number, lot_cond) +
- "INSERT INTO `admin_strona`.`auta_postmeta`(`meta_id`, `post_id`, `meta_key`, `meta_value`) VALUES (NULL, \'{}\','dowod_rejestracyjny_v5',\'{}\');\n".format(
- lot_number,v5) +
- "INSERT INTO `admin_strona`.`auta_postmeta`(`meta_id`, `post_id`, `meta_key`, `meta_value`) VALUES (NULL, \'{}\','kluczyki',\'{}\');\n".format(
- lot_number, has_keys) +
- "INSERT INTO `admin_strona`.`auta_postmeta`(`meta_id`, `post_id`, `meta_key`, `meta_value`) VALUES (NULL, \'{}\','weryfikacja','-');\n".format(
- lot_number) +
- "INSERT INTO `admin_strona`.`auta_postmeta`(`meta_id`, `post_id`, `meta_key`, `meta_value`) VALUES (NULL, \'{}\','uszkodzenia',\'{} {}\');\n".format(
- lot_number, damage_description, secondary_damage) +
- "INSERT INTO `admin_strona`.`auta_postmeta`(`meta_id`, `post_id`, `meta_key`, `meta_value`) VALUES (NULL, \'{}\','data_zakonczenia','test');\n".format(
- lot_number) +
- "INSERT INTO `admin_strona`.`auta_postmeta`(`meta_id`, `post_id`, `meta_key`, `meta_value`) VALUES (NULL, \'{}\','miejscowosc',\'{}\');\n".format(
- lot_number, location_city) +
- "INSERT INTO `admin_strona`.`auta_postmeta`(`meta_id`, `post_id`, `meta_key`, `meta_value`) VALUES (NULL, \'{}\','id_pojazdu',\'{}\');\n".format(
- lot_number, lot_number) +
- "INSERT INTO `admin_strona`.`auta_postmeta`(`meta_id`, `post_id`, `meta_key`, `meta_value`) VALUES (NULL, \'{}\','marka',\'{}\');\n".format(
- lot_number, make) +
- "INSERT INTO `admin_strona`.`auta_postmeta`(`meta_id`, `post_id`, `meta_key`, `meta_value`) VALUES (NULL, \'{}\','model',\'{}\');\n".format(
- lot_number, model_detail) +
- "INSERT INTO `admin_strona`.`auta_postmeta`(`meta_id`, `post_id`, `meta_key`, `meta_value`) VALUES (NULL, \'{}\','wpcm_body_style',\'{}\');\n".format(
- lot_number, body) +
- "INSERT INTO `admin_strona`.`auta_postmeta`(`meta_id`, `post_id`, `meta_key`, `meta_value`) VALUES (NULL, \'{}\','_car_gallery',\'{}\');\n".format(
- lot_number, imgs)
- ) # """)
- def get_sales_date(date):
- # input 10/12/2018
- # o 2018-12-10
- # l = date.split('/')
- # print('XD')
- print('DATA', date)
- l = date
- return l[0] + l[1] + "." + l[3] + l[4] + "." + l[6] + l[7] + l[8] + l[9] + " " + l[11] + l[12] + ":" + l[14] + l[15]
- def get_actual_date():
- # howtime = strftime("%Y-%m-%d %H:%M:%S", gmtime())
- showtime = strftime("%Y-%m-%d %H:%M", gmtime())
- return showtime
- # 2015-10-15 07:49:18
- # yyyy-mm-dd hh:mm:ss
- def get_vin(soup):
- try:
- a = soup.select('span[data-uname="lotdetailVin"]')[0].text
- except Exception as e:
- a = soup.select('span[data-uname="lotdetailVinvalue"]')[0].text
- return a
- def get_current_bid(soup):
- try:
- a = soup.select('label[for="Current Bid"]')[0].find_next_sibling('span').text
- except Exception as e:
- try:
- a = soup.select('label[for="Starting Bid"]')[0].find_next_sibling('span').text
- except Exception as e:
- a = "0"
- return a
- def get_odometer(soup):
- try:
- a = soup.select('span[data-uname="lotdetailOdometervalue"]')[0].text
- except Exception as e:
- a = soup.select('label[for="mileage"]')[0].find_next_sibling('span').text
- return a
- def get_description(soup, ifd):
- if (not ifd): return '-'
- try:
- # print("")
- desc = soup.select('div[ng-repeat="lotNotes in additionalLotNotes"]')
- desc = [i.text.rstrip() for i in desc]
- desc = ",\n".join(desc)
- return desc
- except Exception as e:
- return '-'
- return '-'
- def get_imgs(soup):
- imgs = soup.find_all("img", class_="img-responsive")
- # print('zdj',imgs)
- # imgs = [el['src'] for el in imgs if el['src'].endswith(".JPG")]
- imgss = []
- for el in imgs:
- try:
- if el['src'].endswith(".JPG"):
- imgss.append(el['src'])
- except Exception as e:
- print(el, e)
- # print(imgs)
- imgss = ','.join(imgss)
- return imgss
- def get_price(price):
- return ''.join(filter(lambda x: x.isdigit(), price))
- try:
- delete_if_csv_exists()
- display = Display(visible=0, size=(1366, 768))
- display.start()
- outputdir = "log_chrome"
- options = webdriver.ChromeOptions()
- options.add_argument('--no-sandbox')
- options.add_argument('--window-size=1366x768')
- # options.add_argument('--headless')
- options.add_argument('--disable-gpu')
- options.binary_location = '/usr/bin/chromium'
- service_log_path = "chromedriver.log"
- service_args = ['--verbose']
- path = os.getcwd() + r"/chromedriver"
- # path = '/usr/bin/chromedriver'
- # path = '/usr/bin/google-chrome'
- driver = webdriver.Chrome(executable_path=path,
- options=options,
- # service_args=service_args,
- # service_log_path=service_log_path
- )
- enable_downloading_in_google_headless(driver)
- driver.get("https://www.copart.co.uk/login")
- try:
- username = driver.find_element_by_id("username")
- password = driver.find_element_by_id("password")
- except Exception as e:
- sleep(5)
- username = driver.find_element_by_id("username")
- password = driver.find_element_by_id("password")
- username.send_keys("dabrowskikrzysztof44@gmail.com")
- password.send_keys("AutaLawik2") # "Laurka12")
- sleep(2)
- driver.find_element_by_class_name("loginfloatright").click()
- sleep(2)
- driver.get("https://www.copart.co.uk/watchList/")
- sleep(2)
- driver.find_element_by_css_selector('a[ng-csv="getCSVResult()"]').click()
- filename = wait_until_csv_file_exists()
- # ___ _____ __
- # / __/ __\ \ / /
- # | (__\__ \\ V /
- # \___|___/ \_/
- queries = []
- #with open(filename) as csv_file:
- with open(filename, 'r', encoding="utf-8") as csv_file:
- csv_reader = csv.reader(csv_file, delimiter=',')
- line_count = 0
- for row in csv_reader:
- if line_count == 0:
- print('Column names are {}'.format(", ".join(row)))
- line_count += 1
- for i, cell in enumerate(row):
- print(i, cell)
- # else:
- # driver.get(row[0])
- # print(driver.find_element_by_xpath('//span[@data-uname="DriverValue"]').text)
- # print(driver.find_element_by_name('DriverValue').text)
- # print(soup.find("span", {"data-uname" : "DriverValue"}).text)
- # print(soup.select('span[data-uname="DriverValue"]')[0].text)
- else:
- description = False
- driver.get(row[0])
- sleep(2)
- try:
- driver.find_element_by_css_selector('a[ng-click="showAllPhotosView($event)"]').click()
- except Exception as e:
- pass
- #print(e)
- # continue
- sleep(2)
- try:
- driver.find_element_by_css_selector('a[ng-click="showAdditionalLotNotes()"]').click()
- description = True
- except Exception as e:
- #print(e)
- description = False
- # continue
- html = driver.page_source
- soup = BeautifulSoup(html)
- # driver.find_element_by_class_name("ctsymbol").text
- # print(soup.find("label", {"for" : "category"})[0].find_next_sibling('span').text)
- # print(line_count, soup.select('label[for="category"]')[0].find_next_sibling('span').text)
- #print(row[1])
- try:
- query = make_a_query(lot_number=row[1], # good
- actual_date=get_actual_date(),
- description=get_description(soup, description),
- sales_date=get_sales_date(row[3]), # good
- year=row[4], # row[4],#good
- high_bid=get_price(get_current_bid(soup)),
- # soup.select('label[for="category"]')[0].find_next_sibling('span').text,#soup.find("label", {"for" : "category"}).find_next_siblings('span').text,#soup.select('label[for="category"]')[0].find_next_siblings('span').text,
- odometer=get_odometer(soup),
- fuel_type=soup.select('span[data-uname="lotdetailFuelvalue"]')[0].text,
- color=soup.select('span[data-uname="lotdetailColorvalue"]')[0].text,
- transmission=soup.select('label[data-uname=""]')[2].find_next_sibling('span').text,
- # wtf
- engine=soup.select('span[data-uname="lotdetailEnginetype"]')[0].text,
- drive=soup.select('span[data-uname="DriverValue"]')[0].text,
- vin=get_vin(soup),
- lot_cond=soup.select('label[for="category"]')[0].find_next_sibling('span').text,
- v5=soup.select('span[data-uname="lotdetailNotes"]')[0].text,
- # soup.select('span[data-uname=""]')[0].text, # wtf ,
- has_keys=soup.select('span[data-uname="lotdetailKeyvalue"]')[0].text,
- damage_description=soup.select('span[data-uname="lotdetailPrimarydamagevalue"]')[
- 0].text,
- secondary_damage=soup.select('span[data-uname="lotdetailSecondarydamagevalue"]')[
- 0].text,
- location_city=soup.select('label[for="location"]')[0].find_next_sibling('span').text,
- make=row[5], # good
- model_detail=row[6], # prob. good
- imgs=get_imgs(soup),
- body=soup.select('label[data-uname="lotdetailBodystyle"]')[0].find_next_sibling(
- 'span').text) # get_imgs())
- queries.append(query)
- except Exception as e:
- print("ERROR id:",row[1])
- print(e)
- print()
- line_count += 1
- # print(f'Processed {line_count} lines.')
- #print(get_actual_date())
- #print()
- except Exception as e:
- print(line_count)
- finally:
- driver.close()
- display.stop()
- if len(queries)>0:
- DELETE_CARS1 = """
- DELETE FROM auta_postmeta WHERE meta_key='wpcm_expiration' OR meta_key='wpcm_sold'OR meta_key='wpcm_featured'OR meta_key='wpcm_make'OR meta_key='wpcm_model'OR meta_key='wpcm_frdate'OR meta_key='wpcm_price' OR meta_key='wpcm_mileage'OR meta_key='wpcm_fuel_type'OR meta_key='wpcm_color'OR meta_key='wpcm_transmission'OR meta_key='wpcm_doors'OR meta_key='wpcm_engine'OR meta_key='wpcm_power_kw'OR meta_key='wpcm_power_hp'OR meta_key='wpcm_test'OR meta_key='zawieszenie'OR meta_key='vin'OR meta_key='dowod_rejestracyjny_v5'OR meta_key='kluczyki'OR meta_key='weryfikacja'OR meta_key='uszkodzenia'OR meta_key='data_zakonczenia'OR meta_key='miejscowosc'OR meta_key='id_pojazdu'OR meta_key='marka'OR meta_key='model'OR meta_key='_car_gallery'
- OR meta_key='_zawieszenie'OR meta_key='_vin'OR meta_key='_dowod_rejestracyjny_v5'OR meta_key='_kluczyki'OR meta_key='_weryfikacja'OR meta_key='_uszkodzenia'OR meta_key='_data_zakonczenia'OR meta_key='_miejscowosc'OR meta_key='_id_pojazdu'OR meta_key='_marka'OR meta_key='_model'OR meta_key='_car_gallery' OR meta_key='kategoria_abi' OR meta_key='_kategoria_abi' OR meta_key='wpcm_pwer_hp' OR meta_key='_wpcm_pwer_hp' OR meta_key='wpcm_solds' OR meta_key='_wpcm_solds' OR meta_key='wpcm_body_style' OR meta_key='_wpcm_body_style' OR meta_key='wpcm_solds' OR meta_key='_wpcm_solds'
- """
- DELETE_CARS2 = """
- DELETE FROM auta_posts WHERE post_type='wpcm_vehicle';
- """
- cnx = mysql.connector.connect(user='admin_user', password='Laurka12',
- host='185.243.55.83',
- database='admin_strona')
- try:
- cursor = cnx.cursor()
- cursor.execute(DELETE_CARS1)
- print(DELETE_CARS1)
- except Exception as e:
- print(e)
- finally:
- cnx.close()
- cnx = mysql.connector.connect(user='admin_user', password='Laurka12',
- host='185.243.55.83',
- database='admin_strona')
- try:
- cursor = cnx.cursor()
- cursor.execute(DELETE_CARS2)
- print(DELETE_CARS2)
- except Exception as e:
- print(e)
- finally:
- cnx.close()
- for q in queries:
- cnx = mysql.connector.connect(user='admin_user', password='Laurka12',
- host='185.243.55.83',
- database='admin_strona')
- try:
- cursor = cnx.cursor()
- cursor.execute(q)
- print(q)
- except Exception as e:
- print(e)
- finally:
- cnx.close()
- # cnx = mysql.connector.connect(user='p524807_maro', password='Maro6969',
- # host='time4it.xaa.pl',
- # database='admin_strona')
- # DELETE_CARS1 = """
- # DELETE FROM auta_postmeta WHERE meta_key='wpcm_expiration' OR meta_key='wpcm_sold'OR meta_key='wpcm_featured'OR meta_key='wpcm_make'OR meta_key='wpcm_model'OR meta_key='wpcm_frdate'OR meta_key='wpcm_price' OR meta_key='wpcm_mileage'OR meta_key='wpcm_fuel_type'OR meta_key='wpcm_color'OR meta_key='wpcm_transmission'OR meta_key='wpcm_doors'OR meta_key='wpcm_engine'OR meta_key='wpcm_power_kw'OR meta_key='wpcm_power_hp'OR meta_key='wpcm_test'OR meta_key='zawieszenie'OR meta_key='vin'OR meta_key='dowod_rejestracyjny_v5'OR meta_key='kluczyki'OR meta_key='weryfikacja'OR meta_key='uszkodzenia'OR meta_key='data_zakonczenia'OR meta_key='miejscowosc'OR meta_key='id_pojazdu'OR meta_key='marka'OR meta_key='model'OR meta_key='_car_gallery'
- # OR meta_key='_zawieszenie'OR meta_key='_vin'OR meta_key='_dowod_rejestracyjny_v5'OR meta_key='_kluczyki'OR meta_key='_weryfikacja'OR meta_key='_uszkodzenia'OR meta_key='_data_zakonczenia'OR meta_key='_miejscowosc'OR meta_key='_id_pojazdu'OR meta_key='_marka'OR meta_key='_model'OR meta_key='_car_gallery' OR meta_key='kategoria_abi' OR meta_key='_kategoria_abi' OR meta_key='wpcm_pwer_hp' OR meta_key='_wpcm_pwer_hp' OR meta_key='wpcm_solds' OR meta_key='_wpcm_solds' OR meta_key='wpcm_body_style' OR meta_key='_wpcm_body_style' OR meta_key='wpcm_solds' OR meta_key='_wpcm_solds'
- # """
- # DELETE_CARS2 = """
- # DELETE FROM auta_posts WHERE post_type='wpcm_vehicle';
- # """
- # try:
- # cursor = cnx.cursor()
- # cursor.execute(DELETE_CARS1)
- # rs = cursor.fetchall()
- # #cursor.close()
- # #cursor = cnx.cursor()
- # cursor.execute(DELETE_CARS2)
- # rs = cursor.fetchall()
- # #cursor.close()
- # #cursor = cnx.cursor()
- # for q in queries:
- # cursor.execute(q)
- # print(q, '\n\n\n')
- # rs = cursor.fetchall()
- # #cursor.close()
- # #cursor = cnx.cursor()
- # #result = cursor.fetchall()
- # #print(result)
- # finally:
- # cnx.close()
- # here comes checking if file is downloaded
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement