Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Iterate through every batch in update_dictionary. Should this operation get interrupted, re-run generate_update_ids.py to generate the latest list of IDs to be updated, and run this script again.
- for key in update_dictionary:
- # Declare a variable start_time as the current time.
- start_time = time()
- # Declare a list to which we append the output from running the update() function.
- results = []
- # Obtain the list of IDs corresponding to the key in the current iteration.
- batch = update_dictionary[key]
- # Randomise sorting of IDs to make scraping less suspicious to the server.
- id_randomised = random.sample(batch, len(batch))
- # Perform update() function on all car IDs in the list; pause for a random period of time to reduce suspicion and the frequency of get requests.
- for car_id in id_randomised:
- output = update(str(car_id))
- if output != None:
- results.append(output)
- sleep(random.uniform(0.05,0.1))
- # Declare a list sold_list to which sold listings will be appended.
- sold_list = []
- # Declare a list update_list to which available-for-sale listings will be appended; the latest listing data will be overwritten into the DB.
- update_list = []
- # Using the second element in the list output of update(), we sort the sold listings and available-for-sale listings into the respective lists.
- # Sold listings have the list element in index 1 (second element) set as the listing ID (int(car_id)) with type() = int, while available-for-sale listings have it set as the vehicle availability (avail) with type() = str.
- for result in results:
- if type(result[1]) == int:
- sold_list.append(result)
- elif type(result[1]) == str:
- update_list.append(result)
- # Print sold_list, update_list and their respective lengths for quick checking
- print(sold_list)
- print('Length of sold_list: ' + str(len(sold_list)))
- print(update_list)
- print('Length of update_list: ' + str(len(update_list)))
- # Connect to the DB and create a Cursor object and use it to execute SQL commands.
- conn = sqlite3.connect('db.db')
- c = conn.cursor()
- # Execute to DB updates for sold vehicles.
- try:
- for car in sold_list:
- c.execute("UPDATE cars SET avail=?, date_sold=?, last_edited=? WHERE id=?", ['Sold',car[0],datetime.now().date(),car[1]])
- except:
- None
- # Execute to DB updates for available vehicles. Very tedious method used in matching the list elements to their respective columns in the DB. Was not able to find a simplified way which worked.
- for car in update_list:
- c.execute('''UPDATE cars SET make_model=?, price=?, depre=?, reg_date=?, manu_yr=?, mileage=?, tranny=?, eng_cap=?,
- road_tax=?, power=?, weight=?, features=?, acc=?, coe_left=?, coe_price=?, omv=?, arf=?, owners=?, veh_type=?,
- cat=?, date_posted=?, updated=?, tags=?, last_edited=?, desc=? WHERE id=?''',[car[2],car[3],car[4],car[5],car[6],car[7],car[8],car[9],car[10],
- car[11],car[12],car[13],car[14],car[15],car[16],car[17],car[18],
- car[19],car[20],car[21],car[22],car[23],car[24],datetime.now().date(),car[26],car[0]])
- # Confirm the changes to be made to the DB and close the connection to the DB.
- conn.commit()
- conn.close()
- # Calculate time taken for the whole operation
- elapsed = round(time() - start_time, 2)
- print("time elapsed = " + str(elapsed))
- print("time taken per page = " + str(round(elapsed/(len(batch)),2)))
Add Comment
Please, Sign In to add comment