Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #! python
- """Simple script to iterate over a csv file of URLs and convert those pages to PDF.
- Created for u/aggiefury101 on Reddit by u/MyNamesNotReallyDave.
- Tested with a csv file of randomly generated Google searches ([0,https://www.google.com/search?q=833]).
- """
- # Necessary imports; dependant on WKHTMLTOPDF being installed
- import csv, pdfkit
- # Additionally import os for filepath management
- import os
- # Input file assumes the csv is in the same folder; provide the absolute path if necessary
- INPUT_FILE = 'urls.csv'
- # Downloaded PDFs will be stored in the 'PDFs' sub-directory; provide the absolute path if necessary
- OUTPUT_PATH = os.path.abspath('.\\PDFs')
- # Context manager for the input file
- with open(INPUT_FILE, 'r') as file:
- # Create the csv Reader Object
- reader = csv.reader(file, delimiter=',')
- # Iterate over each line in the csv
- for row in reader:
- # Determines which column holds the URL; change as required
- url = row[1]
- # Determines the unique filename (row[1][-3:]); I recommend the unique part of the URL but you could use a counter etc
- output_filename = f'{OUTPUT_PATH}\\{row[1][-3:]}.pdf'
- # Run the pdfkit converter on the given URL
- pdfkit.from_url(url, output_filename)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement