u/aggiefury101

#! python

"""Simple script to iterate over a csv file of URLs and convert those pages to PDF.
Created for u/aggiefury101 on Reddit by u/MyNamesNotReallyDave.
Tested with a csv file of randomly generated Google searches ([0,https://www.google.com/search?q=833]).
"""

# Necessary imports; dependant on WKHTMLTOPDF being installed
import csv, pdfkit

# Additionally import os for filepath management
import os

# Input file assumes the csv is in the same folder; provide the absolute path if necessary
INPUT_FILE = 'urls.csv'

# Downloaded PDFs will be stored in the 'PDFs' sub-directory; provide the absolute path if necessary
OUTPUT_PATH = os.path.abspath('.\\PDFs')

# Context manager for the input file
with open(INPUT_FILE, 'r') as file:

    # Create the csv Reader Object
    reader = csv.reader(file, delimiter=',')

    # Iterate over each line in the csv
    for row in reader:

        # Determines which column holds the URL; change as required
        url = row[1]

        # Determines the unique filename (row[1][-3:]); I recommend the unique part of the URL but you could use a counter etc
        output_filename = f'{OUTPUT_PATH}\\{row[1][-3:]}.pdf'

        # Run the pdfkit converter on the given URL
        pdfkit.from_url(url, output_filename)