Untitled

#!/usr/bin/env python3

import argparse
import os
import sys
import re
import urllib.request
try:
    import lxml.html
except ModuleNotFoundError as e:
    _ = e
    print("The lxml module is not installed.\n",
          "To install it, plase enter this command:\n",
          "python -m pip install lxml")
    sys.exit(1)

regex = r"^https:\/\/boards.4chan(nel)?.org\/[a-z]+\/thread\/[0-9]+$"


def apply_parser():
    def URL4chan(arg):
        if not re.compile(regex).match(arg):
            raise argparse.ArgumentTypeError(
                f"URL has to match '{regex}'")
        return arg

    parser = argparse.ArgumentParser(
        description="Downloads all images from a 4chan thread"
    )
    parser.add_argument("URL", help="4chan thread URL", type=URL4chan)
    parser.add_argument("-o", "--output", default=os.getcwd(),
                        help="Destination folder", type=str, nargs=1)
    parser.add_argument("--no-output-folder", action="store_true",
                        help="If set, saves images directly on output foler")
    parser.add_argument("-f", "--no-interactive", action="store_true",
                        help="If set, run the script w/o prompt")
    return parser.parse_args()


def run(args):
    request = urllib.request.Request(args.URL,
                                     headers={'User-Agent': 'Mozilla/5.0'})
    thread_board = args.URL.split('/')[-3]
    thread_number = args.URL.split('/')[-1]
    dest_folder = args.output \
        if args.no_output_folder \
        else os.path.join(
            args.output, f"4ch-{thread_board}-{thread_number}"
        )

    content = urllib.request.urlopen(request).read()
    img_links = lxml.html.fromstring(content).xpath(
        '//a[@class="fileThumb"]/@href'
    )
    nb_imgs = len(img_links)

    if not args.no_interactive:
        print(
            f"The script will download {nb_imgs}"
            + f" images in the folder '{dest_folder}'")
        choice = input("Continue ? [y/N] ")
        if choice != "y":
            sys.exit(0)

    if not os.path.exists(dest_folder):
        try:
            os.makedirs(dest_folder)
            print(f"Folder {dest_folder} created")
        except OSError as e:
            print(e)
            sys.exit(1)

    counter = 0
    for link in img_links:
        filepath = os.path.join(dest_folder, link.split('/')[-1])
        if not os.path.exists(filepath):
            urllib.request.urlretrieve("http:"+link, filepath)
            counter += 1
        print(f"Images downloaded: {counter}/{nb_imgs}", end='\r')
    print("\nAll images downloaded!")


if __name__ == "__main__":
    run(apply_parser())