Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup
- import requests
- import urllib.request
- import csv
- def get_links():
- #listOfYears = [i for i in range(1920, 2000)]
- listOfPages = [i for i in range(1, 360)]
- BASE = 'http://www.iafd.com/lookupethnic.rme/ethnic=latin/pix=1/page={}'
- listOfLinks = [BASE.format(p) for p in listOfPages]
- return listOfLinks
- def src_download():
- with open('latin.csv', 'w') as pornFile:
- for i in get_links():
- source_code = requests.get(i)
- plain_text = source_code.text
- soup = BeautifulSoup(plain_text, "lxml")
- for link in soup.findAll('img'):
- srcImg = link.get('src')
- baseUrl = 'http://www.iafd.com'
- complete = baseUrl + srcImg
- if complete.endswith('.jpg'):
- try:
- pornFile.write(complete + ', ')
- print(complete)
- except UnicodeEncodeError:
- pass
- src_download()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement