Advertisement
Guest User

Untitled

a guest
Aug 15th, 2018
122
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.02 KB | None | 0 0
  1. from bs4 import BeautifulSoup
  2. import requests
  3. import urllib.request
  4. import csv
  5.  
  6. def get_links():
  7.     #listOfYears = [i for i in range(1920, 2000)]
  8.     listOfPages = [i for i in range(1, 360)]
  9.     BASE = 'http://www.iafd.com/lookupethnic.rme/ethnic=latin/pix=1/page={}'
  10.     listOfLinks = [BASE.format(p) for p in listOfPages]
  11.     return listOfLinks
  12.  
  13. def src_download():
  14.     with open('latin.csv', 'w') as pornFile:
  15.         for i in get_links():
  16.             source_code = requests.get(i)
  17.             plain_text = source_code.text
  18.             soup = BeautifulSoup(plain_text, "lxml")
  19.             for link in soup.findAll('img'):
  20.                 srcImg = link.get('src')
  21.                 baseUrl = 'http://www.iafd.com'
  22.                 complete = baseUrl + srcImg
  23.                 if complete.endswith('.jpg'):
  24.                     try:
  25.                         pornFile.write(complete + ', ')
  26.                         print(complete)
  27.                     except UnicodeEncodeError:
  28.                         pass
  29.  
  30. src_download()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement