joke scraper

import requests
import csv
from bs4 import BeautifulSoup
import os


	# Create a file to write our data to, add a headers row
f = csv.writer(open('kid_jokes.csv', 'w'))
	#f.writerow(['Name', 'Address', 'Town', 'State'])
f.writerow(['Q', 'A'])

# Create empty array for your multiple pages to get stored to
pages = []

#Run through every page we want to visit - great if you have a numerical list of ordered pages!
for i in range(7, 91):


	#our base url, which we are adding numbers to the end of
	url = 'http://www.ahajokes.com/kani' + str(i).zfill(2) + '.html' # + str(i)
	pages.append(url)

#for each url we have stored
for item in pages:
	page = requests.get(item)
	if page:
		soup = BeautifulSoup(page.text, 'html.parser')

		############HTML############
		#prints out the full html of the page.
		#print (soup)

		######PARSING FOR INTERNAL DATA######## - the specifics of this will change for your project!
		text_soup = soup.text
		split_text_soup = text_soup.split('\n')
		#print (split_text_soup)

		for pea in split_text_soup:
			if pea.startswith("Q:"):
				print (pea)
				f.writerow([pea, ""])
			if pea.startswith("A:"):
				print (pea)
				f.writerow(["",pea])

		#########IMAGES###########
		#downloads every image on the page
		for link in soup.find_all('img'):
			image = link.get("src")

			#prints link it found for the image
			print(image)

			#checks if the image is a complete link or not
			if image.startswith("http"):
				image_url = image

			#if it is not, please add the base URL of the domain!
			else:
				baseURL = 'http://www.ahajokes.com/'
				image_url = baseURL + image

			#gets the image offline
			r2 = requests.get(image_url)

			#gets the image name
			image_name = os.path.split(image_url)[1]

			#saves the image to the local folder
			with open(image_name, "wb") as im:
				im.write(r2.content)