Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests,json
- #Original URL for reference
- #url = "https://cdm22007.contentdm.oclc.org/digital/api/collections/p22007coll9/items/491899/false"
- page_count = 512;
- url_prefix = "https://cdm22007.contentdm.oclc.org/digital/api/collections/p22007coll9/items/"
- url_page_count = 491899;
- url_suffix = "/false"
- urls = []
- #Construct a list of urls that will serve as a work queue
- for i in range(page_count):
- # construct a url from the prefix, incremented page counter and suffix
- #and store it in the list of urls
- urls.append(url_prefix+ str(url_page_count+1)+url_suffix)
- url_page_count += 1
- #print(urls[i])
- for j in range(len(urls)):
- r = requests.get(url = urls[j])
- json_data = json.loads(r.text)
- print(json_data['text'])
- print("__________________________________")
- #stuff for reference
- #r = requests.get(url = url)
- #json_data = json.loads(r.text)
- #print(json_data['text'])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement