Advertisement
Guest User

web scrap akoam.com

a guest
Apr 10th, 2020
195
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.35 KB | None | 0 0
  1. import requests
  2. from bs4 import BeautifulSoup
  3.  
  4.  
  5. url = 'https://www.akoam.net/cat/156/%D8%A7%D9%84%D8%A3%D9%81%D9%84%D8%A7%D9%85-%D8%A7%D9%84%D8%A7%D8%AC%D9%86%D8%A8%D9%8A%D8%A9'
  6. headers={"User-Agent":"Mozilla/5.0  AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36"}
  7. s = requests.Session()
  8. s.headers.update(headers)
  9. r = s.get(url)
  10. soup = BeautifulSoup(r.content,"lxml")
  11.  
  12.  
  13. for div in soup.find_all('div',class_='subject_box'):
  14.     a = div.find('a')
  15.     surl= a['href']
  16.     r2 = s.get(surl)
  17.     soup2 = BeautifulSoup(r2.content,"lxml")
  18.     for div2 in soup2.find_all('div',class_='sub_desc sub_extra_desc'):
  19.         a2 = div2.find('a')
  20.         url2= a2['href']
  21.         r3 = s.get(url2)
  22.         soup3 = BeautifulSoup(r3.content,"lxml")
  23.         a3 = soup3.find('a',class_="link-btn link-download d-flex align-items-center px-3")
  24.         url3= a3['href']
  25.         r4 = s.get(url3)
  26.         soup4 = BeautifulSoup(r4.content,"lxml")
  27.         a4 = soup4.find('a',class_="download-link")
  28.         url4 = a4['href']
  29.         r5 = s.get(url4)
  30.         soup5 = BeautifulSoup(r5.content,"lxml")
  31.         a5 = soup5.find('a',class_="font-size-16 text-muted")
  32.         url5 = a5['href']
  33.         text = open('fileidm.txt','a+')
  34.         text.write(url5+'\r\n')
  35.         text.close()
  36.         print(url5)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement