Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup
- import requests
- import xlwt
- def main():
- wb= xlwt.Workbook()
- try:
- for pg_num in range(0,4):
- ws= wb.add_sheet("page_{0}".format(pg_num+1),cell_overwrite_ok=True)
- ws.write(0,0,"Title")
- ws.write(0,1,"City")
- ws.write(0,2,"Phone")
- ws.write(0,3,"Temple Name")
- ws.write(0,4,"Country")
- ws.write(0,5,"Website")
- all_detail=list()
- req = requests.get("http://directory.krishna.com/temples?page="+str(pg_num))
- soup= BeautifulSoup(req.content,"lxml")
- first_temple=soup.find_all("div","views-row views-row-1 views-row-odd views-row-first")
- soup_2= BeautifulSoup(first_temple[0].prettify(),"lxml")
- title= soup_2.find_all("div","views-field views-field-title")
- all_detail.append(title[0].a.string.strip())
- city= soup_2.find_all("div","views-field views-field-city")
- soup_3= BeautifulSoup(city[0].prettify(),"lxml")
- city_name=soup_3.find_all("span","field-content")
- all_detail.append(city_name[0].contents[0].strip())
- phn= soup_2.find_all("div","views-field views-field-phone")
- soup_4= BeautifulSoup(phn[0].prettify(),"lxml")
- phn_num= soup_4.find_all("span","field-content")
- all_detail.append(phn_num[0].contents[0].strip())
- tmpl_name= soup_2.find_all("div","views-field views-field-name")
- soup_4= BeautifulSoup(tmpl_name[0].prettify(),"lxml")
- tmpl_name_f= soup_4.find_all("span","field-content")
- all_detail.append(tmpl_name_f[0].contents[0].strip())
- country_name= soup_2.find_all("div","views-field views-field-country")
- soup_4= BeautifulSoup(country_name[0].prettify(),"lxml")
- country_name_f= soup_4.find_all("span","field-content")
- all_detail.append(country_name_f[0].contents[0].strip())
- website= soup_2.find_all("div","views-field views-field-field-website-url")
- soup_4= BeautifulSoup(website[0].prettify(),"lxml")
- website_url= soup_4.find_all("span","field-content")
- soup_5= BeautifulSoup(website_url[0].prettify(),"lxml")
- website_url_f= soup_5.find_all("a")
- try:
- all_detail.append(website_url_f[0]["href"])
- except:
- all_detail.append("No website URL fornd!")
- all_detail.append(1)
- all_detail.append(ws)
- write(all_detail)
- all_detail=list()
- for j in range(2,30):
- if (j%2==0):
- even_temples=soup.find_all("div","views-row views-row-"+str(j)+" views-row-even")
- for i in even_temples:
- soup_2= BeautifulSoup(i.prettify(),"lxml")
- title= soup_2.find_all("div","views-field views-field-title")
- all_detail.append(title[0].a.string.strip())
- city= soup_2.find_all("div","views-field views-field-city")
- soup_3= BeautifulSoup(city[0].prettify(),"lxml")
- city_name=soup_3.find_all("span","field-content")
- all_detail.append(city_name[0].contents[0].strip())
- phn= soup_2.find_all("div","views-field views-field-phone")
- soup_4= BeautifulSoup(phn[0].prettify(),"lxml")
- phn_num= soup_4.find_all("span","field-content")
- all_detail.append(phn_num[0].contents[0].strip())
- tmpl_name= soup_2.find_all("div","views-field views-field-name")
- soup_4= BeautifulSoup(tmpl_name[0].prettify(),"lxml")
- tmpl_name_f= soup_4.find_all("span","field-content")
- all_detail.append(tmpl_name_f[0].contents[0].strip())
- country_name= soup_2.find_all("div","views-field views-field-country")
- soup_4= BeautifulSoup(country_name[0].prettify(),"lxml")
- country_name_f= soup_4.find_all("span","field-content")
- all_detail.append(country_name_f[0].contents[0].strip())
- website= soup_2.find_all("div","views-field views-field-field-website-url")
- soup_4= BeautifulSoup(website[0].prettify(),"lxml")
- website_url= soup_4.find_all("span","field-content")
- soup_5= BeautifulSoup(website_url[0].prettify(),"lxml")
- website_url_f= soup_5.find_all("a")
- try:
- all_detail.append(website_url_f[0]["href"])
- except:
- all_detail.append("No website URL fornd!")
- all_detail.append(j)
- all_detail.append(ws)
- write(all_detail)
- all_detail=list()
- else:
- odd_temples=soup.find_all("div","views-row views-row-"+str(j)+" views-row-odd")
- for i in odd_temples:
- soup_2= BeautifulSoup(i.prettify(),"lxml")
- title= soup_2.find_all("div","views-field views-field-title")
- all_detail.append(title[0].a.string.strip())
- city= soup_2.find_all("div","views-field views-field-city")
- soup_3= BeautifulSoup(city[0].prettify(),"lxml")
- city_name=soup_3.find_all("span","field-content")
- all_detail.append(city_name[0].contents[0].strip())
- phn= soup_2.find_all("div","views-field views-field-phone")
- soup_4= BeautifulSoup(phn[0].prettify(),"lxml")
- phn_num= soup_4.find_all("span","field-content")
- all_detail.append(phn_num[0].contents[0].strip())
- tmpl_name= soup_2.find_all("div","views-field views-field-name")
- soup_4= BeautifulSoup(tmpl_name[0].prettify(),"lxml")
- tmpl_name_f= soup_4.find_all("span","field-content")
- all_detail.append(tmpl_name_f[0].contents[0].strip())
- country_name= soup_2.find_all("div","views-field views-field-country")
- soup_4= BeautifulSoup(country_name[0].prettify(),"lxml")
- country_name_f= soup_4.find_all("span","field-content")
- all_detail.append(country_name_f[0].contents[0].strip())
- website= soup_2.find_all("div","views-field views-field-field-website-url")
- soup_4= BeautifulSoup(website[0].prettify(),"lxml")
- website_url= soup_4.find_all("span","field-content")
- soup_5= BeautifulSoup(website_url[0].prettify(),"lxml")
- website_url_f= soup_5.find_all("a")
- try:
- all_detail.append(website_url_f[0]["href"])
- except:
- all_detail.append("No website URL fornd!")
- all_detail.append(j)
- all_detail.append(ws)
- write(all_detail)
- all_detail=list()
- last_temple=soup.find_all("div","views-row views-row-30 views-row-even views-row-last")
- print(last_temple)
- soup_2= BeautifulSoup(last_temple[0].prettify(),"lxml")
- title= soup_2.find_all("div","views-field views-field-title")
- all_detail.append(title[0].a.string.strip())
- city= soup_2.find_all("div","views-field views-field-city")
- soup_3= BeautifulSoup(city[0].prettify(),"lxml")
- city_name=soup_3.find_all("span","field-content")
- all_detail.append(city_name[0].contents[0].strip())
- phn= soup_2.find_all("div","views-field views-field-phone")
- soup_4= BeautifulSoup(phn[0].prettify(),"lxml")
- phn_num= soup_4.find_all("span","field-content")
- all_detail.append(phn_num[0].contents[0].strip())
- tmpl_name= soup_2.find_all("div","views-field views-field-name")
- soup_4= BeautifulSoup(tmpl_name[0].prettify(),"lxml")
- tmpl_name_f= soup_4.find_all("span","field-content")
- all_detail.append(tmpl_name_f[0].contents[0].strip())
- country_name= soup_2.find_all("div","views-field views-field-country")
- soup_4= BeautifulSoup(country_name[0].prettify(),"lxml")
- country_name_f= soup_4.find_all("span","field-content")
- all_detail.append(country_name_f[0].contents[0].strip())
- website= soup_2.find_all("div","views-field views-field-field-website-url")
- soup_4= BeautifulSoup(website[0].prettify(),"lxml")
- website_url= soup_4.find_all("span","field-content")
- soup_5= BeautifulSoup(website_url[0].prettify(),"lxml")
- website_url_f= soup_5.find_all("a")
- try:
- all_detail.append(website_url_f[0]["href"])
- except:
- all_detail.append("No website URL fornd!")
- all_detail.append(j+1)
- all_detail.append(ws)
- write(all_detail)
- all_detail=list()
- except:
- print("done")
- wb.save("learn.xls")
- def write(tpl):
- ws=tpl[7]
- print(tpl[6])
- for i in range(0,6):
- ws.write(int(tpl[6]),i,tpl[i])
- if __name__ == '__main__':
- main()
Add Comment
Please, Sign In to add comment