Iskon Scrape

from bs4 import BeautifulSoup
import requests
import xlwt
def main():
    wb= xlwt.Workbook()
    try:
        for pg_num in range(0,4):
            ws= wb.add_sheet("page_{0}".format(pg_num+1),cell_overwrite_ok=True)
            ws.write(0,0,"Title")
            ws.write(0,1,"City")
            ws.write(0,2,"Phone")
            ws.write(0,3,"Temple Name")
            ws.write(0,4,"Country")
            ws.write(0,5,"Website")
            all_detail=list()
            req = requests.get("http://directory.krishna.com/temples?page="+str(pg_num))
            soup= BeautifulSoup(req.content,"lxml")
            first_temple=soup.find_all("div","views-row views-row-1 views-row-odd views-row-first")
            soup_2= BeautifulSoup(first_temple[0].prettify(),"lxml")
            title= soup_2.find_all("div","views-field views-field-title")
            all_detail.append(title[0].a.string.strip())


            city= soup_2.find_all("div","views-field views-field-city")
            soup_3= BeautifulSoup(city[0].prettify(),"lxml")
            city_name=soup_3.find_all("span","field-content")
            all_detail.append(city_name[0].contents[0].strip())

            phn= soup_2.find_all("div","views-field views-field-phone")
            soup_4= BeautifulSoup(phn[0].prettify(),"lxml")
            phn_num= soup_4.find_all("span","field-content")
            all_detail.append(phn_num[0].contents[0].strip())

            tmpl_name= soup_2.find_all("div","views-field views-field-name")
            soup_4= BeautifulSoup(tmpl_name[0].prettify(),"lxml")
            tmpl_name_f= soup_4.find_all("span","field-content")
            all_detail.append(tmpl_name_f[0].contents[0].strip())

            country_name= soup_2.find_all("div","views-field views-field-country")
            soup_4= BeautifulSoup(country_name[0].prettify(),"lxml")
            country_name_f= soup_4.find_all("span","field-content")
            all_detail.append(country_name_f[0].contents[0].strip())

            website= soup_2.find_all("div","views-field views-field-field-website-url")
            soup_4= BeautifulSoup(website[0].prettify(),"lxml")
            website_url= soup_4.find_all("span","field-content")
            soup_5= BeautifulSoup(website_url[0].prettify(),"lxml")
            website_url_f= soup_5.find_all("a")
            try:
                all_detail.append(website_url_f[0]["href"])
            except:
                all_detail.append("No website URL fornd!")
            all_detail.append(1)
            all_detail.append(ws)
            write(all_detail)
            all_detail=list()

            for j in range(2,30):
                if (j%2==0):
                    even_temples=soup.find_all("div","views-row views-row-"+str(j)+" views-row-even")
                    for i in even_temples:
                        soup_2= BeautifulSoup(i.prettify(),"lxml")
                        title= soup_2.find_all("div","views-field views-field-title")
                        all_detail.append(title[0].a.string.strip())


                        city= soup_2.find_all("div","views-field views-field-city")
                        soup_3= BeautifulSoup(city[0].prettify(),"lxml")
                        city_name=soup_3.find_all("span","field-content")
                        all_detail.append(city_name[0].contents[0].strip())

                        phn= soup_2.find_all("div","views-field views-field-phone")
                        soup_4= BeautifulSoup(phn[0].prettify(),"lxml")
                        phn_num= soup_4.find_all("span","field-content")
                        all_detail.append(phn_num[0].contents[0].strip())

                        tmpl_name= soup_2.find_all("div","views-field views-field-name")
                        soup_4= BeautifulSoup(tmpl_name[0].prettify(),"lxml")
                        tmpl_name_f= soup_4.find_all("span","field-content")
                        all_detail.append(tmpl_name_f[0].contents[0].strip())

                        country_name= soup_2.find_all("div","views-field views-field-country")
                        soup_4= BeautifulSoup(country_name[0].prettify(),"lxml")
                        country_name_f= soup_4.find_all("span","field-content")
                        all_detail.append(country_name_f[0].contents[0].strip())

                        website= soup_2.find_all("div","views-field views-field-field-website-url")
                        soup_4= BeautifulSoup(website[0].prettify(),"lxml")
                        website_url= soup_4.find_all("span","field-content")
                        soup_5= BeautifulSoup(website_url[0].prettify(),"lxml")
                        website_url_f= soup_5.find_all("a")
                        try:
                            all_detail.append(website_url_f[0]["href"])
                        except:
                            all_detail.append("No website URL fornd!")
                        all_detail.append(j)
                        all_detail.append(ws)
                        write(all_detail)
                        all_detail=list()
                else:
                    odd_temples=soup.find_all("div","views-row views-row-"+str(j)+" views-row-odd")
                    for i in odd_temples:
                        soup_2= BeautifulSoup(i.prettify(),"lxml")
                        title= soup_2.find_all("div","views-field views-field-title")
                        all_detail.append(title[0].a.string.strip())


                        city= soup_2.find_all("div","views-field views-field-city")
                        soup_3= BeautifulSoup(city[0].prettify(),"lxml")
                        city_name=soup_3.find_all("span","field-content")
                        all_detail.append(city_name[0].contents[0].strip())


                        phn= soup_2.find_all("div","views-field views-field-phone")
                        soup_4= BeautifulSoup(phn[0].prettify(),"lxml")
                        phn_num= soup_4.find_all("span","field-content")
                        all_detail.append(phn_num[0].contents[0].strip())

                        tmpl_name= soup_2.find_all("div","views-field views-field-name")
                        soup_4= BeautifulSoup(tmpl_name[0].prettify(),"lxml")
                        tmpl_name_f= soup_4.find_all("span","field-content")
                        all_detail.append(tmpl_name_f[0].contents[0].strip())

                        country_name= soup_2.find_all("div","views-field views-field-country")
                        soup_4= BeautifulSoup(country_name[0].prettify(),"lxml")
                        country_name_f= soup_4.find_all("span","field-content")
                        all_detail.append(country_name_f[0].contents[0].strip())

                        website= soup_2.find_all("div","views-field views-field-field-website-url")
                        soup_4= BeautifulSoup(website[0].prettify(),"lxml")
                        website_url= soup_4.find_all("span","field-content")
                        soup_5= BeautifulSoup(website_url[0].prettify(),"lxml")
                        website_url_f= soup_5.find_all("a")
                        try:
                            all_detail.append(website_url_f[0]["href"])
                        except:
                            all_detail.append("No website URL fornd!")
                        all_detail.append(j)
                        all_detail.append(ws)
                        write(all_detail)
                        all_detail=list()
            last_temple=soup.find_all("div","views-row views-row-30 views-row-even views-row-last")
            print(last_temple)
            soup_2= BeautifulSoup(last_temple[0].prettify(),"lxml")
            title= soup_2.find_all("div","views-field views-field-title")
            all_detail.append(title[0].a.string.strip())


            city= soup_2.find_all("div","views-field views-field-city")
            soup_3= BeautifulSoup(city[0].prettify(),"lxml")
            city_name=soup_3.find_all("span","field-content")
            all_detail.append(city_name[0].contents[0].strip())

            phn= soup_2.find_all("div","views-field views-field-phone")
            soup_4= BeautifulSoup(phn[0].prettify(),"lxml")
            phn_num= soup_4.find_all("span","field-content")
            all_detail.append(phn_num[0].contents[0].strip())

            tmpl_name= soup_2.find_all("div","views-field views-field-name")
            soup_4= BeautifulSoup(tmpl_name[0].prettify(),"lxml")
            tmpl_name_f= soup_4.find_all("span","field-content")
            all_detail.append(tmpl_name_f[0].contents[0].strip())

            country_name= soup_2.find_all("div","views-field views-field-country")
            soup_4= BeautifulSoup(country_name[0].prettify(),"lxml")
            country_name_f= soup_4.find_all("span","field-content")
            all_detail.append(country_name_f[0].contents[0].strip())

            website= soup_2.find_all("div","views-field views-field-field-website-url")
            soup_4= BeautifulSoup(website[0].prettify(),"lxml")
            website_url= soup_4.find_all("span","field-content")
            soup_5= BeautifulSoup(website_url[0].prettify(),"lxml")
            website_url_f= soup_5.find_all("a")
            try:
                all_detail.append(website_url_f[0]["href"])
            except:
                all_detail.append("No website URL fornd!")
            all_detail.append(j+1)
            all_detail.append(ws)
            write(all_detail)
            all_detail=list()
    except:
        print("done")
    wb.save("learn.xls")
def write(tpl):
    ws=tpl[7]
    print(tpl[6])
    for i in range(0,6):
        ws.write(int(tpl[6]),i,tpl[i])
if __name__ == '__main__':
    main()