Parser

# -*- coding: utf-8 -*-
import requests
from lxml import html, etree
import socks
import csv
import requests.exceptions
import re

hrefs2 = []
hrefs = []
data = []

proxies = [
  'socks5://46.148.112.222:1085',
  'socks5://95.85.80.213:1085',
  'socks5://185.101.71.76:1085',
  'socks5://5.189.201.218:1085',
  'socks5://141.101.201.151:1085',
  'socks5://146.185.205.15:1085',
  'socks5://193.106.31.11:1085',
  'socks5://95.181.217.149:1085',
  'socks5://5.62.152.73:1085'
]

for prox in proxies:
    while  True:
        base_url = "https://www.bundes-telefonbuch.de/suche/b"
        url1 = 'https://www.bundes-telefonbuch.de/'
        session = requests.Session()
        try:
            response = session.get(base_url,proxies = dict(http = prox))
        except requests.exceptions.ConnectionError:
            continue

        parsed_body = html.fromstring(response.text)

        elements = parsed_body.find_class('listitempage')
        for element in elements:
            for href in element.findall('.//a'):
                x = href.get('href')
                hrefs.append(x)

        for url2 in hrefs:
            pages_urls = url1 + str(url2)
            response2 = session.get(pages_urls, proxies = dict(http = prox))
            body = html.fromstring(response2.content.decode('utf-8'))

            name = body.find_class('panel-title')
            for d in name:
                for hr in d.findall('.//a'):
                    a = hr.get('href')
                    hrefs2.append(a)

        for url3 in hrefs2:
            pages_urls2 = url1 + str(url3)
            if len(pages_urls2)>55:
                response3 = session.get(pages_urls2, proxies = dict(http = prox))
                body2 = html.fromstring(response3.content.decode('utf-8'))

                elname = body2.find_class('col-xs-12')
                for eln in elname:
                    for hre in eln.findall('.//meta'):
                        n = hre.get('content')
                        print n


                elmail = body2.find_class('table')
                for e in elmail:
                    k = list(e.itertext())
                    for j in k:
                        mail = re.findall(r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)", j)
                        if mail:
                            mail = mail
                            print ';',mail