Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from bs4 import BeautifulSoup as bs
- import re
- import pika
- import pymysql as mysql
- conn = mysql.connect(host="localhost", user="root", password="", db="crawl")
- cur = conn.cursor()
- __author__ = "Leyla Agayeva"
- url = "https://www.yellowpages.com/austin-tx/plumbers"
- req = requests.get(url)
- data = req.content
- soup = bs(data, "lxml")
- links = soup.findAll("div", {"class": "info"})
- for link in links:
- accepted = re.compile(r"^.*[0-9]\.")
- if accepted.match(link.h2.text):
- ddata = link.h2.text
- print(ddata)
- query = """INSERT INTO `info`(data) VALUES (%s)"""
- cur.execute(query, ddata.strip())
- conn.commit()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement