Advertisement
Guest User

Untitled

a guest
Feb 28th, 2016
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.78 KB | None | 0 0
  1. Traceback (most recent call last):
  2. File "D:pythonfinal2822016.py", line 132, in <module>
  3. connection.commit()
  4. File "C:UsersamanpAppDataLocalProgramsPythonPython35-32libsite-packagespymysqlconnections.py", line 758, in commit
  5. self._read_ok_packet()
  6. File "C:UsersamanpAppDataLocalProgramsPythonPython35-32libsite-packagespymysqlconnections.py", line 737, in _read_ok_packet
  7. pkt = self._read_packet()
  8. File "C:UsersamanpAppDataLocalProgramsPythonPython35-32libsite-packagespymysqlconnections.py", line 946, in _read_packet
  9. packet_header = self._read_bytes(4)
  10. File "C:UsersamanpAppDataLocalProgramsPythonPython35-32libsite-packagespymysqlconnections.py", line 982, in _read_bytes
  11. 2013, "Lost connection to MySQL server during query")
  12. pymysql.err.OperationalError: (2013, 'Lost connection to MySQL server during query')
  13.  
  14. from bs4 import BeautifulSoup
  15. import urllib.request
  16. import re
  17. import json
  18. import pymysql
  19. import pymysql.cursors
  20.  
  21.  
  22. connection = pymysql.connect(host='198.46.81.14',
  23. user='kanton5_scrape',
  24. password='Heineken4291',
  25. db='kanton5_scrape',
  26. charset='utf8mb4',
  27. cursorclass=pymysql.cursors.DictCursor)
  28.  
  29. #first hitting scraping the url
  30. r = urllib.request.urlopen('http://i.cantonfair.org.cn/en/ExpExhibitorList.aspx?k=glassware')
  31. soup = BeautifulSoup(r, "html.parser")
  32.  
  33. links = soup.find_all("a", href=re.compile(r"expexhibitorlist.aspx?categoryno=[0-9]+"))
  34. linksfromcategories = ([link["href"] for link in links])
  35.  
  36. string = "http://i.cantonfair.org.cn/en/"
  37. linksfromcategories = [string + x for x in linksfromcategories]
  38.  
  39.  
  40. for link in linksfromcategories:
  41.  
  42. response = urllib.request.urlopen(link)
  43. soup2 = BeautifulSoup(response, "html.parser")
  44.  
  45. links2 = soup2.find_all("a", href=re.compile(r"ExpExhibitorList.aspx?categoryno=[0-9]+"))
  46. linksfromsubcategories = ([link["href"] for link in links2])
  47.  
  48. linksfromsubcategories = [string + x for x in linksfromsubcategories]
  49. for link in linksfromsubcategories:
  50.  
  51. response = urllib.request.urlopen(link)
  52. soup3 = BeautifulSoup(response, "html.parser")
  53. links3 = soup3.find_all("a", href=re.compile(r"ExpExhibitorList.aspx?categoryno=[0-9]+"))
  54. linksfromsubcategories2 = ([link["href"] for link in links3])
  55.  
  56. linksfromsubcategories2 = [string + x for x in linksfromsubcategories2]
  57. for link in linksfromsubcategories2:
  58.  
  59. response2 = urllib.request.urlopen(link)
  60. soup4 = BeautifulSoup(response2, "html.parser")
  61. companylink = soup4.find_all("a", href=re.compile(r"expCompany.aspx?corpid=[0-9]+"))
  62. companylink = ([link["href"] for link in companylink])
  63. companydetail = soup4.find_all("div", id="contact")
  64. companylink = [string + x for x in companylink]
  65. my_list = list(set(companylink))
  66.  
  67. for link in my_list:
  68. print (link)
  69. response3 = urllib.request.urlopen(link)
  70. soup5 = BeautifulSoup(response3, "html.parser")
  71. companydetail = soup5.find_all("div", id="contact")
  72. for d in companydetail:
  73. lis = d.find_all('li')
  74. companyname = lis[0].get_text().strip()
  75. companyaddress = lis[1].get_text().strip()
  76. companycity = lis[2].get_text().strip()
  77. try:
  78. companypostalcode = lis[3].get_text().strip()
  79. companypostalcode = companypostalcode.replace(",","")
  80. except:
  81. companypostalcode = lis[3].get_text().strip()
  82. try:
  83. companywebsite = lis[4].get_text().strip()
  84. companywebsite = companywebsite.replace("xEFxBCx8Cifl...","")
  85. except IndexError:
  86. companywebsite = 'null'
  87.  
  88.  
  89. try:
  90. with connection.cursor() as cursor:
  91.  
  92.  
  93. print ('saving company details to db')
  94. cursor.execute("""INSERT INTO company(
  95. companyname,address,city,pincode,website)
  96. VALUES (%s, %s, %s, %s, %s)""",
  97. (companyname, companyaddress, companycity,
  98. companypostalcode, companywebsite))
  99. connection.commit()
  100.  
  101. finally:
  102. print ("Company Data saved")
  103. productlink = soup5.find_all("a", href=re.compile(r"ExpProduct.aspx?corpid=[0-9]+.categoryno=[0-9]+"))
  104. productlink = ([link["href"] for link in productlink])
  105.  
  106. productlink = [string + x for x in productlink]
  107. productlinkun = list(set(productlink))
  108. for link in productlinkun:
  109.  
  110. print (link)
  111. responseproduct = urllib.request.urlopen(link)
  112. soupproduct = BeautifulSoup(responseproduct, "html.parser")
  113. productname = soupproduct.select('div[class="photolist"] li a')
  114. for element in productname:
  115. print ("====================Product Name=======================")
  116. productnames = element.get_text().strip()
  117. print (productnames)
  118. try:
  119. with connection.cursor() as cursor:
  120.  
  121. # Create a new record
  122. print ('saving products to db')
  123. cursor.execute("""INSERT INTO products(
  124. companyname,products)
  125. VALUES (%s, %s)""",
  126. (companyname, productnames))
  127. connection.commit()
  128.  
  129. finally:
  130. print ("Products Data Saved")
  131. print ("===================UshaAman======================")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement