Advertisement
Guest User

Untitled

a guest
Nov 19th, 2017
68
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.15 KB | None | 0 0
  1. #! python3
  2.  
  3. # OG Frosh - setting the pace, as usual :)
  4. #Please do not remove, i'm releasing such a nice gift in exchange for nothing
  5. #but a request that you leave these 3 consecutive lines of text.
  6.  
  7.  
  8. import requests, bs4, re
  9.  
  10. emailRegex = re.compile(r'''(
  11. [a-zA-Z0-9._-]+
  12. @
  13. [a-zA-Z0-9.-]+
  14. (\.[a-zA-Z]{2,4})
  15. )''', re.VERBOSE)
  16.  
  17.  
  18.  
  19. resFile = open('Alibaba_leads2.csv', 'a', encoding="UTF-8")
  20. finaldata = ''
  21. print('starting .....')
  22. for i in range(806,29226):
  23. url = "https://chinaexporter118.mingluji.com/node/"+str(i)
  24.  
  25. res = requests.get(url)
  26.  
  27. try:
  28. res.raise_for_status()
  29. except Exception as exc:
  30. continue
  31. mingluji = bs4.BeautifulSoup(res.text)
  32.  
  33. items = mingluji.select('.field-item')
  34.  
  35. found = False
  36.  
  37. for i in range(1, len(items)):
  38. if emailRegex.search(str(items[i].getText)) != None:
  39. found = True
  40. break
  41. if not found:
  42. continue
  43. company = items[0].getText()
  44. name = items[i-5].getText()
  45. email = items[i].getText()
  46.  
  47. finaldata = name + u', ' + email + u', ' + company + u'\n'
  48. print(finaldata)
  49. resFile.write(finaldata)
  50. finaldata = ''
  51. if (i % 100) == 0:
  52. print (i)
  53. print('done')
  54. resFile.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement