Advertisement
Guest User

Untitled

a guest
Jul 28th, 2016
76
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.72 KB | None | 0 0
  1. import mysql.connector
  2. import os
  3. import threading
  4. from bs4 import BeautifulSoup
  5. import re
  6. from urllib.request import urlopen
  7. from mysql.connector import errorcode
  8. song=0
  9. page=0
  10.  
  11.  
  12. def get_links(url,superpage):
  13. global song
  14. global page
  15. superlength=len(superpage)
  16. try:
  17. ip=urlopen(url)
  18. inp=BeautifulSoup(ip,"html.parser")
  19. ip.close()
  20. except:
  21. print("link not working moveving to next link")
  22. ip=""
  23. inp=BeautifulSoup(ip,"html.parser")
  24.  
  25. l = inp.find_all('a',href=True)
  26. for links in l:
  27. main=links['href']
  28.  
  29. if len(main)>=2:
  30. #remove hashtag pages
  31. if main[0]=="#":
  32. main="hashtag"
  33. elif main[0]==".":
  34. main=main[1:]
  35. #detect http links
  36. elif main[:7]=="http://" or main[:8]=="https://":
  37. main=main
  38. #add http to // links
  39. elif main[:2]=="//":
  40. main="http:"+main
  41. #add paraent to / link
  42. elif main[0]=="/":
  43. main=url+main
  44. #add superpage to other links
  45. else:
  46. main=superpage+"/"+main
  47. #replace space with %20
  48. main=main.replace(" ","%20")
  49. if main[-3:]=="mp3":
  50. song=song+1
  51. pos=main.rfind("/")
  52. name=main[pos+1:]
  53. name=name.replace("%20"," ");
  54. sql = "INSERT INTO music (name,link,parent,super)VALUES ('"+name+"','"+main+"','"+url+"','"+superpage+"')"
  55.  
  56. #create connection
  57. try:
  58. conn=mysql.connector.connect(
  59. user='root',
  60. password='root',
  61. host='localhost',
  62. database='musicSearch',
  63. port='8889'
  64. )
  65. except mysql.connector.Error as e:
  66. print(e)
  67.  
  68. cur=conn.cursor(buffered=True)
  69. cur.execute(sql)
  70. cur.close()
  71. conn.close()
  72.  
  73. #working with otherlinks
  74. else:
  75. #check if page is done
  76. #create connection
  77. try:
  78. conn=mysql.connector.connect(
  79. user='root',
  80. password='root',
  81. host='localhost',
  82. database='musicSearch',
  83. port='8889'
  84. )
  85. except mysql.connector.Error as e:
  86. print(e)
  87.  
  88. cur=conn.cursor(buffered=True)
  89. qq = "SELECT page from done where page='"+main+"'"
  90. cur.execute(qq)
  91. n=cur.rowcount
  92. cur.close()
  93. conn.close()
  94. if n<=0:
  95. try:
  96. conn=mysql.connector.connect(
  97. user='root',
  98. password='root',
  99. host='localhost',
  100. database='musicSearch',
  101. port='8889'
  102. )
  103. except mysql.connector.Error as e:
  104. print(e)
  105.  
  106. cur=conn.cursor(buffered=True)
  107. x = "INSERT INTO done (page) VALUES ('"+main+"')"
  108. cur.execute(x)
  109. conn.commit()
  110. cur.close()
  111. conn.close()
  112. page=page+1;
  113. t=threading.Thread(target=get_links,args=(main,superpage))
  114. t.start()
  115.  
  116.  
  117.  
  118. _=os.system("clear")
  119. print(threading.activeCount())
  120. print("songs:{}".format(song))
  121. print("Pages Done:{}".format(page))
  122.  
  123.  
  124.  
  125. get_links("http://songspk3.co/","http://songspk3.co/")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement