Advertisement
collinsanele

Web scraper for Text and Links

Nov 2nd, 2018
339
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.75 KB | None | 0 0
  1. from bs4 import BeautifulSoup
  2. import requests
  3. from kivy.uix.textinput import TextInput
  4. from kivy.uix.label import Label
  5. from kivy.app import App
  6. from kivy.uix.floatlayout import FloatLayout
  7. from kivy.uix.button import Button
  8. from kivy.uix.scrollview import ScrollView
  9. from kivy.uix.popup import Popup
  10.  
  11.  
  12.  
  13.  
  14. class RootWidget(FloatLayout):
  15. def __init__(self, **kwargs):
  16. super().__init__(**kwargs)
  17. self.result = ''
  18. self.label_intro = Label(text='Webscrapper', size_hint=(None, None), size=(150,50),
  19. color=(0,0,1,1), pos_hint={'top':1, 'center_x':0.5})
  20. self.label_url = Label(text='URL:', size_hint=(None, None), pos_hint={'top':0.90})
  21. self.txtin_url = TextInput(text='',
  22. size_hint=(None, None), size=(500,200), pos_hint={'top':0.9, 'x':0.15})
  23. self.btn = Button(text='scrape text', size_hint=(None, None), size=(180,60), pos_hint={'top':0.695, 'x':0.150},
  24. on_press=self.scrape)
  25. self.btn3 = Button(text='scrape link',
  26. size_hint=(None, None), size=(180,60), pos_hint={'top':0.695, 'x':0.60}, on_press= self.scrape_link)
  27. self.sv = ScrollView(pos_hint={'top':0.599, 'center_x':0.50}, size_hint=(None, None),
  28. width=700, height=600)
  29. self.lab = TextInput(text='', size_hint_y=None, height=170000)
  30. self.popup1 = Popup(title='Error', content=Label(text='pls enter url'),
  31. size_hint=(None, None), size=(300,300), pos_hint={'top':0.68, 'center_x':0.50})
  32. self.btn2 = Button(text='Clear',
  33. size_hint=(None,None), size=(130,50),
  34. pos_hint={'top':0.06, 'center_x':0.50},
  35. on_press=self.clear)
  36.  
  37.  
  38.  
  39.  
  40.  
  41.  
  42. self.sv.add_widget(self.lab)
  43.  
  44.  
  45. self.add_widget(self.label_intro)
  46. self.add_widget(self.label_url)
  47. self.add_widget(self.txtin_url)
  48. self.add_widget(self.btn)
  49. self.add_widget(self.btn3)
  50. self.add_widget(self.sv)
  51. self.add_widget(self.btn2)
  52.  
  53.  
  54.  
  55. def scrape(self, *args):
  56. #self.popup1.open()
  57. try:
  58. a = self.txtin_url.text
  59. a = str(a)
  60. r = requests.get(a).text
  61. soup = BeautifulSoup(r, 'html.parser')
  62. matches = soup.find_all(['h1', 'h2',
  63. 'h3', 'span', 'strong', 'em', 'p', 'section'])
  64. except:
  65. self.popup1.open()
  66. else:
  67. for match in matches:
  68. self.result += match.text + '\n'
  69. self.lab.text = self.result
  70.  
  71. def clear(self, *args):
  72. self.lab.text = ''
  73. self.result = ''
  74.  
  75. def scrape_link(self, *args):
  76. try:
  77. a = self.txtin_url.text
  78. a = str(a)
  79. r = requests.get(a).text
  80. soup = BeautifulSoup(r, 'html.parser')
  81. links = soup.find_all('a')
  82. except:
  83. self.popup1.open()
  84. else:
  85. for link in links:
  86. self.result += link.text + '\n'
  87. self.result += link.get('href') + '\n'
  88.  
  89. self.lab.text = self.result
  90.  
  91.  
  92.  
  93.  
  94.  
  95. class Main(App):
  96. def build(self, *args):
  97. return RootWidget()
  98.  
  99. app = Main()
  100. app.run()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement