Advertisement
Guest User

Untitled

a guest
Mar 22nd, 2019
108
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.90 KB | None | 0 0
  1. import sqlite3
  2. from Modules.Cleaner import Cleaner
  3.  
  4.  
  5. class Content:
  6.  
  7. def __init__(self, db_file):
  8. self.categories = []
  9. self.conn = sqlite3.connect(db_file)
  10. self.cursor = self.conn.cursor()
  11. self.cleaner = Cleaner()
  12.  
  13. def get_ids(self):
  14. return [id for id in self.cursor.execute('SELECT id FROM wikiData')]
  15.  
  16. def get_urls(self):
  17. return [url for url in self.cursor.execute('SELECT url FROM wikiData')]
  18.  
  19. def get_page_by_id(self, id):
  20. return str(self.cleaner.clean_text(self.cursor.execute('SELECT content FROM wikiData WHERE id=?', id)
  21. .fetchone()[0]))
  22.  
  23. def get_url_by_id(self, id):
  24. return str(self.cursor.execute('SELECT url FROM wikiData WHERE id=?', id).fetchone()[0])
  25.  
  26. def __iter__(self):
  27. for id in self.get_ids():
  28. page = self.get_page_by_id(id)
  29. yield self.cleaner.clean_text(page).split()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement