SHARE
TWEET

Untitled

a guest Mar 22nd, 2019 67 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import sqlite3
  2. from Modules.Cleaner import Cleaner
  3.  
  4.  
  5. class Content:
  6.  
  7.     def __init__(self, db_file):
  8.         self.categories = []
  9.         self.conn = sqlite3.connect(db_file)
  10.         self.cursor = self.conn.cursor()
  11.         self.cleaner = Cleaner()
  12.  
  13.     def get_ids(self):
  14.         return [id for id in self.cursor.execute('SELECT id FROM wikiData')]
  15.  
  16.     def get_urls(self):
  17.         return [url for url in self.cursor.execute('SELECT url FROM wikiData')]
  18.  
  19.     def get_page_by_id(self, id):
  20.         return str(self.cleaner.clean_text(self.cursor.execute('SELECT content FROM wikiData WHERE id=?', id)
  21.                     .fetchone()[0]))
  22.  
  23.     def get_url_by_id(self, id):
  24.         return str(self.cursor.execute('SELECT url FROM wikiData WHERE id=?', id).fetchone()[0])
  25.  
  26.     def __iter__(self):
  27.         for id in self.get_ids():
  28.             page = self.get_page_by_id(id)
  29.             yield self.cleaner.clean_text(page).split()
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top