Advertisement
Guest User

Untitled

a guest
Aug 24th, 2019
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.11 KB | None | 0 0
  1. import sys
  2. import sqlite3
  3. import glob
  4. import os
  5. from bs4 import BeautifulSoup
  6. path = sys.argv[1]
  7. print path
  8. db_name = sys.argv[2]
  9. db_path = '%s.db' % (db_name,)
  10. conn = sqlite3.connect(db_path)
  11. conn.text_factory = str
  12. c = conn.cursor()
  13. c.execute('''CREATE TABLE IF NOT EXISTS sections
  14. (id INTEGER PRIMARY KEY AUTOINCREMENT, number NUMERIC, content TEXT)''')
  15. c.execute('DELETE FROM sections')
  16. i = 0
  17. for filename in sorted(glob.glob(os.path.join(path, '*.html')), key=lambda f: int(filter(str.isdigit, f))):
  18. print filename
  19. f = open(filename, 'r')
  20. content = f.read()
  21. soup = BeautifulSoup(content)
  22. body = soup.find('body')
  23. links = soup.findAll('a')
  24. images = body.findAll('img')
  25. for image in images:
  26. image['src'] = image['src'].replace(
  27. image['src'], "./assets/%s" % (image['src'],))
  28. for a in links:
  29. a.replaceWithChildren()
  30. content = str(body)
  31. content = content.replace('<body>', '<div>').replace('</body>', '</div>')
  32. c.execute("INSERT INTO sections (number, content) VALUES (?, ?)", (i, content))
  33. f.close()
  34. conn.commit()
  35. i += 1
  36. conn.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement