Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import sys
- import sqlite3
- import glob
- import os
- from bs4 import BeautifulSoup
- path = sys.argv[1]
- print path
- db_name = sys.argv[2]
- db_path = '%s.db' % (db_name,)
- conn = sqlite3.connect(db_path)
- conn.text_factory = str
- c = conn.cursor()
- c.execute('''CREATE TABLE IF NOT EXISTS sections
- (id INTEGER PRIMARY KEY AUTOINCREMENT, number NUMERIC, content TEXT)''')
- c.execute('DELETE FROM sections')
- i = 0
- for filename in sorted(glob.glob(os.path.join(path, '*.html')), key=lambda f: int(filter(str.isdigit, f))):
- print filename
- f = open(filename, 'r')
- content = f.read()
- soup = BeautifulSoup(content)
- body = soup.find('body')
- links = soup.findAll('a')
- images = body.findAll('img')
- for image in images:
- image['src'] = image['src'].replace(
- image['src'], "./assets/%s" % (image['src'],))
- for a in links:
- a.replaceWithChildren()
- content = str(body)
- content = content.replace('<body>', '<div>').replace('</body>', '</div>')
- c.execute("INSERT INTO sections (number, content) VALUES (?, ?)", (i, content))
- f.close()
- conn.commit()
- i += 1
- conn.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement