duzo tekstu z wiki

#!/usr/bin/env python

import sys
import uuid
import string
import re

from urllib import urlopen
from bs4 import BeautifulSoup
import unicodedata
import time
import datetime


random_wiki_article_url = "https://en.wikipedia.org/wiki/Special:Random"
language_code = "en"

content_div_id = "mw-content-text"

timestamp = str(datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d_%H:%M:%S'))
output_filename = language_code + "_" + timestamp + "_" + str(uuid.uuid1()) + ".txt"
output_dir = "/home/domi/Desktop/wikiscraps/en/"
open(output_dir + output_filename, 'w').close()


for i in range(0, 1000):
    fo = open(output_dir + output_filename, "a")

    html = urlopen(random_wiki_article_url).read()

    soup = BeautifulSoup(html, "html.parser")
    content = soup.find("div", {"id": content_div_id})
    text = content.getText()

    #obrobka tekstu
    text = text.replace('\n', ' ').replace('\r', '')
    text = text.lower()
    text = text.replace("wikipedia", "")
    text = text.replace("–", " ")
    text = text.replace(" v t e ", "")
    regex = re.compile('[%s]' % re.escape(string.punctuation))
    text = regex.sub(' ', text)
    text = ''.join(j for j in text if not j.isdigit())
    while "  " in text:
        text = text.replace("  ", " ")
    print(text)
    print(len(text))
    utf8_text = text.encode('utf-8')
    fo.write(utf8_text)
    fo.close()

print("do widzenia dzieciaczki")