create a chuck of text from Aozora Bunko

#!/usr/bin/python
# -*- coding: utf8 -*-

from selenium import webdriver
from bs4 import BeautifulSoup
from random import randint

driver = webdriver.PhantomJS(executable_path = r'./phantomjs')
site = 'http://www.aozora.gr.jp'

print 'Opening ' + site
driver.get(site)

html_doc = driver.page_source
soup = BeautifulSoup(html_doc, 'lxml')

td_tag = soup.find_all('td', class_ = 'summary')
a_tag = td_tag[3].find_all('a')

rand = randint(0, len(a_tag)-1)
print 'Opening ' + site + '/' + a_tag[rand]['href']
driver.get(site + '/' + a_tag[rand]['href'])

html_doc = driver.page_source
soup = BeautifulSoup(html_doc, 'lxml')

table_tag = soup.find('table', class_ = 'list')
a_tag = table_tag.find_all('a')

rand = randint(0, len(a_tag)-1)
site = site + '/' + a_tag[rand]['href'][3:]
print 'Opening ' + site
driver.get(site)
html_doc = driver.page_source

soup = BeautifulSoup(html_doc, 'lxml')

table_tag = soup.find('table', class_ = 'download')
a_tag = table_tag.find_all('a')

for i in range(0, len(site)):
    if site[len(site)-i-1] == '/':
        site = site[:len(site)-i-1]
        break

for a in a_tag:
    length = len(a['href'])
    if a['href'][length-5:length-1] == 'html':
        break
site = site + '/' + a['href'][2:]
print 'Opening ' + site
driver.get(site)
html_doc = driver.page_source

soup = BeautifulSoup(html_doc, 'lxml')

div_tag = soup.find('div', class_ = 'main_text')


full_stop_array = [-1, ]
for i in range(0, len(div_tag.text)):
    if div_tag.text[i] == u'。':
        full_stop_array += [i, ]

number_of_sentences = 3
rand = randint(0, len(full_stop_array) - number_of_sentences)
print div_tag.text[full_stop_array[rand]+1:full_stop_array[rand+number_of_sentences]+1]

driver.quit()