Untitled

import requests
from bs4 import BeautifulSoup
from sys import argv


def write(data, filename):
    with open(filename, 'w', encoding='UTF-8') as f:
        f.write(str(data))


def get_story_data(stories_links):
    for story in stories_links:
        soup = BeautifulSoup(requests.get(story).text, 'lxml')
        title = soup.find('h2', class_='topic-title accent').text.strip()  # type: str
        author = soup.find('a', class_='userlogo link link-dual link-lead link-clear').text.strip()
        time = soup.find('span', class_='topic-date').text + ' ' + soup.find('span', class_='topic-time').text
        likes = soup.find('li', class_='topic-rating js-vote').find('span').text.strip()
        comments_count = soup.find('span', id='count-comments').text
        tag = soup.find('a', class_='link link-lead link-blue').text
        text = soup.find('div', class_='topic-text').text.strip()
        data = {
            'title': title,
            'author': author,
            'time': time,
            'likes': likes,
            'comments_count': comments_count,
            'tag': tag,
            'text': text
        }
        title = title.strip('?.-=()!@#$%^&*_')
        write(data, title + '.txt')


def get_page_links(pages_count):
    counter = 1
    links = ['http://sramo.org/index/page1/']
    response = requests.get('http://sramo.org')
    html = response.text
    soup = BeautifulSoup(html, 'lxml')
    while soup.find('a', class_='js-paging-next-page') is not None and counter != pages_count:
        link = soup.find('a', class_='js-paging-next-page').get('href')
        links.append(link)
        counter += 1
        response = requests.get(link)
        html = response.text
        soup = BeautifulSoup(html, 'lxml')
    return links


def get_stories_links(page_links):
    stories_links = []
    for page_link in page_links:
        page = requests.get(page_link).text
        soup = BeautifulSoup(page, 'lxml')
        for data in soup.find_all('a', class_='link link-lead link-clear link-dark'):
            s_link = data.get('href')
            stories_links.append(s_link)
    return stories_links


def main():
    pages_count = int(argv[1])
    page_links = get_page_links(pages_count)
    stories_links = get_stories_links(page_links)
    get_story_data(stories_links)


if __name__ == '__main__':
    main()