web_scraper_stage5

def save_article(the_article_type, cwd, url):
    # url = "https://www.nature.com/nature/articles"
    r = requests.get(url)
    if r.status_code == 200:
        soup = BeautifulSoup(r.content, 'html.parser')
        title_news = []
        puncs = string.punctuation
        for x in soup.find_all('article'):
            # article_type = x.find('span', attrs={'data-test': 'article.type'}).text
            article_type = x.find('span', class_='c-meta__type').text
            # <span class="c-meta__type">Research Summary</span>
            # print(article_type)
            # if article_type == f'\n{article_type}\n':
            if article_type == the_article_type:
                # print('article_type == the_article_type')
                title = x.find('a', {'data-track-action': "view article"}).text
                name = title.strip(' ').translate(str.maketrans(" ", "_", puncs)) + '.txt'
                print(name)
                title_news.append(name)
                article_url = f"https://www.nature.com{x.a.get('href')}"
                print(article_url)
                r2 = requests.get(article_url)
                soup2 = BeautifulSoup(r2.content, 'html.parser')
                text = soup2.find('div', class_='article-item__body').text.strip()
                # print(text)
                # with open(name, 'w') as file:
                    # file.write(text)
                print('cwd', cwd)
                print('real cwd', os.getcwd())
                # os.chdir(cwd) if os.getcwd() != cwd else print('no')
                print('changed dir')
                file = open(name, 'wb')
                file.write(text.strip().encode('utf-8'))
                file.close()


def save_articles():
    pages = int(input())
    _type = input()
    for x in range(pages):
        print(x)
        cwd = f'Page_{x + 1}'
        print(cwd)
        url = f'https://www.nature.com/nature/articles?searchType=journalSearch&sort=PubDate&page={x + 1}'
        print(url)
        os.mkdir(cwd)
        os.chdir(cwd)
        save_article(_type, cwd, url)
        os.chdir('C:/Users/linyu/PycharmProjects/Web Scraper/Web Scraper/task')


save_articles()