Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- __author__ = 'ZeLib0ba -> http://surin.ru'
- # просто запускаем , на выходе получаем файл links.html
- # по умолчанию первые 70 постов поменять можно, см. примечание в коде
- import requests
- from bs4 import BeautifulSoup
- head = """
- <!DOCTYPE html>
- <html>
- <head lang="en">
- <link href='http://fonts.googleapis.com/css?family=Ubuntu&subset=latin,cyrillic' rel='stylesheet' type='text/css'>
- <meta charset="UTF-8">
- <style type="text/css">
- body,html {
- font-family: 'Ubuntu', sans-serif;
- color: #000000;
- }
- a:link,a:visited,a:active {color: #000000;text-decoration:none;}
- a:hover {text-decoration:underline;}
- </style>
- </head>
- <body>
- """
- def page_url(url):
- page = None
- try:
- r = requests.get(url)
- if r.status_code == 200:
- page = r.text
- except:
- page = 'Error Parsing'
- return page
- def main():
- all_links = []
- f = open('links.html','w',encoding='utf-8')
- a = page_url('http://forum.guns.ru/forumtopics/9.html')
- soup = BeautifulSoup(a)
- print(soup.text)
- #print(soup.prettify())
- all = soup.find_all('a')
- print(all)
- for i in all:
- print(i)
- forumlink = i.get('href')
- #print (dir(forumlink))
- bbb = i.get('target')
- if "http://forum.guns.ru/forummessage/9/" in forumlink:
- if "-" not in forumlink:
- if not bbb:
- if not '>1<' in str(i):
- all_links.append(i)
- #print(forumlink,len(forumlink),len(str(i)))
- f.write(head)
- count = 0
- for i in all_links:
- if not 'ПРАВИЛА РАЗДЕЛА' in i:
- aaa = str(i).replace('<b>','')
- bbb = str(aaa).replace('</b>','')
- print(bbb)
- links123 = str(count) + '. ' + str(bbb).lower() + "<br>"
- #print(links123)
- f.write(links123)
- count +=1
- if count == 71: # тут поменять кол-ва постов для загрузки
- break
- print(count)
- f.write('</body></html>')
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment