Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- import re
- import os
- import pycurl
- import lxml.html
- from lxml import etree
- import StringIO
- import html5lib
- from html5lib import treebuilders
- class vk_dwl():
- def __init__(self, email, password):
- self.email = email
- self.password = password
- self.cookie = ''
- self.crl = pycurl.Curl()
- self.errlog = open('err.log', 'a')
- #self.filt = re.compile('[\x00\x03\x10\x13\x20-\xD7FF\xE000-\xFFFD\x10000]')
- self.filt = re.compile('(?u)[\x00\x03\x10\x13]')
- self.fn1 = re.compile('(?u) *')
- self.fn2 = re.compile('(?u)^ *| *$')
- self.fn3 = re.compile('(?u) -$')
- self.fn4 = re.compile('(?u)/')
- def __del__(self):
- self.errlog.close()
- def get_url(self, url):
- if url:
- t = StringIO.StringIO()
- self.crl.setopt(pycurl.URL, url)
- self.crl.setopt(pycurl.HTTPHEADER, ["Accept:"])
- self.crl.setopt(pycurl.WRITEFUNCTION, t.write)
- self.crl.setopt(pycurl.FOLLOWLOCATION, 1)
- self.crl.setopt(pycurl.MAXREDIRS, 5)
- self.crl.setopt(pycurl.COOKIEFILE, self.cookie)
- self.crl.perform()
- t = self.filt.sub('', t.getvalue())
- return html5lib.parse(t, treebuilder = "lxml", namespaceHTMLElements = False), t
- else:
- return None
- def get_dump(self, url, fname):
- if url and fname:
- f = open(fname,'w')
- self.crl.setopt(pycurl.URL, url)
- self.crl.setopt(pycurl.HTTPHEADER, ["Accept:"])
- self.crl.setopt(pycurl.WRITEFUNCTION, f.write)
- self.crl.setopt(pycurl.FOLLOWLOCATION, 1)
- self.crl.setopt(pycurl.MAXREDIRS, 5)
- self.crl.setopt(pycurl.COOKIEFILE, self.cookie)
- self.crl.perform()
- f.close()
- def get_file(self, fname):
- f = open(fname, 'r')
- res = ''
- for ln in f.readlines():
- res += ln + '\n'
- f.close()
- return html5lib.parse(res, treebuilder = "lxml", namespaceHTMLElements = False), res
- def login(self):
- tree, src = self.get_url('http://vk.com/login.php?email='+self.email+'&pass='+self.password)
- # Тут нужно попарсить скрипт на предмет наличия ошибок при авторизации и выдавать код ошибки в резалте
- def get_audio(self, vkid = None):
- # Получение списка аудио
- # Надо записывать список url
- # При старте смотреть, есть ли список, если список есть, то подгружать его в память и перед загрузкой проверять наличие урла
- if vkid:
- upref = 'http://vk.com/audio?friend=' + str(vkid)
- else:
- upref = 'http://vk.com/audio'
- def get_a(offset = 0):
- if offset:
- if upref == 'http://vk.com/audio':
- tree = self.get_url(upref + '?offset=' + str(offset))[0]
- else:
- tree = self.get_url(upref + '&offset=' + str(offset))[0]
- else:
- tree = self.get_url(upref)[0]
- links = tree.findall('//td[@class="play_btn"]')
- names = tree.findall('//td[@class="info"]')
- return tree, links, names
- def fn_corr(fname):
- if fname:
- res = self.fn1.sub(' ', fname)
- res = self.fn2.sub('', res)
- res = self.fn3.sub('', res)
- res = self.fn4.sub('-', res)
- return res
- else:
- return None
- # tr - tree, ln - links, nm - names
- tr, ln, nm = get_a()
- summary = int(re.search('(?u)[0-9]+', tr.xpath('//div[@class="summary"]/b[@id="audio_summary"]/text()')[0]).group(0))
- cln = len(ln)
- cl = re.compile('(?u),[0-9]+$')
- lst = open('url.lst', 'w')
- #err = open('err.log', 'w')
- for c in range(summary/cln+1):
- if c:
- tr, ln, nm = get_a(c*cln)
- print '=-' * 15 + ' ' + str(c*50) + ' ' + '-=' * 15
- for i in range(len(ln)):
- url = cl.sub('', ln[i][1].values()[2])
- artist = nm[i].xpath('./div[@class="title_wrap"]/b/a/text()')[0]
- title = nm[i].xpath('.//span[@class="title"]/a/text()')
- if title:
- title = title[0]
- else:
- title = nm[i].xpath('.//span[@class="title"]/text()')[0]
- if title:
- fn = fn_corr(artist + ' - ' + title)
- else:
- fn = fn_corr(artist)
- if fn:
- fn += '.mp3'
- if not os.path.exists(fn):
- print 'Download: ' + url + ' -> ' + fn
- try:
- self.get_dump(url, fn)
- lst.write(url)
- except:
- print 'ERROR: Can\'t download file ' + fn
- try:
- self.errlog.write('ERROR: Can\'t download ' + url + ' to file ' + fn)
- except:
- print 'Can\'t write filename ' + fn + ' to logfile'
- else:
- print 'PASS: ' + fn
- lst.close()
- tmp = vk_dwl('xxx@mail.ru', 'pass')
- tmp.login()
- tmp.get_audio('id')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement