Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python2
- # coding: UTF-8
- import urllib2
- import urllib
- import re
- import cookielib
- import sys
- import time
- import json
- import MySQLdb as db
- import datetime
- import logging
- import getopt
- import traceback
- from HTMLParser import HTMLParser
- # максимальная продолжительность аудио в секундах
- MAX_AUDIO_DURATION = 600
- AUDIO_REGEX = re.compile(r"[\w0-9\. ,\-\(\)]+$", re.U)
- DEBUG = True
- LOG_TO_CONSOLE = True
- def consoleOut(text):
- if not LOG_TO_CONSOLE:
- return
- if type(text) == unicode:
- print text.encode('ascii', 'ignore')
- else:
- print text
- def getNumOfWeekInMonth(dtime):
- timeDelta = datetime.timedelta(dtime.day - 1)
- firstDayInMonth = dtime - timeDelta
- firstDayWeekNumber = int(firstDayInMonth.strftime('%w'))
- weekNumberOfDayInMonth = (firstDayWeekNumber + dtime.day) / 7
- return weekNumberOfDayInMonth
- class Vk:
- pageEncoding = 'utf-8'
- connectionTimeout = 15
- def __init__(self, login, password):
- self.login = login
- self.password = password
- self.cj = cookielib.CookieJar()
- self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))
- self.opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 5.1; rv:29.0) Gecko/20100101 Firefox/29.0'),
- ('Referer', 'http://www.lesstroy.net/')]
- # функция авторизации. Возвращает True в случае успешной авторизации, иначе False
- def auth(self):
- actionUrlReg = re.compile(r"action=\"(https://login\.vk\.com/\?act=login.+?)\"")
- html = self.opener.open("http://m.vk.com/login", timeout=Vk.connectionTimeout).read() # TODO сделать обработчик исключений
- actionUrlMatch = actionUrlReg.search(html)
- if actionUrlMatch is None:
- consoleOut("html doesnt contain url matching actionUrlReg in auth()")
- return False
- actionUrl = actionUrlMatch.group(1)
- postDic = {}
- postDic["email"] = self.login
- postDic["pass"] = self.password
- postStr = urllib.urlencode(postDic)
- html = self.opener.open(actionUrl, postStr, timeout=Vk.connectionTimeout).read() # TODO сделать обработчик исключений
- if self.needToAcceptOwner(html):
- logger = logging.getLogger('Vk.auth')
- logger.critical('The site needs accepting owner. Exit...')
- sys.exit(-1)
- if (html.find(self.toSiteEncode("Не удается войти.")) != -1):
- consoleOut("invalid login/password")
- return False
- else:
- return True
- def getOpener(self):
- return self.opener
- def logout(self):
- self.opener.open("https://login.vk.com/?act=logout_mobile&hash=f4ef82a16b90ea5a78&_origin=http%3A%2F%2Fm.vk.com",
- timeout=Vk.connectionTimeout).read()
- """постит сообщение в группу с адресом groupUrl, сообщением message, идентификатором картинки imageId,
- с музыкой, идентификаторы которой указаны в списке auidioIds, с таймером postTime (время в unix формате)
- """
- def postInGroup(self, groupName, message, imageId, audioIds, postTime):
- groupUrl = "http://m.vk.com/" + groupName
- html = self.opener.open(groupUrl).read()
- if self.needToAcceptOwner(html):
- logger = logging.getLogger('Vk.postInGroup')
- logger.critical('The site needs accepting owner. Exit...')
- sys.exit(-1)
- groupPageParser = GroupPageParser()
- groupPageParser.feed(self.toUnicode(html))
- formActionUrl = groupPageParser.getFormActionUrl(); # TODO добавить проверку на None
- postDic = {}
- postDic['message'] = message
- postDic['postpone'] = postTime
- postDic['attach1'] = imageId
- postDic['attach1_type'] = 'photo'
- c = 1
- # DUMMY UNCOMMENT
- postData = urllib.urlencode(postDic)
- response = self.opener.open(formActionUrl, postData, timeout=Vk.connectionTimeout) # TODO добавить обработчик исключений
- html = response.read()
- if self.needToAcceptOwner(html):
- logger = logging.getLogger('Vk.postInGroup')
- logger.critical('The site needs accepting owner. Exit...')
- sys.exit(-1)
- def findAudios(self, name):
- utf8Name = name.encode('utf-8')
- findAudioUrl = "http://m.vk.com/audio?act=search&q=" + urllib.quote_plus(utf8Name)
- html = self.opener.open(findAudioUrl, timeout=Vk.connectionTimeout).read(); # TODO добавить обработчик исключений
- if self.needToAcceptOwner(html):
- logger = logging.getLogger('Vk.findAudios')
- logger.critical('The site needs accepting owner. Exit...')
- sys.exit(-1)
- audioPageParser = AudioPageParser()
- audioPageParser.feed(self.toUnicode(html))
- return audioPageParser.getAudios()
- def toUnicode(self, text):
- # переводит текст, который в кодировки контакта, в юникод
- return text.decode(Vk.pageEncoding)
- def toSiteEncode(self, text):
- # переводит текст, который в кодировки скрипта(utf-8), в кодировку контакта
- return text.decode('UTF-8').encode(Vk.pageEncoding)
- def needToAcceptOwner(self, html):
- """проверяет html документ на наличие кода, требующего подтверждения владельца страницы"""
- securityCheckText = "act=security_check"
- if self.toSiteEncode(securityCheckText) in html:
- return True
- else:
- return False
- # -----------------------------------------------
- class AudioInfo:
- def __init__(self):
- self.author = None
- self.songName = None
- self.duration = None
- self.identifier = None
- def __str__(self):
- result = u""
- if self.author is not None:
- result += self.author
- else:
- result += u"NONE"
- result += u" - "
- if self.songName is not None:
- result += self.songName
- else:
- result += u"NONE"
- result += u"("
- if self.duration is not None:
- result += str(self.duration)
- else:
- result += u'NONE'
- result += u' s), id : '
- if self.identifier is not None:
- result += self.identifier
- else:
- result += u'NONE'
- return result
- def __repr__(self):
- return self.__str__()
- # -----------------------------------------------
- class AttributeUtils:
- @staticmethod
- def getAttrsDic(attrsList):
- attrsDic = {}
- for i in attrsList:
- attrsDic[i[0]] = i[1]
- return attrsDic
- @staticmethod
- def hasAttributeEqualingValue(attrsDic, attribute, value):
- if attrsDic.has_key(attribute) and attrsDic[attribute] == value:
- return True
- else:
- return False
- # -----------------------------------------------
- # Парсер вап-страницы группы, рассчитанный на версию сайта для оперы мини
- class GroupPageParser(HTMLParser):
- def __init__(self):
- HTMLParser.__init__(self)
- self.formActionUrl = None
- def getFormActionUrl(self):
- if self.formActionUrl is not None:
- return "http://m.vk.com" + self.formActionUrl
- else:
- return None
- def handle_starttag(self, tag, attrs):
- attrsDic = AttributeUtils.getAttrsDic(attrs)
- if tag == 'form':
- self.formActionUrl = attrsDic['action']
- # -----------------------------------------------
- class AudioPageParser(HTMLParser):
- def __init__(self):
- HTMLParser.__init__(self)
- self.audios = []
- self.isAudioSection = False
- self.isAuthorSection = False
- self.isSongNameSection = False
- self.buffer = ''
- self.curAudioDuration = None
- self.curAudioSongName = None
- self.curAudioAuthor = None
- self.curAudioId = None
- def getAudios(self):
- return self.audios
- def handle_starttag(self, tag, attrs):
- attrsDic = AttributeUtils.getAttrsDic(attrs)
- if tag == 'div' and AttributeUtils.hasAttributeEqualingValue(attrsDic, 'class', 'audio_item ai_has_btn'):
- self.isAudioSection = True
- self.curAudioId = attrsDic['data-id']
- elif self.isAudioSection:
- if tag == 'div' and AttributeUtils.hasAttributeEqualingValue(attrsDic, 'class', 'ai_dur'):
- self.curAudioDuration = int(attrsDic['data-dur'])
- elif tag == 'span' and AttributeUtils.hasAttributeEqualingValue(attrsDic, 'class', 'ai_artist'):
- self.isAuthorSection = True
- elif tag == 'span' and AttributeUtils.hasAttributeEqualingValue(attrsDic, 'class', 'ai_title'):
- self.isSongNameSection = True
- def handle_endtag(self, tag):
- if self.isAudioSection:
- if tag == 'table':
- self.isAudioSection = False
- audio = AudioInfo()
- audio.duration = self.curAudioDuration
- audio.songName = self.curAudioSongName
- audio.author = self.curAudioAuthor
- audio.identifier = self.parseId(self.curAudioId)
- self.audios.append(audio)
- self.curAudioDuration = None
- self.curAudioSongName = None
- self.curAudioAuthor = None
- self.curAudioId = None
- self.isAudioSection = False
- if tag == 'span' and self.isAuthorSection:
- self.curAudioAuthor = self.buffer
- self.buffer = ''
- self.isAuthorSection = False
- elif tag == 'span' and self.isSongNameSection:
- self.curAudioSongName = self.buffer
- self.buffer = ''
- self.isSongNameSection = False
- def handle_data(self, data):
- if self.isAudioSection:
- if self.isAuthorSection or self.isSongNameSection:
- self.buffer += data
- def parseId(self, musicId):
- return re.search(r"(\-?\d+_\d+)", musicId, re.U).group(1)
- # -----------------------------------------------
- class DataBaseInfo:
- def __init__(self, host, login, password, dbName):
- self.host = host
- self.login = login
- self.password = password
- self.dbName = dbName
- # -----------------------------------------------
- class VkGroupInfo:
- def __init__(self, groupName, message, imageSchedule):
- self.groupName = groupName
- self.message = message
- self.imageSchedule = imageSchedule
- # -----------------------------------------------
- class ApplicationSettings:
- def __init__(self, fileName):
- f = open(fileName, 'r')
- self.settings = json.load(f)
- f.close()
- def getGroupsInfo(self):
- return [VkGroupInfo(i['groupName'], i['message'], self.settings['imageSchedules'][i[u'imageSchedule']]) for i in self.settings['VkGroups']]
- def getVkLogin(self):
- return self.settings['VkCreditianals']['login']
- def getVkPassword(self):
- return self.settings['VkCreditianals']['password']
- def getDataBaseInfo(self):
- host = self.settings['DBCreditianals']['host']
- login = self.settings['DBCreditianals']['login']
- password = self.settings['DBCreditianals']['password']
- dbName = self.settings['DBCreditianals']['DBName']
- return DataBaseInfo(host, login, password, dbName)
- def getAudiosNumToPost(self):
- return self.settings['audiosNumber']
- def getDateFrom(self):
- return datetime.datetime.strptime(self.settings['dateFrom'], '%d.%m.%Y')
- def getDateTo(self):
- return datetime.datetime.strptime(self.settings['dateTo'], '%d.%m.%Y')
- def getPostTimes(self):
- return [strToTime(i) for i in self.settings['postTimes']]
- def getMinTracksPerPost(self):
- return self.settings['minTracksPerPost']
- def getTimeoutMaxAttemps(self):
- return self.settings['timeoutMaxAttempts']
- def getTimeoutRepeatAt(self):
- return self.settings['timeoutRepeatAt']
- def getSearchMusicDelay(self):
- return self.settings['searchMusicDelay']
- def getPostingDelay(self):
- return self.settings['postingDelay']
- def getJumpToNextGroupDelay(self):
- return self.settings['jumpToNextGroupDelay']
- def getLogFile(self):
- return self.settings['logFile']
- # -----------------------------------------------
- class MusicRepository:
- weekdays = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
- def __init__(self, cursor, tableName):
- self.tableName = tableName
- self.cursor = cursor
- # в качестве дня недели передаем номер дня от 0 до 6 включительно
- def getAudiosForWeekday(self, weekdayNum):
- return ["music" for i in xrange(50)] # DUMMY
- self.cursor.execute("SELECT track FROM " + db.escape_string(self.tableName) + " WHERE day=%s;",
- (MusicRepository.weekdays[weekdayNum],))
- return [i[0] for i in self.cursor.fetchall()]
- # -----------------------------------------------
- def strToTime(timeStr):
- t = timeStr.split(':')
- return datetime.time(int(t[0]), int(t[1]))
- def datetimeToUnixtime(dtime):
- return time.mktime(dtime.timetuple())
- def db_connect(dbInfo):
- return None # DUMMY
- con = db.connect(host=dbInfo.host, user=dbInfo.login,
- passwd=dbInfo.password, db=dbInfo.dbName, charset='utf8')
- return con
- def filterAudio(audioList):
- skipStrings = ['>>>', '<<<']
- for audio in audioList:
- skipSong = False
- for skipString in skipStrings:
- if skipString in audio.songName:
- skipSong = True
- break
- if skipSong:
- continue
- if audio.duration <= MAX_AUDIO_DURATION and AUDIO_REGEX.match(audio.author) and AUDIO_REGEX.match(audio.songName):
- return audio
- logging.debug(u"can't find audio in follow audio list: %s", audioList);
- return None
- def usage():
- consoleOut("usage: python vk.py [--config jsonConfigPath]\n"
- "Starts autoposting to groups with settings placed in jsonConfigPath"
- " (default config.json)")
- def main():
- cfg = 'config.json'
- opts, args = getopt.getopt(sys.argv[1:], "h", ['config=']);
- for opt, arg in opts:
- print opt, arg
- if opt == '--config':
- cfg = arg
- elif opt == '-h':
- usage()
- sys.exit(0)
- applicationSettings = ApplicationSettings(cfg)
- if DEBUG:
- logLevel = logging.DEBUG
- else:
- logLevel = logging.WARNING
- logging.basicConfig(filename=applicationSettings.getLogFile(), level=logLevel)
- logger = logging.getLogger('main')
- logger.info('start working')
- vk_login = applicationSettings.getVkLogin()
- vk_password = applicationSettings.getVkPassword()
- dbInfo = applicationSettings.getDataBaseInfo()
- logger.debug('connection to DB')
- try :
- dbCon = db_connect(dbInfo)
- except db.Error as e:
- consoleOut(e.message())
- logger.critical('Excepton. %s', e)
- logger.critical('can\'t connect to DB. Exit...')
- sys.exit(-1)
- dateFrom = applicationSettings.getDateFrom()
- dateTo = applicationSettings.getDateTo()
- postTimes = applicationSettings.getPostTimes()
- vkGroupsList = applicationSettings.getGroupsInfo()
- audiosNumber = applicationSettings.getAudiosNumToPost()
- minTracksPerPost = applicationSettings.getMinTracksPerPost()
- timeoutMaxAttempts = applicationSettings.getTimeoutMaxAttemps()
- timeoutRepeatAt = applicationSettings.getTimeoutRepeatAt()
- jumpToNextGroupDelay = applicationSettings.getJumpToNextGroupDelay()
- searchMusicDelay = applicationSettings.getSearchMusicDelay()
- vk = Vk(vk_login, vk_password)
- connectionAttemptsCounter = 0
- logger.info('authorization')
- while connectionAttemptsCounter < timeoutMaxAttempts:
- try:
- if not vk.auth():
- logger.critical("can't auth. Exit...")
- sys.exit(-1)
- break
- except Exception as e:
- consoleOut(str(e))
- logger.warning("Excepton. %s", e)
- connectionAttemptsCounter += 1
- time.sleep(timeoutRepeatAt)
- if connectionAttemptsCounter >= timeoutMaxAttempts:
- logger.critical("max connection timeouts number is reached while auth. Exit...")
- sys.exit(-1)
- logger.debug('start groupList iteration(Num groups: %d)', len(vkGroupsList))
- for vkGroup in vkGroupsList:
- logger.debug('start processing of group %s', vkGroup.groupName)
- musicRepository = MusicRepository(dbCon, vkGroup.groupName) # DUMMY replace dbCon with dbCon.cursor()
- dateDelta = datetime.timedelta(1) # необходим для прибавления одного дня к dateIt
- dateIt = dateFrom # для обхода даты с dateFrom до dateTo
- while dateIt <= dateTo:
- logger.debug('start processing date %s', dateIt.isoformat())
- try:
- dayMusicList = musicRepository.getAudiosForWeekday(dateIt.weekday())
- except db.Error as e:
- logger.error("can't get music for day %s. Go to next day...", dateIt.isoformat())
- consoleOut(str(e))
- consoleOut("can't get music for day %s. Go to next day..." % (dateIt.isoformat(),))
- dateIt += dateDelta
- continue
- consoleOut("post today: " + dayMusicList.__str__())
- musicCounter = 0
- for t in postTimes:
- consoleOut("timebegin"), t
- logger.debug('start processing time %s', t.isoformat())
- dayAudiosToPost = []
- dtime = datetime.datetime.combine(dateIt, t)
- numOfWeekInMonth = getNumOfWeekInMonth(dtime)
- imageId = vkGroup.imageSchedule[dtime.strftime('%w')][numOfWeekInMonth] # айдишник картинки, которую постим
- for m in xrange(audiosNumber):
- searchSoundProcess = True
- while searchSoundProcess and musicCounter < len(dayMusicList):
- consoleOut(u'search for ' + dayMusicList[musicCounter])
- logger.debug(u'search for %s', dayMusicList[musicCounter])
- connectionAttemptsCounter = 0
- while connectionAttemptsCounter < timeoutMaxAttempts:
- try:
- audios = vk.findAudios(dayMusicList[musicCounter])
- break
- except Exception as e :
- consoleOut(e)
- logger.warning("Excepton. %s", e)
- connectionAttemptsCounter += 1
- time.sleep(timeoutRepeatAt)
- if connectionAttemptsCounter >= timeoutMaxAttempts:
- logger.critical("max connection timeouts number is reached while search audio. Exit...")
- logger.critical(u"Rest audios: %s\nDate: %s\nTime: %s", dayMusicList[musicCounter:],
- dateIt.isoformat(), t.isoformat())
- sys.exit(-1)
- time.sleep(searchMusicDelay)
- audio = filterAudio(audios)
- musicCounter += 1
- if audio is not None:
- consoleOut(u'audio found: ' + audio.__str__() + u'm =' + str(m))
- dayAudiosToPost.append(audio.identifier)
- searchSoundProcess = False
- else:
- consoleOut('audio not found! ' + str(len(audios)))
- if len(dayAudiosToPost) < minTracksPerPost:
- logger.debug('not enough audios to post')
- break
- else:
- logger.debug("POSTING!")
- consoleOut("POSTING!")
- connectionAttemptsCounter = 0
- while connectionAttemptsCounter < timeoutMaxAttempts:
- try:
- vk.postInGroup(vkGroup.groupName, vkGroup.message, imageId, dayAudiosToPost, datetimeToUnixtime(dtime))
- break
- except Exception as e:
- consoleOut(str(e))
- logger.warning("Excepton. %s", e)
- timeoutMaxAttempts += 1
- time.sleep(timeoutRepeatAt)
- if connectionAttemptsCounter >= timeoutMaxAttempts:
- logger.critical('max connection timeouts number is reached while posting message. Exit...')
- logger.critical(u"Rest audios: %s\nDate: %s\nTime: %s",
- dayMusicList[musicCounter:],
- dateIt.isoformat(),
- t.isoformat())
- sys.exit(-1)
- dateIt += dateDelta
- time.sleep(jumpToNextGroupDelay)
- logger.info("programm is exited normally")
- if __name__ == '__main__':
- consoleOut("vkposter starting")
- try:
- main()
- except getopt.GetoptError as e:
- consoleOut("invalid arguments")
- usage()
- except Exception as e:
- consoleOut("Unhandled exception")
- consoleOut(str(e))
- traceback.print_exc()
- consoleOut("end")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement