Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """
- Чтобы всё работало, нужно создать бота и добавить его администратором в канал
- requirements.txt:
- pyTelegramBotAPI==4.4.0
- requests==2.26.0
- Имя файла параметров передается первым аргументом при запуске скрипта
- Формат файла:
- {
- "token": "",
- "channel": -1,
- "channel_link": "https://t.me/durov/",
- "posts_file": "posts.db",
- "board": "fa",
- "thread_num": "",
- "thread_subject": "^часотред *[N#№НH]* *[0-9]+",
- "sleep_message": 3,
- "sleep_thread": 20,
- "sleep_error": 60,
- "check_thread": 1800
- }
- posts_file: файл базы данных SQLite, содержит одну таблицу, создастся при первом запуске, если не существует:
- thread_num/thread_subject взаимоисключающие
- Если thread_num заполнен, грузим только этот тред
- Если не заполнен, ищем по thread_subject последний тред на доске (синтаксис регулярных выражений)
- """
- import json
- import requests
- import re
- import sqlite3
- import sys
- import telebot
- from io import BytesIO
- from time import sleep
- from datetime import datetime, timedelta
- tags = (
- ('<u>', '</u>'),
- ('<s>', '</s>'),
- ('<tg-spoiler>', '</tg-spoiler>'),
- ('<code>', '</code>'),
- ('<i>', '</i>'),
- ('<b>', '</b>'),
- )
- def smart_split(comment, first, others):
- """
- comment: пост
- first: длина первого сообщения
- others: длина остальных сообщений
- Возвращается str первого сообщения и list[str] остальных
- Разбиваем построчно, потом слепляем на куски <= предельного размера
- Первый кусок размера first, остальные размера others
- Если разбили внутри тега, закрываем его в конце куска и открываем в начале следующего
- """
- if len(comment) <= first:
- return comment, []
- comment_s = comment.split('\n')
- result = []
- tmp = ''
- max_len = first
- for row in comment_s:
- if len(tmp + row) < max_len:
- tmp = tmp + '\n' + row
- else:
- result.append(tmp)
- tmp = row
- max_len = others
- result.append(tmp)
- for i in range(len(result)):
- for left, right in tags:
- left_count = len(re.findall(left, result[i]))
- right_count = len(re.findall(right, result[i]))
- if left_count > right_count:
- result[i] = result[i] + right
- elif right_count > left_count:
- result[i] = left + result[i]
- return result[0], result[1:]
- params = json.load(open(sys.argv[1], encoding='utf-8'))
- token = params['token']
- channel = params['channel']
- channel_link = params['channel_link']
- posts_file = params['posts_file']
- board = params['board']
- thread_num = params['thread_num']
- thread_subject = params['thread_subject']
- m = params['sleep_message']
- t = params['sleep_thread']
- e = params['sleep_error']
- check_interval = timedelta(seconds=params['check_thread'])
- bot = telebot.TeleBot(token)
- thread_curr = thread_num
- last_check = datetime(2021, 1, 1)
- error_cnt = 0
- con = sqlite3.connect(posts_file)
- cur = con.cursor()
- cur.execute("""
- create table if not exists post(post_num integer primary key,
- message_id text,
- create_date date default(datetime('now', 'localtime')))
- """)
- while True:
- if not thread_num and datetime.now() - last_check > check_interval:
- try:
- resp = requests.get(f'http://2ch.hk/{board}/catalog_num.json')
- j = resp.json()
- except Exception as ex:
- print(datetime.now(), 'catalog_num', ex)
- sleep(e)
- continue
- thread_curr = [t['num'] for t in j["threads"] if re.search(thread_subject, t['subject'], flags=re.I)][0]
- last_check = datetime.now()
- if cur.execute(f"select 1 from post where post_num = {thread_curr}").fetchone():
- last_post = cur.execute(f"select max(post_num) + 1 from post").fetchone()[0]
- else:
- last_post = thread_curr
- try:
- bot.unpin_all_chat_messages(channel)
- message = bot.send_message(channel,
- f'Тред: https://2ch.hk/{board}/res/{thread_curr}.html',
- disable_web_page_preview=True,
- disable_notification=True)
- bot.pin_chat_message(channel, message.id)
- except Exception as ex:
- print(datetime.now(), 'new_thread', ex)
- sleep(e)
- continue
- try:
- # url = f'http://2ch.hk/makaba/mobile.fcgi?task=get_thread&board={board}&thread={thread_curr}&num={last_post}'
- url = f'http://2ch.hk//api/mobile/v2/after/{board}/{thread_curr}/{last_post}'
- resp = requests.get(url)
- j = resp.json()
- except Exception as ex:
- print(datetime.now(), 'get_thread', ex)
- sleep(e)
- continue
- for post in j['posts']:
- comment = post['comment']
- comment = comment.replace('"', '"').replace('<br>', chr(10)).replace('/', '/').replace('>', '>')
- comment = re.sub(r'<a href=.+?(>>[0-9]+)</a>', r'\1', comment) # ответ на пост
- comment = re.sub(r'<a href=.+?(>>[0-9]+ →)</a>', r'\1', comment) # ответ на пост из другого треда
- comment = re.sub(r'<a href=.+?(>>[0-9]+ \(OP\))</a>', r'\1', comment) # ответ на оп пост
- comment = re.sub(r'<a href=.+?>(.+?)\[РАСКРЫТЬ\]</a>', r'\1', comment) # ссылка на ютуб
- comment = re.sub(r'<a href=.+?>(.+?)</a>', r'\1', comment) # просто ссылка
- comment = re.sub(r'<span class="u">(.+?)</span>', r'<u>\1</u>', comment, flags=re.DOTALL) # подчеркнуто
- comment = re.sub(r'<span class="s">(.+?)</span>', r'<s>\1</s>', comment, flags=re.DOTALL) # зачеркнуто
- comment = re.sub(r'<span class="spoiler">(.+?)</span>', r'<tg-spoiler>\1</tg-spoiler>', comment,
- flags=re.DOTALL) # спойлер
- comment = re.sub(r'<span class="unkfunc">(.+?)</span>', r'<code>\1</code>', comment) # гринтекст
- comment = re.sub(r'<em>(.+?)</em>', r'<i>\1</i>', comment, flags=re.DOTALL) # курсив
- comment = re.sub(r'<strong>(.+?)</strong>', r'<b>\1</b>', comment, flags=re.DOTALL) # жирный
- comment = re.sub(r'<span style.+?>(.+?)</span>', r'\1', comment) # цветной (нет аналога в телеге)
- comment = re.sub(r'<sup>(.+?)</sup>', r'\1', comment, flags=re.DOTALL) # над строкой (нет аналога в телеге)
- comment = re.sub(r'<sub>(.+?)</sub>', r'\1', comment, flags=re.DOTALL) # под строкой (нет аналога в телеге)
- # чистим остальные теги
- comment = re.sub(r'<span class=".*?">(.+?)</span>', r'\1', comment, flags=re.DOTALL)
- for i in set(re.findall(r'<.+?>', comment)) - set([t[0] for t in tags] + [t[1] for t in tags]):
- comment = comment.replace(i, '')
- if post['subject']:
- comment = '<b>' + post['subject'] + '</b>\n\n' + comment
- for reply in set(re.findall(r'(>>[0-9]+ \(OP\)|>>[0-9]+ →|>>[0-9]+)', comment)):
- replied = re.search(r'[0-9]+', reply)[0]
- replied_msg = cur.execute(f"select max(message_id) from post where post_num = {replied}").fetchone()[0]
- if replied_msg:
- comment = comment.replace(reply, '<a href="' + channel_link + replied_msg + '">' + reply + '</a>')
- files = ['http://2ch.hk' + f['path'] for f in post['files']
- if f['size'] <= 10240 and f['height'] <= 6000 and f['width'] <= 6000
- and f['size'] > 0 and f['height'] > 0 and f['width'] > 0
- and f['height'] / f['width'] <= 20 and f['width'] / f['height'] <= 20
- and re.search(r'\.(jpg|jpeg|png|gif)$', f['path'].lower())
- or f['size'] <= 20480 and re.search(r'(\.mp4)$', f['path'].lower())] if post['files'] else []
- try:
- if files:
- comment, tail = smart_split(comment, 1024, 4096)
- grp = []
- for i in range(len(files)):
- if files[i][-3:] in ('mp4'): # 'gif', убрал временно
- grp.append(telebot.types.InputMediaVideo(files[i],
- caption=comment if i == 0 else '',
- parse_mode='HTML'))
- else:
- image_resp = requests.get(files[i])
- photo = telebot.types.InputMediaPhoto(media=BytesIO(image_resp.content),
- caption=comment if i == 0 else '',
- parse_mode='HTML')
- grp.append(photo)
- try:
- message = bot.send_media_group(channel, grp, disable_notification=True)
- message_id = str(message[0].id)
- except Exception as ex:
- print(datetime.now(), 'send_media', ex)
- message = bot.send_message(channel,
- comment or chr(10060),
- disable_web_page_preview=True,
- disable_notification=True,
- parse_mode='HTML')
- message_id = str(message.id)
- else:
- comment, tail = smart_split(comment, 4096, 4096)
- message = bot.send_message(channel,
- comment or chr(10060),
- disable_web_page_preview=True,
- disable_notification=True,
- parse_mode='HTML')
- message_id = str(message.id)
- except Exception as ex:
- print(datetime.now(), 'send_message', ex)
- if error_cnt >= 3:
- message = bot.send_message(channel,
- chr(10060),
- disable_web_page_preview=True,
- disable_notification=True,
- parse_mode='HTML')
- message_id = str(message.id)
- else:
- error_cnt += 1
- break
- cur.execute("insert into post(post_num, message_id) values(" + str(post['num']) + ", '" + message_id + "')")
- con.commit()
- error_cnt = 0
- for i in tail:
- message = bot.send_message(channel, i,
- reply_to_message_id=message_id,
- disable_web_page_preview=True,
- disable_notification=True,
- parse_mode='HTML')
- message_id = message.id
- sleep(m)
- sleep(t)
Add Comment
Please, Sign In to add comment