joelmaxuel

URL parser for https://github.com/gehaxelt/python-rss2irc

May 5th, 2021
637
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. From e28ac7ffe5d7fe80d7dbf2ed8432c84d9d3f5892 Mon Sep 17 00:00:00 2001
  2. From: Joel Maxuel <joel@shell.skynet>
  3. Date: Wed, 5 May 2021 19:25:39 -0300
  4. Subject: [PATCH] Include the URL parser from EEVbot
  5.  
  6. ---
  7.  bot.py           | 76 ++++++++++++++++++++++++++++++++++++++++++++++++
  8.  requirements.txt |  3 ++
  9.  2 files changed, 79 insertions(+)
  10.  
  11. diff --git a/bot.py b/bot.py
  12. index 86956a3..143018a 100644
  13. --- a/bot.py
  14. +++ b/bot.py
  15. @@ -3,18 +3,32 @@ import threading
  16.  import irc.bot
  17.  import irc.client
  18.  import irc.connection
  19. +import json
  20.  import time
  21.  import re
  22. +import string
  23.  import sys
  24.  import feedparser
  25.  import datetime
  26.  import dateutil.parser
  27.  import requests
  28. +import urllib
  29.  from colour import Colours
  30.  from db import FeedDB
  31.  from config import Config
  32.  from feedupdater import FeedUpdater
  33. +from bs4 import BeautifulSoup
  34. +from urlextract import URLExtract
  35.  
  36. +fcount = 1.0
  37. +pubfltime = 0
  38. +tformat = "%Y/%m/%d %H:%M:%S"
  39. +ccstrip = re.compile('[\x02\x0F\x16\x1D\x1F]|\x03(\d{,2}(,\d{,2})?)?')
  40. +
  41. +IRC_BOLD = "\x02"
  42. +IRC_ITALIC = "\x1D"
  43. +IRC_UNDERLINE = "\x1F"
  44. +IRC_REGULAR = "\x0F"
  45.  
  46.  class IRCBot(irc.bot.SingleServerIRCBot):
  47.      def __init__(self, config, db, on_connect_cb):
  48. @@ -129,6 +143,49 @@ class IRCBot(irc.bot.SingleServerIRCBot):
  49.              answer = "Something was wrong."
  50.          return answer
  51.  
  52. +    ## EEV Bot hacks ... section inserted from:
  53. +    ## eevBot - Austnet IRC bot
  54. +
  55. +    ## A twitter IRC bot, modified for AustNet #eevblog.
  56. +    ## Original creator - https://mike.verdone.ca/twitter
  57. +    ## Original source code - https://github.com/sixohsix/twitter/blob/master/twitter/ircbot.py
  58. +    ## Modified / maintained by - electrohead / corp[at]hush[dot]ai
  59. +
  60. +    ## A big thanks to "the internet" for the ideas and help with bugs, and what not.
  61. +
  62. +    def ytlinkparse(self, yturl):
  63. +        q = urllib.parse.urlparse(yturl)
  64. +        if q.hostname == 'youtu.be': return q.path[1:]
  65. +        if q.hostname in {'www.youtube.com', 'youtube.com'}:
  66. +            if q.path == '/watch': return urllib.parse.parse_qs(q.query)['v'][0]
  67. +            if q.path[:7] == '/embed/': return q.path.split('/')[2]
  68. +            if q.path[:3] == '/v/': return q.path.split('/')[2]
  69. +        return None
  70. +
  71. +    def url_parse(self, stext):
  72. +        ext1 = URLExtract()
  73. +        urls = ext1.find_urls(stext)
  74. +        if urls:
  75. +            for url in urls:
  76. +                if url.find("youtube") != -1 or url.find("youtu.be") != -1:
  77. +                    xparams = {"format": "json", "url": "https://www.youtube.com/watch?v=%s" % self.ytlinkparse(url)}
  78. +                    xurl = "https://www.youtube.com/oembed"
  79. +                    qstring = urllib.parse.urlencode(xparams)
  80. +                    xurl = xurl + "?" + qstring
  81. +                    with urllib.request.urlopen(xurl) as response:
  82. +                        response_text = response.read()
  83. +                        data = json.loads(response_text.decode())
  84. +                        self.send_msg(self.__config.CHANNEL, IRC_BOLD + "[Title] - Youtube: " + data['title'] + IRC_REGULAR)
  85. +                else:
  86. +                    r = requests.get(url, allow_redirects=False)
  87. +                    tparse = BeautifulSoup(r.text, features="html5lib")
  88. +                    tfind = tparse.find_all('title')
  89. +                    self.send_msg(self.__config.CHANNEL, IRC_BOLD + "[Title] - " + tfind[0].get_text() + IRC_REGULAR)
  90. +                print("** URL_PARSE - " + url)
  91. +                break
  92. +
  93. +    ## /EEV
  94. +
  95.      def on_privmsg(self, connection, event):
  96.          """Handles the bot's private messages"""
  97.          if len(event.arguments) < 1:
  98. @@ -153,9 +210,28 @@ class IRCBot(irc.bot.SingleServerIRCBot):
  99.          # Send the answer as a private message
  100.          if msg == "!help":
  101.              self.send_msg(event.source.nick, self.__help_msg())
  102. +        # Suggest a paste platform
  103. +        if msg == "!paste":
  104. +            self.send_msg(self.__config.CHANNEL, "Paste services: http://ix.io/  -  http://sprunge.us/  -  https://www.pastebin.com/")
  105.          # Send the answer as a public message
  106.          if botnick.lower() in msg:
  107.              self.send_msg(self.__config.CHANNEL, self.welcome_msg())
  108. +        global pubfltime
  109. +        if Config.lastpubmsg - pubfltime < fcount:
  110. +            return
  111. +        try:
  112. +            """Handles the bot's public (channel) messages"""
  113. +            tsrc = event.source.split('!')[0]
  114. +            sc1 = re.sub(r'[^\x00-\x7f]', r'', event.arguments[0])
  115. +            sc2 = ccstrip.sub('', sc1)
  116. +            args = [i for i in sc2.split(' ') if i]
  117. +            if (not args):
  118. +                return
  119. +            tfirst = args[0].lower().strip()
  120. +            tfirst = re.sub(":|-|,", "", tfirst)
  121. +            self.url_parse(event.arguments[0])
  122. +        except Exception as e:
  123. +            print(datetime.datetime.now(), e)
  124.  
  125.      def on_nicknameinuse(self, connection, event):
  126.          """Changes the nickname if necessary"""
  127. diff --git a/requirements.txt b/requirements.txt
  128. index 17a5bfe..b17a57f 100644
  129. --- a/requirements.txt
  130. +++ b/requirements.txt
  131. @@ -3,3 +3,6 @@ irc==19.0.1
  132.  python_dateutil==2.8.1
  133.  requests==2.25.1
  134.  sqlite3worker==1.1.7
  135. +bs4==0.0.1
  136. +urlextract>=1.1.0
  137. +html5lib>=1.1
  138. --
  139. 2.20.1
  140.  
  141.  
RAW Paste Data