Advertisement
Guest User

Untitled

a guest
Apr 5th, 2019
151
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.20 KB | None | 0 0
  1. import mysql.connector
  2. from mysql.connector import Error
  3. from requests import get
  4. from profanity_check import predict, predict_prob
  5. from random import choice
  6. from nltk.corpus import stopwords
  7. from nltk.tokenize import word_tokenize
  8. import re
  9. import uuid
  10. import mistune
  11.  
  12. api_key = '9681111-253a05102d6de921a567af3e6'
  13.  
  14.  
  15. def insert_sql(connection, title, text, nsfw, score, f_image):
  16.     exists = select_sql(connection, title)
  17.     if not exists:
  18.         text = mistune.markdown(text)
  19.         cursor = connection.cursor()
  20.         sql_insert_query = """ INSERT INTO `dreams`
  21.                              (`title`, `text`, `nsfw`, `score`, `image`) VALUES (%s,%s,%s,%s,%s)"""
  22.         insert_tuple = (title, text, nsfw, score, f_image)
  23.         result = cursor.execute(sql_insert_query, insert_tuple)
  24.         connection.commit()
  25.         print("Added to db")
  26.     else:
  27.         print('Exists in db')
  28.  
  29.  
  30. def select_sql(connection, title):
  31.     sql_select_query = "SELECT title FROM dreams WHERE title = %s"
  32.     cursor = connection.cursor()
  33.     cursor.execute(sql_select_query, (title,))
  34.     records = cursor.fetchall()
  35.     return cursor.rowcount
  36.  
  37.  
  38. def get_unsplash_img():
  39.     url = 'https://source.unsplash.com/random/1200x800'
  40.     img = get(url).content
  41.     wee = str(uuid.uuid4())
  42.     file = 'images/{}.jpg'.format(wee)
  43.     with open(file, 'wb') as f:
  44.         f.write(img)
  45.     return file
  46.  
  47.  
  48. def get_pixabay_img(url):
  49.     got = get(url)
  50.     img = got.content
  51.     img_url = got.url
  52.     file = img_url.split('/')[-1]
  53.     file = 'images/{}'.format(file)
  54.     with open(file, 'wb') as f:
  55.         f.write(img)
  56.     return file
  57.  
  58.  
  59. def get_image(string):
  60.     string = re.sub(r'[^\w\s]', '', string)
  61.     stop_words = set(stopwords.words('english'))
  62.     word_tokens = word_tokenize(string)
  63.     filtered_sentence = [w for w in word_tokens if not w in stop_words]
  64.     filtered_sentence = []
  65.  
  66.     for w in word_tokens:
  67.         if w not in stop_words:
  68.             filtered_sentence.append(w)
  69.     if filtered_sentence:
  70.         if len(filtered_sentence) == 1:
  71.             query = filtered_sentence[0]
  72.         else:
  73.             query = choice(filtered_sentence)
  74.  
  75.         url = "https://pixabay.com/api/?key={}&q={}&safesearch=true".format(
  76.             api_key, query)
  77.         data = get(url).json()
  78.         images = []
  79.         for hit in data['hits']:
  80.             images.append(hit['largeImageURL'])
  81.         if images:
  82.             image = choice(images)
  83.             print('PIXABAY')
  84.             return get_pixabay_img(image)
  85.         else:
  86.             print('UNSPLASH')
  87.             return get_unsplash_img()
  88.  
  89.  
  90. def dreamcatcher(title):
  91.     for catch in ['had a dream', 'once had a dream' 'dreamt about', 'dreamed about']:
  92.         if catch in title.lower():
  93.             return True
  94.     return False
  95.  
  96.  
  97. def checker(title, selftext):
  98.     nopes = ['reddit', 'pokemon', 'dnd']
  99.     for texts in [title, selftext]:
  100.         for nope in nopes:
  101.             if nope in texts:
  102.                 return True
  103.     return False
  104.  
  105.  
  106. def pushshift(connection, limit):
  107.     end = 1
  108.     start = 0
  109.     c1 = 0
  110.  
  111.     for i in range(10000):
  112.         if 1473496205 < end:
  113.             print('end')
  114.             break
  115.         try:
  116.             api = 'https://api.pushshift.io/reddit/search/submission/?q=dream&sort=desc&after={}h&before={}h'.format(
  117.                 end, start)
  118.             r = get(api)
  119.             data = r.json()
  120.         except Exception as e:
  121.             print(e)
  122.  
  123.         for post in data['data']:
  124.             is_self = post['is_self']
  125.             if is_self:
  126.                 go = 0
  127.                 try:
  128.                     title = post['title']
  129.                     selftext = post['selftext']
  130.                     score = post['score']
  131.                     go = 1
  132.                 except Exception as e:
  133.                     print(e)
  134.                     go = 0
  135.                 if go:
  136.                     if not checker(title, selftext):
  137.                         if dreamcatcher(title):
  138.                             if selftext and selftext != '[removed]':
  139.                                 c1 += 1
  140.                                 profane = predict([title])
  141.                                 if profane:
  142.                                     nsfw = 1
  143.                                 else:
  144.                                     nsfw = 0
  145.                                 profane = predict([selftext])
  146.                                 if profane:
  147.                                     nsfw = 1
  148.                                 else:
  149.                                     nsfw = 0
  150.                                 try:
  151.                                     f_image = get_image(title)
  152.                                     #f_image = 'no_image.jpg'
  153.                                 except Exception as e:
  154.                                     print(e)
  155.                                     f_image = 'images/no_image.jpg'
  156.                                 try:
  157.                                     insert_sql(connection, title, selftext,
  158.                                                nsfw, score, f_image)
  159.                                 except Exception as e:
  160.                                     print(e)
  161.                                 if c1 > limit:
  162.                                     break
  163.         if c1 > limit:
  164.             break
  165.         end += 1
  166.         start += 1
  167.  
  168.  
  169. def main():
  170.     try:
  171.         connection = mysql.connector.connect(host='localhost',
  172.                                              database='test',
  173.                                              user='root',
  174.                                              password='root')
  175.         if connection.is_connected():
  176.             db_Info = connection.get_server_info()
  177.             cursor = connection.cursor()
  178.             cursor.execute("select database();")
  179.             record = cursor.fetchone()
  180.             print('Connected to {}'.format(record[0]))
  181.             pushshift(connection, 5000)
  182.             cursor.close()
  183.     except Error as e:
  184.         print("Error while connecting to MySQL", e)
  185.     finally:
  186.         if(connection.is_connected()):
  187.             cursor.close()
  188.             connection.close()
  189.             print("\nConnection closed")
  190.  
  191.  
  192. if __name__ == '__main__':
  193.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement