Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import mysql.connector
- from mysql.connector import Error
- from requests import get
- from profanity_check import predict, predict_prob
- from random import choice
- from nltk.corpus import stopwords
- from nltk.tokenize import word_tokenize
- import re
- import uuid
- import mistune
- api_key = '9681111-253a05102d6de921a567af3e6'
- def insert_sql(connection, title, text, nsfw, score, f_image):
- exists = select_sql(connection, title)
- if not exists:
- text = mistune.markdown(text)
- cursor = connection.cursor()
- sql_insert_query = """ INSERT INTO `dreams`
- (`title`, `text`, `nsfw`, `score`, `image`) VALUES (%s,%s,%s,%s,%s)"""
- insert_tuple = (title, text, nsfw, score, f_image)
- result = cursor.execute(sql_insert_query, insert_tuple)
- connection.commit()
- print("Added to db")
- else:
- print('Exists in db')
- def select_sql(connection, title):
- sql_select_query = "SELECT title FROM dreams WHERE title = %s"
- cursor = connection.cursor()
- cursor.execute(sql_select_query, (title,))
- records = cursor.fetchall()
- return cursor.rowcount
- def get_unsplash_img():
- url = 'https://source.unsplash.com/random/1200x800'
- img = get(url).content
- wee = str(uuid.uuid4())
- file = 'images/{}.jpg'.format(wee)
- with open(file, 'wb') as f:
- f.write(img)
- return file
- def get_pixabay_img(url):
- got = get(url)
- img = got.content
- img_url = got.url
- file = img_url.split('/')[-1]
- file = 'images/{}'.format(file)
- with open(file, 'wb') as f:
- f.write(img)
- return file
- def get_image(string):
- string = re.sub(r'[^\w\s]', '', string)
- stop_words = set(stopwords.words('english'))
- word_tokens = word_tokenize(string)
- filtered_sentence = [w for w in word_tokens if not w in stop_words]
- filtered_sentence = []
- for w in word_tokens:
- if w not in stop_words:
- filtered_sentence.append(w)
- if filtered_sentence:
- if len(filtered_sentence) == 1:
- query = filtered_sentence[0]
- else:
- query = choice(filtered_sentence)
- url = "https://pixabay.com/api/?key={}&q={}&safesearch=true".format(
- api_key, query)
- data = get(url).json()
- images = []
- for hit in data['hits']:
- images.append(hit['largeImageURL'])
- if images:
- image = choice(images)
- print('PIXABAY')
- return get_pixabay_img(image)
- else:
- print('UNSPLASH')
- return get_unsplash_img()
- def dreamcatcher(title):
- for catch in ['had a dream', 'once had a dream' 'dreamt about', 'dreamed about']:
- if catch in title.lower():
- return True
- return False
- def checker(title, selftext):
- nopes = ['reddit', 'pokemon', 'dnd']
- for texts in [title, selftext]:
- for nope in nopes:
- if nope in texts:
- return True
- return False
- def pushshift(connection, limit):
- end = 1
- start = 0
- c1 = 0
- for i in range(10000):
- if 1473496205 < end:
- print('end')
- break
- try:
- api = 'https://api.pushshift.io/reddit/search/submission/?q=dream&sort=desc&after={}h&before={}h'.format(
- end, start)
- r = get(api)
- data = r.json()
- except Exception as e:
- print(e)
- for post in data['data']:
- is_self = post['is_self']
- if is_self:
- go = 0
- try:
- title = post['title']
- selftext = post['selftext']
- score = post['score']
- go = 1
- except Exception as e:
- print(e)
- go = 0
- if go:
- if not checker(title, selftext):
- if dreamcatcher(title):
- if selftext and selftext != '[removed]':
- c1 += 1
- profane = predict([title])
- if profane:
- nsfw = 1
- else:
- nsfw = 0
- profane = predict([selftext])
- if profane:
- nsfw = 1
- else:
- nsfw = 0
- try:
- f_image = get_image(title)
- #f_image = 'no_image.jpg'
- except Exception as e:
- print(e)
- f_image = 'images/no_image.jpg'
- try:
- insert_sql(connection, title, selftext,
- nsfw, score, f_image)
- except Exception as e:
- print(e)
- if c1 > limit:
- break
- if c1 > limit:
- break
- end += 1
- start += 1
- def main():
- try:
- connection = mysql.connector.connect(host='localhost',
- database='test',
- user='root',
- password='root')
- if connection.is_connected():
- db_Info = connection.get_server_info()
- cursor = connection.cursor()
- cursor.execute("select database();")
- record = cursor.fetchone()
- print('Connected to {}'.format(record[0]))
- pushshift(connection, 5000)
- cursor.close()
- except Error as e:
- print("Error while connecting to MySQL", e)
- finally:
- if(connection.is_connected()):
- cursor.close()
- connection.close()
- print("\nConnection closed")
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement