Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- id || title || comment
- 1 || Title 1 || Sentence 1 of Comment 1. Sentence 2 of Comment 1.
- 2 || Title 1 || Sentence 1 of Comment 2. Sentence 2 of Comment 2.
- id || title || root_comment || sub_comment
- 1 || Title 1 || Comment 1 || Sentence 1
- 2 || Title 1 || Comment 1 || Sentence 2
- 3 || Title 1 || Comment 2 || Sentence 1
- 4 || Title 1 || Comment 2 || Sentence 2
- import pymysql
- import pymysql.cursors
- import random
- from bs4 import BeautifulSoup
- import nltk
- from nltk import tokenize
- import pdb
- conn = pymysql.connect(host='localhost', user='root', password='password', db='master_thesis', autocommit=True)
- cursor = conn.cursor()
- cursor.execute("SELECT * FROM lucene_counter WHERE count > 5 AND count <= 30")
- lucene_rs_list = list()
- for row in cursor:
- lucene_rs_list.append(row[1])
- random.shuffle(lucene_rs_list)
- final_list = lucene_rs_list[:1]
- for i in range(len(final_list)):
- current_title = final_list[i]
- query = "SELECT title, comment FROM lucene_try WHERE title = %s"
- cursor.execute(query, final_list[i])
- for row in cursor:
- root_comment = BeautifulSoup(row[1], "lxml").text
- print("Root Title: ", current_title)
- print("Root Comment: ", root_comment)
- cleancomment = tokenize.sent_tokenize(root_comment)
- for j in range(len(cleancomment)):
- # THIS LINE PRINTS EVERYTHING PROPERLY WITH ALL THE COMMENTS AND SUBCOMMENTS IF CURSOR.EXECUTE IS COMMENTED OUT
- print("Sub Comment: ", cleancomment[j])
- # IF THE CURSOR.EXECUTE IS UNCOMMENTED, IT ONLY DISPLAYS RESULT OF THE FIRST ROOT_COMMENT AND NOT ALL
- cursor.execute("""INSERT INTO lucene_rs (title, root_comment, comment) VALUES ("%s", "%s", "%s")""" % (current_title, root_comment, cleancomment[j]))
- print("n")
- conn.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement