Advertisement
Guest User

Untitled

a guest
Sep 15th, 2017
67
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.80 KB | None | 0 0
  1. id || title || comment
  2. 1 || Title 1 || Sentence 1 of Comment 1. Sentence 2 of Comment 1.
  3. 2 || Title 1 || Sentence 1 of Comment 2. Sentence 2 of Comment 2.
  4.  
  5. id || title || root_comment || sub_comment
  6. 1 || Title 1 || Comment 1 || Sentence 1
  7. 2 || Title 1 || Comment 1 || Sentence 2
  8. 3 || Title 1 || Comment 2 || Sentence 1
  9. 4 || Title 1 || Comment 2 || Sentence 2
  10.  
  11. import pymysql
  12. import pymysql.cursors
  13. import random
  14. from bs4 import BeautifulSoup
  15. import nltk
  16. from nltk import tokenize
  17. import pdb
  18.  
  19. conn = pymysql.connect(host='localhost', user='root', password='password', db='master_thesis', autocommit=True)
  20. cursor = conn.cursor()
  21. cursor.execute("SELECT * FROM lucene_counter WHERE count > 5 AND count <= 30")
  22.  
  23. lucene_rs_list = list()
  24.  
  25. for row in cursor:
  26. lucene_rs_list.append(row[1])
  27.  
  28. random.shuffle(lucene_rs_list)
  29. final_list = lucene_rs_list[:1]
  30.  
  31. for i in range(len(final_list)):
  32. current_title = final_list[i]
  33. query = "SELECT title, comment FROM lucene_try WHERE title = %s"
  34. cursor.execute(query, final_list[i])
  35. for row in cursor:
  36. root_comment = BeautifulSoup(row[1], "lxml").text
  37. print("Root Title: ", current_title)
  38. print("Root Comment: ", root_comment)
  39. cleancomment = tokenize.sent_tokenize(root_comment)
  40. for j in range(len(cleancomment)):
  41. # THIS LINE PRINTS EVERYTHING PROPERLY WITH ALL THE COMMENTS AND SUBCOMMENTS IF CURSOR.EXECUTE IS COMMENTED OUT
  42. print("Sub Comment: ", cleancomment[j])
  43. # IF THE CURSOR.EXECUTE IS UNCOMMENTED, IT ONLY DISPLAYS RESULT OF THE FIRST ROOT_COMMENT AND NOT ALL
  44. cursor.execute("""INSERT INTO lucene_rs (title, root_comment, comment) VALUES ("%s", "%s", "%s")""" % (current_title, root_comment, cleancomment[j]))
  45. print("n")
  46.  
  47. conn.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement