Advertisement
Guest User

Untitled

a guest
Aug 3rd, 2016
84
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.37 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. import pymysql.cursors
  3. import MeCab
  4.  
  5. connection = pymysql.connect(host='localhost',
  6. user='root',
  7. password='',
  8. db='',
  9. charset='utf8',
  10. cursorclass=pymysql.cursors.DictCursor)
  11.  
  12. m = MeCab.Tagger('-Ochasen')
  13. m.parse(' ')#バグ対策
  14.  
  15. w_list = []
  16. with connection.cursor() as cursor:
  17. sql = "select qid, aid, concat(title, ' ', text) as qtxt, atext \
  18. from db \
  19. where char_length(title) + char_length(text) < 120 \
  20. and char_length(title) + char_length(text) > 100 \
  21. and char_length(atext) < 120 \
  22. and char_length(atext) > 100 \
  23. order by rand() limit 1000"
  24. cursor.execute(sql)
  25. results = cursor.fetchall()
  26. connection.close()
  27.  
  28. fq = open('qst.txt', 'w')
  29. fa = open('ans.txt', 'w')
  30. for r in results:
  31. qnode = m.parseToNode(r['qtxt'])
  32. qw = ''
  33. while qnode:
  34. word = qnode.surface
  35. if word not in ('\n', '\r', '、', '。'):
  36. qw += word + ' '
  37. qnode = qnode.next
  38. fq.write(qw + '\n')
  39.  
  40. anode = m.parseToNode(r['atxt'])
  41. aw = ''
  42. while anode:
  43. word = anode.surface
  44. if word not in ('\n', '\r', '、', '。'):
  45. aw += word + ' '
  46. anode = anode.next
  47. fa.write(aw + '\n')
  48.  
  49. fq.close()
  50. fa.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement