Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- import pymysql.cursors
- import MeCab
- connection = pymysql.connect(host='localhost',
- user='root',
- password='',
- db='',
- charset='utf8',
- cursorclass=pymysql.cursors.DictCursor)
- m = MeCab.Tagger('-Ochasen')
- m.parse(' ')#バグ対策
- w_list = []
- with connection.cursor() as cursor:
- sql = "select qid, aid, concat(title, ' ', text) as qtxt, atext \
- from db \
- where char_length(title) + char_length(text) < 120 \
- and char_length(title) + char_length(text) > 100 \
- and char_length(atext) < 120 \
- and char_length(atext) > 100 \
- order by rand() limit 1000"
- cursor.execute(sql)
- results = cursor.fetchall()
- connection.close()
- fq = open('qst.txt', 'w')
- fa = open('ans.txt', 'w')
- for r in results:
- qnode = m.parseToNode(r['qtxt'])
- qw = ''
- while qnode:
- word = qnode.surface
- if word not in ('\n', '\r', '、', '。'):
- qw += word + ' '
- qnode = qnode.next
- fq.write(qw + '\n')
- anode = m.parseToNode(r['atxt'])
- aw = ''
- while anode:
- word = anode.surface
- if word not in ('\n', '\r', '、', '。'):
- aw += word + ' '
- anode = anode.next
- fa.write(aw + '\n')
- fq.close()
- fa.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement