Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- import os, sys, re
- def main():
- pattern = re.compile(r"<<(.+?)>>")
- from collections import defaultdict
- frequency = defaultdict(int)/Users/it/home_dl/chainer_rnn/result/op_line_filter_hira_correct_spm_4k_unit=2000/log
- with open(sys.argv[1]) as f:
- for line in f:
- line = line.strip()
- match = re.search(pattern, line)
- if match is None:
- pass
- else:
- before = match.group(1)
- frequency[before] += 1
- for k, v in sorted(frequency.items(), key=lambda x:x[1],reverse=True):
- print (k, v)
- main()
- # Python 正規表現でマッチした箇所を利用して置換する
- # ラムダ式を使う。
- import re
- text = 'あいうえお'
- regex_pattern = re.compile('(。|?|\?|\n)')
- sentence = re.sub(regex_pattern, lambda m:str(m.group() + '###') , text)
- # 'あ###いうえ###お'
- # 半角スペース区切りのテキストを入力し要素ごとの頻度を出力する。
- import sys
- from collections import defaultdict
- frequency = defaultdict(int)
- with open(sys.argv[1]) as f:
- lines = f.readlines()
- for line in lines:
- line = line.strip()
- morphs = line.split(" ")
- for morph in morphs:
- frequency[morph] += 1
- for k, v in sorted(frequency.items(), key=lambda x:x[1], reverse=True):
- print(k, v)
- # pythonの正規表現でのマッチ部分捕捉
- import sys, re
- # pn_ja.dic
- # 優れる:すぐれる:動詞:1
- pattern = re.compile(r"^(\S+?)\:(\S+?)\:(\S+?)\:(\S+)$")
- with open(sys.argv[1]) as f:
- for line in f:
- line = line.strip()
- if line is None:
- pass
- else:
- match = re.search(pattern, line)
- if match is None:
- print('Not match')
- else:
- term = match.group(1)
- yomi = match.group(2)
- klass = match.group(3)
- pn_score = match.group(4)
- print(term, yomi, klass, pn_score)
Add Comment
Please, Sign In to add comment