SHARE
TWEET

Untitled

a guest Apr 24th, 2019 58 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #! /usr/bin/env python                                                                                                                                                                                      
  2. # -*- coding: utf-8 -*-                                                                                                                                                                                    
  3.  
  4. from sys import stdin, stderr
  5.  
  6. for line in stdin:
  7.     line = line.strip()
  8.  
  9.     if line == '':
  10.         print()
  11.     else:
  12.         wf, feats, lemma, label, ann = line.split('\t')
  13.         lemma_list = eval(ann)
  14.  
  15.         l = [lemma_ for label_, lemma_ in lemma_list if label_==label]
  16.         max_hash = max([lemma_.count('#') for lemma_ in l])
  17.         l = [lemma_ for lemma_ in l if lemma_.count('#') == max_hash][0]
  18.  
  19.         if l != '':
  20.             lemma = l
  21.  
  22.         lemma = lemma.lower()
  23.         lemma = lemma.replace('Å', 'å')
  24.         lemma = lemma.replace('Ä', 'ä')
  25.         lemma = lemma.replace('Ö', 'ö')
  26.  
  27.         print ("%s\t%s\t%s\t%s\t_" % (wf, feats, lemma, label))
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top