Advertisement
Guest User

Exercice Python Sam & Max - 16/12/2013

a guest
Dec 16th, 2013
157
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.88 KB | None | 0 0
  1. #!/usr/bin/python
  2. # -*- coding:utf-8 -*-
  3.  
  4. from __future__ import with_statement
  5. import sys
  6. import re
  7. import unicodedata
  8. from collections import defaultdict
  9.  
  10. def proceed(filename):
  11.   with open(filename) as f:
  12.     texte = f.read()
  13.     pass
  14.  
  15.   texte = texte.strip()
  16.   texte = texte.replace("’", " ")
  17.   texte = texte.replace("œ", "oe")
  18.   texte = texte.decode('utf-8')
  19.   texte = unicodedata.normalize('NFKD', texte)
  20.   texte = texte.encode('ASCII', 'ignore')
  21.   texte = " ".join(re.split(r'\W+',texte))
  22.   texte = texte.strip().lower()
  23.  
  24.   dictword = defaultdict(list)
  25.   pos = 0
  26.   for m in re.finditer(r'\w+', texte):
  27.     dictword[m.group(0)].append(pos)
  28.     pos += 1
  29.     pass
  30.  
  31.   for key,value in sorted(dictword.iteritems(), key = lambda (k,v):(len(v),v)):
  32.     print "%s: %s" % (key, ', '.join([str(x) for x in value]))
  33.  
  34.  
  35. if __name__ == "__main__":
  36.    proceed(sys.argv[1])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement