Advertisement
Guest User

ilius

a guest
Sep 13th, 2010
289
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.54 KB | None | 0 0
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. # recode a file from arabic windows(windows-1256) to utf8
  4.  
  5. import sys, os
  6.  
  7. def winArabicToUtf8(s, ar2fa=True):
  8.  u = s.decode('windows-1256')
  9.  if ar2fa:
  10.   for item in [
  11.                 (u'ي',u'ی'),
  12.                 (u'ك',u'ک'),
  13.                 (u'ۀ',u'هٔ')]:
  14.    u = u.replace(item[0], item[1])
  15.  return u.encode('utf8')
  16.  
  17. path = sys.argv[1]
  18. s = file(path).read()
  19. ws = winArabicToUtf8(s)
  20.  
  21. (name, ext) = os.path.splitext(path)
  22. path2 = name + '-utf8' + ext
  23. file(path2, 'w').write(ws)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement