Advertisement
Guest User

Untitled

a guest
Feb 22nd, 2012
28
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.66 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. Name        check-compose.py
  5. Description Check compose sequences
  6. Author      Pander <pander@users.sourceforge.net>
  7. License     MIT License
  8.  
  9. 0.1 2012-01-06  Pander <pander@users.sourceforge.net>
  10. Initial release
  11. """
  12.  
  13. import binascii
  14. import sys
  15.  
  16. silent = False
  17.  
  18. def isUnicodeUpper(s):
  19.     hex = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', )
  20.     if (
  21.         len(s) == 5 and s[0] == 'U' and s[1] in hex and s[2] in hex and s[3] in hex and s[4] in hex
  22.     ) or (
  23.         len(s) == 6 and s[0] == 'U' and s[1] in hex and s[2] in hex and s[3] in hex and s[4] in hex and s[5] in hex
  24.     ):
  25.         return True
  26.     return False
  27.  
  28. def isUnicodeLower(s):
  29.     hex = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', )
  30.     if (
  31.         len(s) == 5 and s[0] == 'U' and s[1] in hex and s[2] in hex and s[3] in hex and s[4] in hex
  32.     ) or (
  33.         len(s) == 6 and s[0] == 'U' and s[1] in hex and s[2] in hex and s[3] in hex and s[4] in hex and s[5] in hex
  34.     ):
  35.         return True
  36.     return False
  37.  
  38. # Load Unicode information
  39. unicode_info = {}
  40. unicode_file = file('UnicodeData.txt', 'r')
  41. for line in unicode_file.readlines():
  42.     data = line.split(';')
  43.     unicode_info[data[0]] = data[1]
  44.  
  45. # Load compose sequences
  46. compose_sequences = {}
  47. codes = []
  48. chars = []
  49. names = []
  50. compose_file = file('proposal', 'r')
  51. lines = 0
  52. for line in compose_file.readlines():
  53.     lines = lines + 1
  54.     if line[0] != '<':
  55.         continue
  56.     if '"\t\t# ' in line:
  57.         line = line .replace('"\t\t# ', '"    # ')
  58.     if ':   "' in line and '<U17f' not in line:
  59.         line = line.replace(':   "', ': "')
  60.     seq = None
  61.     char = None
  62.     code = None
  63.     name = None
  64.     data = line.split(': "')
  65.     seq = data[0].strip()
  66.     try:
  67.  
  68.         charcodename = data[1].split('" ')
  69.     except IndexError:
  70.         if '<U17f' in seq:
  71.             seq = seq.split(' :')[0].strip()
  72.             if not silent:
  73.                 print 'WARNING line %s: missing second double quote and comment with name' %lines
  74.                 print ' ', line[:-1]
  75.             char = ''
  76.             code = ''
  77.             name = ''
  78.         else:
  79.             if not silent:
  80.                 print 'ERROR lines %s: unknown malformation' %lines
  81.                 print ' ', line[:-1]
  82.     if not (char == '' and code == '' and name == ''):
  83.         char = charcodename[0].strip()
  84.         charname = charcodename[1].strip().split(' # ')
  85.         if len(charname) == 1:
  86.             if False:
  87.                 if not silent:
  88.                     print 'WARNING line %s: missing code (alias or Unicode code point)' %lines
  89.                     print ' ', line[:-1]
  90.             code = ''
  91.             name = charname[0].replace('# ', '').strip()
  92.         else:
  93.             code = charname[0].strip()
  94.             name = charname[1].strip()
  95.     if False:#TODO for generating documentation
  96.         print 'seq:', seq
  97.         print '  char:', char
  98.         print '  code:', code
  99.         print '  name:', name
  100.  
  101.     for se in compose_sequences.keys():
  102.         (ch, co, na) = compose_sequences[se]
  103.         if se == seq:
  104.             if not silent:
  105.                 print 'ERROR line %s: at least duplicate compose sequence'
  106.                 print ' ', line[:-1]
  107.             break
  108.         elif seq == se[:len(seq)]:
  109.             if not silent:
  110.                 print 'ERROR line %s: compose sequence is blocking at least %s : "%s" %s # %s' %(lines, se, ch, co, na)
  111.                 print ' ', line[:-1]
  112.             break
  113.         elif seq[:len(se)] == se:
  114.             if not silent:
  115.                 print 'ERROR line %s: compose sequence is at least blocked by %s : "%s" %s # %s' %(lines, se, ch, co, na)
  116.                 print ' ', line[:-1]
  117.             break
  118.         elif code != '' and code == co and name != na:
  119.             if not silent:
  120.                 print 'WARNING line %s: conflicting names for same code for %s : "%s" %s # %s' %(lines, se, ch, co, na)
  121.                 print ' ', line[:-1]
  122.             break
  123.         elif code != '' and code != co and name == na:
  124.             if not silent:
  125.                 print 'WARNING line %s: conflicting codes for same name for %s : "%s" %s # %s' %(lines, se, ch, co, na)
  126.                 print ' ', line[:-1]
  127.             break
  128.         #TODO etc.
  129.  
  130.     if code != '' and (isUnicodeUpper(code) or isUnicodeLower(code)):
  131.         CODE = code[1:].upper()
  132.         if CODE in unicode_info:
  133.             info = unicode_info[CODE]
  134.             if info != name:
  135.                 if not silent:
  136.                     print 'WARNING line %s: incorrect comment with name, should be %s from UnicodeData.txt' %(lines, info)
  137.                     print ' ', line[:-1]
  138.         else:
  139.             if not silent:
  140.                 print 'WARNING line %s: unknown Unicode code point %s according to UnicodeData.txt' %(lines, code)
  141.                 print ' ', line[:-1]
  142.  
  143.     compose_sequences[seq] = (char, code, name)
  144.     if code != '' and code not in codes:
  145.         codes.append(code)
  146.     if char != '' and char not in chars:
  147.         chars.append(char)
  148.     if name != '' and name not in names:
  149.         names.append(name)
  150.  
  151. if not silent:
  152.     print 'INFO: checked %s compose sequences' %len(compose_sequences)
  153.     print 'INFO: resulting in %s different chars' %len(chars)
  154.     print 'INFO: related to %s different codes' %len(codes)
  155.     print 'INFO: with %s different names in comment' %len(names)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement