Advertisement
Guest User

Eike Welk

a guest
Oct 27th, 2008
255
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.07 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. #Try if parsing in Unicode works
  3. #Tested on x86_64 GNU/Linux (Suse Linux 11.0)
  4.  
  5. from pyparsing import *
  6.  
  7. #creating the parser
  8. start_kw = Keyword('ABC')
  9. end_kw = Keyword('XYZ')
  10. the_parser = (start_kw + '|' + CharsNotIn('|') + '|' + end_kw)
  11.  
  12. #some test texts
  13. text1 = 'ABC | マルチ | XYZ'
  14. text2 = 'ABC | チディ | XYZ'
  15. text3 = 'ABC | 图形等 | XYZ'
  16. text4 = 'ABC | 应用程 | XYZ'
  17. text5 = 'ABC | Äöü | XYZ'
  18. text6 = 'ABC | iöü | XYZ'
  19. bad_1 = 'ABC  应用程 | XYZ'
  20. bad_2 = 'ABC | 应用程  XYZ'
  21.  
  22. #function to present input output and errors in a nice way
  23. def parse_text(text):
  24.     print
  25.     print 'Trying to parse: ', text
  26.     try:
  27.         print 'Result: ', the_parser.parseString(text)
  28.     except ParseException, error:
  29.         print '\nParse error!'
  30.         print error
  31.  
  32. #do some parsing
  33. parse_text(text1)
  34. parse_text(text2)
  35. parse_text(text3)
  36. parse_text(text4)
  37. parse_text(text5)
  38. parse_text(text6)
  39. parse_text(bad_1)
  40. parse_text(bad_2)
  41.  
  42.  
  43.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement