# -*- coding: utf-8 -*- #Try if parsing in Unicode works #Tested on x86_64 GNU/Linux (Suse Linux 11.0) from pyparsing import * #creating the parser start_kw = Keyword('ABC') end_kw = Keyword('XYZ') the_parser = (start_kw + '|' + CharsNotIn('|') + '|' + end_kw) #some test texts text1 = 'ABC | マルチ | XYZ' text2 = 'ABC | チディ | XYZ' text3 = 'ABC | 图形等 | XYZ' text4 = 'ABC | 应用程 | XYZ' text5 = 'ABC | Äöü | XYZ' text6 = 'ABC | iöü | XYZ' bad_1 = 'ABC 应用程 | XYZ' bad_2 = 'ABC | 应用程 XYZ' #function to present input output and errors in a nice way def parse_text(text): print print 'Trying to parse: ', text try: print 'Result: ', the_parser.parseString(text) except ParseException, error: print '\nParse error!' print error #do some parsing parse_text(text1) parse_text(text2) parse_text(text3) parse_text(text4) parse_text(text5) parse_text(text6) parse_text(bad_1) parse_text(bad_2)