Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- #Try if parsing in Unicode works
- #Tested on x86_64 GNU/Linux (Suse Linux 11.0)
- from pyparsing import *
- #creating the parser
- start_kw = Keyword('ABC')
- end_kw = Keyword('XYZ')
- the_parser = (start_kw + '|' + CharsNotIn('|') + '|' + end_kw)
- #some test texts
- text1 = 'ABC | マルチ | XYZ'
- text2 = 'ABC | チディ | XYZ'
- text3 = 'ABC | 图形等 | XYZ'
- text4 = 'ABC | 应用程 | XYZ'
- text5 = 'ABC | Äöü | XYZ'
- text6 = 'ABC | iöü | XYZ'
- bad_1 = 'ABC 应用程 | XYZ'
- bad_2 = 'ABC | 应用程 XYZ'
- #function to present input output and errors in a nice way
- def parse_text(text):
- print
- print 'Trying to parse: ', text
- try:
- print 'Result: ', the_parser.parseString(text)
- except ParseException, error:
- print '\nParse error!'
- print error
- #do some parsing
- parse_text(text1)
- parse_text(text2)
- parse_text(text3)
- parse_text(text4)
- parse_text(text5)
- parse_text(text6)
- parse_text(bad_1)
- parse_text(bad_2)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement