Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # devplayer@gmail.com
- # 2011-Nov-04
- # This is my first text parsing program.
- # This is my first implementation of a simple
- # string parser and transform (I call recompile)
- # of a given string into a new string based on
- # a list of given (valid) keyword strings.
- # From within Python instead of a MS-DOS script
- # I wanted to compile/transform/convert
- # the value of the MS-DOS PROMPT envar variable,
- # usaually PROMPT = "$P$G",
- # to what would be displayed by MS-DOS.
- # But it can be used for a lot more.
- # globals()['__doc__'] =
- '''Parse (partition) a string in to a list of valid and
- invalid words (strings), while retaining the same order of
- both.
- Valid words are supplied by the caller in a list of strings.
- An invalid word is anything between valid words, before any
- valid words or after any valid words.
- Then recompile the string with values from a dict whose keys
- are the valid words yet keep the invalid words in place.
- Example:
- "invalid00;Valid00Invalid02Valid01I.n'valid03"
- where Valid00 remaps to " HELLO " and "Valid01" remaps to " WORLD! "
- you should get:
- "invalid00; HELLO Invalid02 WORLD! I.n'valid03"
- '''
- # These are the main functions
- def getValidWord(keywords, string):
- for word in keywords:
- if string.startswith(word):
- return word
- return ''
- def getInvalidWord(keywords, string):
- lstring = len(string)
- for i in range(lstring):
- for word in keywords:
- if string[i:].startswith(word):
- return string[0:i]
- return string
- def parse(string, keywords):
- # copy original string as this function is destructive to string
- s = string[:]
- words = []
- word = ''
- interations = 0
- while len(s) > 0:
- # try and get a keyword
- word = getValidWord(keywords, s)
- if word and s.startswith(word):
- words.append(word)
- s = s.replace(word, '', 1)
- # try and get whatever is not a keyword
- word = getInvalidWord(keywords, s)
- if word and s.startswith(word):
- words.append(word)
- s = s.replace(word, '', 1)
- interations += 1
- return words
- # similar to map() i guess
- def recompile(string, mapping):
- keywords = mapping.keys()
- words = parse(string, keywords)
- new_words = []
- for word in words:
- if word in keywords:
- if callable(mapping[word]):
- ret_word = mapping[word]()
- new_words.append( str( ret_word ) )
- else:
- new_words.append( str( mapping[word] ) )
- else:
- new_words.append( str( word ) )
- new_string = ''.join(new_words)
- return new_string
- # These functions support DOS prompt conversion/tranform/recompile
- def getcwdrive():
- import os
- return os.path.splitdrive(os.getcwd())[0]
- def msdosdate():
- import datetime
- now = datetime.datetime.today()
- dosformat = now.strftime('%a %m/%d/%Y')
- return dosformat
- def msdostime():
- import datetime
- now = datetime.datetime.today()
- dosformat = now.strftime('%X.00')
- return dosformat
- def msdosprompt():
- '''Processes the DOS "PROMPT" environment variable
- and returns it's string.'''
- import os
- prompt = os.getenv('prompt', '$P$G')
- prompt_mapper = msdosprompt_mapper()
- transformed = recompile(prompt, prompt_mapper)
- return transformed
- def msdosprompt_mapper():
- import os
- import sys
- import collections
- special_codes = collections.OrderedDict()
- sc = special_codes
- sc['$A'] = '&'
- sc['$B'] = '|'
- sc['$C'] = '('
- sc['$D'] = msdosdate
- sc['$E'] = chr(27)
- sc['$F'] = ')'
- sc['$G'] = '>'
- sc['$H'] = '\b'
- sc['$I'] = '$I'
- sc['$J'] = '$J'
- sc['$K'] = '$K'
- sc['$L'] = '>'
- sc['$M'] = '$M'
- sc['$N'] = getcwdrive
- sc['$O'] = '$O'
- sc['$P'] = os.getcwd
- sc['$Q'] = '='
- sc['$R'] = '$R'
- sc['$S'] = ' '
- sc['$T'] = msdostime
- sc['$U'] = '$U'
- sc['$V'] = sys.version
- sc['$W'] = '$W'
- sc['$X'] = '$X'
- sc['$Y'] = '$Y'
- sc['$Z'] = '$Z'
- sc['$_'] = '\r\n'
- sc['$$'] = '$'
- sc['$+'] = '$+'
- sc['$M'] = ''
- return special_codes
- def mdosprompthelp():
- # Warning: copy directly from Microsoft Windows XP without permission.
- print('''
- Changes the cmd.exe command prompt.
- PROMPT [text]
- text Specifies a new command prompt.
- Prompt can be made up of normal characters and the following special codes:
- $A & (Ampersand)
- $B | (pipe)
- $C ( (Left parenthesis)
- $D Current date
- $E Escape code (ASCII code 27)
- $F ) (Right parenthesis)
- $G > (greater-than sign)
- $H Backspace (erases previous character)
- $L < (less-than sign)
- $N Current drive
- $P Current drive and path
- $Q = (equal sign)
- $S (space)
- $T Current time
- $V Windows XP version number
- $_ Carriage return and linefeed
- $$ $ (dollar sign)
- If Command Extensions are enabled the PROMPT command supports
- the following additional formatting characters:
- $+ zero or more plus sign (+) characters depending upon the
- depth of the PUSHD directory stack, one character for each
- level pushed.
- $M Displays the remote name associated with the current drive
- letter or the empty string if current drive is not a network
- drive.
- ''')
- def usage():
- '''Print package usage.'''
- print( globals()['__doc__'] )
- if __name__ == '__main__':
- import sys
- # ///////////////////////////////////
- def is_argv_help():
- '''See if user supplied a command line help argument,
- if so print usage and exit.'''
- import sys
- help = ['-h', '--h', '-help', '--help', '/h', '/help']
- for arg in sys.argv[1:]:
- if arg.lower() in help:
- usage()
- sys.exit(0)
- is_argv_help()
- # ///////////////////////////////////
- # ///////////////////////////////////
- # ///////////////////////////////////
- def test_parse(string, keywords):
- '''copy of parse() with interactive prompt and debug messages'''
- # copy original string as this function is destructive to string
- s = string[:]
- words = []
- word = ''
- interations = 0
- while raw_input('%d: >'% interations) <> 'q' and len(s) > 0:
- print s
- word = getValidWord(keywords, s)
- print ' valid return: ', repr(word)
- if word and s.startswith(word):
- words.append(word)
- s = string.replace(word, '', 1)
- word = getInvalidWord(keywords, s)
- print 'invalid return: ', repr(word)
- if word and s.startswith(word):
- words.append(word)
- s = s.replace(word, '', 1)
- print 'words:', words
- print
- interations += 1
- return words
- # ///////////////////////////////////
- if 'test1' in sys.argv:
- def test1(test_string, test_keywords):
- print('')
- print('Parse: %r' % test_string)
- print('')
- print(' into valid and invalid keywords: %r' % test_keywords)
- words = parse(test_string, test_keywords)
- print('')
- print('String was broken down (parsed) into:')
- print('')
- print(' %r' % words)
- print('')
- # ///////////////////////////////////
- if 'test2' in sys.argv:
- def test2(test_string, test_mapping):
- print('-'*70)
- print('')
- print('Reformat test string: %r' % test_string)
- print('')
- print(' using mapping: %r' % test_mapping)
- string = recompile(test_string, test_mapping)
- print('')
- print('String was reformatted into:')
- print('')
- print(' %r' % string)
- print(string)
- print('')
- # ///////////////////////////////////
- # test one
- if 'test1' in sys.argv:
- # pay very close attention to spaces and spelling
- # there's a few twists in this tests
- test_keywords = ['one','two','three ','five','size']
- test_string = 'one potatoe, two tomatoe, three oregano, more,fivesix'
- test1(test_string, test_keywords)
- # ///////////////////////////////////
- # test two
- if 'test2' in sys.argv:
- special_codes = msdosprompt_mapper()
- prompt = '$D $T$P$G$EmOLLy $H $H$H$X:$S$N$.$G$G$G'
- test2(prompt, special_codes)
- # ///////////////////////////////////
- # test three
- if 'test3' in sys.argv:
- import collections
- keywords = collections.OrderedDict()
- keywords['Valid00'] = ' HELLO '
- keywords['Valid01'] = ' WORLD! '
- string = "invalid00;Valid00Invalid02Valid01I.n'valid03"
- print( 'Keywords:' )
- print( ' %r' % keywords.keys() )
- print( 'Original string:')
- print( ' %r' % string )
- print( 'Recompiled/transformed string:' )
- print( ' %r' % recompile(string, keywords) )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement