Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- # Each stage of the process is numbered and printed it so you can see the conversion at each step
- # The print method also has a number in the form of a string, so it shows up in the console as documented.
- #i.e print( 'Number. ' + printedVariable )
- #imports
- import re
- import sys
- from random import random
- from math import log
- from collections import defaultdict
- #0. This is the original string, it is printed in the main body of the code below
- input_string = "ASd1234567890 . @$$$$£ rupaulisbae %" # Changed the input text to test the number function
- #Creates a dictionary for counting the trigrams
- tri_counts=defaultdict(int) #counts of all trigrams in input
- ## Checks that there is an input file to be read
- #if len(sys.argv) != 2:
- # print("Usage: ", sys.argv[0], "<training_file>")
- # sys.exit(1)
- #infile = sys.argv[1] #get input argument: the training file
- ##
- def preprocess_line(inputText): # Had to change input to a different name, as input is already pre-defined by python
- # 1. make it lower case
- lower_input = inputText.lower();
- print('1. ' + lower_input)
- # 2. & 3. replace 1-9 -> 0
- # Replace 1-9 -> 0
- # It's not the most elegant but it does work, and we could refine it once it's all working
- # Takes the previous input of the text and outputs a zero in place of the other number
- # Cycles through this until it has been completed
- # See print(numConversion1) and print(numConversionDone)
- numConversion1 = lower_input.replace('1', '0')
- print('2. ' + numConversion1)
- numConversion2 = numConversion1.replace('2', '0')
- numConversion3 = numConversion2.replace('3', '0')
- numConversion4 = numConversion3.replace('4', '0')
- numConversion5 = numConversion4.replace('5', '0')
- numConversion6 = numConversion5.replace('6', '0')
- numConversion7 = numConversion6.replace('7', '0')
- numConversion8 = numConversion7.replace('8', '0')
- numConversion9 = numConversion8.replace('9', '0')
- numConversionDone = numConversion9
- print('3. '+ numConversionDone) # - here for demonstration of num conversion
- #4. Performs a regular expression, finds characters that match a-z, 0, [space] and [.]
- # Saves each value as an item in a list (converted to a string below)
- letters_numbers = re.findall('([a-z0\.\ ])',numConversionDone)
- print('4. '+ str(letters_numbers))
- # Converts the above list as a string
- letters_numbers = "".join(letters_numbers)
- outputText = letters_numbers
- # an outputText variable is created and returned as the output of our function
- return outputText
- #________MAIN BODY OF THE CODE________________________________________________________________
- print('0. ' + input_string)
- # 5. A variable is created with the returned variable (outputText)
- # and stored in a new variable preProcessedText
- # This is then printed in the console
- pre_processed_text = preprocess_line(input_string)
- print('5. ' + pre_processed_text)
- #This bit of code gives an example of how you might extract trigram counts
- #from a file, line by line. If you plan to use or modify this code,
- #please ensure you understand what it is actually doing, especially at the
- #beginning and end of each line. Depending on how you write the rest of
- #your program, you may need to modify this code.
- # with open(infile) as file:
- # list_lines = preprocess_file(file) # --------preprocess the file, splitting the file into a list of new lines
- ##Sorts
- test_string_tri_counts = input_string
- print('6. ' + input_string)
- for line in input_string:
- line = preprocess_line(input_string) # doesn't do anything yet.
- print(line)
- for j in range(len(line) - (3)):
- trigram = line[j:j + 3]
- tri_counts[trigram] += 1
- print(tri_counts)
- print("Trigram counts in ", test_string_tri_counts, ", sorted alphabetically:")
- # replaced_input = re.sub()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement