Advertisement
Guest User

Untitled

a guest
Oct 16th, 2019
85
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.41 KB | None | 0 0
  1. import io
  2. import codecs
  3.  
  4.  
  5.  
  6. class preprocessor():
  7.   def __init__(self):
  8.     #init empty string
  9.     self.bookString = ""
  10.  
  11.  
  12.   def __str__(self):
  13.     #return a string / present contents
  14.     return self.bookString
  15.    
  16.   def clean(self):
  17.     #TODO check if empty, else replace
  18.     self.bookString = self.bookString.replace('-',' ')
  19.     self.bookString = self.bookString.replace("'",'')
  20.     self.bookString = self.bookString.replace('_',' ')
  21.     self.bookString = self.bookString.replace('"',' ')
  22.     self.bookString = self.bookString.replace('“', '')
  23.     self.bookString = self.bookString.replace('”', '')
  24.     self.bookString = self.bookString.replace("'",'')
  25.     self.bookString = self.bookString.replace("’", '')
  26.  
  27.   def read_text(self, text_name):
  28.     #add .txt format to the string of the filename
  29.     fileName = text_name + ".txt"
  30.     #read the file
  31.     readd = io.open(fileName,mode= "r", encoding="utf-8")
  32.     #add the string to the self.bookString variable which will
  33.     #be used in all the other methods
  34.     self.bookString = readd.read()
  35.    
  36.  
  37.  
  38. #initialise object of class preprocessor
  39. process = preprocessor()
  40. #using read_text method of preprocessor
  41. process.read_text("1952-0")
  42. #print to terminal
  43. print(process.__str__())
  44. #an output file for debugging is created, written into and saved.
  45. text_file = open("Output.txt", "w")
  46. text_file.write(process.__str__())
  47. text_file.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement