Untitled

try:
    from PIL import Image
except ImportError:
    import Image
import textract
import os

class PdfMiner():

    path = os.getcwd() + '/folderForPdf/'
    output_path = os.getcwd() + '/output_results/'

    def __init__(self):
        pass

    def main(self):
        for self.filename in os.listdir(self.path):
            PdfMiner().extract_text_from_pdf(self.path + self.filename)


    def keyword_strike(self, text, keyword_strike_dict={}):
        '''The purpose of keyword_strike function is
        is to count how many times a specific keyword occured'''
        self.keyword_strike_dict = {}
        self.text = text
        self.keywords_list = PdfMiner().extract_keywords()
        for keyword in self.keywords_list:
            if keyword in text.decode('utf-8'):
                print("Keyword {} occured {} times".format(keyword, text.decode('utf-8').count(keyword)))
                self.keyword_strike_dict[keyword] = text.decode('utf-8').count(keyword)
        if bool(self.keyword_strike_dict):
            print('Not empty')
            print(PdfMiner().main().filename)

    def extract_keywords(self, keywords_list=None):
        '''The purpose of function extract_keywords
        is to extract the keywords we want to use,
        from file keywords.txt, into a list'''
        keywords_list = []
        with open('keywords.txt', 'r', encoding='utf8') as keywords_file:
            for keyword in keywords_file:
                keywords_list.append(keyword.strip('\n'))
        return keywords_list

    def extract_text_from_pdf(self, file_destination, text=None):
        '''The purpose of function extract_text_from_pdf
        is to extract the text of each page and add it to variable text'''
        self.file_destination = file_destination
        text = textract.process(self.file_destination, language='eng', encoding='utf-8')
        PdfMiner().keyword_strike(text)
        return text

    def output_to_csv(self):
        '''The purpose of function output_csv
        is to create a csv file with possitive pdf filename
        and inside it will contains 2 columns, first for keywords
        and second column how many times the keyword appeared'''


if __name__ == "__main__":
    PdfMiner().main()