Untitled

import re
import xlsxwriter
from datetime import datetime
import numpy as np
import time
import matplotlib.pyplot as plt
import os

def loop_statements(dir,reader_f,act_type,debug=0):
    everything = []
    now_and_then = round(time.time() * 1000)
    count=0
    for f in os.listdir(dir):
        f = f.lower()
        if f.endswith(".pdf"):
            count+=1
            if count >=5 and debug > 0:
                continue
            y = f[0:4]
            f = dir + f
            everything.append(reader_f(f, debug, y))

    everything = filter_list(everything,act_type)
    make_xlsx(everything, act_type+".xlsx")
    print("took",round(time.time() * 1000)-now_and_then, "ms to parse",count,"files")

def filter_list(everything, act_type):
    for i in range(len(everything)):
        for j in range(len(everything[i])):
            try:
                new_amt = float(everything[i][j][4])
            except:
                new_amt = 0
            if everything[i][j][3] == "NET" and new_amt == 120:
                everything[i][j][3] = "ANNUAL FEE"
            if re.match(r".*(amz\*amazon|amz\*ware|amazon.ca\*|amazon.*downtown).*", everything[i][j][3], re.IGNORECASE) != None:
                everything[i][j][3] = "Amazon.ca"
            if re.match(r".*(AMZN Mktp CA|Amazon \*Mark).*", everything[i][j][3], re.IGNORECASE) != None:
                everything[i][j][3] = "AMZN Mktp CA"
            if re.match(r".*(AMZN Mktp US).*", everything[i][j][3], re.IGNORECASE) != None:
                everything[i][j][3] = "AMZN Mktp US"
            if re.match(r".*(audible).*", everything[i][j][3], re.IGNORECASE) != None:
                everything[i][j][3] = "Audible CA"
            if re.match(r".*(MEC|Mountain Equipment).*", everything[i][j][3], re.IGNORECASE) != None:
                everything[i][j][3] = "MEC - Mountain Equipment"
            if re.match(r".*(Steamgames|steam.*seattle).*", everything[i][j][3], re.IGNORECASE) != None:
                everything[i][j][3] = "Steam Games"
            check_groc = re.match(r".*(foods|super a|IGA|Safeway|Buy-low|buy low|freson|nofrills|pc express|superstore|save on|no frills).*", everything[i][j][3], re.IGNORECASE)
            check_ff = re.match(r".*(restaur|kura|pastry|red beard|pho a pho|mr mikes|tandoori|sushi|the shack|bakery|pizza|spicy gre|A&W|subway|dairy queen|wendys|taco de|tim horton|pizzeria|gus' pizza|panago|DQ Grill).*", everything[i][j][3], re.IGNORECASE)
            check_gas = re.match(r".*(chv[0-9]{5}|chevron|mohawk|canco|gti pet|co-op|7-eleven|Shell|esso|husky|petrocan|macs|7 eleven|petro canada).*", everything[i][j][3], re.IGNORECASE)
            check_int = re.match(r".*(Telus comm|tsi).*", everything[i][j][3], re.IGNORECASE)
            check_ful = re.match(r".*(Shell easypay|shell ep).*", everything[i][j][3], re.IGNORECASE)
            check_phn = re.match(r".*(fido mobile|freedom|koodo|virgin|public mob|telus mob).*", everything[i][j][3], re.IGNORECASE)
            check_amz = re.match(r".*(audible|amazon|amzn).*", everything[i][j][3], re.IGNORECASE)
            check_rent = re.match(r".*(cheque withdrawal).*", everything[i][j][3], re.IGNORECASE)
            check_pp = re.match(r".*(paypal).*", everything[i][j][3], re.IGNORECASE)
            check_pwr = re.match(r".*(BC HYDRO).*", everything[i][j][3], re.IGNORECASE)
            check_pay = re.match(r".*(from canada|province of b\.c).*", everything[i][j][3], re.IGNORECASE)
            check_inv2 = re.match(r".*(INTERAC e-Transfer).*", everything[i][j][3], re.IGNORECASE)
            check_med = re.match(r".*(dental|optometric|audio).*", everything[i][j][3], re.IGNORECASE)
            if check_groc != None:
                l="grocery"
            elif check_ff != None:
                l="fast food"
            elif check_ful != None:
                l="fuel"
            elif check_gas != None and new_amt < 20:
                l="gas station"
            elif check_gas != None:
                l="fuel"
            elif check_int != None:
                l="internet"
            elif check_phn != None:
                l="cell phone"
            elif check_amz != None:
                l="amazon"
            elif check_pp != None:
                l="paypal"
            elif check_pwr != None:
                l="power"
            elif check_rent != None and new_amt > 470 and new_amt < 600:
                l="rent"
            elif check_pay != None and new_amt > 1000:
                l="payroll"
            elif check_pay != None and new_amt < 1000:
                l="expenses"
            elif check_cc != None:
                l="credit cards"
                everything[i][j][4] = str(new_amt * -1)
            elif check_inv2 != None and new_amt > 2800 and new_amt < 3000:
                l="investing"
            elif act_type == "tngch":
                l="junk"
            else:
                l="misc"
            everything[i][j].append(l)
            everything[i][j].append(act_type)
    return everything

def make_xlsx(everything, fName): #list of all statements and filename of xlsx
    wb = xlsxwriter.Workbook(fName)
    ws = wb.add_worksheet("statements")
    date_format = wb.add_format({'num_format': 'd mmm yyyy'})
    ws.write(0,0,"year")
    ws.write(0,1,"t_date")
    ws.write(0,2,"p_date")
    ws.write(0,3,"description")
    ws.write(0,4,"amount")
    ws.write(0,5,"label")
    ws.write(0,6,"act type")
    s_row = 1
    s_col = 0
    row=s_row
    col=s_col
    for s in range(0 ,len(everything)):
        for t in range(0, len(everything[s])):
            try:
                float(everything[s][t][4])
            except Exception as e:
                print(e)
                continue
            col=0
            ws.write_number(row,col,int(everything[s][t][0]))

            try:
                if fName == "tngch.xlsx":
                    dte=datetime.strptime(everything[s][t][1], '%d %b %Y').date()
                elif fName == "td.xlsx":
                    dte=datetime.strptime(everything[s][t][1], '%b %d %Y').date()
                ws.write_datetime(row,col+1,dte,date_format)
            except Exception as e:
                print(e)
                ws.write(row,col+1,str(everything[s][t][1]))

            ws.write(row,col+2,str(everything[s][t][2]))
            ws.write(row,col+3,str(everything[s][t][3]))
            ws.write_number(row,col+4,float(everything[s][t][4]))
            ws.write(row,col+5,str(everything[s][t][5]))
            ws.write(row,col+6,str(everything[s][t][6]))
            row+=1
        row+=1

    ws.set_column(0,0,4.3)
    ws.set_column(1,1,11.45)
    ws.set_column(2,2,11.45)
    ws.set_column(3,3,71.5)
    ws.set_column(4,4,9.7)
    wb.close()


def show_image(item, title=""):
    """Display a pixmap.

    Just to display Pixmap image of "item" - ignore the man behind the curtain.

    Args:
        item: any PyMuPDF object having a "get_pixmap" method.
        title: a string to be used as image title

    Generates an RGB Pixmap from item using a constant DPI and using matplotlib
    to show it inline of the notebook.
    """
    DPI = 220  # use this resolution
    pix = item.get_pixmap(dpi=DPI)
    img = np.ndarray([pix.h, pix.w, 3], dtype=np.uint8, buffer=pix.samples_mv)
    plt.figure(dpi=DPI)  # set the figure's DPI
    plt.title(title)  # set title of image
    _ = plt.imshow(img, extent=(0, pix.w * 72 / DPI, pix.h * 72 / DPI, 0))

def line_by_line(stuff):
    count = 0
    for line in stuff:
        print(count, line)
        count+=1