Guest User

Untitled

a guest
Jan 4th, 2025
40
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 7.57 KB | None | 0 0
  1. import re
  2. import xlsxwriter
  3. from datetime import datetime
  4. import numpy as np
  5. import time
  6. import matplotlib.pyplot as plt
  7. import os
  8.  
  9. def loop_statements(dir,reader_f,act_type,debug=0):
  10.     everything = []
  11.     now_and_then = round(time.time() * 1000)
  12.     count=0
  13.     for f in os.listdir(dir):
  14.         f = f.lower()
  15.         if f.endswith(".pdf"):
  16.             count+=1
  17.             if count >=5 and debug > 0:
  18.                 continue
  19.             y = f[0:4]
  20.             f = dir + f
  21.             everything.append(reader_f(f, debug, y))
  22.  
  23.     everything = filter_list(everything,act_type)
  24.     make_xlsx(everything, act_type+".xlsx")
  25.     print("took",round(time.time() * 1000)-now_and_then, "ms to parse",count,"files")
  26.  
  27. def filter_list(everything, act_type):
  28.     for i in range(len(everything)):
  29.         for j in range(len(everything[i])):
  30.             try:
  31.                 new_amt = float(everything[i][j][4])
  32.             except:
  33.                 new_amt = 0
  34.             if everything[i][j][3] == "NET" and new_amt == 120:
  35.                 everything[i][j][3] = "ANNUAL FEE"
  36.             if re.match(r".*(amz\*amazon|amz\*ware|amazon.ca\*|amazon.*downtown).*", everything[i][j][3], re.IGNORECASE) != None:
  37.                 everything[i][j][3] = "Amazon.ca"
  38.             if re.match(r".*(AMZN Mktp CA|Amazon \*Mark).*", everything[i][j][3], re.IGNORECASE) != None:
  39.                 everything[i][j][3] = "AMZN Mktp CA"
  40.             if re.match(r".*(AMZN Mktp US).*", everything[i][j][3], re.IGNORECASE) != None:
  41.                 everything[i][j][3] = "AMZN Mktp US"    
  42.             if re.match(r".*(audible).*", everything[i][j][3], re.IGNORECASE) != None:
  43.                 everything[i][j][3] = "Audible CA"
  44.             if re.match(r".*(MEC|Mountain Equipment).*", everything[i][j][3], re.IGNORECASE) != None:
  45.                 everything[i][j][3] = "MEC - Mountain Equipment"
  46.             if re.match(r".*(Steamgames|steam.*seattle).*", everything[i][j][3], re.IGNORECASE) != None:
  47.                 everything[i][j][3] = "Steam Games"
  48.             check_groc = re.match(r".*(foods|super a|IGA|Safeway|Buy-low|buy low|freson|nofrills|pc express|superstore|save on|no frills).*", everything[i][j][3], re.IGNORECASE)
  49.             check_ff = re.match(r".*(restaur|kura|pastry|red beard|pho a pho|mr mikes|tandoori|sushi|the shack|bakery|pizza|spicy gre|A&W|subway|dairy queen|wendys|taco de|tim horton|pizzeria|gus' pizza|panago|DQ Grill).*", everything[i][j][3], re.IGNORECASE)
  50.             check_gas = re.match(r".*(chv[0-9]{5}|chevron|mohawk|canco|gti pet|co-op|7-eleven|Shell|esso|husky|petrocan|macs|7 eleven|petro canada).*", everything[i][j][3], re.IGNORECASE)
  51.             check_int = re.match(r".*(Telus comm|tsi).*", everything[i][j][3], re.IGNORECASE)
  52.             check_ful = re.match(r".*(Shell easypay|shell ep).*", everything[i][j][3], re.IGNORECASE)
  53.             check_phn = re.match(r".*(fido mobile|freedom|koodo|virgin|public mob|telus mob).*", everything[i][j][3], re.IGNORECASE)
  54.             check_amz = re.match(r".*(audible|amazon|amzn).*", everything[i][j][3], re.IGNORECASE)
  55.             check_rent = re.match(r".*(cheque withdrawal).*", everything[i][j][3], re.IGNORECASE)
  56.             check_pp = re.match(r".*(paypal).*", everything[i][j][3], re.IGNORECASE)
  57.             check_pwr = re.match(r".*(BC HYDRO).*", everything[i][j][3], re.IGNORECASE)
  58.             check_pay = re.match(r".*(from canada|province of b\.c).*", everything[i][j][3], re.IGNORECASE)
  59.             check_inv2 = re.match(r".*(INTERAC e-Transfer).*", everything[i][j][3], re.IGNORECASE)            
  60.             check_med = re.match(r".*(dental|optometric|audio).*", everything[i][j][3], re.IGNORECASE)
  61.             if check_groc != None:
  62.                 l="grocery"
  63.             elif check_ff != None:
  64.                 l="fast food"
  65.             elif check_ful != None:
  66.                 l="fuel"
  67.             elif check_gas != None and new_amt < 20:
  68.                 l="gas station"
  69.             elif check_gas != None:
  70.                 l="fuel"
  71.             elif check_int != None:
  72.                 l="internet"
  73.             elif check_phn != None:
  74.                 l="cell phone"
  75.             elif check_amz != None:
  76.                 l="amazon"
  77.             elif check_pp != None:
  78.                 l="paypal"
  79.             elif check_pwr != None:
  80.                 l="power"
  81.             elif check_rent != None and new_amt > 470 and new_amt < 600:
  82.                 l="rent"
  83.             elif check_pay != None and new_amt > 1000:
  84.                 l="payroll"
  85.             elif check_pay != None and new_amt < 1000:
  86.                 l="expenses"
  87.             elif check_cc != None:
  88.                 l="credit cards"
  89.                 everything[i][j][4] = str(new_amt * -1)
  90.             elif check_inv2 != None and new_amt > 2800 and new_amt < 3000:
  91.                 l="investing"
  92.             elif act_type == "tngch":
  93.                 l="junk"
  94.             else:
  95.                 l="misc"
  96.             everything[i][j].append(l)
  97.             everything[i][j].append(act_type)
  98.     return everything
  99.    
  100. def make_xlsx(everything, fName): #list of all statements and filename of xlsx
  101.     wb = xlsxwriter.Workbook(fName)
  102.     ws = wb.add_worksheet("statements")
  103.     date_format = wb.add_format({'num_format': 'd mmm yyyy'})
  104.     ws.write(0,0,"year")
  105.     ws.write(0,1,"t_date")
  106.     ws.write(0,2,"p_date")
  107.     ws.write(0,3,"description")
  108.     ws.write(0,4,"amount")
  109.     ws.write(0,5,"label")
  110.     ws.write(0,6,"act type")
  111.     s_row = 1
  112.     s_col = 0
  113.     row=s_row
  114.     col=s_col
  115.     for s in range(0 ,len(everything)):
  116.         for t in range(0, len(everything[s])):
  117.             try:
  118.                 float(everything[s][t][4])
  119.             except Exception as e:
  120.                 print(e)
  121.                 continue
  122.             col=0
  123.             ws.write_number(row,col,int(everything[s][t][0]))
  124.  
  125.             try:
  126.                 if fName == "tngch.xlsx":
  127.                     dte=datetime.strptime(everything[s][t][1], '%d %b %Y').date()
  128.                 elif fName == "td.xlsx":
  129.                     dte=datetime.strptime(everything[s][t][1], '%b %d %Y').date()
  130.                 ws.write_datetime(row,col+1,dte,date_format)
  131.             except Exception as e:
  132.                 print(e)        
  133.                 ws.write(row,col+1,str(everything[s][t][1]))
  134.  
  135.             ws.write(row,col+2,str(everything[s][t][2]))
  136.             ws.write(row,col+3,str(everything[s][t][3]))
  137.             ws.write_number(row,col+4,float(everything[s][t][4]))        
  138.             ws.write(row,col+5,str(everything[s][t][5]))
  139.             ws.write(row,col+6,str(everything[s][t][6]))
  140.             row+=1
  141.         row+=1
  142.  
  143.     ws.set_column(0,0,4.3)
  144.     ws.set_column(1,1,11.45)
  145.     ws.set_column(2,2,11.45)
  146.     ws.set_column(3,3,71.5)
  147.     ws.set_column(4,4,9.7)    
  148.     wb.close()
  149.  
  150.  
  151. def show_image(item, title=""):
  152.     """Display a pixmap.
  153.  
  154.    Just to display Pixmap image of "item" - ignore the man behind the curtain.
  155.  
  156.    Args:
  157.        item: any PyMuPDF object having a "get_pixmap" method.
  158.        title: a string to be used as image title
  159.  
  160.    Generates an RGB Pixmap from item using a constant DPI and using matplotlib
  161.    to show it inline of the notebook.
  162.    """
  163.     DPI = 220  # use this resolution
  164.     pix = item.get_pixmap(dpi=DPI)
  165.     img = np.ndarray([pix.h, pix.w, 3], dtype=np.uint8, buffer=pix.samples_mv)
  166.     plt.figure(dpi=DPI)  # set the figure's DPI
  167.     plt.title(title)  # set title of image
  168.     _ = plt.imshow(img, extent=(0, pix.w * 72 / DPI, pix.h * 72 / DPI, 0))
  169.  
  170. def line_by_line(stuff):
  171.     count = 0
  172.     for line in stuff:
  173.         print(count, line)
  174.         count+=1
Advertisement
Add Comment
Please, Sign In to add comment