Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- (use-modules (srfi srfi-1))
- (define FITID-WEIGHT 10000) ;; fitid match carry highest wt
- (define DATE-WEIGHT 500) ;; a perfect date match score
- (define DATE-TOLERANCE (* 60 60 24 7)) ;; higher = more tolerant
- (define AMOUNT-WEIGHT 500) ;; perfect amount match score
- (define AMOUNT-TOLERANCE 1)
- (define WORDS-WEIGHT 100) ;; each word match score
- (define MATCH-THRESHOLD 800) ;; below threshold --> unmatched
- ;; fake-gnucash API follows
- (define (dmy d m y)
- (let ((zdate (localtime (current-time))))
- (set-tm:year zdate (- y 1900))
- (set-tm:mon zdate (1- m))
- (set-tm:mday zdate d)
- (car (mktime zdate))))
- ;; fake-book. list-items are:
- ;; date, desc, memo, amount, online_id
- (define register-txns
- (list
- (list (dmy 31 12 2018) "EOY Bonus" "" 400 "001")
- (list (dmy 01 01 2019) "Bought $100 of groceries ZMART" "" -100 "")
- (list (dmy 04 01 2019) "Send $120 to wife" "" -120 "")
- (list (dmy 20 01 2019) "Mortgage Payment" "" -3000 "")
- (list (dmy 02 01 2019) "Income of $200" "" 200 "")))
- ;; fake-ofx. list items are:
- ;; date, desc, memo, amount, fitid
- ;; note the first item has already been imported; see FitID matches above
- (define ofx-txns
- (list
- (list (dmy 01 01 2019) "BONUS" "" 400 "001")
- (list (dmy 01 01 2019) "BANK FEE" "" -5 "002")
- (list (dmy 02 01 2019) "EMPLOYER" "" 200 "003")
- (list (dmy 03 01 2019) "ZMART" "*VISADEBIT" -100 "004")
- (list (dmy 07 01 2019) "ZMART" "*VISADEBIT" -150 "005")
- (list (dmy 18 01 2019) "Mortgage" "Regular" -3000 "006")))
- (define xaccTransGetDate car)
- (define xaccTransGetDescription cadr)
- (define xaccSplitGetMemo caddr)
- (define xaccSplitGetAmount cadddr)
- (define xaccTransGetOnlineID (compose cadddr cdr))
- (define xaccSplitGetParent identity)
- (define ofxDate car)
- (define ofxDesc cadr)
- (define ofxMemo caddr)
- (define ofxAmount cadddr)
- (define ofxFitID (compose cadddr cdr))
- ;; gaussian formula y(x)=exp(−(x/a)^2), returns 0..1
- ;; a should be in similar magnitude as x i.e. spread
- (define (gaussian x a)
- (exp (- (/ (* x x) (* a a)))))
- ;; counts how many tokens are same; duplicates are removed.
- ;; (count-equal? '("A" "B" "C" "B") '("B" "A" "Z")) should output 2.
- (define (count-equal A B)
- (define lst1 (delete-duplicates A))
- (define lst2 (delete-duplicates B))
- (let lp ((lst1 lst1) (count 0))
- (cond
- ((null? lst1) count)
- ((member (car lst1) lst2) (lp (cdr lst1) (1+ count)))
- (else (lp (cdr lst1) count)))))
- ;; "A Long Str!ing2Parsing" -> '("A" "LONG" "STR" "ING" "PARSING")
- (define (str->tokens str)
- (filter (negate string-null?)
- (string-split (string-upcase str) (negate char-alphabetic?))))
- (define (match-score ofx-txn reg-txn)
- (let* ((fitid-match? (equal? (xaccTransGetOnlineID reg-txn) (ofxFitID ofx-txn)))
- (amt-diff (- (xaccSplitGetAmount (xaccSplitGetParent reg-txn))
- (ofxAmount ofx-txn)))
- (words-match (count-equal (str->tokens (xaccTransGetDescription reg-txn))
- (str->tokens (ofxDesc ofx-txn))))
- (date-diff (- (xaccTransGetDate reg-txn) (ofxDate ofx-txn))))
- (+ (if fitid-match? FITID-WEIGHT 0)
- (* (gaussian amt-diff AMOUNT-TOLERANCE) AMOUNT-WEIGHT)
- (* words-match WORDS-WEIGHT)
- (* (gaussian date-diff DATE-TOLERANCE) DATE-WEIGHT))))
- ;; algorithm follows:
- (define scores-list '())
- ;; populate scores-list with num(ofx-txn) * num(reg-txn) scores
- (for-each
- (lambda (A)
- (for-each
- (lambda (B)
- (set! scores-list
- (cons (list A B (match-score A B)) scores-list)))
- register-txns))
- ofx-txns)
- ;; scores-list = (list ofx-split reg-split score), just generated
- ;; outputs = unique match-list with highest scores, and unmatched ofx
- ;; and reg txns
- (let loop ((scores-list (stable-sort! scores-list
- (lambda (a b) (> (caddr a) (caddr b)))))
- (lone-ofx '())
- (lone-reg '())
- (matched-list '()))
- (cond
- ((null? scores-list)
- (pk "completed. matches are")
- (for-each pk (stable-sort matched-list (lambda (a b) (< (caar a) (caar b)))))
- (pk "lone ofx, must be added new:")
- (for-each pk lone-ofx)
- (pk "lone reg, may be mistakes in register:")
- (for-each pk lone-reg))
- ((< (caddar scores-list) MATCH-THRESHOLD)
- (loop '()
- (delete-duplicates! (map car scores-list))
- (delete-duplicates! (map cadr scores-list))
- matched-list))
- (else
- (let ((m (car scores-list)))
- (loop (filter
- (lambda (elt)
- (not (or (equal? (car elt) (car m))
- (equal? (cadr elt) (cadr m)))))
- scores-list)
- lone-ofx
- lone-reg
- (cons m matched-list))))))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement