J2897

PayPal Activity CSV to GnuCash-ready splits (GBP, USD, FX reference)

Jan 3rd, 2026
3,019
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.26 KB | None | 0 0
  1. #!/usr/bin/env python3
  2. r"""
  3. Split a PayPal "Activity Download" CSV into:
  4.  1) GBP purchases/refunds (excluding funding and FX rows)
  5.  2) USD purchases/refunds (excluding FX rows)
  6.  3) FX reference table (GBP->USD conversions for USD purchases)
  7.  
  8. Works with the PayPal CSV format that has columns like:
  9. Date, Time, Time Zone, Description, Currency, Gross, Fee, Net, Balance, Transaction ID, Reference Txn ID, Name, ...
  10.  
  11. Usage (Windows):
  12.  py -3 paypal_split_by_currency.py "C:\path\PayPal.CSV"
  13.  
  14. Outputs files next to the input CSV by default.
  15. """
  16. import argparse
  17. import csv
  18. from collections import Counter
  19. from pathlib import Path
  20.  
  21. EXCLUDE_GBP_DESCRIPTIONS = {
  22.     "Bank Deposit to PP Account",
  23.     "User Initiated Withdrawal",
  24.     "General Currency Conversion",
  25. }
  26.  
  27. EXCLUDE_USD_DESCRIPTIONS = {
  28.     "General Currency Conversion",
  29. }
  30.  
  31. USD_PAYMENT_DESCRIPTIONS = {
  32.     "Pre-approved Payment Bill User Payment",
  33.     "Express Checkout Payment",
  34.     "General Payment",
  35. }
  36.  
  37. def parse_float(s: str) -> float:
  38.     s = (s or "").strip()
  39.     if s == "":
  40.         return 0.0
  41.     return float(s)
  42.  
  43. def read_rows(csv_path: Path):
  44.     with csv_path.open("r", encoding="utf-8-sig", newline="") as f:
  45.         reader = csv.DictReader(f)
  46.         rows = list(reader)
  47.         if not rows:
  48.             raise SystemExit("CSV appears to be empty.")
  49.         return reader.fieldnames, rows
  50.  
  51. def write_rows(out_path: Path, fieldnames, rows):
  52.     out_path.parent.mkdir(parents=True, exist_ok=True)
  53.     with out_path.open("w", encoding="utf-8", newline="") as f:
  54.         w = csv.DictWriter(f, fieldnames=fieldnames)
  55.         w.writeheader()
  56.         w.writerows(rows)
  57.  
  58. def main():
  59.     ap = argparse.ArgumentParser()
  60.     ap.add_argument("csv", type=Path, help="PayPal activity CSV file")
  61.     ap.add_argument("--outdir", type=Path, default=None, help="Output directory (default: same folder as input)")
  62.     ap.add_argument("--prefix", type=str, default=None, help="Filename prefix (default: input filename stem)")
  63.     args = ap.parse_args()
  64.  
  65.     csv_path: Path = args.csv
  66.     outdir: Path = args.outdir if args.outdir else csv_path.parent
  67.     prefix: str = args.prefix if args.prefix else csv_path.stem
  68.  
  69.     fieldnames, rows = read_rows(csv_path)
  70.  
  71.     # Basic sanity check for columns we rely on
  72.     required = {"Currency", "Description", "Net", "Transaction ID", "Reference Txn ID", "Date", "Time"}
  73.     missing_cols = required - set(fieldnames or [])
  74.     if missing_cols:
  75.         raise SystemExit(f"Missing expected columns: {', '.join(sorted(missing_cols))}")
  76.  
  77.     # Split for import
  78.     gbp_rows = [
  79.         r for r in rows
  80.         if (r.get("Currency") == "GBP") and (r.get("Description") not in EXCLUDE_GBP_DESCRIPTIONS)
  81.     ]
  82.     usd_rows = [
  83.         r for r in rows
  84.         if (r.get("Currency") == "USD") and (r.get("Description") not in EXCLUDE_USD_DESCRIPTIONS)
  85.     ]
  86.  
  87.     # Build quick lookup for FX components keyed by purchase transaction id
  88.     # For a USD purchase with Transaction ID = PID, PayPal records:
  89.     #   GBP "Bank Deposit to PP Account"      Reference Txn ID == PID
  90.     #   GBP "General Currency Conversion"     Reference Txn ID == PID (Net negative)
  91.     #   USD "General Currency Conversion"     Reference Txn ID == PID (Net positive)
  92.     fx_lookup = {}
  93.     for r in rows:
  94.         ref = (r.get("Reference Txn ID") or "").strip()
  95.         if not ref:
  96.             continue
  97.         fx_lookup.setdefault(ref, []).append(r)
  98.  
  99.     fx_ref_rows = []
  100.     fx_missing = 0
  101.     for r in rows:
  102.         if r.get("Currency") != "USD":
  103.             continue
  104.         if r.get("Description") not in USD_PAYMENT_DESCRIPTIONS:
  105.             continue
  106.  
  107.         pid = (r.get("Transaction ID") or "").strip()
  108.         if not pid:
  109.             continue
  110.  
  111.         bundle = fx_lookup.get(pid, [])
  112.         gbp_deposit = next((x for x in bundle if x.get("Currency") == "GBP" and x.get("Description") == "Bank Deposit to PP Account"), None)
  113.         gbp_conv    = next((x for x in bundle if x.get("Currency") == "GBP" and x.get("Description") == "General Currency Conversion"), None)
  114.         usd_conv    = next((x for x in bundle if x.get("Currency") == "USD" and x.get("Description") == "General Currency Conversion"), None)
  115.  
  116.         if not (gbp_deposit and gbp_conv and usd_conv):
  117.             fx_missing += 1
  118.             continue
  119.  
  120.         gbp_topup = parse_float(gbp_deposit.get("Net"))
  121.         gbp_converted = abs(parse_float(gbp_conv.get("Net")))
  122.         usd_received = parse_float(usd_conv.get("Net"))
  123.         usd_spent = abs(parse_float(r.get("Net")))
  124.         rate = (usd_received / gbp_converted) if gbp_converted else 0.0
  125.  
  126.         merchant = (r.get("Name") or "").strip() or "(blank name)"
  127.         fx_ref_rows.append({
  128.             "Date": r.get("Date"),
  129.             "Time": r.get("Time"),
  130.             "Merchant": merchant,
  131.             "GBP topup (bank->PayPal GBP)": f"{gbp_topup:.2f}",
  132.             "GBP converted (PayPal GBP->USD)": f"{gbp_converted:.2f}",
  133.             "USD received (PayPal USD)": f"{usd_received:.2f}",
  134.             "USD spent at merchant": f"{usd_spent:.2f}",
  135.             "Implied rate (USD per GBP)": f"{rate:.6f}",
  136.             "Purchase TxID": pid,
  137.         })
  138.  
  139.     # Output paths
  140.     gbp_out = outdir / f"{prefix}_GBP_purchases.csv"
  141.     usd_out = outdir / f"{prefix}_USD_purchases.csv"
  142.     fx_out  = outdir / f"{prefix}_FX_reference.csv"
  143.  
  144.     write_rows(gbp_out, fieldnames, gbp_rows)
  145.     write_rows(usd_out, fieldnames, usd_rows)
  146.  
  147.     fx_fields = ["Date","Time","Merchant","GBP topup (bank->PayPal GBP)","GBP converted (PayPal GBP->USD)","USD received (PayPal USD)","USD spent at merchant","Implied rate (USD per GBP)","Purchase TxID"]
  148.     write_rows(fx_out, fx_fields, fx_ref_rows)
  149.  
  150.     # Summary
  151.     ccy_counts = Counter(r.get("Currency") for r in rows)
  152.     desc_counts = Counter(r.get("Description") for r in rows)
  153.     print("Input:", csv_path)
  154.     print("Rows:", len(rows), "| Currencies:", dict(ccy_counts))
  155.     print("Wrote:")
  156.     print("  ", gbp_out, f"({len(gbp_rows)} rows)")
  157.     print("  ", usd_out, f"({len(usd_rows)} rows)")
  158.     print("  ", fx_out,  f"({len(fx_ref_rows)} FX rows; {fx_missing} missing bundles skipped)")
  159.     print("Top descriptions:", ", ".join([f"{k}={v}" for k,v in desc_counts.most_common(6)]))
  160.  
  161. if __name__ == "__main__":
  162.     main()
  163.  
Advertisement