Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- r"""
- Split a PayPal "Activity Download" CSV into:
- 1) GBP purchases/refunds (excluding funding and FX rows)
- 2) USD purchases/refunds (excluding FX rows)
- 3) FX reference table (GBP->USD conversions for USD purchases)
- Works with the PayPal CSV format that has columns like:
- Date, Time, Time Zone, Description, Currency, Gross, Fee, Net, Balance, Transaction ID, Reference Txn ID, Name, ...
- Usage (Windows):
- py -3 paypal_split_by_currency.py "C:\path\PayPal.CSV"
- Outputs files next to the input CSV by default.
- """
- import argparse
- import csv
- from collections import Counter
- from pathlib import Path
- EXCLUDE_GBP_DESCRIPTIONS = {
- "Bank Deposit to PP Account",
- "User Initiated Withdrawal",
- "General Currency Conversion",
- }
- EXCLUDE_USD_DESCRIPTIONS = {
- "General Currency Conversion",
- }
- USD_PAYMENT_DESCRIPTIONS = {
- "Pre-approved Payment Bill User Payment",
- "Express Checkout Payment",
- "General Payment",
- }
- def parse_float(s: str) -> float:
- s = (s or "").strip()
- if s == "":
- return 0.0
- return float(s)
- def read_rows(csv_path: Path):
- with csv_path.open("r", encoding="utf-8-sig", newline="") as f:
- reader = csv.DictReader(f)
- rows = list(reader)
- if not rows:
- raise SystemExit("CSV appears to be empty.")
- return reader.fieldnames, rows
- def write_rows(out_path: Path, fieldnames, rows):
- out_path.parent.mkdir(parents=True, exist_ok=True)
- with out_path.open("w", encoding="utf-8", newline="") as f:
- w = csv.DictWriter(f, fieldnames=fieldnames)
- w.writeheader()
- w.writerows(rows)
- def main():
- ap = argparse.ArgumentParser()
- ap.add_argument("csv", type=Path, help="PayPal activity CSV file")
- ap.add_argument("--outdir", type=Path, default=None, help="Output directory (default: same folder as input)")
- ap.add_argument("--prefix", type=str, default=None, help="Filename prefix (default: input filename stem)")
- args = ap.parse_args()
- csv_path: Path = args.csv
- outdir: Path = args.outdir if args.outdir else csv_path.parent
- prefix: str = args.prefix if args.prefix else csv_path.stem
- fieldnames, rows = read_rows(csv_path)
- # Basic sanity check for columns we rely on
- required = {"Currency", "Description", "Net", "Transaction ID", "Reference Txn ID", "Date", "Time"}
- missing_cols = required - set(fieldnames or [])
- if missing_cols:
- raise SystemExit(f"Missing expected columns: {', '.join(sorted(missing_cols))}")
- # Split for import
- gbp_rows = [
- r for r in rows
- if (r.get("Currency") == "GBP") and (r.get("Description") not in EXCLUDE_GBP_DESCRIPTIONS)
- ]
- usd_rows = [
- r for r in rows
- if (r.get("Currency") == "USD") and (r.get("Description") not in EXCLUDE_USD_DESCRIPTIONS)
- ]
- # Build quick lookup for FX components keyed by purchase transaction id
- # For a USD purchase with Transaction ID = PID, PayPal records:
- # GBP "Bank Deposit to PP Account" Reference Txn ID == PID
- # GBP "General Currency Conversion" Reference Txn ID == PID (Net negative)
- # USD "General Currency Conversion" Reference Txn ID == PID (Net positive)
- fx_lookup = {}
- for r in rows:
- ref = (r.get("Reference Txn ID") or "").strip()
- if not ref:
- continue
- fx_lookup.setdefault(ref, []).append(r)
- fx_ref_rows = []
- fx_missing = 0
- for r in rows:
- if r.get("Currency") != "USD":
- continue
- if r.get("Description") not in USD_PAYMENT_DESCRIPTIONS:
- continue
- pid = (r.get("Transaction ID") or "").strip()
- if not pid:
- continue
- bundle = fx_lookup.get(pid, [])
- gbp_deposit = next((x for x in bundle if x.get("Currency") == "GBP" and x.get("Description") == "Bank Deposit to PP Account"), None)
- gbp_conv = next((x for x in bundle if x.get("Currency") == "GBP" and x.get("Description") == "General Currency Conversion"), None)
- usd_conv = next((x for x in bundle if x.get("Currency") == "USD" and x.get("Description") == "General Currency Conversion"), None)
- if not (gbp_deposit and gbp_conv and usd_conv):
- fx_missing += 1
- continue
- gbp_topup = parse_float(gbp_deposit.get("Net"))
- gbp_converted = abs(parse_float(gbp_conv.get("Net")))
- usd_received = parse_float(usd_conv.get("Net"))
- usd_spent = abs(parse_float(r.get("Net")))
- rate = (usd_received / gbp_converted) if gbp_converted else 0.0
- merchant = (r.get("Name") or "").strip() or "(blank name)"
- fx_ref_rows.append({
- "Date": r.get("Date"),
- "Time": r.get("Time"),
- "Merchant": merchant,
- "GBP topup (bank->PayPal GBP)": f"{gbp_topup:.2f}",
- "GBP converted (PayPal GBP->USD)": f"{gbp_converted:.2f}",
- "USD received (PayPal USD)": f"{usd_received:.2f}",
- "USD spent at merchant": f"{usd_spent:.2f}",
- "Implied rate (USD per GBP)": f"{rate:.6f}",
- "Purchase TxID": pid,
- })
- # Output paths
- gbp_out = outdir / f"{prefix}_GBP_purchases.csv"
- usd_out = outdir / f"{prefix}_USD_purchases.csv"
- fx_out = outdir / f"{prefix}_FX_reference.csv"
- write_rows(gbp_out, fieldnames, gbp_rows)
- write_rows(usd_out, fieldnames, usd_rows)
- fx_fields = ["Date","Time","Merchant","GBP topup (bank->PayPal GBP)","GBP converted (PayPal GBP->USD)","USD received (PayPal USD)","USD spent at merchant","Implied rate (USD per GBP)","Purchase TxID"]
- write_rows(fx_out, fx_fields, fx_ref_rows)
- # Summary
- ccy_counts = Counter(r.get("Currency") for r in rows)
- desc_counts = Counter(r.get("Description") for r in rows)
- print("Input:", csv_path)
- print("Rows:", len(rows), "| Currencies:", dict(ccy_counts))
- print("Wrote:")
- print(" ", gbp_out, f"({len(gbp_rows)} rows)")
- print(" ", usd_out, f"({len(usd_rows)} rows)")
- print(" ", fx_out, f"({len(fx_ref_rows)} FX rows; {fx_missing} missing bundles skipped)")
- print("Top descriptions:", ", ".join([f"{k}={v}" for k,v in desc_counts.most_common(6)]))
- if __name__ == "__main__":
- main()
Advertisement