Guest User

Pearson R, GDP PPP and Disposable Income PPP

a guest
Nov 28th, 2025
42
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.16 KB | None | 0 0
  1.  
  2. """
  3. Fetching World Bank GDP PPP data...
  4.  Got 206 countries
  5. Fetching OECD Disposable Income data...
  6.  Got 43 countries
  7.  
  8. Countries with both datasets: 41
  9. Total observations: 1129, years 1990-2024
  10.  
  11. ==================================================
  12. CORRELATION ANALYSIS
  13. ==================================================
  14.  
  15. 1. Cross-section (2024): r = 0.7531, n = 25
  16. 2. Pooled panel:            r = 0.8885, n = 1129
  17. 3. Within-country mean:     r = 0.9891, n = 40 countries
  18. 4. Country averages:        r = 0.8748, n = 41
  19. """
  20.  
  21. import urllib.request
  22. import json
  23. import csv
  24. import io
  25. from collections import defaultdict
  26. from math import sqrt
  27.  
  28.  
  29. def fetch_worldbank_gdp_ppp():
  30.     """Fetch GDP per capita PPP from World Bank API"""
  31.     url = "https://api.worldbank.org/v2/country/all/indicator/NY.GDP.PCAP.PP.CD?format=json&per_page=20000&date=1990:2024"
  32.     print("Fetching World Bank GDP PPP data...")
  33.    
  34.     req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
  35.     with urllib.request.urlopen(req, timeout=30) as response:
  36.         data = json.loads(response.read().decode())
  37.    
  38.     if not data or len(data) < 2 or data[1] is None:
  39.         raise RuntimeError("World Bank API returned unexpected structure")
  40.    
  41.     aggregates = {
  42.         'EU', 'XC', 'OE', 'ZJ', 'XD', 'XO', 'XM', 'XN', 'XP', 'XT',
  43.         'ZG', 'ZF', 'Z4', 'Z7', '4E', '7E', 'S1', 'S2', 'S3', 'S4',
  44.         '8S', 'B8', 'F1', 'XE', 'XL', 'ZQ', 'XQ', 'XU', '1W', '1A',
  45.         'ARB', 'CSS', 'CEB', 'EAP', 'EAS', 'ECA', 'ECS', 'EMU', 'EUU',
  46.         'FCS', 'HIC', 'HPC', 'IBD', 'IBT', 'IDA', 'IDB', 'IDX', 'LAC',
  47.         'LCN', 'LDC', 'LIC', 'LMC', 'LMY', 'LTE', 'MEA', 'MIC', 'MNA',
  48.         'NAC', 'OED', 'OSS', 'PRE', 'PSS', 'PST', 'SAS', 'SSA', 'SSF',
  49.         'SST', 'TEA', 'TEC', 'TLA', 'TMN', 'TSA', 'TSS', 'UMC', 'WLD'
  50.     }
  51.    
  52.     gdp = defaultdict(dict)
  53.     for record in data[1]:
  54.         if record['value'] is not None:
  55.             country = record['countryiso3code']
  56.             if country and country not in aggregates:
  57.                 gdp[country][int(record['date'])] = record['value']
  58.    
  59.     print(f"  Got {len(gdp)} countries")
  60.     return dict(gdp)
  61.  
  62.  
  63. def fetch_oecd_disposable_income():
  64.     """Fetch Gross Disposable Income per capita (USD PPP) from OECD"""
  65.     url = "https://sdmx.oecd.org/public/rest/data/OECD.SDD.NAD,DSD_NAAG@DF_NAAG_V,1.0/A..B6GS1M_POP.USD_PPP_PS.?dimensionAtObservation=AllDimensions"
  66.     headers = {"Accept": "application/vnd.sdmx.data+csv;version=2.0.0"}
  67.    
  68.     print("Fetching OECD Disposable Income data...")
  69.     req = urllib.request.Request(url, headers=headers)
  70.    
  71.     with urllib.request.urlopen(req, timeout=30) as response:
  72.         content = response.read().decode('utf-8')
  73.    
  74.     reader = csv.DictReader(io.StringIO(content))
  75.     income = defaultdict(dict)
  76.     for row in reader:
  77.         country = row.get('REF_AREA', '')
  78.         year = row.get('TIME_PERIOD', '')
  79.         value = row.get('OBS_VALUE', '')
  80.         if country and year and value:
  81.             try:
  82.                 income[country][int(year)] = float(value)
  83.             except (ValueError, TypeError):
  84.                 pass
  85.    
  86.     print(f"  Got {len(income)} countries")
  87.     return dict(income)
  88.  
  89.  
  90. def pearsonr(x, y):
  91.     n = len(x)
  92.     if n < 3:
  93.         return None
  94.     mx, my = sum(x)/n, sum(y)/n
  95.     sx = sqrt(sum((xi - mx)**2 for xi in x) / (n-1))
  96.     sy = sqrt(sum((yi - my)**2 for yi in y) / (n-1))
  97.     if sx == 0 or sy == 0:
  98.         return None
  99.     return sum((xi - mx) * (yi - my) for xi, yi in zip(x, y)) / ((n-1) * sx * sy)
  100.  
  101.  
  102. def analyze(gdp_data, income_data):
  103.     common_countries = set(gdp_data.keys()) & set(income_data.keys())
  104.     print(f"\nCountries with both datasets: {len(common_countries)}")
  105.    
  106.     if not common_countries:
  107.         print("ERROR: No matching countries.")
  108.         return
  109.    
  110.     rows = []
  111.     for country in common_countries:
  112.         common_years = set(gdp_data[country].keys()) & set(income_data[country].keys())
  113.         for year in common_years:
  114.             rows.append({
  115.                 'country': country,
  116.                 'year': year,
  117.                 'gdp': gdp_data[country][year],
  118.                 'income': income_data[country][year]
  119.             })
  120.    
  121.     years = sorted(set(r['year'] for r in rows))
  122.     print(f"Total observations: {len(rows)}, years {min(years)}-{max(years)}")
  123.    
  124.     print("\n" + "=" * 50)
  125.     print("CORRELATION ANALYSIS")
  126.     print("=" * 50)
  127.    
  128.     # 1. Cross-section (latest year)
  129.     latest = max(years)
  130.     latest_rows = [r for r in rows if r['year'] == latest]
  131.     r1 = pearsonr([r['gdp'] for r in latest_rows], [r['income'] for r in latest_rows])
  132.     print(f"\n1. Cross-section ({latest}): r = {r1:.4f}, n = {len(latest_rows)}")
  133.    
  134.     # 2. Pooled panel
  135.     r2 = pearsonr([r['gdp'] for r in rows], [r['income'] for r in rows])
  136.     print(f"2. Pooled panel:            r = {r2:.4f}, n = {len(rows)}")
  137.    
  138.     # 3. Within-country
  139.     country_rs = []
  140.     for country in common_countries:
  141.         c_rows = [r for r in rows if r['country'] == country]
  142.         if len(c_rows) >= 5:
  143.             rc = pearsonr([r['gdp'] for r in c_rows], [r['income'] for r in c_rows])
  144.             if rc is not None:
  145.                 country_rs.append((country, rc))
  146.     country_rs.sort(key=lambda x: x[1], reverse=True)
  147.     mean_r = sum(r for _, r in country_rs) / len(country_rs)
  148.     print(f"3. Within-country mean:     r = {mean_r:.4f}, n = {len(country_rs)} countries")
  149.    
  150.     # 4. Country averages
  151.     country_avgs = {}
  152.     for country in common_countries:
  153.         c_rows = [r for r in rows if r['country'] == country]
  154.         if c_rows:
  155.             country_avgs[country] = {
  156.                 'gdp': sum(r['gdp'] for r in c_rows) / len(c_rows),
  157.                 'income': sum(r['income'] for r in c_rows) / len(c_rows)
  158.             }
  159.     r4 = pearsonr([v['gdp'] for v in country_avgs.values()],
  160.                    [v['income'] for v in country_avgs.values()])
  161.     print(f"4. Country averages:        r = {r4:.4f}, n = {len(country_avgs)}")
  162.  
  163.  
  164. if __name__ == "__main__":
  165.     gdp = fetch_worldbank_gdp_ppp()
  166.     income = fetch_oecd_disposable_income()
  167.     analyze(gdp, income)
  168.  
  169.  
Advertisement
Add Comment
Please, Sign In to add comment