Advertisement
Guest User

python#1ZGN3l8A

a guest
Oct 9th, 2022
308
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.08 KB | None | 0 0
  1. import pandas as pd
  2.  
  3.  
  4. url = "http://staff.stat.sinica.edu.tw/fredphoa/HW/HW1/CAmaxTemp.txt"
  5.  
  6.  
  7. raw = pd.read_csv(
  8.     url,
  9.     skiprows=1,
  10.     header=None,
  11.     sep="\t+",
  12.     engine="python",
  13. )
  14. columns = {
  15.     "WBAN": "category",
  16.     "Station": "category",
  17.     "State": "category",
  18.     "Period": str,
  19.     "JAN": int,
  20.     "FEB": int,
  21.     "MAR": int,
  22.     "APR": int,
  23.     "MAY": int,
  24.     "JUN": int,
  25.     "JUL": int,
  26.     "AUG": int,
  27.     "SEP": int,
  28.     "OCT": int,
  29.     "NOV": int,
  30.     "DEC": int,
  31.     "MAX": int,
  32. }
  33.  
  34.  
  35. def extract_period(df):
  36.     time = df["Period"].str.extract("(\d+)-(\d+)").agg(pd.to_datetime, format="%Y%m")
  37.     return df.assign(Start=time[0], End=time[1])
  38.  
  39.  
  40. df = (
  41.     pd.concat(
  42.         (
  43.             raw[0].str.extract(
  44.                 "(\d+)([A-Z ]+),(\D+)",
  45.                 expand=True,
  46.             ),
  47.             raw[1].str.split(
  48.                 "\s+",
  49.                 expand=True,
  50.             ),
  51.         ),
  52.         axis=1,
  53.     )
  54.     .set_axis(columns.keys(), axis=1)
  55.     .astype(columns)
  56.     .pipe(extract_period)
  57. )
  58.  
  59. df.head()
  60.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement