Advertisement
dsuveges

gy stats2.py

Aug 26th, 2022 (edited)
739
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.94 KB | Source Code | 0 0
  1. import pandas as pd
  2.  
  3. def generate_statistics(df: pd.DataFrame, window: int) -> None:
  4.     cuts = pd.interval_range(start=0, end = df.szamok.max(), freq=window)
  5.     (
  6.         df
  7.         .groupby(pd.cut(df.szamok, cuts))
  8.         .agg(
  9.             osszeg = ('szamok', sum),
  10.             max_ertek = ('szamok', max),
  11.             min_ertek = ('szamok', min),
  12.             darab = ('szamok', 'count')
  13.         )
  14.         .reset_index()
  15.         .assign(
  16.             start = lambda df: df.szamok.apply(lambda x: x.left),
  17.             end = lambda df: df.szamok.apply(lambda x: x.right),
  18.         )
  19.         .drop('szamok', axis=1)
  20.         .to_csv(f'bontas-{window}.tsv', sep='\t', index=False)
  21.     )
  22.  
  23. # A szamsort tartalmazo file:
  24. input_file = 'input.txt'
  25.  
  26. # Az osszes bontas:
  27. windows = [100, 500, 1000, 5000]
  28.  
  29. # Szamsor beolvasa:
  30. df = pd.read_csv(input_file, header=None, names=['szamok'])
  31.  
  32. for window in windows:
  33.     generate_statistics(df, window)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement