Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- def generate_statistics(df: pd.DataFrame, window: int) -> None:
- cuts = pd.interval_range(start=0, end = df.szamok.max(), freq=window)
- (
- df
- .groupby(pd.cut(df.szamok, cuts))
- .agg(
- osszeg = ('szamok', sum),
- max_ertek = ('szamok', max),
- min_ertek = ('szamok', min),
- darab = ('szamok', 'count')
- )
- .reset_index()
- .assign(
- start = lambda df: df.szamok.apply(lambda x: x.left),
- end = lambda df: df.szamok.apply(lambda x: x.right),
- )
- .drop('szamok', axis=1)
- .to_csv(f'bontas-{window}.tsv', sep='\t', index=False)
- )
- # A szamsort tartalmazo file:
- input_file = 'input.txt'
- # Az osszes bontas:
- windows = [100, 500, 1000, 5000]
- # Szamsor beolvasa:
- df = pd.read_csv(input_file, header=None, names=['szamok'])
- for window in windows:
- generate_statistics(df, window)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement