Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- from rcounting import thread_navigation as tn, plots
- from rcounting.counters import apply_alias
- from pathlib import Path
- import click
- CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"])
- @click.command(context_settings=CONTEXT_SETTINGS)
- @click.argument("comment_id")
- @click.option(
- "-l",
- "--length",
- default=1000,
- help="An upper bound for the run length. Used to avoid fetching more comments than necessary",
- )
- @click.option(
- "-f",
- "--filename",
- type=click.Path(path_type=Path),
- help="What file to save the histogram to. If none is specified, `histogram.png` is used",
- )
- @click.option(
- "--seaborn/--matplotlib",
- default=False,
- help="Use the default matplotlib style for the plot, or seaborn.",
- )
- def analyse_run(comment_id, length, seaborn, filename):
- """Generate a histogram of the run of counts ending in COMMENT_ID.
- Load at least LENGTH comments, and then restrict the comments to only the
- latest volley between the two most active counters in the loaded comments.
- """
- if seaborn:
- import seaborn as sns
- sns.set_theme()
- comments = pd.DataFrame(
- tn.fetch_comments(comment_id, limit=length if length < 1000 else None)
- )
- comments["dt"] = comments["timestamp"].diff()
- comments["username"] = comments["username"].apply(apply_alias)
- if comments["username"].nunique() > 2:
- counters = comments["username"].value_counts().head(2).index
- comments = comments.iloc[
- comments.query("username not in @counters").index[-1] + 1 :
- ].copy()
- fig = plots.speedrun_histogram(comments, n=2)
- if filename is None:
- filename = "histogram.png"
- fig.savefig(filename, bbox_inches="tight")
- print(
- comments.loc[comments["dt"] <= 10]
- .groupby("username")["dt"]
- .describe()
- .to_markdown()
- )
- if __name__ == "__main__":
- analyse_run()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement