Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- from pandas import DataFrame
- from datetime import datetime
- import os
- class Analyze:
- ONE_MILLION = 1_000_000
- ONE_BILLION = 1_000_000_000
- def __init__(self):
- self.ftd_data = None
- self.file_dir = 'files'
- self.tickers_floats = None
- def read_data(self):
- concat_data = pd.DataFrame()
- for name in os.listdir('files/'):
- if name != "tickers_floats.csv":
- data = pd.read_csv(f"{self.file_dir}/{name}", delimiter='|')
- # last two columns are summary data
- concat_data = pd.concat([concat_data, data[0:-2]])
- self.ftd_data = concat_data
- def transform_data(self):
- self.ftd_data = self.ftd_data.rename(columns={
- 'QUANTITY (FAILS)': 'Quantity'
- })
- # # Commented stuff was for running web scraping.
- # TODO: Clean up
- self.ftd_data = self.ftd_data.merge(self.read_float(), on='SYMBOL', how='left')
- self.ftd_data['Percentage of Float'] = self.ftd_data.apply(lambda x: x['Quantity'] / x['float'], axis=1)
- self.ftd_data = self.ftd_data[self.ftd_data['PRICE'] != "."]
- self.ftd_data['Total Amount'] = self.ftd_data.apply(lambda x: x['Quantity'] * float(x['PRICE']), axis=1)
- # custom filters
- self.ftd_data = self.ftd_data[self.ftd_data['float'] != -1.0]
- self.ftd_data = self.ftd_data.sort_values(by='Percentage of Float', ascending=False)
- def read_float(self):
- return Analyze.transform_float(pd.read_csv('files/tickers_floats.csv'))
- @staticmethod
- def transform_float(df: DataFrame):
- df = df[(df['float'] != 'UNKNOWN') | (df['float'].notna())]
- df['float'] = df['float'].replace(np.nan, "-1")
- def transform_money(amt):
- # million
- if 'M' in amt:
- return float(amt[:-1])*Analyze.ONE_MILLION
- # billion
- elif 'B' in amt:
- return float(amt[:-1])*Analyze.ONE_BILLION
- return -1
- df['float'] = df['float'].apply(lambda x: transform_money(x))
- return df
- def write_data(self):
- self.tickers_floats.to_csv('files/tickers_floats.csv', index=False)
- @staticmethod
- def do_print(title, content):
- print(f"{title.ljust(40)[:40]}: {content}")
- def run(self):
- # self.read_data()
- # self.transform_data()
- # the above two commands read in and process SEC split data
- # below command just reads in file of the result of the above
- self.ftd_data = pd.read_csv('aggregate_ftds.csv')
- # self.write_data()
- Analyze.do_print("Number of tickers (that also have price)", self.ftd_data['SYMBOL'].unique().shape[0])
- gme = self.ftd_data[self.ftd_data['SYMBOL'] == 'GME']
- Analyze.do_print("Avg % of float, all tickers", self.ftd_data['Percentage of Float'].mean())
- Analyze.do_print("Avg % of float, GME", gme['Percentage of Float'].mean())
- # divide two above for ratio of gme to all
- Analyze.do_print("Avg Net Amount FTD, all tickers", self.ftd_data['Total Amount'].mean())
- Analyze.do_print("Avg Net Amount FTD, GME", gme['Total Amount'].mean())
- print(gme.sort_values(by='Total Amount', ascending=False))
- Analyze.do_print("Number of dates", self.ftd_data['SETTLEMENT DATE'].unique().shape[0])
- Analyze.do_print("GME # of appearances", gme['SETTLEMENT DATE'].unique().shape[0])
- if __name__ == "__main__":
- runner = Analyze()
- runner.run()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement