Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- import matplotlib.ticker
- %matplotlib inline
- import requests
- from bs4 import BeautifulSoup
- #By Rami Nasser
- def get_13f(symbol, firm_name, xml_file = 'form13fInfoTable.xml'):
- base_url = 'https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=' + symbol +'&type=13F-HR&output=xml'
- r = requests.get(base_url)
- soup = BeautifulSoup(r.text, 'lxml')
- for link in soup.find_all('filinghref'):
- url = link.string
- if link.string.split(".")[len(link.string.split("."))-1] == "htm":
- url += "l"
- break
- base_url = url[:url.rfind('/')]+'/'+xml_file
- r = requests.get(base_url)
- print(base_url)
- soup = BeautifulSoup(r.text, 'xml')
- cusip = soup.find_all('cusip')
- if len(cusip) > 1:
- value = soup.find_all('value')
- sshPrnamt = soup.find_all('sshPrnamt')
- myList = []
- for i in range(0, len(cusip)):
- myList = myList + [[cusip[i].string, int(value[i].string), int(sshPrnamt[i].string)]]
- df = pd.DataFrame(myList, columns=['cusip', 'value', 'sshPrnamt'])
- return df
- df = pd.DataFrame()
- df = df.append(get_13f('0000093751', 'State Street', 'copyofcopyof13fworkbook.xml'))
- df = df.append(get_13f('0000102909', 'Vanguard'))
- df = df.append(get_13f('0001364742', 'BlackRock'))
- df_g = df.groupby(['cusip']).sum()
- df_g['cusip'] = df_g.index
- df_g = df_g[df_g['value'] > 1000000]
- df2 = pd.read_csv('_data/cusip_nums.csv')
- df2 = df2.drop(['FundTicker', 'Shares', 'Weight', 'Date', 'MarketValue'], axis=1)
- df2 = df2.rename(columns={'SecurityNum': 'cusip'})
- df2 = df2.rename(columns={'HoldingsTicker': 'Symbol'})
- df2['Symbol'] = df2['Symbol'].str.strip()
- df3 = df_g.merge(df2, on= 'cusip')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement