Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #import the relevant libraries
- import pandas as pd
- import statsmodels.formula.api as smf
- import matplotlib.pyplot as plt
- df=pd.read_csv("Team1_data_Startups.csv") #read the datafile
- print(df)
- df.dropna() #drop all rows that have any NaN values
- print(df)
- pd.set_option('display.max_columns', 15)
- #set the categorical variables with the correct datatype
- df['number_of_employees']= df['number_of_employees'].astype('category')
- df['State']= df['State'].astype('category')
- df['AI_Type']= df['AI_Type'].astype('category')
- df['funding_status_adjusted']=df['funding_status_adjusted'].astype('category')
- #create the linear models
- #using the actual funding numbers
- linearmodel1 = smf.ols(formula = 'Funding_amount ~ AI_Type', data = df).fit()
- print('Funding_amount ~ AI type', "\n")
- print(linearmodel1.summary())
- print("\n\n\n")
- linearmodel2 = smf.ols(formula = 'Funding_amount ~ Patents', data = df).fit()
- print('Funding_amount ~ Patents',"\n")
- print(linearmodel2.summary())
- print("\n\n\n")
- linearmodel3 = smf.ols(formula = 'Funding_amount ~ Patents+AI_Type', data = df).fit()
- print('Funding_amount ~ Patents + AI_Type',"\n")
- print(linearmodel3.summary())
- print("\n\n\n")
- linearmodel4 = smf.ols(formula = 'Funding_amount ~ Patents+Age', data = df).fit()
- print('Funding_amount ~ Patents + Age',"\n")
- print(linearmodel4.summary())
- print("\n\n\n")
- linearmodel5 = smf.ols(formula = 'Funding_amount ~ Patents+AI_Type+Age', data = df).fit()
- print('Funding_amount ~ Patents + AI_Type + Age', "\n")
- print(linearmodel5.summary())
- print("\n\n\n")
- linearmodel6 = smf.ols(formula = 'Funding_amount~funding_status_adjusted+Patents+AI_Type+Age', data = df).fit()
- print('Funding_amount ~ funding_status + Patents + AI_Type + Age', "\n")
- print(linearmodel6.summary())
- print("\n\n\n")
- linearmodel7 = smf.ols(formula = 'Funding_amount~State+funding_status_adjusted+number_of_employees+Number_of_funding_rounds+Patents+AI_Type+Age', data = df).fit()
- print('Funding_amount ~ State + funding_status + number_of_employees + Number_of_funding_rounds + Patents + AI_Type + Age', "\n")
- print(linearmodel7.summary())
- print("\n\n\n")
- #using the funding amount/1,000,000
- linearmodel8 = smf.ols(formula = 'Funding_amount_micron ~ AI_Type', data = df).fit()
- print('Funding_amount_micron ~ AI_Type', "\n")
- print(linearmodel8.summary())
- print("\n\n\n")
- linearmodel9 = smf.ols(formula = 'Funding_amount_micron ~ Patents', data = df).fit()
- print('Funding_amount_micron ~ Patents', "\n")
- print(linearmodel9.summary())
- print("\n\n\n")
- linearmodel10 = smf.ols(formula = 'Funding_amount_micron ~ Patents+AI_Type', data = df).fit()
- print('Funding_amount_micron ~ Patents + AI_Type', "\n")
- print(linearmodel10.summary())
- print("\n\n\n")
- linearmodel11 = smf.ols(formula = 'Funding_amount_micron ~ Patents+Age', data = df).fit()
- print('Funding_amount_micron ~ Patents + Age', "\n")
- print(linearmodel11.summary())
- print("\n\n\n")
- linearmodel12 = smf.ols(formula = 'Funding_amount_micron ~ Patents+AI_Type+Age', data = df).fit()
- print('Funding_amount_micron ~ Patents + AI_Type + Age', "\n")
- print(linearmodel12.summary())
- print("\n\n\n")
- linearmodel13 = smf.ols(formula = 'Funding_amount_micron~funding_status_adjusted+Patents+AI_Type+Age', data = df).fit()
- print('Funding_amount_micron ~ funding_status + Patents + AI_Type + Age', "\n")
- print(linearmodel13.summary())
- print("\n\n\n")
- linearmodel14 = smf.ols(formula = 'Funding_amount_micron~State+funding_status_adjusted+number_of_employees+Number_of_funding_rounds+Patents+AI_Type+Age', data = df).fit()
- print('Funding_amount_micron ~ State + funding_status + number_of_employees + Number_of_funding_rounds + Patents + AI_Type + Age', "\n")
- print(linearmodel14.summary())
- print("\n\n\n")
- #using the log of the funding amount
- linearmodel15 = smf.ols(formula = 'Funding_amount_log ~ Patents', data = df).fit()
- print('Funding_amount_log ~ Patents', "\n")
- print(linearmodel15.summary())
- print("\n\n\n")
- linearmodel16 = smf.ols(formula = 'Funding_amount_log ~ AI_Type', data = df).fit()
- print('Funding_amount_log ~ AI_Type', "\n")
- print(linearmodel16.summary())
- print("\n\n\n")
- linearmodel17 = smf.ols(formula = 'Funding_amount_log ~ Patents+AI_Type', data = df).fit()
- print('Funding_amount_log ~ Patents + AI_Type', "\n")
- print(linearmodel17.summary())
- print("\n\n\n")
- linearmodel18 = smf.ols(formula = 'Funding_amount_log ~ Patents+Age', data = df).fit()
- print('Funding_amount_log ~ Patents + Age', "\n")
- print(linearmodel18.summary())
- print("\n\n\n")
- linearmodel19 = smf.ols(formula = 'Funding_amount_log ~ Patents+AI_Type+Age', data = df).fit()
- print('Funding_amount_log ~ Patents + AI_Type + Age', "\n")
- print(linearmodel19.summary())
- print("\n\n\n")
- linearmodel20 = smf.ols(formula = 'Funding_amount_log~State+funding_status_adjusted+number_of_employees+Number_of_funding_rounds+Patents+AI_Type+Age', data = df).fit()
- print('Funding_amount_log ~ State + funding_status + number_of_employees + Number_of_funding_rounds + Patents + AI_Type + Age', "\n")
- print(linearmodel20.summary())
- print("\n\n\n")
- linearmodelfinal= smf.ols(formula = 'Funding_amount_log~funding_status_adjusted+number_of_employees+Number_of_funding_rounds+Patents+AI_Type+Age', data = df).fit()
- print('Funding_amount_log ~ funding_status + number_of_employees + Number_of_funding_rounds + Patents + AI_Type + Age',"\n")
- print(linearmodelfinal.summary())
- print("\n\n\n")
- #create the plots
- #scatterplot patents on funding
- df.plot.scatter(x='Patents', y='Funding_amount_log', title='Graph showing the number of patents and the funding amount')
- plt.show()
- #scatterplot patents on age
- df.plot.scatter(x='Age', y='Patents', title='Graph showing the age of the companies and the number of patents')
- plt.show()
- #scatterplot patents on age
- df.plot.scatter(x='Age', y='Funding_amount_log', title='Graph showing the funding amount and the age of the companies')
- plt.show()
- #bar graph for AI_Type and mean of funding
- var = df.groupby('AI_Type').Funding_amount_micron.mean()
- var.plot(kind='bar')
- plt.xlabel('AI_Type')
- plt.ylabel('Mean of Funding amount micron')
- plt.title('AI_Type Vs Mean of Funding amount micron')
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement