Advertisement
Guest User

Untitled

a guest
Apr 19th, 2019
94
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.97 KB | None | 0 0
  1. #import the relevant libraries
  2. import pandas as pd
  3. import statsmodels.formula.api as smf
  4. import matplotlib.pyplot as plt
  5.  
  6. df=pd.read_csv("Team1_data_Startups.csv") #read the datafile
  7. print(df)
  8. df.dropna() #drop all rows that have any NaN values
  9. print(df)
  10.  
  11. pd.set_option('display.max_columns', 15)
  12.  
  13. #set the categorical variables with the correct datatype
  14. df['number_of_employees']= df['number_of_employees'].astype('category')
  15. df['State']= df['State'].astype('category')
  16. df['AI_Type']= df['AI_Type'].astype('category')
  17. df['funding_status_adjusted']=df['funding_status_adjusted'].astype('category')
  18.  
  19. #create the linear models
  20.  
  21. #using the actual funding numbers
  22. linearmodel1 = smf.ols(formula = 'Funding_amount ~ AI_Type', data = df).fit()
  23. print('Funding_amount ~ AI type', "\n")
  24. print(linearmodel1.summary())
  25. print("\n\n\n")
  26.  
  27. linearmodel2 = smf.ols(formula = 'Funding_amount ~ Patents', data = df).fit()
  28. print('Funding_amount ~ Patents',"\n")
  29. print(linearmodel2.summary())
  30. print("\n\n\n")
  31.  
  32. linearmodel3 = smf.ols(formula = 'Funding_amount ~ Patents+AI_Type', data = df).fit()
  33. print('Funding_amount ~ Patents + AI_Type',"\n")
  34. print(linearmodel3.summary())
  35. print("\n\n\n")
  36.  
  37. linearmodel4 = smf.ols(formula = 'Funding_amount ~ Patents+Age', data = df).fit()
  38. print('Funding_amount ~ Patents + Age',"\n")
  39. print(linearmodel4.summary())
  40. print("\n\n\n")
  41.  
  42. linearmodel5 = smf.ols(formula = 'Funding_amount ~ Patents+AI_Type+Age', data = df).fit()
  43. print('Funding_amount ~ Patents + AI_Type + Age', "\n")
  44. print(linearmodel5.summary())
  45. print("\n\n\n")
  46.  
  47. linearmodel6 = smf.ols(formula = 'Funding_amount~funding_status_adjusted+Patents+AI_Type+Age', data = df).fit()
  48. print('Funding_amount ~ funding_status + Patents + AI_Type + Age', "\n")
  49. print(linearmodel6.summary())
  50. print("\n\n\n")
  51.  
  52. linearmodel7 = smf.ols(formula = 'Funding_amount~State+funding_status_adjusted+number_of_employees+Number_of_funding_rounds+Patents+AI_Type+Age', data = df).fit()
  53. print('Funding_amount ~ State + funding_status + number_of_employees + Number_of_funding_rounds + Patents + AI_Type + Age', "\n")
  54. print(linearmodel7.summary())
  55. print("\n\n\n")
  56.  
  57. #using the funding amount/1,000,000
  58. linearmodel8 = smf.ols(formula = 'Funding_amount_micron ~ AI_Type', data = df).fit()
  59. print('Funding_amount_micron ~ AI_Type', "\n")
  60. print(linearmodel8.summary())
  61. print("\n\n\n")
  62.  
  63. linearmodel9 = smf.ols(formula = 'Funding_amount_micron ~ Patents', data = df).fit()
  64. print('Funding_amount_micron ~ Patents', "\n")
  65. print(linearmodel9.summary())
  66. print("\n\n\n")
  67.  
  68. linearmodel10 = smf.ols(formula = 'Funding_amount_micron ~ Patents+AI_Type', data = df).fit()
  69. print('Funding_amount_micron ~ Patents + AI_Type', "\n")
  70. print(linearmodel10.summary())
  71. print("\n\n\n")
  72.  
  73. linearmodel11 = smf.ols(formula = 'Funding_amount_micron ~ Patents+Age', data = df).fit()
  74. print('Funding_amount_micron ~ Patents + Age', "\n")
  75. print(linearmodel11.summary())
  76. print("\n\n\n")
  77.  
  78. linearmodel12 = smf.ols(formula = 'Funding_amount_micron ~ Patents+AI_Type+Age', data = df).fit()
  79. print('Funding_amount_micron ~ Patents + AI_Type + Age', "\n")
  80. print(linearmodel12.summary())
  81. print("\n\n\n")
  82.  
  83. linearmodel13 = smf.ols(formula = 'Funding_amount_micron~funding_status_adjusted+Patents+AI_Type+Age', data = df).fit()
  84. print('Funding_amount_micron ~ funding_status + Patents + AI_Type + Age', "\n")
  85. print(linearmodel13.summary())
  86. print("\n\n\n")
  87.  
  88. linearmodel14 = smf.ols(formula = 'Funding_amount_micron~State+funding_status_adjusted+number_of_employees+Number_of_funding_rounds+Patents+AI_Type+Age', data = df).fit()
  89. print('Funding_amount_micron ~ State + funding_status + number_of_employees + Number_of_funding_rounds + Patents + AI_Type + Age', "\n")
  90. print(linearmodel14.summary())
  91. print("\n\n\n")
  92.  
  93. #using the log of the funding amount
  94. linearmodel15 = smf.ols(formula = 'Funding_amount_log ~ Patents', data = df).fit()
  95. print('Funding_amount_log ~ Patents', "\n")
  96. print(linearmodel15.summary())
  97. print("\n\n\n")
  98.  
  99. linearmodel16 = smf.ols(formula = 'Funding_amount_log ~ AI_Type', data = df).fit()
  100. print('Funding_amount_log ~ AI_Type', "\n")
  101. print(linearmodel16.summary())
  102. print("\n\n\n")
  103.  
  104. linearmodel17 = smf.ols(formula = 'Funding_amount_log ~ Patents+AI_Type', data = df).fit()
  105. print('Funding_amount_log ~ Patents + AI_Type', "\n")
  106. print(linearmodel17.summary())
  107. print("\n\n\n")
  108.  
  109. linearmodel18 = smf.ols(formula = 'Funding_amount_log ~ Patents+Age', data = df).fit()
  110. print('Funding_amount_log ~ Patents + Age', "\n")
  111. print(linearmodel18.summary())
  112. print("\n\n\n")
  113.  
  114. linearmodel19 = smf.ols(formula = 'Funding_amount_log ~ Patents+AI_Type+Age', data = df).fit()
  115. print('Funding_amount_log ~ Patents + AI_Type + Age', "\n")
  116. print(linearmodel19.summary())
  117. print("\n\n\n")
  118.  
  119. linearmodel20 = smf.ols(formula = 'Funding_amount_log~State+funding_status_adjusted+number_of_employees+Number_of_funding_rounds+Patents+AI_Type+Age', data = df).fit()
  120. print('Funding_amount_log ~ State + funding_status + number_of_employees + Number_of_funding_rounds + Patents + AI_Type + Age', "\n")
  121. print(linearmodel20.summary())
  122. print("\n\n\n")
  123.  
  124. linearmodelfinal= smf.ols(formula = 'Funding_amount_log~funding_status_adjusted+number_of_employees+Number_of_funding_rounds+Patents+AI_Type+Age', data = df).fit()
  125. print('Funding_amount_log ~ funding_status + number_of_employees + Number_of_funding_rounds + Patents + AI_Type + Age',"\n")
  126. print(linearmodelfinal.summary())
  127. print("\n\n\n")
  128.  
  129. #create the plots
  130. #scatterplot patents on funding
  131. df.plot.scatter(x='Patents', y='Funding_amount_log', title='Graph showing the number of patents and the funding amount')
  132. plt.show()
  133.  
  134. #scatterplot patents on age
  135. df.plot.scatter(x='Age', y='Patents', title='Graph showing the age of the companies and the number of patents')
  136. plt.show()
  137.  
  138. #scatterplot patents on age
  139. df.plot.scatter(x='Age', y='Funding_amount_log', title='Graph showing the funding amount and the age of the companies')
  140. plt.show()
  141.  
  142. #bar graph for AI_Type and mean of funding
  143. var = df.groupby('AI_Type').Funding_amount_micron.mean()
  144. var.plot(kind='bar')
  145. plt.xlabel('AI_Type')
  146. plt.ylabel('Mean of Funding amount micron')
  147. plt.title('AI_Type Vs Mean of Funding amount micron')
  148. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement