Advertisement
MetricT

US Patent Rate per 100k

Feb 1st, 2024 (edited)
1,064
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 4.27 KB | History | 0 0
  1. ### Compute the ratio of patents granted per 100k US population, as a measure
  2. ### of innovation over time.
  3.  
  4. library(tidyverse)
  5. library(rvest)
  6. library(scales)
  7. library(broom)
  8.  
  9. ### US Population since 1900
  10. us_pop_pres <-
  11.   "https://multpl.com/united-states-population/table/by-year" %>%
  12.   read_html() %>%
  13.   html_nodes("table") %>%
  14.   .[[1]] %>%
  15.   html_table() %>%
  16.   filter(grepl("Jul", Date)) %>%
  17.   rename(Year = Date) %>%
  18.   mutate(
  19.     Year  = gsub("Jul 1, ", "", Year) %>% as.integer(),
  20.     Value = gsub(" million", "", Value) %>% as.numeric(),
  21.     Date  = as.Date(paste(Year, "-01-01", sep = "")),
  22.     Value = Value * 1000000
  23.   ) %>%
  24.   select(Date, Value)
  25.  
  26. # US population before 1900 from Wikipedia
  27. # https://en.wikipedia.org/wiki/Demographic_history_of_the_United_States
  28. us_pop_hist <-
  29.   tribble(
  30.     ~year, ~Value,
  31.     1610,   350,
  32.     1620,   2302,
  33.     1630,   4646,
  34.     1640,   26634,
  35.     1650,   50368,
  36.     1660,   75058,
  37.     1670,   111935,
  38.     1680,   151507,
  39.     1690,   210372,
  40.     1700,   250888,
  41.     1710,   331711,
  42.     1720,   466185,
  43.     1730,   629445,
  44.     1740,   905563,
  45.     1750,   1170760,
  46.     1760,   1593625,
  47.     1770,   2148076,
  48.     1780,   2780369,
  49.     1790,   3929214,
  50.     1800,   5308483,
  51.     1810,   7239881,
  52.     1820,   9638453,
  53.     1830,   12866020,
  54.     1840,   17069453,
  55.     1850,   23191876,
  56.     1860,   31443321,
  57.     1870,   38558371,
  58.     1880,   50189209,
  59.     1890,   62979766
  60.   ) %>%
  61.   mutate(Date = as.Date(paste(year, "-01-01", sep = ""))) %>%
  62.   select(Date, Value)
  63.  
  64. ### Combine past/present history into one dataset
  65. us_pop <-
  66.   us_pop_pres %>%
  67.   bind_rows(us_pop_hist) %>%
  68.   arrange(Date) %>%
  69.   rename(date = Date, us_pop = Value)
  70.  
  71. # Info on number of patents from US Patent & Trademark Office
  72. patents <-
  73.   "https://www.uspto.gov/web/offices/ac/ido/oeip/taf/h_counts.htm" %>%
  74.   read_html() %>%
  75.   html_elements("table") %>%
  76.   .[[3]] %>%
  77.   html_table() %>%
  78.   janitor::clean_names() %>%
  79.   mutate_all(~sub(",", "", .)) %>%
  80.   select(-notes) %>%
  81.   rename(
  82.     year = 1,
  83.     utility_patent_applications = 2,
  84.     design_patent_applications = 3,
  85.     plant_patent_applications = 4,
  86.     utility_patents = 5,
  87.     design_patents = 6,
  88.     plant_patents = 7,
  89.     patents_to_foreign_residents = 8
  90.   ) %>%
  91.   mutate(
  92.     year = ifelse(year == "1836 (c)", "1836", year),
  93.     design_patents = ifelse(design_patents == "(b)", "", design_patents)
  94.     ) %>%
  95.   mutate_all(~sub("n/a", "", .)) %>%
  96.   mutate_all(as.numeric) %>%
  97.   replace(is.na(.), 0) %>%
  98.   mutate(
  99.     date = as.Date(paste(year, "-01-01", sep = "")),
  100.     total_patents = utility_patents + design_patents + plant_patents
  101.     ) %>%
  102.   select(date, total_patents)
  103.  
  104. # Combine patents and population into one table so we can compute the rate of
  105. # patents per 100k population
  106. combo <-
  107.   us_pop %>%
  108.   full_join(patents, by = "date") %>%
  109.   filter(!is.na(total_patents), !is.na(us_pop)) %>%
  110.   mutate(
  111.     year = year(date),
  112.     rate = 100000 * total_patents / us_pop)
  113.  
  114. # Compute a linear fit of the data to detrend
  115. fit <- combo %>% lm(rate ~ year, data = .) %>% tidy()
  116. c0 <- fit %>% filter(term == "(Intercept)") %>% pull(estimate)
  117. c1 <- fit %>% filter(term == "year")        %>% pull(estimate)
  118.  
  119. combo <-
  120.   combo %>%
  121.   mutate(
  122.     fit = c0 + c1 * year,
  123.     detrended_rate = rate - fit
  124.     )
  125.  
  126. # Graph the raw patent per 100k rate, showing the trend line we will subtract
  127. g_patents <-
  128. ggplot(data = combo) +
  129.   theme_bw() +
  130.   geom_line(aes(x = date, y = rate)) +
  131.   geom_line(aes(x = date, y = fit), color = "firebrick2") +
  132.   scale_x_date(breaks = pretty_breaks(10)) +
  133.   scale_y_continuous(breaks = pretty_breaks(10)) +
  134.   labs(x = "", y = "Patent Rate per 100k population", title = "US Patent Rate per 100k population")
  135. print(g_patents)
  136.  
  137. # Detrend and graph the z-score of the patent rate
  138. g_patents_detrend <-
  139. ggplot(data = combo) +
  140.   theme_bw() +
  141.   geom_line(aes(x = date, y = (detrended_rate - mean(detrended_rate))/sd(detrended_rate))) +
  142.   geom_hline(yintercept = 0, linetype = "dashed", color = "steelblue2") +
  143.   scale_x_date(breaks = pretty_breaks(10)) +
  144.   scale_y_continuous(breaks = pretty_breaks(10)) +
  145.   labs(x = "", y = "Z-Score",
  146.        title = "Z-Score of Detrended Patent Rate per 100k population")
  147. print(g_patents_detrend)
  148.  
  149. plot_grid(g_patents, g_patents_detrend, ncol = 1, align = "hv")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement