Advertisement
Guest User

Untitled

a guest
Sep 26th, 2016
58
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.85 KB | None | 0 0
  1.  
  2. library("stringr")
  3.  
  4. clean_md_file <- function(file_path, re_bib = "^References$") {
  5. # report
  6. report <- readr::read_lines(file_path)
  7.  
  8. # Ignore the bibliography
  9. line_references_start <- report %>%
  10. str_detect(re_bib) %>%
  11. which
  12.  
  13. # Say the bibliography is last line if it's missing
  14. if (length(line_references_start) == 0) {
  15. line_references_start <- length(report)
  16. }
  17.  
  18. main_lines <- report[seq_len(line_references_start)]
  19.  
  20. # Ignore lines that aren't prose
  21. table_lines <- main_lines %>% str_detect("^[|]")
  22. img_lines <- main_lines %>% str_detect("^[<img]")
  23. header_lines <- main_lines %>% str_detect("^[-][-]|^[#]")
  24.  
  25. do_not_touch <- table_lines | img_lines | header_lines
  26. lines_to_check <- main_lines[!do_not_touch]
  27.  
  28. main_lines[!do_not_touch] <- lines_to_check %>%
  29. replace_inline_amper
  30.  
  31. report[seq_len(line_references_start)] <- main_lines
  32. report
  33. }
  34.  
  35.  
  36.  
  37. replace_inline_amper <- function(text) {
  38. # Assume that an inline citation consists of an author's last name followed by a
  39. # parenthesized year. If we find those, we fix the inline citations with
  40. # ampersands.
  41.  
  42. # Last names are letters and hyphens and spaces.
  43. re_author <- "[[:alpha:]- ]+"
  44.  
  45. re_inline_year <- "[(]\\d{4}[)]"
  46. re_author_year <- paste(re_author, re_inline_year)
  47. re_ampersand <- " & "
  48.  
  49. re_ampersand_author_year <- sprintf("%s(?=%s)", re_ampersand, re_author_year)
  50. str_replace_all(text, re_ampersand_author_year, " and ")
  51.  
  52. # c("Maggie & Lisa (2005) found...",
  53. # "...have been found (Maggie & Lisa, 2005)",
  54. # "Jones & Hyphen-Name (2005) found...",
  55. # "...have been found (Jones & Hyphen-Name, 2005)",
  56. # "Marge, Maggie, & Lisa (2005) found...",
  57. # "...have been found (Marge, Maggie, & Lisa, 2005)",
  58. # "Jones & Space Name (2005) found...",
  59. # "...have been found (Jones & Space Name, 2005)") %>%
  60. # replace_inline_amper() -> text
  61. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement