Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(pdftools)
- x <- pdf_text("Test.pdf)
- y1 <- str_split(x, "r")
- #pdf output contains a total of 7 lists
- a <- y1 [[4]]
- b <- c(a[4],a[11:13]) #Obtain only rows 4, 11 to 13 from list 4
- n2 <- y1[[3]]
- n3 <- c(n2[3]) #Obtain only rows 3 from list 3
- n <- y1[[5]]
- n1 <- c(n[3]) #Obtain only rows 3 from list 5
- c <- y1[[6]]
- d <- c(c[4:18]) #Obtain only rows 4 to 18 from list 6
- e <- c(n3,b,d,n1) #Combining all necessary information into one list
- z <- substr(s[1:21], start = 15, stop = 200) #to remove white spaces between quotes
- Name <- z[1]
- InterestedParty <- z[2]
- TotalOwnBefore <- substr(z[11], start = 97, stop = 120)
- Ownership <- list(NM = Name, Party = InterestedParty, OwnBefore = TotalOwnBefore)
- write.csv(Ownership, file="MyData.csv")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement