Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- C1 <- rec$C1
- RP <- rec$RP
- PY <- rec$PY
- WC <- rec$WC
- UT <- rec$UT
- nr.recs <- length(PY)
- C1s <- C1
- for (rn in 1:nr.recs)
- {
- C1s = gsub("\\[.*?\\] ", "", C1s) #remove author assignment
- C1s = toupper(C1s) #capitals
- C1s = strsplit(C1s, ";") #separate addresses
- numbers = sapply(C1s, length) #number of adresses?
- C1s = unlist(C1s)
- C1s = gsub(",", ", ", C1s) #add spaces
- #replace 'HUMBOLDT...GERMANY' with 'HUMBOLDT'
- C1s = gsub("(.*)(HUMBOLDT)(.*)(GERMANY$)", "\\2", C1s)
- #'UNITED KINGDOM'
- C1s = gsub("ENGLAND|SCOTLAND|WALES|NORTH IRELAND", "UNITED KINGDOM", C1s)
- #'USA'
- #state and zip
- C1s = gsub(" [A-Z]{2} [0-9]{5}", " USA", C1s)
- #state only
- C1s = gsub(", [A-Z]{2}$", ", USA", C1s)
- #no comma before USA
- C1s <- gsub("(.*) USA", "USA", C1s)
- #country name is word before last comma
- C1s <- sapply(C1s, function(x)sub("^.*, ([A-Za-z ]*)$", "\\1", x))
- names(C1s) <- NULL
- C1s = gsub("^ | $", "", C1s) #remove spaces
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement