Guest User

Untitled

a guest
Jul 20th, 2018
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.71 KB | None | 0 0
  1. names<-"key, fund_name, keyword"
  2. names_split<-strsplit(names, ", ")[[1]]
  3.  
  4. names2<-"fund_name2, other_keyword"
  5. names_split2<-strsplit(names2, ", ")[[1]]
  6.  
  7. # It creates a matrix with the Standard Levenshtein distance between the name fields of both sources
  8. dist.name<-adist(names_split, names_split2, partial = TRUE, ignore.case = TRUE)
  9.  
  10. # We now take the pairs with the minimum distance
  11. min.name<-apply(dist.name, 1, min)
  12.  
  13. match.s1.s2<-NULL
  14. for(i in 1:nrow(dist.name))
  15. {
  16. s2.i<-match(min.name[i],dist.name[i,])
  17. s1.i<-i
  18. match.s1.s2<-rbind(data.frame(s2.i=s2.i,s1.i=s1.i,s2name=names_split2[s2.i], s1name=names_split[s1.i], adist=min.name[i]),match.s1.s2)
  19. }
  20. # and we then can have a look at the results
  21. View(match.s1.s2)
Add Comment
Please, Sign In to add comment