Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- names<-"key, fund_name, keyword"
- names_split<-strsplit(names, ", ")[[1]]
- names2<-"fund_name2, other_keyword"
- names_split2<-strsplit(names2, ", ")[[1]]
- # It creates a matrix with the Standard Levenshtein distance between the name fields of both sources
- dist.name<-adist(names_split, names_split2, partial = TRUE, ignore.case = TRUE)
- # We now take the pairs with the minimum distance
- min.name<-apply(dist.name, 1, min)
- match.s1.s2<-NULL
- for(i in 1:nrow(dist.name))
- {
- s2.i<-match(min.name[i],dist.name[i,])
- s1.i<-i
- match.s1.s2<-rbind(data.frame(s2.i=s2.i,s1.i=s1.i,s2name=names_split2[s2.i], s1name=names_split[s1.i], adist=min.name[i]),match.s1.s2)
- }
- # and we then can have a look at the results
- View(match.s1.s2)
Add Comment
Please, Sign In to add comment