Advertisement
Guest User

Untitled

a guest
Aug 19th, 2019
90
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.18 KB | None | 0 0
  1. load_embedding <- function(file_path){
  2.  
  3. # load full file
  4. lines <- readLines(file_path)
  5.  
  6. # create new environment
  7. embeddings_env <- new.env(hash = TRUE, parent = emptyenv())
  8.  
  9. # this function is used to convert vectors to unit vectors
  10. # by dividing their components by vector length
  11. normalize_vector <- function(a){
  12. a/sqrt(sum(a**2))
  13. }
  14.  
  15. # iterate through the whole file line by line
  16. for (i in 1:length(lines)) {
  17. line <- lines[[i]]
  18. values <- strsplit(line, " ")[[1]]
  19. label <- values[[1]]
  20. embeddings_env[[label]] <- normalize_vector(as.double(values[-1]))
  21. }
  22.  
  23. embeddings_env
  24. }
  25.  
  26. cosine_similarity <- function(a,b){
  27. # assuming unit vectors
  28. # the cosine is just the dot-product
  29. a %*% b
  30. }
  31.  
  32.  
  33. most_similar <- function(embeddings, ref_item, n_top = 10){
  34. # calculate cos similarity to ref_item for all elements
  35. cos_sims <- eapply(embeddings, cosine_similarity, b = ref_item)
  36.  
  37. # only look at cos values smaller than 1
  38. # this will ignore the same element
  39. cos_sims <- cos_sims[cos_sims < 1]
  40.  
  41. # return top elements
  42. cos_sims[order(unlist(cos_sims),decreasing=TRUE)][1:n_top]
  43. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement