Advertisement
Guest User

Untitled

a guest
Mar 26th, 2019
83
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.08 KB | None | 0 0
  1. #fname: the file name of binary file <google_w2v.bin>
  2. #vocab: vocabulary dictionary
  3.  
  4. function load_bin_vec(fname, vocab)
  5. pc(s)=return convert(Char,s[1])
  6. word_vecs = Dict()
  7. open(fname, "r") do f
  8. @show header = readline(f)
  9. vocab_size, layer1_size = map(pf, split(header))
  10. @show binary_len = sizeof(Float32) * layer1_size
  11. for line in collect(1:vocab_size)
  12. word=[]
  13. while true
  14. ch=read(f,1)
  15. ch=convert(Char,ch[1])
  16. if ch == ' '
  17. word = join(word,"")
  18. break
  19. end
  20. if ch != '\n';
  21. push!(word,ch);
  22. end
  23. end
  24. if word in keys(vocab)
  25. word_vecs[word]=reinterpret(Float32,read(f,binary_len))
  26. else
  27. read(f,binary_len)
  28. end
  29.  
  30. end
  31. end;
  32. return word_vecs
  33. end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement