Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #fname: the file name of binary file <google_w2v.bin>
- #vocab: vocabulary dictionary
- function load_bin_vec(fname, vocab)
- pc(s)=return convert(Char,s[1])
- word_vecs = Dict()
- open(fname, "r") do f
- @show header = readline(f)
- vocab_size, layer1_size = map(pf, split(header))
- @show binary_len = sizeof(Float32) * layer1_size
- for line in collect(1:vocab_size)
- word=[]
- while true
- ch=read(f,1)
- ch=convert(Char,ch[1])
- if ch == ' '
- word = join(word,"")
- break
- end
- if ch != '\n';
- push!(word,ch);
- end
- end
- if word in keys(vocab)
- word_vecs[word]=reinterpret(Float32,read(f,binary_len))
- else
- read(f,binary_len)
- end
- end
- end;
- return word_vecs
- end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement