Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import org.deeplearning4j.models.embeddings.WeightLookupTable;
- import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable;
- import org.deeplearning4j.models.embeddings.loader.WordVectorSerializer;
- import org.deeplearning4j.models.word2vec.Word2Vec;
- import org.deeplearning4j.models.word2vec.wordstore.inmemory.InMemoryLookupCache;
- import org.deeplearning4j.text.sentenceiterator.SentenceIterator;
- import org.deeplearning4j.text.sentenceiterator.UimaSentenceIterator;
- import org.deeplearning4j.text.sentenceiterator.LineSentenceIterator;
- import org.deeplearning4j.text.sentenceiterator.SentencePreProcessor;
- import org.deeplearning4j.text.tokenization.tokenizer.TokenPreProcess;
- import org.deeplearning4j.text.tokenization.tokenizer.preprocessor.CommonPreprocessor;
- import org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFactory;
- import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory;
- import org.deeplearning4j.text.tokenization.tokenizer.preprocessor.EndingPreProcessor;
- import org.slf4j.Logger;
- import org.slf4j.LoggerFactory;
- import java.io.File;
- //import org.springframework.core.io.ClassPathResource;
- import java.util.ArrayList;
- import java.util.Collection;
- public class w2v {
- private static Logger log = LoggerFactory.getLogger(w2v.class);
- public static void main(String[] args) throws Exception {
- String filePath = "/home/xyang/workspace/resources/raw_sentences.txt";
- log.info("Load & Vectorize Sentences....");
- // Strip white space before and after for each line
- SentenceIterator iter = UimaSentenceIterator.createWithPath(filePath);
- // Split on white spaces in the line to get words
- TokenizerFactory t = new DefaultTokenizerFactory();
- t.setTokenPreProcessor(new CommonPreprocessor());
- InMemoryLookupCache cache = new InMemoryLookupCache();
- WeightLookupTable table = new InMemoryLookupTable.Builder()
- .vectorLength(100)
- .useAdaGrad(false)
- .cache(cache)
- .lr(0.025f).build();
- log.info("Building model....");
- Word2Vec vec = new Word2Vec.Builder()
- .minWordFrequency(5).iterations(1)
- .layerSize(100).lookupTable(table)
- .stopWords(new ArrayList<String>())
- .vocabCache(cache).seed(42)
- .windowSize(5).iterate(iter).tokenizerFactory(t).build();
- log.info("Fitting Word2Vec model....");
- vec.fit();
- log.info("Writing word vectors to text file....");
- // Write word
- WordVectorSerializer.writeWordVectors(vec, "pathToWriteto.txt");
- log.info("Closest Words:");
- Collection<String> lst = vec.wordsNearest("day", 10);
- System.out.println(lst);
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement