Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.util.Collection;
- import org.deeplearning4j.models.embeddings.loader.WordVectorSerializer;
- import org.deeplearning4j.models.word2vec.Word2Vec;
- import org.deeplearning4j.text.sentenceiterator.BasicLineIterator;
- import org.deeplearning4j.text.sentenceiterator.SentenceIterator;
- import org.deeplearning4j.text.tokenization.tokenizer.preprocessor.CommonPreprocessor;
- import org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFactory;
- import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory;
- public class main {
- public static void main(String[] args) throws Exception {
- //Path du document pour apprentissage
- String filePath = "C:/Users/lucas/Desktop/M2/Web Semantique/Projet/web-semantique/src/main/resources/raw_sentences.txt";
- //Ouverture et tokenization du document
- System.out.println("Load & Vectorize Sentences....");
- SentenceIterator iter = new BasicLineIterator(filePath);
- TokenizerFactory t = new DefaultTokenizerFactory();
- t.setTokenPreProcessor(new CommonPreprocessor());
- //Build du model
- System.out.println("Building model....");
- Word2Vec vec = new Word2Vec.Builder()
- .minWordFrequency(5)
- .layerSize(100)
- .seed(42)
- .windowSize(5)
- .iterate(iter)
- .tokenizerFactory(t)
- .build();
- //Entrainement du model
- System.out.println("Fitting Word2Vec model....");
- vec.fit();
- //Affichage des mots les plus proches de "le"
- System.out.println("Closest Words:");
- Collection<String> lst = vec.wordsNearest("le", 3);
- System.out.println(lst);
- //Sauvegarde du model buildé pour pouvoir le recharger ultérieurement
- WordVectorSerializer.writeWord2VecModel(vec, "C:/Users/lucas/Desktop/M2/Web Semantique/Projet/web-semantique/src/main/resources/model.txt");
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement