Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- train_data,word2index, unigram_table,vocab = SkipGram(data = None, columns = 'comment', cut = True, WINDOW_SIZE = 2, dataname = 'newdata_pytorch.csv')
- #np.save('word2index.npy', word2index)
- #np.save('unigram_table.npy', unigram_table)
- #np.save('vocab.npy', vocab)
- ##word2index = np.load(word2index_name).item()
- #torch.save(train_data, 'train_data.pth')
- EMBEDDING_SIZE = 300
- BATCH_SIZE = 256
- EPOCH = 100
- NEG = 10 # Num of Negative Sampling
- losses = []
- model = SkipgramNegSampling(len(word2index), EMBEDDING_SIZE)
- if USE_CUDA:
- model = model.cuda()
- optimizer = optim.Adam(model.parameters(), lr=0.001)
- for epoch in range(EPOCH):
- for i,batch in enumerate(getBatch(BATCH_SIZE, train_data)):
- inputs, targets = zip(*batch)
- inputs = torch.cat(inputs) # B x 1
- targets = torch.cat(targets) # B x 1
- negs = negative_sampling(targets, unigram_table, NEG)
- model.zero_grad()
- loss = model(inputs, targets, negs)
- loss.backward()
- optimizer.step()
- losses.append(loss.data.tolist())
- if epoch % 5 == 0:
- print("Epoch : %d, mean_loss : %.02f" % (epoch, np.mean(losses)))
- losses = []
- # torch.save(model, 'skipgram.pt')
- torch.cuda.empty_cache()
Add Comment
Please, Sign In to add comment