Untitled

train_data,word2index, unigram_table,vocab = SkipGram(data = None, columns = 'comment', cut = True, WINDOW_SIZE = 2, dataname = 'newdata_pytorch.csv')

#np.save('word2index.npy', word2index)
#np.save('unigram_table.npy', unigram_table)
#np.save('vocab.npy', vocab)
##word2index = np.load(word2index_name).item()
#torch.save(train_data, 'train_data.pth')

EMBEDDING_SIZE = 300
BATCH_SIZE = 256
EPOCH = 100
NEG = 10 # Num of Negative Sampling
losses = []
model = SkipgramNegSampling(len(word2index), EMBEDDING_SIZE)
if USE_CUDA:
    model = model.cuda()

optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(EPOCH):
    for i,batch in enumerate(getBatch(BATCH_SIZE, train_data)):

        inputs, targets = zip(*batch)

        inputs = torch.cat(inputs) # B x 1
        targets = torch.cat(targets) # B x 1
        negs = negative_sampling(targets, unigram_table, NEG)
        model.zero_grad()

        loss = model(inputs, targets, negs)

        loss.backward()
        optimizer.step()

        losses.append(loss.data.tolist())

    if epoch % 5 == 0:
        print("Epoch : %d, mean_loss : %.02f" % (epoch, np.mean(losses)))
        losses = []
#        torch.save(model, 'skipgram.pt')
        torch.cuda.empty_cache()