Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class MyDataset(Dataset):
- """
- dataset spocte vsechna mozna data a pro kazdou tridu si udrzuje vlastni listy a indexy,
- pri kazdem __getitem__ bere s pravdepodobnosti 0.5 z kazdeho seznamu -> ignoruje argument idx
- """
- def __init__(self, pickle_path, train=True):
- # load the pickle
- with open(pickle_path, 'rb') as f:
- self.pickle_dict = pickle.load(f)
- print('keys ', self.pickle_dict.keys())
- self.descriptors = np.array([self.pickle_dict[key]['trn_descriptors'] for key in self.pickle_dict['keys']]).squeeze(-1)
- # labels
- self.train = train
- labels_name = 'trn_labels' if self.train else 'tst_labels'
- self.labels = self.pickle_dict[256][labels_name]
- self.size = len(self.labels)
- y = self.labels[:, None]
- z = np.repeat(y, len(self.labels), -1)
- self.indexes = (y.T == z).flatten()
- # same and different idxs
- self.same_i, self.different_i, self.same_turn = 0, 0, True
- mask = [x == True and self.labels[i // self.size] != -1 for i, x in enumerate(self.indexes)]
- self.different = np.where(np.logical_not(mask))
- self.same = np.where(mask)
- print('same length ', len(self.same))
- print('different length ', len(self.different))
- def __len__(self):
- return self.size**2
- def __getitem__(self, idx):
- if torch.round(torch.rand(1)).item():
- i = self.same_i
- self.same_i = (self.same_i +1) % len(self.same)
- true_x = self.same[i] // self.size
- true_y = self.same[i] % self.size
- else:
- i = self.different_i
- true_x = self.different[i] // self.size
- true_y = self.different[i] % self.size
- self.different_i = (self.different_i + 1) % len(self.different)
- data = self.descriptors[:,true_x].dot(self.descriptors[:, true_y].T).flatten()
- label = int(self.labels[true_x] == self.labels[true_y])
- self.same_turn = not self.same_turn
- return data, label
- ## main
- def moving_average(a, n=3) :
- ret = np.cumsum(a, dtype=float)
- ret[n:] = ret[n:] - ret[:-n]
- return ret[n - 1:] / n
- def plot_history(losses, accuracy, epoch, window):
- plt_loss = moving_average(np.array(losses), window)
- plt_accuracy = moving_average(np.array(accuracy), window)
- plt.figure()
- plt.subplot(121)
- plt.plot(np.linspace(0,epoch, len(plt_loss)), plt_loss)
- plt.title('loss')
- plt.subplot(122)
- plt.plot(np.linspace(0,epoch, len(plt_accuracy)), plt_accuracy)
- plt.title('accuracy')
- plt.savefig('models/learning_state.png')
- def main():
- epochs = 30
- bs = 20
- dev = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
- # Prepare model
- model = nn.Sequential(nn.Linear(25, 100), nn.Linear(100,1))
- model.to(dev)
- optimizer = torch.optim.SGD(model.parameters(), lr=0.00005, momentum=0.9)
- model.train()
- loss = nn.BCEWithLogitsLoss()
- # Prepare Dataset
- dataset = MyDataset('output/gl18-tl-resnet101-gem-w')
- loader = torch.utils.data.DataLoader(dataset, bs, shuffle=True)
- loader_len = len(loader)
- losses = []
- accuracy = []
- ewa_a = .5
- q = 0.1
- ewa_l = .5
- last_p = 0 # lepsi nazev by byl best_p
- print('starting ')
- # trenovani site
- for epoch in range(1, epochs + 1):
- for i, (x,y) in enumerate(loader):
- # data preparation
- x = x.to(dev)
- y = y.to(dev)
- y = y.float()
- # step
- optimizer.zero_grad()
- out = model.forward(x)
- l = loss(out.flatten().float(), y.flatten().float())
- l.backward()
- optimizer.step()
- # loss
- ewa_l = (1-q) * ewa_l + q*l # Tohle konvertovat na float!!
- losses.append(ewa_l)
- # accuracy
- classify = torch.round(torch.sigmoid(out)).float().flatten()
- acc = (classify == y).float().mean()
- ewa_a = (1-q) * ewa_a + q*acc # Tohle konvertovat na float!!
- accuracy.append(ewa_a)
- if i%100 == 0:
- print(f'Epoch: {epoch}/{epochs}, batch {i}/{loader_len} EWA loss {ewa_l}, EWA accuracy {ewa_a}')
- if ewa_a > last_p:
- last_p = ewa_a
- torch.save(model.state_dict(), 'models/best')
- plot_history(losses, accuracy, epoch, len(loader)//10)
- if __name__ == '__main__':
- main()
Add Comment
Please, Sign In to add comment