# Navod na pouziti, Mgr. Hynek Mlčoušek, v Brne 2.5.2024 # Ulozte do lokalniho souboru u sebe na PC data tohoto tvaru vzdy ukoncene 0 ci 1 (jde o uceni s ucitelem: 1 = nemocny, 0 = prezil/zdravy, ve vystupu bude zelena znacit 0, cervena 1) a bez znaku #; pozor na "," # [ [23.657800719276743,18.859916797201468,0], # [22.573729142097473,17.96922325097786,0], # [32.55342396968757,29.463651408558803,0], # [6.718035041529263,25.704665468161718,1], # [14.401918566243225,16.770856492924658,0], # [17.457907312962234,21.76521470574044,0], # [20.02796946568093,73.45445954770891,1], # [30.295138369778076,62.901112886193246,1], # [15.128977804449633,32.40267702110393,0], # [30.179457395820013,58.982492125646104,1], # [28.01649701854089,63.92781357637711,1], # [16.791838457871147,42.33482314089884,0], # [10.583694293380976,19.61926728942497,0], # [26.634447074406467,91.96624817360987,1], # [26.217868623367643,36.400293587062976,0], # [17.689396788624936,60.79797114006423,1], # [33.17193822527976,66.75277364959176,1], # [23.793952755709153,22.57501437360518,0]] # kliknete na cerne tlacitko s trojuhelnickem vlevo nahore # pod kodem se objevi moznost spustit dialogove okenko, kliknete na nej # soubor, ktery mate z bodu vyse vyberte a nahrajte # Najdete v tomto kodu retezec: ###ZDE VLOZTE DATA OD NOVYCH PACIENTU # Vlozte do pole # new_persons_results = [] # data o nekolika malo novych pacientech bez ukoncovaci 0 a 1, ale se stejnym poctem sloupcu jako ma soubor z Vaseho lokalniho disku, vyse by tedy toto bylo rovno 2 # kod vyhodi hned po natrenovani, (jehoz prubeh muzete sledovat na modre progres bare) pro kazdy radek z new_persons_results bilo-sedo-cerne ctverecky vznikle z normalizace poskytnutych dat a ukoncovaci ctverecek cerveny pripadne zeleny # zaroven s tim se vypise realne cislo mezi 0 a 1 znacici jak moc je pacient zdravy (blizke 0) ci nemocny (blizke 1) # cisla uprostred pak indikuji zadany oranzovy semafor. # je na lekarich nastavit tresholdy (tedy pravdepodobnosti: cisla mezi 0 a 1) ktere pak daji zaver, zda je pacient cerveny, oranzovy ci zeleny # prosim o komnetare a vysledky na realnych datech, je zadouci aby radku v matici, tedy pacientu byly stovky a sloupcu desitky # Moznosti vyuziti: onkologicka diagnoza vs. zdrava kontorlni skupina, diabetes (pritomnost/nepritomnost), testovani noveho leku oproti placebu atd. # kod zaroven vyhodi confusion matici, tedy mozne True Negative a False Positive plus spravne zarazene hodnoty spolu s presnosti, F1 score recall atd. # poznamka ke kodu: jde o epxerimentalni verzi, ktera krome skutecne potrebneho kodu obsahuje ladici informace, ruzne duplicity, nadbytecne prikazy atd. # Na uvod behu programu se pro kontorlu vypise poskytnuta matice a jeji normalizovana verze, je treba sjet jezdcem napravo nize na obrazky a dalsi vystupy # Dekuji profesoru Petru Dostalovi za namet k teto praci a poskytnuta data, byt je potreba mit data realna import numpy as np import matplotlib.pyplot as plt import tensorflow as tf from tqdm import tqdm from IPython.display import display from IPython.display import Javascript display(Javascript('IPython.OutputArea.auto_scroll_threshold = 9999;')) label_colors = {0: [0, 128, 0], 1: [255, 0, 0]} label_colors_testing = {0: [0, 128, 0], 1: [255, 0, 0]} %matplotlib inline # Function to create images based on predictions def create_image(data, predictions): num_rows, num_columns = len(data), len(data[0]) image = np.zeros((num_rows, num_columns + 1, 3), dtype=np.uint8) for i in range(num_rows): for j in range(num_columns): pixel_value = int(np.interp(data[i][j], [np.min(data), np.max(data)], [0, 255])) image[i, j] = np.array([pixel_value] * 3) # Use the specified color for the last column based on the label image[i, -1] = label_colors[predictions[i]] return image def create_imageN(data, predictions, label_colors=None): num_rows, num_columns = len(data), len(data[0]) image = np.zeros((num_rows, num_columns + 1, 3), dtype=np.uint8) data_array = np.array(data) # Convert data to a NumPy array for i in range(num_rows): for j in range(num_columns - 1): # Exclude the last column for now # Map data values to the full range of 0 to 255 pixel_value = int(np.interp(data_array[i, j], [np.min(data_array[:, j]), np.max(data_array[:, j])], [0, 255])) image[i, j] = np.array([pixel_value] * 3) # Use the specified color for the last column based on the label if label_colors is not None: image[i, -1] = label_colors[predictions[i]] else: # If label_colors is not provided, set the last column to grayscale pixel_value = int(np.interp(predictions[i], [0, 1], [0, 255])) image[i, -1] = np.array([pixel_value] * 3) return image # Load data from a file from google.colab import files uploaded = files.upload() # Tento kód otevře dialogové okno pro výběr souboru z vašeho počítače. import io import pandas as pd # Předpokládáme, že jste nahráli CSV soubor for fn in uploaded.keys(): print('User uploaded file "{name}" with length {length} bytes'.format(name=fn, length=len(uploaded[fn]))) path = io.BytesIO(uploaded[fn]) # Pro soubory, které potřebují být čteny jako binární objekty df = pd.read_csv(path) print(df.head()) # Vypíše prvních pět řádků DataFrame all_results = [] import os import shutil import ast for filename in uploaded.keys(): original_path = f"/content/{filename}" destination_path = os.path.join("/content/", "/content/DATA2") shutil.move(original_path, destination_path) print(f"Soubor {filename} byl přesunut do {destination_path}") file_path = '/content/DATA2' # Cesta k souboru with open(file_path, 'r') as file: code = file.read() A_list = ast.literal_eval(code) # Převod na NumPy pole A = np.array(A_list) # Assign values to variables dynamically based on the rows of matrix A for i, row in enumerate(A, start=1): globals()[f"person{i}_results"] = list(row) # Print the assigned variables for i in range(1, len(A) + 1): all_results.append(f"person{i}_results") result_variables = [] # Loop through the variable names and get the corresponding variables using globals() for var_name in all_results: result_variables.append(globals()[var_name]) # Now, result_variables contains the variables with names specified in variable_names all_results = result_variables new_persons_results = result_variables # Extract the last column (0 nebo 1) as labels labels = [results[-1] for results in all_results] # Remove the last column from the dataset data = [results[:-1] for results in all_results] # Define the number of rows for training and testing num_training_rows = 100 num_testing_rows = 100 # Split the data into training and testing datasets X_train, X_test, y_train, y_test = data[:num_training_rows], data[:num_testing_rows], labels[:num_training_rows], labels[:num_testing_rows] # Normalize the training data min_values = np.min(X_train, axis=0) max_values = np.max(X_train, axis=0) X_train_normalized = (X_train - min_values) / (max_values - min_values) # Normalize the testing data using the min and max values of the training data X_test_normalized = (X_test - min_values) / (max_values - min_values) # Print normalized training data print("Normalized Training Data:") print(X_train_normalized) print("Adenormalized", X_train_normalized * (max_values - min_values) + min_values, "Bdenormalized") # Define a simple neural network model model = tf.keras.Sequential([ tf.keras.layers.Dense(128, activation='relu', input_shape=(len(X_train[0]),)), tf.keras.layers.Dense(64, activation='relu'), tf.keras.layers.Dense(1, activation='sigmoid') ]) # Compile the model model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) # Lists to store accuracy values accuracy_history = [] # Create images for the training data image_training = np.zeros((num_training_rows, len(X_train[0]) + 1, 3), dtype=np.uint8) min_pixel_value = np.min(X_train_normalized) max_pixel_value = np.max(X_train_normalized) # Populate image_training with consistent gray and red/green colors based on the labels in the last column for i, label in enumerate(y_train): for j in range(len(X_train[0])): pixel_value = int(np.interp(X_train_normalized[i][j], [min_pixel_value, max_pixel_value], [0, 255])) image_training[i, j] = np.array([pixel_value] * 3) image_training[i, -1] = np.array([128, 128, 128]) if label == 0: image_training[i, -1] = np.array([0, 128, 0]) elif label == 1: image_training[i, -1] = np.array([255, 0, 0]) ### ZDE VLOZTE DATA OD NOVYCH PACIENTU # Train the model for 400 epochs epochs = 138 import sys # Function to find the best pair of values def find_best_pair(min_val, max_val, num_features, model, min_values, max_values): best_pair = None best_prediction = 0 for _ in range(1000): # Number of iterations to find the best pair new_data = np.random.uniform(min_val, max_val, num_features) new_data_normalized = (new_data - min_values) / (max_values - min_values) # Potlačení výstupů modelu tf.get_logger().setLevel('ERROR') with tf.device('/CPU:0'): # Ensure to run on CPU to minimize unwanted logs prediction = model.predict(np.array([new_data_normalized]), verbose=0)[0][0] tf.get_logger().setLevel('INFO') if prediction > best_prediction: best_prediction = prediction best_pair = new_data return best_pair, best_prediction def find_worst_pair(min_val, max_val, num_features, model, min_values, max_values): worst_pair = None worst_prediction = 1 for _ in range(1000): # Number of iterations to find the best pair new_data = np.random.uniform(min_val, max_val, num_features) new_data_normalized = (new_data - min_values) / (max_values - min_values) # Potlačení výstupů modelu tf.get_logger().setLevel('ERROR') with tf.device('/CPU:0'): # Ensure to run on CPU to minimize unwanted logs prediction = model.predict(np.array([new_data_normalized]), verbose=0)[0][0] tf.get_logger().setLevel('INFO') if prediction < worst_prediction: worst_prediction = prediction worst_pair = new_data return worst_pair, worst_prediction # Potlačení nadbytečných výstupů modelu tf.get_logger().setLevel('ERROR') tf.autograph.set_verbosity(0) for epoch in tqdm(range(epochs)): history = model.fit(X_train_normalized, np.array(y_train), epochs=1, verbose=0, shuffle=True) accuracy_history.append(history.history['accuracy'][0]) if epoch == 1: # Normalize the testing data X_test_normalized = (X_test - min_values) / (max_values - min_values) y_pred_after_2nd_epoch = model.predict(X_test_normalized, verbose=0) y_pred_binary_after_2nd_epoch = [1 if pred >= 0.5 else 0 for pred in y_pred_after_2nd_epoch] image_testing_before_2nd_epoch = create_image(X_test_normalized, y_pred_binary_after_2nd_epoch) if epoch >= epochs-1: print(f"HERE HERE Epoch: {epoch}, Epochs: {epochs}\n") sys.stdout.flush() # Iterate through new persons for idx, personNEW_results in enumerate(new_persons_results, start=1): # Ensure that personNEW_results has the same number of features as the model expects if len(personNEW_results) != len(X_train[0]): # Potlač chybové zprávy o nevyhovujícím počtu funkcí continue personNEW_results_normalized = (np.array(personNEW_results) - min_values) / (max_values - min_values) personNEW_prediction = model.predict(np.array([personNEW_results_normalized]), verbose=0)[0][0] personNEW_label = 1 if personNEW_prediction >= 0.5 else 0 y_pred_after_50_epochs = model.predict(X_test_normalized, verbose=0) y_pred_binary_after_50_epochs = [1 if pred >= 0.5 else 0 for pred in y_pred_after_50_epochs] image_testing_after_50_epochs = create_image(X_test_normalized, y_pred_binary_after_50_epochs) # Create an image for the new person image_personNEW = create_imageN([personNEW_results_normalized], [personNEW_label], label_colors) # Display the images plt.figure(figsize=(5, 5)) plt.imshow(image_personNEW) plt.title(f"New Person {idx}\nLabel: {personNEW_label}, Prediction: {personNEW_prediction}") plt.axis("off") plt.show() # Find the best pair of values to add to new_persons_results best_pair, best_prediction = find_best_pair(min_values, max_values, len(X_train[0]), model, min_values, max_values) new_persons_results.append(best_pair) worst_pair, worst_prediction = find_worst_pair(min_values, max_values, len(X_train[0]), model, min_values, max_values) new_persons_results.append(worst_pair) print(f"Best Pair: {best_pair}, Best Prediction: {best_prediction}") print(f"Worst Pair: {worst_pair}, Worst Prediction: {worst_prediction}") # Display the images plt.figure(figsize=(25, 15)) plt.subplot(2, 2, 1) plt.imshow(image_training) plt.title("Training Data") plt.axis("off") plt.subplot(2, 2, 2) plt.imshow(image_testing_before_2nd_epoch) plt.title("Testing Data (2nd Epoch)") plt.axis("off") plt.subplot(2, 2, 3) plt.imshow(image_testing_after_50_epochs) plt.title(f"Testing Data ({epochs} Epochs)") plt.axis("off") plt.subplot(2, 2, 4) plt.imshow(image_personNEW) plt.title(f"New Person\nLabel: {personNEW_label},[{personNEW_prediction}]") plt.axis("off") # Plot accuracy history plt.figure(figsize=(12, 5)) plt.plot(range(1, epochs + 1), accuracy_history, marker='o') plt.title('Accuracy Over Epochs') plt.xlabel('Epochs') plt.ylabel('Accuracy') plt.grid() # Print normalized data print("Normalized PersonNEW Data:") print(personNEW_results_normalized) plt.show() print("X_train before normalization:") print(X_train) print("X_test before normalization:") print(X_test) import seaborn as sns print("KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK") print(X_test) print("HHHHHHHHHHHHHHHHHHHHHHHHHHHHHH") print(X_train) print("LLLLLLLLLLLLLLLLLLLLLLLLLLLLL") from sklearn.metrics import confusion_matrix from tensorflow.keras.utils import to_categorical np.set_printoptions(threshold=np.inf, precision=4, suppress=True) # Generate predictions from the model predictions = (model.predict(X_test_normalized, verbose=0) > 0.5).astype(int) # Convert y_test to a numpy array and then to binary labels y_test_array = np.array(y_test) # Convert y_test to a numpy array y_test_binary = (y_test_array > 0.5).astype(int) # Convert to binary # Compute the confusion matrix conf_matrix = confusion_matrix(y_test_binary, predictions) # Evaluate the model's performance accuracy = accuracy_score(y_test_binary, predictions) precision = precision_score(y_test_binary, predictions) recall = recall_score(y_test_binary, predictions) f1 = f1_score(y_test_binary, predictions) # Display the confusion matrix sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues') plt.xlabel('Predicted') plt.ylabel('Actual') plt.title('Confusion Matrix') plt.show() print(f"Accuracy: {accuracy:.4f}") print(f"Precision: {precision:.4f}") print(f"Recall: {recall:.4f}") print(f"F1 Score: {f1:.4f}") print(f"Confusion Matrix:\n{conf_matrix}")