Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import pandas as pd
- import os
- import sys
- import random
- from PIL import Image
- from sklearn.neural_network import MLPClassifier
- import pickle
- SIZE = 512
- SIZE_TEXT = str (SIZE) + "x" + str (SIZE)
- PIXELS = SIZE * SIZE
- BORDER = 24
- def count_and_sort (df, columns):
- g = df.groupby (columns)
- df = pd.DataFrame ({"count" : g.size ()}).reset_index ()
- return df.sort_values ("count", ascending=False).reset_index ()
- def image_mask (pixels):
- shape = pixels.shape
- if shape[0] != SIZE or shape[1] != SIZE or shape[2] !=3:
- return None
- rgb = np.sum (pixels, axis=(0, 1))
- rgb = (rgb[0] / PIXELS, rgb[1] / PIXELS, rgb[2] / PIXELS)
- data = np.abs (pixels - rgb)
- return data * (1 / np.max (data))
- def calculate_peaks (data):
- data = data * (2, 5, 3)
- data = np.sum (data, axis=2)
- peaks = np.sum (data, axis=0) + np.sum (data, axis=1)
- peaks = np.diff (peaks)
- peaks = peaks[BORDER:SIZE - BORDER]
- return peaks / np.max (np.abs (peaks))
- def create_dataset (path):
- print ("loading images from " + path)
- rows = []
- if not path.endswith ("/"):
- path = path + "/"
- for f in os.listdir (path):
- try:
- if f[f.find ("."):].lower () in (".jpeg", ".jpg", ".png"):
- image = Image.open (path + f)
- data = np.array (image)
- image.close ()
- mask = image_mask (data)
- if mask is None:
- print ("skipped " + f + ", image not " + SIZE_TEXT)
- else:
- peaks = calculate_peaks (mask)
- rows.append ((peaks, path + f))
- if len (rows) % 100 == 0:
- print (str (len (rows)) + " images processed")
- except:
- print ("skipped: " + f)
- if len (rows) % 100 != 0:
- print (str (len (rows)) + " images processed")
- return rows
- def train_model (realcats, gancats, training=0.5):
- data = [(x[0], 0, x[1]) for x in realcats]
- data.extend ([(x[0], 1, x[1]) for x in gancats])
- random.shuffle (data)
- cutoff = int (training * len (data))
- train = data[:cutoff]
- test = data[cutoff:]
- model = MLPClassifier (random_state=random.randint (0, 2147483647),
- max_iter=1000, hidden_layer_sizes=[48, 12, 3])
- data = np.stack ([t[0] for t in train], axis=0)
- labels = [t[1] for t in train]
- model = model.fit (data, labels)
- data = np.stack ([t[0] for t in test], axis=0)
- labels = [t[1] for t in test]
- images = [t[2] for t in test]
- predictions = model.predict (data)
- df = pd.DataFrame ({"actual" : labels,
- "predicted" : predictions,
- "image" : images})
- return (model, df)
- def train_loop (realcats, gancats, iterations=100, training=0.7):
- accuracy = 0
- for i in range (iterations):
- result = train_model (realcats, gancats, training=training)
- df = result[1]
- score = len (df[df["actual"] == df["predicted"]]) / len (df.index)
- if score > accuracy:
- accuracy = score
- best = result
- if (i + 1) % 20 == 0:
- print (str (i + 1) + "/" + str (iterations ) + ", best accuracy " + str (accuracy))
- print ("accuracy: " + str (accuracy))
- return best
- def classify_images (model, path):
- test = create_dataset (path)
- data = np.stack ([t[0] for t in test], axis=0)
- images = [t[1] for t in test]
- predictions = model.predict (data)
- df = pd.DataFrame ({"image" : images,
- "predicted" : predictions})
- df["predictedCategory"] = df["predicted"].apply (
- lambda x: "GAN" if x == 1 else "real")
- return df[["image", "predicted", "predictedCategory"]]
- argv = sys.argv
- mode = argv[1].lower ().strip ()
- if mode == "train":
- realcats = create_dataset (argv[2])
- gancats = create_dataset (argv[3])
- result = train_loop (realcats, gancats)
- with open (argv[4], "wb") as fd:
- pickle.dump (result[0], fd)
- elif mode == "test":
- with (open (argv[3], "rb")) as fd:
- model = pickle.load (fd)
- df = classify_images (model, argv[2])
- df.to_csv (argv[4], index=False)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement