Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import pandas as pd
- from PIL import Image
- import time
- import sys
- def count_and_sort (df, columns):
- g = df.groupby (columns)
- return pd.DataFrame ({"count" : g.size ()}).reset_index ().sort_values ("count", ascending=False).reset_index ()
- path = sys.argv[1]
- rows = []
- sizes = [(400, 400)]
- t = time.time ()
- columns = ["file", "score", "predicted_category"]
- # test for discolorations in smooth areas/background
- def diff_score (values, limit):
- total = 0
- count = len (values) - 1
- for i in range (count):
- d = abs (values[i] - values[i + 1])
- if d < limit:
- total = total + d
- return total / count
- pos = 0
- files = list (filter (lambda f: not f.startswith ("."), os.listdir (path)))
- for f in files:
- file = path + "/" + f
- score = 0
- for size in sizes:
- image = Image.open (file).resize (size)
- buf = [image.getdata (), image.rotate (90).getdata ()]
- image.close ()
- pixels = len (buf[0])
- for data in buf:
- r = [pixel[0] for pixel in data]
- g = [pixel[1] for pixel in data]
- b = [pixel[2] for pixel in data]
- r0 = diff_score (r, 2)
- g0 = diff_score (g, 2)
- b0 = diff_score (b, 2)
- score = score + r0 + g0 + b0
- rows.append ({"file" : f,
- "score" : score})
- pos = pos + 1
- if pos % 100 == 0:
- print (str (pos) + " images processed")
- print (str (time.time () - t) + " seconds elapsed")
- df = pd.DataFrame (rows)
- print (str (len (df.index)) + " images processed total")
- cutoff = 1.1266882918018237
- df["predicted_category"] = (df["score"] - cutoff).apply (lambda x: "GAN generated" if x > 0 else "real anime pic")
- df[columns].to_csv (sys.argv[2], index=False)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement