Advertisement
999ms

Untitled

May 21st, 2020
1,811
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 9.58 KB | None | 0 0
  1. from PIL import Image
  2. from collections import deque
  3. from random import randrange as rnd
  4.  
  5. white = (255, 255, 255)
  6. black = (0, 0, 0)
  7.  
  8.  
  9. def get_path_to_save():
  10.     return 'C:\\Users\\User\\Desktop\\result\\'
  11.  
  12.  
  13. def get_path_to_image(index, s):
  14.     return 'C:\\Users\\User\\Desktop\\no_names\\  (%d).' % index + s
  15.  
  16.  
  17. def generate_steps(dlt, flag):
  18.     if flag:
  19.         return [(i, j) for i in range(-dlt, dlt + 1) for j in range(-dlt, dlt + 1)]
  20.     return [(i, j) for i in range(-dlt, dlt + 1) for j in range(-dlt, dlt + 1) if not (i == 0 and j == 0)]
  21.  
  22.  
  23. def connect(mp, steps):
  24.     w = len(mp)
  25.     h = len(mp[0])
  26.     colors = [[-1 for _ in range(h)] for _ in range(w)]
  27.  
  28.     def connect_bfs(start_x, start_y, color):
  29.         q = deque()
  30.         q.append((start_x, start_y))
  31.         colors[start_x][start_y] = color
  32.         while len(q):
  33.             cx, cy = q.popleft()
  34.             for dx, dy in steps:
  35.                 nx = cx + dx
  36.                 ny = cy + dy
  37.                 if min(nx, ny) < 0 or nx == w or ny == h:
  38.                     continue
  39.                 if colors[nx][ny] != -1 or mp[nx][ny] != mp[cx][cy]:
  40.                     continue
  41.                 colors[nx][ny] = color
  42.                 q.append((nx, ny))
  43.  
  44.     answer_size = 0
  45.     for x in range(w):
  46.         for y in range(h):
  47.             if colors[x][y] == -1:
  48.                 connect_bfs(x, y, answer_size)
  49.                 answer_size += 1
  50.  
  51.     ans = [[] for _ in range(answer_size)]
  52.  
  53.     for x in range(w):
  54.         for y in range(h):
  55.             ans[colors[x][y]].append((x, y))
  56.  
  57.     return ans
  58.  
  59.  
  60. def clear_corners(mp):
  61.     w = len(mp)
  62.     h = len(mp[0])
  63.     colors = [[-1 for _ in range(h)] for _ in range(w)]
  64.     steps = generate_steps(1, False)
  65.     delta = 3
  66.  
  67.     def clear_bfs(start_x, start_y, color):
  68.         q = deque()
  69.         q.append((start_x, start_y))
  70.         colors[start_x][start_y] = color
  71.         while len(q):
  72.             cx, cy = q.popleft()
  73.             for dx, dy in steps:
  74.                 nx = cx + dx
  75.                 ny = cy + dy
  76.                 if min(nx, ny) < 0 or nx == w or ny == h:
  77.                     continue
  78.                 if colors[nx][ny] != -1 or mp[nx][ny] != mp[cx][cy]:
  79.                     continue
  80.                 colors[nx][ny] = color
  81.                 q.append((nx, ny))
  82.  
  83.     for x in range(w):
  84.         for y in range(h):
  85.             if min(x, w - 1 - x, y, h - 1 - y) >= delta:
  86.                 continue
  87.             if mp[x][y] != white:
  88.                 clear_bfs(x, y, 0)
  89.  
  90.     for x in range(w):
  91.         for y in range(h):
  92.             if colors[x][y] != -1:
  93.                 mp[x][y] = white
  94.  
  95.     return mp
  96.  
  97.  
  98. def k_means(list_of_pixels, k):
  99.     arr = [[rnd(256), rnd(256), rnd(256)] for _ in range(k)]
  100.     n = len(list_of_pixels)
  101.  
  102.     def get_class(color):
  103.         dist = 10 ** 10
  104.         current_class = -1
  105.         for current_index in range(k):
  106.             cur_dist = (color[0] - arr[current_index][0]) ** 2 \
  107.                        + (color[1] - arr[current_index][1]) ** 2 \
  108.                        + (color[2] - arr[current_index][2]) ** 2
  109.             if cur_dist < dist:
  110.                 dist = cur_dist
  111.                 current_class = current_index
  112.         return current_class
  113.  
  114.     classes = [[] for _ in range(k)]
  115.  
  116.     for i in range(n):
  117.         classes[get_class(list_of_pixels[i][0])].append(i)
  118.  
  119.     error = 1
  120.     while error:
  121.         error = 0
  122.         dr = [0 for _ in range(k)]
  123.         dg = [0 for _ in range(k)]
  124.         db = [0 for _ in range(k)]
  125.         count = [1 for _ in range(k)]
  126.         for i in range(k):
  127.             class_r, class_g, class_b = arr[i]
  128.             for val in classes[i]:
  129.                 count[i] += list_of_pixels[val][1]
  130.                 r, g, b = list_of_pixels[val][0]
  131.                 dr[i] += (r - class_r) * list_of_pixels[val][1]
  132.                 dg[i] += (g - class_g) * list_of_pixels[val][1]
  133.                 db[i] += (b - class_b) * list_of_pixels[val][1]
  134.  
  135.             classes[i].clear()
  136.         for i in range(k):
  137.             error += abs(dr[i]) // count[i] + abs(dg[i]) // count[i] + abs(db[i]) // count[i]
  138.             arr[i][0] += dr[i] // count[i]
  139.             arr[i][1] += dg[i] // count[i]
  140.             arr[i][2] += db[i] // count[i]
  141.  
  142.         for i in range(n):
  143.             classes[get_class(list_of_pixels[i][0])].append(i)
  144.  
  145.     answer = dict()
  146.     for j in range(k):
  147.         for index in classes[j]:
  148.             answer[list_of_pixels[index][0]] = (arr[j][0], arr[j][1], arr[j][2])
  149.     return answer
  150.  
  151.  
  152. def filter_image(number_of_image):
  153.     name = get_path_to_image(number_of_image, "png")
  154.     try:
  155.         image = Image.open(name).convert('RGB')
  156.     except FileNotFoundError:
  157.         assert False
  158.  
  159.     w, h = image.size
  160.     image = image.load()
  161.  
  162.     pixels = [[image[x, y] for y in range(h)] for x in range(w)]
  163.  
  164.     mp = {}
  165.     for i in range(w):
  166.         for j in range(h):
  167.             cur = pixels[i][j]
  168.             if cur not in mp:
  169.                 mp[cur] = 0
  170.             mp[cur] += 1
  171.  
  172.     k = 10  # 5
  173.     color_mp = k_means(list(mp.items()), k)
  174.     for i in range(w):
  175.         for j in range(h):
  176.             pixels[i][j] = color_mp[pixels[i][j]]
  177.  
  178.     mp = {}
  179.     for i in range(w):
  180.         for j in range(h):
  181.             cur = pixels[i][j]
  182.             if cur not in mp:
  183.                 mp[cur] = 0
  184.             mp[cur] += 1
  185.  
  186.     presents = 15  # 10 # если больше presents процентов на картинке данного цвета, то будем считать, что это фон
  187.     total_size = w * h
  188.     background_colors = set()
  189.     background_colors.add(white)  # чтобы сразу убрать белые точки
  190.  
  191.     for color in mp:
  192.         if mp[color] / total_size * 100 >= presents:
  193.             background_colors.add(color)
  194.  
  195.     for x in range(w):
  196.         for y in range(h):
  197.             if pixels[x][y] in background_colors:
  198.                 pixels[x][y] = white
  199.  
  200.     return pixels
  201.  
  202.  
  203. def main_filter_captcha_2(number_of_image):
  204.     while True:
  205.         pixels = clear_corners(filter_image(number_of_image))
  206.         w = len(pixels)
  207.         h = len(pixels[0])
  208.         black_count = 0
  209.         for x in range(w):
  210.             for y in range(h):
  211.                 if pixels[x][y] != white:
  212.                     pixels[x][y] = black
  213.                     black_count += 1
  214.  
  215.         if 0.05 * w * h < black_count < 0.95 * w * h:
  216.             break
  217.  
  218.     steps = generate_steps(1, True)
  219.     classes = connect(pixels, steps)
  220.     low_border = 10  # размер связкой области
  221.  
  222.     for current_class in classes:
  223.         if len(current_class) <= low_border:
  224.             for point in current_class:
  225.                 cx, cy = point
  226.                 pixels[cx][cy] = white
  227.  
  228.     # cuts
  229.     oy = []
  230.     for x in range(w):
  231.         column = 0
  232.         for y in range(h):
  233.             if pixels[x][y] == black:
  234.                 column += 1
  235.         oy.append(column)
  236.  
  237.     cuts = []
  238.     low_border = 1
  239.     i = 0
  240.     start = 0
  241.     while i < w:
  242.         check = False
  243.         while i < w and oy[i] < low_border:
  244.             i += 1
  245.         if i < w:
  246.             check = True
  247.             start = i
  248.         while i < w and oy[i] >= low_border:
  249.             i += 1
  250.         if check:
  251.             cuts.append((start, i))
  252.         start = i
  253.  
  254.     if len(cuts) < 5:
  255.         cuts1 = []
  256.         difference = 5 - len(cuts)
  257.         if difference == 5:
  258.             i = 0
  259.             j = w - 1
  260.             for k in range(6):
  261.                 cuts1.append((i + (j - i) * k // 6, i + (j - i) * (k + 1) // 6))
  262.  
  263.         if difference == 4:
  264.             i = 0
  265.             j = w - 1
  266.             for k in range(5):
  267.                 cuts1.append((i + (j - i) * k // 5, i + (j - i) * (k + 1) // 5))
  268.         if difference < 4:
  269.             c = []
  270.             for i, j in cuts:
  271.                 if j - i <= 19:
  272.                     cuts1.append((i, j))
  273.                     continue
  274.                 else:
  275.                     c.append((i, j))
  276.             for i, j in c:
  277.                 if difference == 0:
  278.                     cuts1.append((i, j))
  279.                     continue
  280.                 if j - i > 60:
  281.                     coefficient = min(difference + 1, 4)
  282.                 elif j - i > 40:
  283.                     coefficient = min(difference + 1, 3)
  284.                 else:
  285.                     coefficient = min(difference + 1, 2)
  286.                 for k in range(coefficient):
  287.                     cuts1.append((int(i + (j - i) * k / coefficient), int(i + (j - i) * (k + 1) / coefficient)))
  288.                 difference = max(0, difference - coefficient)
  289.         cuts = [cuts1[i] for i in range(len(cuts1))]
  290.  
  291.     cuts2 = []
  292.     for (i, j) in cuts:
  293.         mn = h
  294.         mx = 0
  295.         for u in range(j - i):
  296.             for k in range(h):
  297.                 if pixels[u + i][k] == white:
  298.                     continue
  299.                 else:
  300.                     mn = min(mn, k)
  301.                     mx = max(mx, k)
  302.         cuts2.append((mn, mx))
  303.  
  304.     print(cuts2)
  305.  
  306.     img2 = Image.new('RGB', (w, h))
  307.     mp = img2.load()
  308.     for x in range(w):
  309.         for y in range(h):
  310.             mp[x, y] = pixels[x][y]
  311.     # img2.show()
  312.     itt = 0
  313.     for (mnx, mxx) in cuts:
  314.         (mnh, mxh) = cuts2[itt]
  315.         img3 = img2.crop((mnx, mnh, mxx, mxh))
  316.         size = (28, 28)
  317.         img3 = img3.resize(size)
  318.         # img3.show()
  319.         itt += 1
  320.         img3.save(get_path_to_save() + str(number_of_image) + "_" + str(itt) + ".jpeg", "JPEG")
  321.  
  322.     return pixels
  323.  
  324.  
  325. for number in range(1, 40):
  326.     print(number, end=': ')
  327.     pixel_map = main_filter_captcha_2(number)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement