Advertisement
Guest User

Untitled

a guest
Aug 20th, 2019
99
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.29 KB | None | 0 0
  1. import numpy as np
  2. import tqdm
  3. import copy
  4.  
  5. file_ = open("./d_pet_pictures.txt")
  6. list_string = file_.read().split("\n")
  7. count_photo = int(list_string[0])
  8. list_photo = list_string[1:]
  9. dict_vert_copy = {}
  10. batch_size = 9000
  11. list_batch_data = []#[{} for i in range(int(len(list_photo) / batch_size))]
  12.  
  13. #parsing
  14. ix_global = 0
  15. for ix, photo in enumerate(list_photo):
  16. elem_photo = photo.split()
  17. type_ = elem_photo[0]
  18. list_tag = elem_photo[2:]
  19. dict_vert_copy[ix] = list_tag
  20. if type_ == "H":
  21. continue
  22.  
  23. if ix_global % batch_size == 0:
  24. list_batch_data.append({})
  25. list_batch_data[ix_global // batch_size][ix] = list_tag
  26. ix_global += 1
  27.  
  28. print(f"count = {ix_global}")
  29.  
  30. def find_neigh_photo(ix_for_search):
  31.  
  32. tag_set_for_search = set(dict_vert[ix_for_search])
  33. score_list = []
  34. ix_list = []
  35.  
  36. for ix in list(dict_vert.keys()):
  37. if ix == ix_for_search:
  38. continue
  39.  
  40. current_tags = set(dict_vert[ix])
  41. intersec_set = tag_set_for_search.intersection(current_tags)
  42. score = min([len(tag_set_for_search - intersec_set), len(intersec_set), len(current_tags - intersec_set)])
  43.  
  44. score_list.append(score)
  45. ix_list.append(ix)
  46.  
  47. ix_max_inter = ix_list[np.argmax(score_list)]
  48.  
  49. return ix_max_inter
  50.  
  51. def calc_sum_tag(ix_global):
  52. if (len(ix_global) != 2):
  53. ix_first = ix_global[0]
  54. return list(set(dict_vert_copy[ix_first]))
  55. #print(ix_global)
  56. ix_first, ix_last = ix_global
  57.  
  58. return list(set(dict_vert_copy[ix_first]).union(set(dict_vert_copy[ix_last])))
  59.  
  60. def find_neigh_photo_last_frd(ix_first, ix_center, ix_last):
  61. ##ix_first левые два элементра
  62. ##ix_last правые два эл-та
  63. score_list = []
  64. ix_list = []
  65. if ix_first == -1:
  66. tag_first = set([])
  67. else:
  68. tag_first = set(calc_sum_tag(ix_first))
  69.  
  70. tag_last = set(calc_sum_tag(ix_last))
  71. for ix in list(dict_vert.keys()):
  72.  
  73. tag_center = set(calc_sum_tag([ix_center[0], ix]))
  74. score = 0
  75.  
  76. intersec_set = tag_first.intersection(tag_center)
  77. score += min([len(tag_first - intersec_set), len(intersec_set), len(tag_center - intersec_set)])
  78.  
  79. intersec_set = tag_last.intersection(tag_center)
  80. score += min([len(tag_last - intersec_set), len(intersec_set), len(tag_last - intersec_set)])
  81.  
  82. score_list.append(score)
  83. ix_list.append(ix)
  84.  
  85. ix_max_inter = ix_list[np.argmax(score_list)]
  86.  
  87. return ix_max_inter
  88.  
  89. def calc_score(result_ix_list):
  90. score = 0
  91. for ix in range(len(result_ix_list) - 1):
  92. tags_curr = set(calc_sum_tag(result_ix_list[ix]))
  93. tags_next = set(calc_sum_tag(result_ix_list[ix+1]))
  94.  
  95. intersec_set = set(tags_curr).intersection(tags_next)
  96. score += min([len(tags_curr - intersec_set), len(intersec_set), len(tags_next - intersec_set)])
  97. return score
  98.  
  99.  
  100. def find_min_tags_el(dict_vert):
  101. min_el = -1; ix_min_el = 0;
  102. for ix in list(dict_vert.keys()):
  103. if min_el < len(dict_vert[ix]):
  104. min_el = len(dict_vert[ix])
  105. ix_min_el = ix
  106. return ix_min_el
  107.  
  108. #processing
  109. def calc_showcast_batch(dict_vert):
  110. batch_size = len(list(dict_vert.keys()))
  111. total_score = 0
  112. prev_photo_ix = np.random.choice(list(dict_vert.keys()))
  113. tag_prev_photo = dict_vert[prev_photo_ix]
  114. print(f"start ix ph: {prev_photo_ix}, tags: {tag_prev_photo}")
  115. result_ix_list = [[prev_photo_ix]]
  116. #first forward
  117. ix = 0;
  118. for _ in tqdm.tqdm(range(batch_size // 2)):
  119.  
  120. ix_max_inter = find_neigh_photo(prev_photo_ix)
  121. dict_vert.pop(prev_photo_ix, None)
  122. result_ix_list.append([ix_max_inter])
  123. prev_photo_ix = ix_max_inter
  124.  
  125. #print(result_ix_list)
  126.  
  127. #last forward
  128. for ix, curr_element in tqdm.tqdm(enumerate(result_ix_list[:-1])):
  129. if (len(curr_element) != 2):
  130. if ix == 0:
  131. ix_max_inter = find_neigh_photo_last_frd(-1, result_ix_list[ix], result_ix_list[ix+1])
  132. result_ix_list[ix].append(ix_max_inter)
  133. dict_vert.pop(ix_max_inter, None)
  134. else:
  135. ix_max_inter = find_neigh_photo_last_frd(result_ix_list[ix-1], result_ix_list[ix], result_ix_list[ix+1])
  136. result_ix_list[ix].append(ix_max_inter)
  137. dict_vert.pop(ix_max_inter, None)
  138. pass
  139. return result_ix_list[:-1]
  140.  
  141. #print(list(dict_vert.keys()))
  142.  
  143. #result_ix_list[-1].append(dict_vert[list(dict_vert.keys())[0]])
  144.  
  145. result_ix_list = []
  146.  
  147. print(len(result_ix_list))
  148.  
  149. for dict_vert in list_batch_data:
  150. #print(dict_vert)
  151. result_ix_list += calc_showcast_batch(dict_vert)
  152. print(f"score = {calc_score(result_ix_list)}")
  153.  
  154. with open("out_sub_v.txt", "w") as fd:
  155. fd.write(str(len(result_ix_list))+"\n")
  156. for el in result_ix_list:
  157. fd.write(f"{el[0]} {el[1]}\n")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement