Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import tqdm
- import copy
- file_ = open("./d_pet_pictures.txt")
- list_string = file_.read().split("\n")
- count_photo = int(list_string[0])
- list_photo = list_string[1:]
- dict_vert_copy = {}
- batch_size = 9000
- list_batch_data = []#[{} for i in range(int(len(list_photo) / batch_size))]
- #parsing
- ix_global = 0
- for ix, photo in enumerate(list_photo):
- elem_photo = photo.split()
- type_ = elem_photo[0]
- list_tag = elem_photo[2:]
- dict_vert_copy[ix] = list_tag
- if type_ == "H":
- continue
- if ix_global % batch_size == 0:
- list_batch_data.append({})
- list_batch_data[ix_global // batch_size][ix] = list_tag
- ix_global += 1
- print(f"count = {ix_global}")
- def find_neigh_photo(ix_for_search):
- tag_set_for_search = set(dict_vert[ix_for_search])
- score_list = []
- ix_list = []
- for ix in list(dict_vert.keys()):
- if ix == ix_for_search:
- continue
- current_tags = set(dict_vert[ix])
- intersec_set = tag_set_for_search.intersection(current_tags)
- score = min([len(tag_set_for_search - intersec_set), len(intersec_set), len(current_tags - intersec_set)])
- score_list.append(score)
- ix_list.append(ix)
- ix_max_inter = ix_list[np.argmax(score_list)]
- return ix_max_inter
- def calc_sum_tag(ix_global):
- if (len(ix_global) != 2):
- ix_first = ix_global[0]
- return list(set(dict_vert_copy[ix_first]))
- #print(ix_global)
- ix_first, ix_last = ix_global
- return list(set(dict_vert_copy[ix_first]).union(set(dict_vert_copy[ix_last])))
- def find_neigh_photo_last_frd(ix_first, ix_center, ix_last):
- ##ix_first левые два элементра
- ##ix_last правые два эл-та
- score_list = []
- ix_list = []
- if ix_first == -1:
- tag_first = set([])
- else:
- tag_first = set(calc_sum_tag(ix_first))
- tag_last = set(calc_sum_tag(ix_last))
- for ix in list(dict_vert.keys()):
- tag_center = set(calc_sum_tag([ix_center[0], ix]))
- score = 0
- intersec_set = tag_first.intersection(tag_center)
- score += min([len(tag_first - intersec_set), len(intersec_set), len(tag_center - intersec_set)])
- intersec_set = tag_last.intersection(tag_center)
- score += min([len(tag_last - intersec_set), len(intersec_set), len(tag_last - intersec_set)])
- score_list.append(score)
- ix_list.append(ix)
- ix_max_inter = ix_list[np.argmax(score_list)]
- return ix_max_inter
- def calc_score(result_ix_list):
- score = 0
- for ix in range(len(result_ix_list) - 1):
- tags_curr = set(calc_sum_tag(result_ix_list[ix]))
- tags_next = set(calc_sum_tag(result_ix_list[ix+1]))
- intersec_set = set(tags_curr).intersection(tags_next)
- score += min([len(tags_curr - intersec_set), len(intersec_set), len(tags_next - intersec_set)])
- return score
- def find_min_tags_el(dict_vert):
- min_el = -1; ix_min_el = 0;
- for ix in list(dict_vert.keys()):
- if min_el < len(dict_vert[ix]):
- min_el = len(dict_vert[ix])
- ix_min_el = ix
- return ix_min_el
- #processing
- def calc_showcast_batch(dict_vert):
- batch_size = len(list(dict_vert.keys()))
- total_score = 0
- prev_photo_ix = np.random.choice(list(dict_vert.keys()))
- tag_prev_photo = dict_vert[prev_photo_ix]
- print(f"start ix ph: {prev_photo_ix}, tags: {tag_prev_photo}")
- result_ix_list = [[prev_photo_ix]]
- #first forward
- ix = 0;
- for _ in tqdm.tqdm(range(batch_size // 2)):
- ix_max_inter = find_neigh_photo(prev_photo_ix)
- dict_vert.pop(prev_photo_ix, None)
- result_ix_list.append([ix_max_inter])
- prev_photo_ix = ix_max_inter
- #print(result_ix_list)
- #last forward
- for ix, curr_element in tqdm.tqdm(enumerate(result_ix_list[:-1])):
- if (len(curr_element) != 2):
- if ix == 0:
- ix_max_inter = find_neigh_photo_last_frd(-1, result_ix_list[ix], result_ix_list[ix+1])
- result_ix_list[ix].append(ix_max_inter)
- dict_vert.pop(ix_max_inter, None)
- else:
- ix_max_inter = find_neigh_photo_last_frd(result_ix_list[ix-1], result_ix_list[ix], result_ix_list[ix+1])
- result_ix_list[ix].append(ix_max_inter)
- dict_vert.pop(ix_max_inter, None)
- pass
- return result_ix_list[:-1]
- #print(list(dict_vert.keys()))
- #result_ix_list[-1].append(dict_vert[list(dict_vert.keys())[0]])
- result_ix_list = []
- print(len(result_ix_list))
- for dict_vert in list_batch_data:
- #print(dict_vert)
- result_ix_list += calc_showcast_batch(dict_vert)
- print(f"score = {calc_score(result_ix_list)}")
- with open("out_sub_v.txt", "w") as fd:
- fd.write(str(len(result_ix_list))+"\n")
- for el in result_ix_list:
- fd.write(f"{el[0]} {el[1]}\n")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement