Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import ast
- import networkx as nx
- from networkx.algorithms.community.lukes import lukes_partitioning
- from networkx.algorithms.community.modularity_max import greedy_modularity_communities
- import os
- import pickle
- import matplotlib.pyplot as plt
- import math
- import random
- import heapq
- import numpy as np
- import statistics
- import datetime
- GPICKLE = "graph.gpickle"
- class BalancedPartitioner:
- def __init__(self, graph, classes = 52):
- self.G = graph
- nx.set_node_attributes(self.G, None, "colour")
- self.classes = classes
- self.coloured_count = 0
- self.neighbour_ques = [[] for _ in range(classes)]
- self.total_weights = [0 for _ in range(classes)]
- self.emptied_ques = [False for _ in range(classes)]
- # assign colour to highest degrees
- sorted_nodes = sorted(self.G.degree, key=lambda x: x[1], reverse=True)
- # instead use sample
- node_sample = random.sample(sorted(self.G.nodes), classes)
- self.target = len(sorted_nodes)
- # or node attributes
- # easier to check if a node has been coloured
- # harder to convert to a partition list (probably not an issue)
- if classes > self.target:
- print("Classes > graph size")
- quit()
- for index in range(classes):
- # node = sorted_nodes[index][0]
- node = node_sample[index]
- self.colour_node(node, index)
- self.que_clean_up()
- while self.coloured_count < self.target:
- # print("{} of {}".format(self.coloured_count, self.target))
- # figure out which combination of queed nodes and class weights generates the minimum total weight
- # candidates = [self.total_weights[i] + self.neighbour_ques[i][0][0] for i in range(classes)]
- # print(candidates)
- min_colour = self.get_next_candidate()
- # add node node node to colour
- chosen_node = heapq.heappop(self.neighbour_ques[min_colour])[1]
- self.colour_node(chosen_node, min_colour)
- # run que clean up
- self.que_clean_up()
- def get_next_candidate(self):
- minimum = None
- min_colour = 0
- for i in range(self.classes):
- if not self.emptied_ques[i]:
- weight_sum = self.total_weights[i] + self.neighbour_ques[i][0][0]
- if (minimum is None) or (weight_sum < minimum) :
- min_colour = i
- minimum = weight_sum
- return min_colour
- def colour_node(self, node, colour):
- # colour node and track neighbours
- if self.G.nodes[node]["colour"] is not None:
- print("problem")
- print(node)
- print(self.G.nodes[node])
- quit()
- self.G.nodes[node]["colour"] = colour
- self.total_weights[colour] += self.G.nodes[node]["weight"]
- self.coloured_count += 1
- # que up neighbouring uncoloured nodes
- for neighbour in self.G.neighbors(node):
- attributes = self.G.nodes[neighbour]
- heapq.heappush(self.neighbour_ques[colour], (attributes["weight"], neighbour))
- def que_clean_up(self):
- # check if ques have uncoloured node at head and clean up ques if empty
- for colour in range(self.classes):
- if self.emptied_ques[colour]:
- continue
- # check for valid nodes
- while True:
- try:
- next_neighbour = self.neighbour_ques[colour][0][1]
- if self.G.nodes[next_neighbour]["colour"] is None:
- # valid option so break
- break
- else:
- # already coloured so pop object
- thing = heapq.heappop(self.neighbour_ques[colour])
- except:
- # track emptied que
- self.emptied_ques[colour] = True
- # print("Emptied candidates for colour : {}".format(colour))
- break
- # colour += 1
- def monte_carlo_partitioner(G, classes = 52, repetitions = 100000):
- best_partition = []
- smallest_weight_std = None
- for repeat_count in range(repetitions):
- print("Repetition {} of {}".format(repeat_count, repetitions))
- working_G = G.copy()
- balanced_part = BalancedPartitioner(working_G, classes = classes)
- partition = [[] for _ in range(52)]
- weights = [0 for _ in range(52)]
- for node in working_G.nodes:
- node_atr = working_G.nodes[node]
- partition[node_atr["colour"]].append(node)
- weights[node_atr["colour"]] += node_atr["weight"]
- weight_std = statistics.stdev(weights)
- print(weight_std)
- if smallest_weight_std is None or weight_std < smallest_weight_std:
- best_partition = partition
- smallest_weight_std = weight_std
- print(smallest_weight_std)
- return best_partition
- if __name__ == '__main__':
- G = nx.Graph()
- with open(GPICKLE, 'rb') as f:
- G = pickle.load(f)
- start_time = datetime.datetime.now()
- partition = monte_carlo_partitioner(G, classes = 52)
- end_time = datetime.datetime.now()
- print("Time : {}".format(end_time - start_time))
Advertisement
Add Comment
Please, Sign In to add comment