Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- """ A project matcher modelled after the Gale-Shapley algorithm to solve the
- stable marriage problem with respect to teammates and projects. The teammates
- represent the eligible suitors, and the projects the fickle object of their
- efforts to establish an engagement.
- The same preference is shared by both the Project and each Teammate. The
- Teammates want to end up on their first choice for Projects, and the Projects
- also want them to get their first choice, or at least the Project closest to
- the tops of their lists ordered from most interested to least.
- The ProjectMatcher adds a variation to the GS algorithm where the preferred
- distribution is not entirely based on the order of Teammate/Project preference,
- but also on the statistical mean and standard deviation of the overall
- teammate/project distribution. This approach can produce better matchings, in
- that all participants tend to mostly get their first picks, and some their
- second, and rarely are any stuck with their 3rd pick or worse.
- Parts of the algorithm are implemented in the classes as methods. Generators are
- used to provide the list of 'free' teammates and lists of project preferences,
- which isolates and moves some of the looping that one might expect to see in the
- ProjectMatcher.match() loop, which is most recognizably the GS algorithm in the
- application.
- The Gale-Shapley algorithm for reference:
- algorithm stable_matching is
- Initialize all m ∈ M and w ∈ W to free
- while ∃ free man m who still has a woman w to propose to do
- w := first woman on m's list to whom m has not yet proposed
- if w is free then
- (m, w) become engaged
- else some pair (m', w) already exists
- if w prefers m to m' then
- m' becomes free
- (m, w) become engaged
- else
- (m', w) remain engaged
- end if
- end if
- repeat
- Methods that implement a noticeable piece of the algorithm:
- Teammate.gen_project_preferences_cycle() - The teammate's ordered list of
- projects to propose to.
- Project.consider() - The Project decides whether it
- prefers one engagement over
- another.
- ProjectMatcher.gen_free_teammates_cycle() - Generates the list of 'free'
- Teammates still looking for a
- project.
- ProjectMatcher.match() - The main loop that drives the
- activity of proposals and
- engagements.
- """
- import io
- # import multiprocessing as mp
- import os
- # import pandas as pd
- import random
- import signal
- import sys
- import time
- from bisect import bisect_left
- from copy import deepcopy
- from itertools import cycle, accumulate
- from multiprocessing import cpu_count, Manager, Process, Value
- from random import randint, shuffle
- from statistics import mean, stdev
- __all__ = ('Teammate', 'Project', 'ProjectMatcher')
- class Teammate:
- """ Teammate - the suitor.
- Represents a teammate to be placed on projects as a romantic suitor in the
- stable marriage problem.
- """
- __slots__ = ('_id', '_projects', '_rankings', '_prefs', '_project',
- '_weights')
- def __init__(self, identifier, projects, rankings):
- """ Constructor.
- @param identifier - An ID assigned to the teammate.
- @param projects - The list of projects available.
- @param rankings - The rankings in order that the teammate gave the
- projects. These must be in the same order as the
- list/iterable passed to 'projects'.
- """
- self._id = identifier
- self._projects = list(projects)
- self._rankings = dict(zip((p.id for p in projects), rankings))
- self._weights = [2**i for i in range(len(projects), 0, -1)]
- self._prefs = None
- self._project = None
- @property
- def id(self):
- """ The ID given to the teammate. """
- return self._id
- def rank(self, project):
- """ Returns the rank the teammate gave the project. """
- return self._rankings[project.id]
- def clear(self):
- """ Removes the teammate from the project and clears its state."""
- self._project = None
- def reset(self):
- """ Clears the teammate's state and resets its preferences list. """
- self._project = None
- self._prefs = self.gen_project_preferences_cycle()
- @property
- def is_free(self):
- """ Indicates engagement status.
- True if the teammate isn't 'engaged' to a project. False otherwise.
- """
- return self._project is None
- @property
- def is_engaged(self):
- """ True if the teammate is 'engaged' to a project; False otherwise. """
- return self._project is not None
- def propose(self, project):
- """ Have the teammate propose to the project.
- If accepted, the teammate's engage() method will be invoked by the
- project.
- @param project - The project to propose to.
- @return - True if the proposal was accepted, False otherwise.
- """
- return project.consider(self)
- def engage(self, project):
- """ Establishes an engagement.
- If a project accepts a proposal, it invokes this method. Adds the
- teammate to the project's roster and changes the teammate's state to
- 'engaged' by setting its _project attribute to the project.
- """
- self._project = project
- project.add(self)
- def disengage(self, project):
- """ Breaks off engagement.
- If a project breaks the engagement with the teammate, this is invoked.
- Removes the teammate from the project and sets the teammate's state back
- to 'free'.
- @param project - The project that requested the disengagement.
- """
- self._project = None
- project.remove(self)
- @property
- def project_preferences(self):
- """ Returns the list of project preferences as a cyclical generator. """
- if not self._prefs:
- self._prefs = self.gen_project_preferences_cycle()
- return self._prefs
- def gen_project_preferences_cycle(self):
- """ Creates a cyclical list of project preferences as a generator.
- Sometimes the 2nd choice is added to the front of the list, and less
- often also the 3rd, and rarely a random weighted sample (favoring best
- scored projects). This breaks the tendency for all first comers to get
- their first picks sticking subsequent teammates with their 3rd, 4th, or
- worse pick. Starting at less desirable scores causes more milling for
- better matches when a teammate gets disengaged and has to propose again.
- """
- prefs = sorted(self._projects, key=self.rank)
- rndv = randint(1, 100)
- if rndv > 98: # 2%
- len_prefs = len(prefs)
- weights = self._weights
- prefs = wsample(prefs, weights, len_prefs // 2) + prefs
- elif rndv > 88: # 10%
- prefs = [prefs[2], prefs[1]] + prefs
- elif rndv > 58: # 30%
- prefs = [prefs[1]] + prefs
- return cycle(prefs)
- def __lt__(self, other):
- """ The id's are compared to determine less-than.
- The projects need this to keep their internal roster lists in order for
- project comparisons.
- """
- return self._id < other.id
- def __eq__(self, other):
- """ The id's are compared for equality. """
- return self._id == other.id
- class Project:
- """ The fickle object of the suitors' efforts.
- Represents a project in the placement strategy modeled after the Gale-
- Shapley algorithm. The Project is the object of the suitors' efforts
- to engage with. It can be 'engaged' to as many suitors as there are seats
- on the project.
- """
- __slots__ = ('_id', '_seats', '_team')
- def __init__(self, identifier, seats):
- """ Constructor.
- @param identifier - The id of the project.
- @param seats - The number of seats available.
- """
- self._id = identifier
- self._seats = seats
- self._team = []
- @property
- def id(self):
- """ The id given to the project when created. """
- return self._id
- @property
- def score(self):
- """ The sum of the ranks each teammate on the roster gave the project.
- """
- return sum(tm.rank(self) for tm in self._team)
- @property
- def ranks(self):
- """ The ranks each teammate gave the project.
- Returns a generator producing the rank each teammate gave the project.
- """
- return (tm.rank(self) for tm in self._team)
- def add(self, teammate):
- """ Adds the teammate to the roster. The internal list is sorted. """
- self._team.append(teammate)
- self._team.sort()
- def remove(self, teammate):
- """ Removes the teammate from the project. """
- self._team.remove(teammate)
- def clear(self):
- """ Clears the project's team roster. """
- self._team = []
- def consider(self, teammate):
- """ Consider engagement proposal.
- If accepted, the teammate's engage() method is invoked; if the project's
- roster is already full, one of the teammates will have its disengage()
- method invoked, which removes it from the project to make space for the
- new fiance.
- @param teammate - The teammate submitting the proposal.
- @return - True if accepted; False otherwise.
- """
- rank = teammate.rank(self)
- team = self._team
- if self.is_free or any(rank < tm.rank(self) for tm in team):
- if self.is_engaged:
- shuffle(team)
- tm_out = max(team, key=self.rank)
- tm_out.disengage(self)
- teammate.engage(self)
- return True
- return False
- @property
- def is_free(self):
- """ True if the project has available seats; False otherwise. """
- return len(self._team) < self._seats
- @property
- def is_engaged(self):
- """ True if there are no empty seats; False otherwise. """
- return len(self._team) >= self._seats
- @property
- def roster_str(self):
- """ Returns a string that can be printed to show which teammates are on
- the project.
- """
- return f"{self.id}: {[(tm.id, tm.rank(self)) for tm in self._team]}"
- @property
- def team(self):
- """ Returns a copy of the list of teammates on the project.
- """
- return list(self._team)
- def rank(self, teammate):
- """ Returns the ranking the teammate gave the project.
- """
- return teammate.rank(self)
- def __eq__(self, other):
- """ Two projects are equal if they have the same id and roster. """
- return self._id == other.id and self._team == other.team
- def __lt__(self, other):
- """ The id's and team lists are compared to determine less-than. """
- return self._id < other.id and self._team < other.team
- class ProjectMatcher:
- """ The matchmaking service.
- The ProjectMatcher has the teammates attempt engagements with the projects
- until all teammates and projects are fully 'engaged'. The suitability of the
- matchings are evaluated and given a score, then the process is repeated any
- number of times while keeping track of the distributions with the best
- (lowest) score.
- """
- __slots__ = ('_projects', '_teammates', '_free_teammates', '_shared_count',
- '_stdev_weight')
- def __init__(self, projects, teammates, stdev_weight=1):
- """ Constructor.
- @param projects - The projects the teammates will try to engage.
- @param teammates - The suitors seeking project engagements.
- @param stdev_weight - Value multiplied by the standard deviation of the
- mean sum of rank values of each project to
- determine the overall score of a distribution.
- """
- self._projects = list(projects)
- self._teammates = list(teammates)
- self._free_teammates = None
- self._shared_count = None
- self._stdev_weight = stdev_weight
- @staticmethod
- def from_dataframe(df):
- """ Creates a ProjectMatcher instance using a Pandas dataframe.
- The header row is used to assign id's to the projects, and the leftmost
- column of each row the teammate id's. The rank each teammate gave for
- each project aligns with the column headers. This method assumes an
- even distribution of seats among the projects for the teammates.
- @param df - The Pandas dataframe.
- @return - A ProjectMatcher instance.
- """
- n_seats = len(df.values) // (len(df.columns) - 1)
- projects = [Project(proj, n_seats) for proj in df.columns[1:]]
- teammates = [Teammate(tm[0], projects, tm[1:]) for tm in df.values]
- return ProjectMatcher(projects, teammates)
- def match(self, num_iterations=1000, report=False, timeout=0,
- stdev_weight=-1):
- """ Matches up Project's with Teammate's.
- Runs the loop that iterates over each teammate and has them propose
- to each project in their ordered lists of project preferences. The loop
- continues until all teammates are 'engaged', then the distribution is
- evaluated. If the distribution is the best so far, it's retained.
- The cycle is repeated 'num_iterations' times while keeping a list of the
- best scoring distributions. The list of best distributions is printed
- after the cycles are done. The first block of loops is most noticeably
- the Gale-Shapley algorithm.
- @param num_iterations - The number of times to generate a project
- distribution to find the best scoring
- combinations. Takes some experimentation to
- find the plateau where the same set is
- reliably generated on successive runs.
- @param report - If True, a printout of the best matches is
- done at the end of the iterations.
- @param timeout - If non-zero, the iterations will stop when
- the timeout expires, or 'num_iterations' has
- been met - whichever comes first.
- @param stdev_weight - Value multiplied by the standard deviation of
- the mean rank values of each project to
- determine the overall score of a distribution.
- If -1 (default) the weight given to the
- constructor is used instead. Raising this
- value may improve distributions with 4 or
- worse values showing up in output.
- @return - A tuple of the best score and list of
- project distributions with that score.
- """
- if stdev_weight > -1:
- self._stdev_weight = stdev_weight
- best_score = float('inf')
- best_distribs = []
- start = time.time()
- random.seed()
- for i in range(num_iterations):
- for teammate in self.free_teammates:
- for project in teammate.project_preferences:
- accepted = teammate.propose(project)
- if accepted:
- break
- score = self.evaluate()
- if score <= best_score and self._projects not in best_distribs:
- best_dist = deepcopy(self._projects)
- if score < best_score:
- best_score = score
- best_distribs = [best_dist]
- else:
- best_distribs.append(best_dist)
- if timeout and time.time() - start > timeout:
- print(f"Process {os.getpid()} timed out at "
- f"{time.time()-start:.3f} seconds "
- f"with {i} iterations completed...",
- file=sys.stderr, flush=True)
- break
- elif self._shared_count:
- with self._shared_count.get_lock():
- self._shared_count.value += 1
- if self._shared_count.value >= num_iterations:
- break
- self.reset()
- if report:
- self.report(best_distribs)
- return best_score, best_distribs
- @staticmethod
- def report(best_distribs):
- """ Prints out the best scoring assignments.
- @param best_distribs - The list of best scoring project distributions.
- """
- print("PROJECT ASSIGNMENTS:")
- for i, distrib in enumerate(best_distribs, 1):
- print(f" option {i}")
- for project in distrib:
- print(f" {project.roster_str}")
- print()
- def multiproc_match(self, num_iterations=1000, report=False, timeout=0,
- stdev_weight=-1, num_procs=0):
- """ Match making in parallel.
- Same as match(), but runs the ProjectMatcher in 'num_procs' number
- of processes in parallel, then collects and reports the resulting best
- distributions. It's good to limit the number of processes to the number
- of CPU cores on the system (the default).
- @param num_iterations - The number times to iteratively create
- project distributions to find the best ones.
- @param report - If True, a printout of the best matches is
- done at the end of the iterations.
- @param timeout - If non-zero, the iterations will stop when
- the timeout expires, or 'num_iterations' has
- been met - whichever comes first.
- @param stdev_weight - Value multiplied by the standard deviation of
- the mean rank values of each project to
- determine the overall score of a distribution.
- If -1 (default) the weight given to the
- constructor is used instead. Raising this
- may improve some distributions that have 4's
- or worse showing up in output distributions.
- @param num_procs - The number of processes to run in parallel.
- Defaults to the number of processor cores on
- the system if left at 0.
- @return - A tuple of the best score and list of
- project distributions with that score.
- """
- if stdev_weight > -1:
- self._stdev_weight = stdev_weight
- with Manager() as manager:
- SCORE, DISTR, LOCK = 0, 1, 2 # shared_data list indices.
- iter_count = Value('L', 0)
- best_distribs = manager.list()
- shared_data = manager.list([float('inf'),
- best_distribs,
- manager.RLock()])
- processes = []
- num_procs = num_procs or cpu_count()
- for _ in range(num_procs):
- process = Process(target=self._mp_match,
- args=(num_iterations, shared_data,
- iter_count, timeout, report))
- processes.append(process)
- for p in processes:
- p.start()
- for p in processes:
- p.join()
- score = shared_data[SCORE]
- distribs = list(best_distribs)
- if report:
- self.report(distribs)
- return score, distribs
- def _mp_match(self, num_iter, best, shared_count, duration, report):
- """ The method run in the processes by multiprocess_match().
- The internal use method that runs matcher.match() and updates 'best'
- with the best project distributions found.
- @param num_iter - The number of iterations to run.
- @param best - A remotely managed list where the first param is
- the best score so far, 2nd is a remote list of the
- best project distributions, 3rd is a remote RLock.
- @param shared_count - A multiprocessing.Value object used between
- processes to determine when num_iter has been met.
- @param duration - The timeout value.
- @param report - If true, print out messages.
- """
- SCORE, DISTR, LOCK = 0, 1, 2 # 'best' list indices.
- self._shared_count = shared_count
- start = time.time()
- score, distribs = self.match(num_iter, False, duration)
- with best[LOCK]:
- if score < best[SCORE]:
- best[SCORE] = score
- best[DISTR][:] = distribs
- elif score == best[SCORE]:
- m_dists = best[DISTR]
- dists = [d for d in distribs if d not in m_dists]
- m_dists.extend(dists)
- if report:
- print(f"Process {os.getpid()} completed "
- f"in {time.time() - start:.3f} seconds.")
- def evaluate(self):
- """ Scores the current project/teammate distribution.
- Scores the current distribution of teammates on projects. The lower
- the better. The lowest average team score (the score of each teammate
- added) among the projects plus the standard deviation.
- """
- s = [sum(10**rank for rank in p.ranks) for p in self._projects]
- m = mean(s)
- d = abs(stdev(s, m))
- return m + (d * self._stdev_weight)
- def gen_free_teammates_cycle(self):
- """ Creates a cyclical list of teammates who are 'free'.
- Teammates that are currently 'engaged' are skipped each cycle, but can
- reappear in the list if they are bumped from a project. The cycles
- continue until all teammates are 'engaged'. The list of teammates is
- shuffled each cycle to add variety to the order engagements are
- attempted.
- """
- teammates = self.teammates
- while any(tm.is_free for tm in teammates):
- shuffle(teammates)
- for tm in filter(lambda tm: tm.is_free, teammates):
- yield tm
- @property
- def projects(self):
- """ Returns a copy of the list of projects. """
- return list(self._projects)
- @property
- def teammates(self):
- """ Returns a copy of the list of teammates. """
- return list(self._teammates)
- @property
- def free_teammates(self):
- """ Returns the generator that produces 'free' teammates. """
- if not self._free_teammates:
- self._free_teammates = self.gen_free_teammates_cycle()
- return self._free_teammates
- def reset(self):
- """ Resets the simulation.
- Resets the projects, clearing their rosters, and setting all teammates
- back to 'free', and creates a new free_teammates generator.
- """
- self._free_teammates = self.gen_free_teammates_cycle()
- [p.clear() for p in self._projects]
- [t.reset() for t in self._teammates]
- def wsample(population, weights, k=1):
- """ Weighted sample.
- Returns a sample of size k who’s selection is influenced by the weight
- values. Optimized for speed by minimizing the need to recalculate the
- weights each time a selection is made internally.
- @param population - The population to sample.
- @param weights - The biases to apply to each item in the population.
- @param k - The size of the sample to take.
- @return - The sample.
- """
- wts = list(weights)
- accum = list(accumulate(wts))
- total = accum[-1]
- cull = 0.0
- tolr = 0.6 * total
- sampl = {}
- while len(sampl) < k:
- randval = random.random()
- index = bisect_left(accum, total * randval)
- if index not in sampl:
- sampl[index] = population[index]
- cull += accum[index]
- if cull > tolr:
- for i in sampl:
- wts[i] = 0
- cull = 0.0
- accum = list(accumulate(wts))
- total = accum[-1]
- tolr = 0.6 * total
- return list(sampl.values())
- class DummyPandas:
- """ A simple stand-in to parse CSV text like Pandas does.
- A simple immitation of a Pandas class that works with the code in the
- main block below.
- """
- def __init__(self, columns, rows):
- """ Constructor. """
- self._columns = columns
- self._rows = rows
- @property
- def values(self):
- """ Returns the "rows" of the object. """
- return self._rows
- @property
- def columns(self):
- """ Returns the "columns" of the object. """
- return self._columns
- @staticmethod
- def read_csv(file, sep):
- """ Parses simple CSV delimited by whitespace.
- Returns an instance of DummyPandas that can be used as a dataframe.
- """
- data_gen = DummyPandas._data_gen(file)
- headers = next(data_gen)
- row_data = [row for row in data_gen]
- return DummyPandas(headers, row_data)
- @staticmethod
- def _data_gen(file):
- """ A generator that reads the file and produces lists of data. """
- for line in file:
- data = line.split()
- if data:
- data = [int(d) if str.isnumeric(d) else d for d in data]
- yield data
- def sigint_handler(sig, frame):
- print(f"Ctrl+C received! Process {os.getpid()} exiting...")
- sys.exit(0)
- if __name__ == '__main__':
- signal.signal(signal.SIGINT, sigint_handler)
- # Comment this out and uncomment the pandas import at the top of the file
- # if you want to use the real Pandas.
- pd = DummyPandas
- df_a = pd.read_csv(io.StringIO("""
- teammate proj_A proj_B proj_C proj_D proj_E
- A 1 5 4 3 2
- B 2 4 5 1 3
- C 2 3 5 1 4
- D 3 1 5 4 2
- E 3 4 2 5 1
- F 1 3 2 5 4
- G 5 3 1 2 4
- H 3 1 5 4 2
- I 1 5 4 2 3
- J 2 4 3 1 5
- K 2 1 3 4 5
- L 1 5 4 3 2
- M 4 1 5 3 2
- N 4 2 1 5 3
- O 2 1 4 3 5
- """), sep=r'\s+')
- df_b = pd.read_csv(io.StringIO("""
- employee proj_A proj_B proj_C proj_D proj_E
- A1 1 5 3 4 2
- B1 5 4 1 2 3
- C1 2 3 4 1 5
- A2 4 2 1 3 5
- B2 4 5 3 2 1
- C2 3 1 2 5 4
- A3 1 2 4 3 5
- B3 2 3 1 5 4
- C3 5 3 4 1 2
- A4 4 5 3 2 1
- B4 5 3 4 2 1
- C4 1 2 3 4 5
- A5 1 3 2 5 4
- B5 2 1 3 5 4
- C5 2 1 4 5 4
- """), sep=r'\s+')
- df_c = pd.read_csv(io.StringIO("""
- employee proj_A proj_B proj_C proj_D proj_E proj_F proj_G proj_H proj_I proj_J
- A1 1 6 8 9 2 7 4 5 10 3
- A2 10 8 1 5 3 4 6 9 7 2
- A3 4 2 10 8 7 9 5 6 1 3
- B1 9 8 2 6 5 7 1 10 3 4
- B2 6 3 8 2 10 7 1 9 5 4
- B3 9 6 4 2 1 10 7 5 3 8
- C1 7 9 2 1 8 4 10 6 3 5
- C2 3 6 5 1 8 4 2 10 7 9
- C3 5 4 2 3 1 8 10 7 9 6
- D1 2 9 4 10 3 5 8 7 1 6
- D2 8 4 10 2 1 3 9 5 6 7
- D3 5 2 8 6 1 7 10 4 9 3
- E1 6 5 4 2 8 1 10 3 9 7
- E2 5 1 3 6 9 7 2 10 4 8
- E3 5 10 3 6 2 7 8 1 9 4
- F1 7 1 4 9 2 6 5 10 3 8
- F2 1 3 4 9 7 10 5 6 8 2
- F3 8 4 1 10 5 3 6 9 7 2
- G1 2 8 10 3 5 1 7 6 9 4
- G2 4 5 9 1 10 3 7 8 2 6
- G3 6 5 9 7 8 1 2 3 4 10
- H1 7 8 10 5 4 9 6 2 1 3
- H2 5 1 7 10 4 2 9 8 6 3
- H3 9 5 10 1 8 3 4 2 6 7
- I1 3 1 10 2 8 9 7 5 4 6
- I2 2 4 6 3 10 5 8 7 1 9
- I3 4 9 10 2 3 6 5 1 8 7
- J1 2 7 9 5 10 8 6 3 4 1
- J2 6 10 7 3 4 5 2 1 9 8
- J3 1 3 9 8 2 10 7 6 4 5
- """), sep=r'\s+')
- df_d = pd.read_csv(io.StringIO("""
- teammate proj_A proj_B proj_C proj_D proj_E proj_F proj_G proj_H proj_I proj_J
- A1 4 5 9 3 8 10 2 1 6 7
- A2 5 4 10 2 8 9 1 3 7 6
- A3 4 8 7 6 9 3 5 2 10 1
- B1 5 4 10 8 3 2 1 7 9 6
- B2 8 10 6 7 9 5 4 1 3 2
- B3 3 1 4 9 10 2 6 8 7 5
- C1 1 4 7 8 3 2 6 10 5 9
- C2 9 8 3 5 10 1 2 4 6 7
- C3 4 3 8 9 6 10 2 1 7 5
- D1 6 7 2 10 5 4 8 9 1 3
- D2 3 8 1 2 5 4 6 7 10 9
- D3 1 3 8 9 2 4 5 10 6 7
- E1 3 1 7 10 9 6 5 4 8 2
- E2 10 4 5 7 1 3 8 9 2 6
- E3 7 9 2 8 3 10 5 1 4 6
- F1 9 2 1 3 6 5 8 7 10 4
- F2 1 7 9 2 6 4 3 5 8 10
- F3 1 10 6 2 4 5 7 9 3 8
- G1 3 2 6 5 7 10 4 8 1 9
- G2 5 10 2 9 4 1 6 7 8 3
- G3 7 5 1 2 6 3 9 4 10 8
- H1 7 1 5 10 4 9 6 8 2 3
- H2 4 7 3 10 8 2 1 6 9 5
- H3 10 8 6 5 3 7 4 1 9 2
- I1 2 1 9 3 6 4 10 8 7 5
- I2 9 8 10 2 3 4 7 6 5 1
- I3 6 8 5 1 2 4 3 9 7 10
- J1 2 9 7 5 1 10 4 3 8 6
- J2 5 8 1 4 10 7 3 2 6 9
- J3 8 3 4 10 2 5 6 9 1 7
- """), sep=r'\s+')
- df_e = pd.read_csv(io.StringIO("""
- employee proj_A proj_B proj_C proj_D proj_E proj_F proj_G proj_H proj_I proj_J
- A1 9 6 3 7 8 1 10 2 4 5
- A2 9 8 10 7 3 1 6 2 5 4
- A3 9 8 6 3 1 7 2 5 10 4
- B1 8 9 6 1 3 7 10 2 5 4
- B2 9 6 3 8 7 5 1 2 10 4
- B3 9 2 6 7 10 3 1 4 8 5
- C1 9 3 7 6 1 8 4 10 2 5
- C2 9 3 8 7 6 1 5 2 10 4
- C3 9 3 7 6 10 8 4 1 2 5
- D1 3 6 9 1 8 7 5 2 4 10
- D2 9 3 8 7 6 1 5 2 4 10
- D3 9 3 8 1 7 6 2 10 4 5
- E1 9 8 7 6 3 1 2 4 10 5
- E2 9 6 3 7 10 4 8 1 2 5
- E3 9 6 3 7 1 5 8 2 10 4
- F1 9 6 8 1 3 4 10 2 7 5
- F2 9 8 7 3 6 1 10 2 4 5
- F3 9 3 6 1 2 8 10 4 7 5
- G1 9 3 6 7 8 4 1 2 10 5
- G2 9 3 6 1 8 7 2 4 10 5
- G3 3 6 1 9 8 7 2 10 4 5
- H1 9 10 8 6 3 1 7 2 4 5
- H2 9 6 3 1 2 7 8 10 4 5
- H3 9 3 6 1 7 8 5 10 4 2
- I1 2 9 3 6 7 8 1 10 5 4
- I2 9 1 5 6 8 3 4 7 10 2
- I3 3 9 7 10 5 2 8 6 4 1
- J1 9 2 7 6 3 8 10 4 1 5
- J2 9 6 10 1 7 3 2 8 4 5
- J3 9 10 2 4 7 6 3 1 8 5
- """), sep=r'\s+')
- pm_a = ProjectMatcher.from_dataframe(df_a)
- pm_b = ProjectMatcher.from_dataframe(df_b)
- pm_c = ProjectMatcher.from_dataframe(df_c)
- pm_d = ProjectMatcher.from_dataframe(df_d)
- pm_e = ProjectMatcher.from_dataframe(df_e)
- # mp.set_start_method('spawn')
- n_cpu = cpu_count()
- print()
- print("[[DATAFRAME A PROJECT MATCHINGS - should produce 3 options]]")
- pm_a.match(10**4, True)
- print("[[DATAFRAME B PROJECT MATCHINGS - should produce 4 options]]")
- pm_b.match(10**4, True)
- print("[[DATAFRAME C PROJECT MATCHINGS - should produce 4 options]]")
- n_cycles = 10**4 // n_cpu
- n_total = n_cycles * n_cpu
- print(
- f"{f'[{n_total} cycles total - {n_cycles} approx per CPU...]':^60}")
- print(f"{f'[Running on {n_cpu} CPUs...]':^60}")
- pm_c.multiproc_match(n_total, True)
- print("[[DATAFRAME D PROJECT MATCHINGS - should produce 10 options]]")
- n_cycles = 3 * 10**4 // n_cpu
- n_total = n_cycles * n_cpu
- print(f"{f'[{n_total} cycles total - {n_cycles} approx per CPU...]':^60}")
- print(f"{f'[Running on {n_cpu} CPUs...]':^60}")
- pm_d.multiproc_match(n_total, report=True, num_procs=n_cpu)
- print(f"[[DATAFRAME E PROJECT MATCHINGS - "
- f"(difficult) should produce 1 option]]")
- n_cycles = 4 * 10**5 // n_cpu
- n_total = n_cycles * n_cpu
- print(f"{f'[{n_total} cycles total - {n_cycles} approx per CPU...]':^60}")
- print(f"{f'[Running on {n_cpu} CPUs...]':^60}")
- # This is a difficult project to distribute to because I weighted the
- # projects heavily in favor of a few of them to simulate the teammates
- # being very interested in them and choosing them within their top few
- # choices - or close to that. It's not possible to generate a matrix of
- # all 1's and 2's with this one.
- pm_e.multiproc_match(n_total, True, stdev_weight=5000)
Add Comment
Please, Sign In to add comment