stablefitness.py

#!/usr/bin/env python3
""" A project matcher modelled after the Gale-Shapley algorithm to solve the
stable marriage problem with respect to teammates and projects. The teammates
represent the eligible suitors, and the projects the fickle object of their
efforts to establish an engagement.

The same preference is shared by both the Project and each Teammate. The
Teammates want to end up on their first choice for Projects, and the Projects
also want them to get their first choice, or at least the Project closest to
the tops of their lists ordered from most interested to least.

The ProjectMatcher adds a variation to the GS algorithm where the preferred
distribution is not entirely based on the order of Teammate/Project preference,
but also on the statistical mean and standard deviation of the overall
teammate/project distribution. This approach can produce better matchings, in
that all participants tend to mostly get their first picks, and some their
second, and rarely are any stuck with their 3rd pick or worse.

Parts of the algorithm are implemented in the classes as methods. Generators are
used to provide the list of 'free' teammates and lists of project preferences,
which isolates and moves some of the looping that one might expect to see in the
ProjectMatcher.match() loop, which is most recognizably the GS algorithm in the
application.

The Gale-Shapley algorithm for reference:

    algorithm stable_matching is
        Initialize all m ∈ M and w ∈ W to free
        while ∃ free man m who still has a woman w to propose to do
            w := first woman on m's list to whom m has not yet proposed
            if w is free then
                (m, w) become engaged
            else some pair (m', w) already exists
                if w prefers m to m' then
                    m' becomes free
                    (m, w) become engaged
                else
                    (m', w) remain engaged
                end if
            end if
        repeat

Methods that implement a noticeable piece of the algorithm:

    Teammate.gen_project_preferences_cycle()  - The teammate's ordered list of
                                                projects to propose to.
    Project.consider()                        - The Project decides whether it
                                                prefers one engagement over
                                                another.
    ProjectMatcher.gen_free_teammates_cycle() - Generates the list of 'free'
                                                Teammates still looking for a
                                                project.
    ProjectMatcher.match()                    - The main loop that drives the
                                                activity of proposals and
                                                engagements.
"""

import io
# import multiprocessing as mp
import os
# import pandas as pd
import random
import signal
import sys
import time
from bisect import bisect_left
from copy import deepcopy
from itertools import cycle, accumulate
from multiprocessing import cpu_count, Manager, Process, Value
from random import randint, shuffle
from statistics import mean, stdev


__all__ = ('Teammate', 'Project', 'ProjectMatcher')


class Teammate:
    """ Teammate - the suitor.
    Represents a teammate to be placed on projects as a romantic suitor in the
    stable marriage problem.
    """
    __slots__ = ('_id', '_projects', '_rankings', '_prefs', '_project',
                 '_weights')

    def __init__(self, identifier, projects, rankings):
        """ Constructor.
        @param identifier   - An ID assigned to the teammate.
        @param projects     - The list of projects available.
        @param rankings     - The rankings in order that the teammate gave the
                              projects. These must be in the same order as the
                              list/iterable passed to 'projects'.
        """
        self._id       = identifier
        self._projects = list(projects)
        self._rankings = dict(zip((p.id for p in projects), rankings))
        self._weights  = [2**i for i in range(len(projects), 0, -1)]
        self._prefs    = None
        self._project  = None

    @property
    def id(self):
        """ The ID given to the teammate. """
        return self._id

    def rank(self, project):
        """ Returns the rank the teammate gave the project. """
        return self._rankings[project.id]

    def clear(self):
        """ Removes the teammate from the project and clears its state."""
        self._project = None

    def reset(self):
        """ Clears the teammate's state and resets its preferences list. """
        self._project = None
        self._prefs   = self.gen_project_preferences_cycle()

    @property
    def is_free(self):
        """ Indicates engagement status.
        True if the teammate isn't 'engaged' to a project. False otherwise.
        """
        return self._project is None

    @property
    def is_engaged(self):
        """ True if the teammate is 'engaged' to a project; False otherwise. """
        return self._project is not None

    def propose(self, project):
        """ Have the teammate propose to the project.
        If accepted, the teammate's engage() method will be invoked by the
        project.
        @param project - The project to propose to.
        @return        - True if the proposal was accepted, False otherwise.
        """
        return project.consider(self)

    def engage(self, project):
        """ Establishes an engagement.
        If a project accepts a proposal, it invokes this method. Adds the
        teammate to the project's roster and changes the teammate's state to
        'engaged' by setting its _project attribute to the project.
         """
        self._project = project
        project.add(self)

    def disengage(self, project):
        """ Breaks off engagement.
        If a project breaks the engagement with the teammate, this is invoked.
        Removes the teammate from the project and sets the teammate's state back
        to 'free'.
        @param project - The project that requested the disengagement.
        """
        self._project = None
        project.remove(self)

    @property
    def project_preferences(self):
        """ Returns the list of project preferences as a cyclical generator. """
        if not self._prefs:
            self._prefs = self.gen_project_preferences_cycle()
        return self._prefs

    def gen_project_preferences_cycle(self):
        """ Creates a cyclical list of project preferences as a generator.
        Sometimes the 2nd choice is added to the front of the list, and less
        often also the 3rd, and rarely a random weighted sample (favoring best
        scored projects). This breaks the tendency for all first comers to get
        their first picks sticking subsequent teammates with their 3rd, 4th, or
        worse pick. Starting at less desirable scores causes more milling for
        better matches when a teammate gets disengaged and has to propose again.
        """
        prefs = sorted(self._projects, key=self.rank)
        rndv  = randint(1, 100)
        if rndv > 98:    # 2%
            len_prefs = len(prefs)
            weights   = self._weights
            prefs     = wsample(prefs, weights, len_prefs // 2) + prefs
        elif rndv > 88:  # 10%
            prefs = [prefs[2], prefs[1]] + prefs
        elif rndv > 58:  # 30%
            prefs = [prefs[1]] + prefs
        return cycle(prefs)

    def __lt__(self, other):
        """ The id's are compared to determine less-than.
        The projects need this to keep their internal roster lists in order for
        project comparisons.
        """
        return self._id < other.id

    def __eq__(self, other):
        """ The id's are compared for equality. """
        return self._id == other.id


class Project:
    """ The fickle object of the suitors' efforts.
    Represents a project in the placement strategy modeled after the Gale-
    Shapley algorithm. The Project is the object of the suitors' efforts
    to engage with. It can be 'engaged' to as many suitors as there are seats
    on the project.
    """
    __slots__ = ('_id', '_seats', '_team')

    def __init__(self, identifier, seats):
        """ Constructor.
        @param identifier   - The id of the project.
        @param seats        - The number of seats available.
        """
        self._id    = identifier
        self._seats = seats
        self._team  = []

    @property
    def id(self):
        """ The id given to the project when created. """
        return self._id

    @property
    def score(self):
        """ The sum of the ranks each teammate on the roster gave the project.
        """
        return sum(tm.rank(self) for tm in self._team)

    @property
    def ranks(self):
        """ The ranks each teammate gave the project.
        Returns a generator producing the rank each teammate gave the project.
        """
        return (tm.rank(self) for tm in self._team)

    def add(self, teammate):
        """ Adds the teammate to the roster. The internal list is sorted. """
        self._team.append(teammate)
        self._team.sort()

    def remove(self, teammate):
        """ Removes the teammate from the project. """
        self._team.remove(teammate)

    def clear(self):
        """ Clears the project's team roster. """
        self._team = []

    def consider(self, teammate):
        """ Consider engagement proposal.
        If accepted, the teammate's engage() method is invoked; if the project's
        roster is already full, one of the teammates will have its disengage()
        method invoked, which removes it from the project to make space for the
        new fiance.
        @param teammate - The teammate submitting the proposal.
        @return         - True if accepted; False otherwise.
        """
        rank = teammate.rank(self)
        team = self._team
        if self.is_free or any(rank < tm.rank(self) for tm in team):
            if self.is_engaged:
                shuffle(team)
                tm_out = max(team, key=self.rank)
                tm_out.disengage(self)
            teammate.engage(self)
            return True
        return False

    @property
    def is_free(self):
        """ True if the project has available seats; False otherwise. """
        return len(self._team) < self._seats

    @property
    def is_engaged(self):
        """ True if there are no empty seats; False otherwise. """
        return len(self._team) >= self._seats

    @property
    def roster_str(self):
        """ Returns a string that can be printed to show which teammates are on
        the project.
        """
        return f"{self.id}: {[(tm.id, tm.rank(self)) for tm in self._team]}"

    @property
    def team(self):
        """ Returns a copy of the list of teammates on the project.
        """
        return list(self._team)

    def rank(self, teammate):
        """ Returns the ranking the teammate gave the project.
        """
        return teammate.rank(self)

    def __eq__(self, other):
        """ Two projects are equal if they have the same id and roster. """
        return self._id == other.id and self._team == other.team

    def __lt__(self, other):
        """ The id's and team lists are compared to determine less-than. """
        return self._id < other.id and self._team < other.team


class ProjectMatcher:
    """ The matchmaking service.
    The ProjectMatcher has the teammates attempt engagements with the projects
    until all teammates and projects are fully 'engaged'. The suitability of the
    matchings are evaluated and given a score, then the process is repeated any
    number of times while keeping track of the distributions with the best
    (lowest) score.
    """
    __slots__ = ('_projects', '_teammates', '_free_teammates', '_shared_count',
                 '_stdev_weight')

    def __init__(self, projects, teammates, stdev_weight=1):
        """ Constructor.
        @param projects     - The projects the teammates will try to engage.
        @param teammates    - The suitors seeking project engagements.
        @param stdev_weight - Value multiplied by the standard deviation of the
                              mean sum of rank values of each project to
                              determine the overall score of a distribution.
        """
        self._projects       = list(projects)
        self._teammates      = list(teammates)
        self._free_teammates = None
        self._shared_count   = None
        self._stdev_weight   = stdev_weight

    @staticmethod
    def from_dataframe(df):
        """ Creates a ProjectMatcher instance using a Pandas dataframe.
        The header row is used to assign id's to the projects, and the leftmost
        column of each row the teammate id's. The rank each teammate gave for
        each project aligns with the column headers. This method assumes an
        even distribution of seats among the projects for the teammates.
        @param df - The Pandas dataframe.
        @return   - A ProjectMatcher instance.
        """
        n_seats   = len(df.values) // (len(df.columns) - 1)
        projects  = [Project(proj, n_seats) for proj in df.columns[1:]]
        teammates = [Teammate(tm[0], projects, tm[1:]) for tm in df.values]
        return ProjectMatcher(projects, teammates)

    def match(self, num_iterations=1000, report=False, timeout=0,
              stdev_weight=-1):
        """ Matches up Project's with Teammate's.
        Runs the loop that iterates over each teammate and has them propose
        to each project in their ordered lists of project preferences. The loop
        continues until all teammates are 'engaged', then the distribution is
        evaluated. If the distribution is the best so far, it's retained.
        The cycle is repeated 'num_iterations' times while keeping a list of the
        best scoring distributions. The list of best distributions is printed
        after the cycles are done. The first block of loops is most noticeably
        the Gale-Shapley algorithm.
        @param num_iterations   - The number of times to generate a project
                                  distribution to find the best scoring
                                  combinations. Takes some experimentation to
                                  find the plateau where the same set is
                                  reliably generated on successive runs.
        @param report           - If True, a printout of the best matches is
                                  done at the end of the iterations.
        @param timeout          - If non-zero, the iterations will stop when
                                  the timeout expires, or 'num_iterations' has
                                  been met - whichever comes first.
        @param stdev_weight     - Value multiplied by the standard deviation of
                                  the mean rank values of each project to
                                  determine the overall score of a distribution.
                                  If -1 (default) the weight given to the
                                  constructor is used instead. Raising this
                                  value may improve distributions with 4 or
                                  worse values showing up in output.
        @return                 - A tuple of the best score and list of
                                  project distributions with that score.
        """
        if stdev_weight > -1:
            self._stdev_weight = stdev_weight

        best_score    = float('inf')
        best_distribs = []
        start         = time.time()
        random.seed()

        for i in range(num_iterations):
            for teammate in self.free_teammates:
                for project in teammate.project_preferences:
                    accepted = teammate.propose(project)
                    if accepted:
                        break

            score  = self.evaluate()

            if score <= best_score and self._projects not in best_distribs:
                best_dist = deepcopy(self._projects)
                if score < best_score:
                    best_score    = score
                    best_distribs = [best_dist]
                else:
                    best_distribs.append(best_dist)

            if timeout and time.time() - start > timeout:
                print(f"Process {os.getpid()} timed out at "
                      f"{time.time()-start:.3f} seconds "
                      f"with {i} iterations completed...",
                      file=sys.stderr, flush=True)
                break
            elif self._shared_count:
                with self._shared_count.get_lock():
                    self._shared_count.value += 1
                    if self._shared_count.value >= num_iterations:
                        break

            self.reset()

        if report:
            self.report(best_distribs)

        return best_score, best_distribs

    @staticmethod
    def report(best_distribs):
        """ Prints out the best scoring assignments.
        @param best_distribs - The list of best scoring project distributions.
        """
        print("PROJECT ASSIGNMENTS:")
        for i, distrib in enumerate(best_distribs, 1):
            print(f"    option {i}")
            for project in distrib:
                print(f"        {project.roster_str}")
        print()

    def multiproc_match(self, num_iterations=1000, report=False, timeout=0,
                        stdev_weight=-1, num_procs=0):
        """ Match making in parallel.
        Same as match(), but runs the ProjectMatcher in 'num_procs' number
        of processes in parallel, then collects and reports the resulting best
        distributions. It's good to limit the number of processes to the number
        of CPU cores on the system (the default).
        @param num_iterations   - The number times to iteratively create
                                  project distributions to find the best ones.
        @param report           - If True, a printout of the best matches is
                                  done at the end of the iterations.
        @param timeout          - If non-zero, the iterations will stop when
                                  the timeout expires, or 'num_iterations' has
                                  been met - whichever comes first.
        @param stdev_weight     - Value multiplied by the standard deviation of
                                  the mean rank values of each project to
                                  determine the overall score of a distribution.
                                  If -1 (default) the weight given to the
                                  constructor is used instead. Raising this
                                  may improve some distributions that have 4's
                                  or worse showing up in output distributions.
        @param num_procs        - The number of processes to run in parallel.
                                  Defaults to the number of processor cores on
                                  the system if left at 0.
        @return                 - A tuple of the best score and list of
                                  project distributions with that score.
        """
        if stdev_weight > -1:
            self._stdev_weight = stdev_weight

        with Manager() as manager:
            SCORE, DISTR, LOCK = 0, 1, 2  # shared_data list indices.
            iter_count         = Value('L', 0)
            best_distribs      = manager.list()
            shared_data        = manager.list([float('inf'),
                                               best_distribs,
                                               manager.RLock()])
            processes = []
            num_procs = num_procs or cpu_count()
            for _ in range(num_procs):
                process = Process(target=self._mp_match,
                                  args=(num_iterations, shared_data,
                                        iter_count, timeout, report))
                processes.append(process)

            for p in processes:
                p.start()
            for p in processes:
                p.join()

            score    = shared_data[SCORE]
            distribs = list(best_distribs)

            if report:
                self.report(distribs)

            return score, distribs

    def _mp_match(self, num_iter, best, shared_count, duration, report):
        """ The method run in the processes by multiprocess_match().
        The internal use method that runs matcher.match() and updates 'best'
        with the best project distributions found.
        @param num_iter     - The number of iterations to run.
        @param best         - A remotely managed list where the first param is
                              the best score so far, 2nd is a remote list of the
                              best project distributions, 3rd is a remote RLock.
        @param shared_count - A multiprocessing.Value object used between
                              processes to determine when num_iter has been met.
        @param duration     - The timeout value.
        @param report       - If true, print out messages.
        """
        SCORE, DISTR, LOCK = 0, 1, 2  # 'best' list indices.
        self._shared_count = shared_count
        start              = time.time()
        score, distribs    = self.match(num_iter, False, duration)
        with best[LOCK]:
            if score < best[SCORE]:
                best[SCORE]    = score
                best[DISTR][:] = distribs
            elif score == best[SCORE]:
                m_dists = best[DISTR]
                dists   = [d for d in distribs if d not in m_dists]
                m_dists.extend(dists)
        if report:
            print(f"Process {os.getpid()} completed "
                  f"in {time.time() - start:.3f} seconds.")

    def evaluate(self):
        """ Scores the current project/teammate distribution.
        Scores the current distribution of teammates on projects. The lower
        the better. The lowest average team score (the score of each teammate
        added) among the projects plus the standard deviation.
        """
        s = [sum(10**rank for rank in p.ranks) for p in self._projects]
        m = mean(s)
        d = abs(stdev(s, m))
        return m + (d * self._stdev_weight)

    def gen_free_teammates_cycle(self):
        """ Creates a cyclical list of teammates who are 'free'.
        Teammates that are currently 'engaged' are skipped each cycle, but can
        reappear in the list if they are bumped from a project. The cycles
        continue until all teammates are 'engaged'. The list of teammates is
        shuffled each cycle to add variety to the order engagements are
        attempted.
        """
        teammates = self.teammates
        while any(tm.is_free for tm in teammates):
            shuffle(teammates)
            for tm in filter(lambda tm: tm.is_free, teammates):
                yield tm

    @property
    def projects(self):
        """ Returns a copy of the list of projects. """
        return list(self._projects)

    @property
    def teammates(self):
        """ Returns a copy of the list of teammates. """
        return list(self._teammates)

    @property
    def free_teammates(self):
        """ Returns the generator that produces 'free' teammates. """
        if not self._free_teammates:
            self._free_teammates = self.gen_free_teammates_cycle()
        return self._free_teammates

    def reset(self):
        """ Resets the simulation.
        Resets the projects, clearing their rosters, and setting all teammates
        back to 'free', and creates a new free_teammates generator.
        """
        self._free_teammates = self.gen_free_teammates_cycle()
        [p.clear() for p in self._projects]
        [t.reset() for t in self._teammates]


def wsample(population, weights, k=1):
    """ Weighted sample.
    Returns a sample of size k who’s selection is influenced by the weight
    values. Optimized for speed by minimizing the need to recalculate the
    weights each time a selection is made internally.
    @param population - The population to sample.
    @param weights    - The biases to apply to each item in the population.
    @param k          - The size of the sample to take.
    @return           - The sample.
    """
    wts   = list(weights)
    accum = list(accumulate(wts))
    total = accum[-1]
    cull  = 0.0
    tolr  = 0.6 * total
    sampl = {}
    while len(sampl) < k:
        randval = random.random()
        index   = bisect_left(accum, total * randval)
        if index not in sampl:
            sampl[index] = population[index]
            cull        += accum[index]
            if cull > tolr:
                for i in sampl:
                    wts[i] = 0
                cull   = 0.0
                accum  = list(accumulate(wts))
                total  = accum[-1]
                tolr   = 0.6 * total
    return list(sampl.values())


class DummyPandas:
    """ A simple stand-in to parse CSV text like Pandas does.
    A simple immitation of a Pandas class that works with the code in the
    main block below.
    """
    def __init__(self, columns, rows):
        """ Constructor. """
        self._columns = columns
        self._rows    = rows

    @property
    def values(self):
        """ Returns the "rows" of the object. """
        return self._rows

    @property
    def columns(self):
        """ Returns the "columns" of the object. """
        return self._columns

    @staticmethod
    def read_csv(file, sep):
        """ Parses simple CSV delimited by whitespace.
        Returns an instance of DummyPandas that can be used as a dataframe.
        """
        data_gen = DummyPandas._data_gen(file)
        headers  = next(data_gen)
        row_data = [row for row in data_gen]
        return DummyPandas(headers, row_data)

    @staticmethod
    def _data_gen(file):
        """ A generator that reads the file and produces lists of data. """
        for line in file:
            data = line.split()
            if data:
                data = [int(d) if str.isnumeric(d) else d for d in data]
                yield data


def sigint_handler(sig, frame):
    print(f"Ctrl+C received! Process {os.getpid()} exiting...")
    sys.exit(0)


if __name__ == '__main__':
    signal.signal(signal.SIGINT, sigint_handler)

    # Comment this out and uncomment the pandas import at the top of the file
    # if you want to use the real Pandas.
    pd = DummyPandas

    df_a = pd.read_csv(io.StringIO("""
        teammate  proj_A  proj_B  proj_C  proj_D  proj_E
               A       1       5       4       3       2
               B       2       4       5       1       3
               C       2       3       5       1       4
               D       3       1       5       4       2
               E       3       4       2       5       1
               F       1       3       2       5       4
               G       5       3       1       2       4
               H       3       1       5       4       2
               I       1       5       4       2       3
               J       2       4       3       1       5
               K       2       1       3       4       5
               L       1       5       4       3       2
               M       4       1       5       3       2
               N       4       2       1       5       3
               O       2       1       4       3       5
        """), sep=r'\s+')

    df_b = pd.read_csv(io.StringIO("""
        employee  proj_A  proj_B  proj_C  proj_D  proj_E
              A1       1       5       3       4       2
              B1       5       4       1       2       3
              C1       2       3       4       1       5
              A2       4       2       1       3       5
              B2       4       5       3       2       1
              C2       3       1       2       5       4
              A3       1       2       4       3       5
              B3       2       3       1       5       4
              C3       5       3       4       1       2
              A4       4       5       3       2       1
              B4       5       3       4       2       1
              C4       1       2       3       4       5
              A5       1       3       2       5       4
              B5       2       1       3       5       4
              C5       2       1       4       5       4
          """), sep=r'\s+')

    df_c = pd.read_csv(io.StringIO("""
employee proj_A proj_B proj_C proj_D proj_E proj_F proj_G proj_H proj_I proj_J
      A1      1      6      8      9      2      7      4      5     10      3
      A2     10      8      1      5      3      4      6      9      7      2
      A3      4      2     10      8      7      9      5      6      1      3
      B1      9      8      2      6      5      7      1     10      3      4
      B2      6      3      8      2     10      7      1      9      5      4
      B3      9      6      4      2      1     10      7      5      3      8
      C1      7      9      2      1      8      4     10      6      3      5
      C2      3      6      5      1      8      4      2     10      7      9
      C3      5      4      2      3      1      8     10      7      9      6
      D1      2      9      4     10      3      5      8      7      1      6
      D2      8      4     10      2      1      3      9      5      6      7
      D3      5      2      8      6      1      7     10      4      9      3
      E1      6      5      4      2      8      1     10      3      9      7
      E2      5      1      3      6      9      7      2     10      4      8
      E3      5     10      3      6      2      7      8      1      9      4
      F1      7      1      4      9      2      6      5     10      3      8
      F2      1      3      4      9      7     10      5      6      8      2
      F3      8      4      1     10      5      3      6      9      7      2
      G1      2      8     10      3      5      1      7      6      9      4
      G2      4      5      9      1     10      3      7      8      2      6
      G3      6      5      9      7      8      1      2      3      4     10
      H1      7      8     10      5      4      9      6      2      1      3
      H2      5      1      7     10      4      2      9      8      6      3
      H3      9      5     10      1      8      3      4      2      6      7
      I1      3      1     10      2      8      9      7      5      4      6
      I2      2      4      6      3     10      5      8      7      1      9
      I3      4      9     10      2      3      6      5      1      8      7
      J1      2      7      9      5     10      8      6      3      4      1
      J2      6     10      7      3      4      5      2      1      9      8
      J3      1      3      9      8      2     10      7      6      4      5
  """), sep=r'\s+')

    df_d = pd.read_csv(io.StringIO("""
teammate proj_A proj_B proj_C proj_D proj_E proj_F proj_G proj_H proj_I proj_J
      A1      4      5      9      3      8     10      2      1      6      7
      A2      5      4     10      2      8      9      1      3      7      6
      A3      4      8      7      6      9      3      5      2     10      1
      B1      5      4     10      8      3      2      1      7      9      6
      B2      8      10     6      7      9      5      4      1      3      2
      B3      3      1      4      9     10      2      6      8      7      5
      C1      1      4      7      8      3      2      6     10      5      9
      C2      9      8      3      5     10      1      2      4      6      7
      C3      4      3      8      9      6     10      2      1      7      5
      D1      6      7      2     10      5      4      8      9      1      3
      D2      3      8      1      2      5      4      6      7     10      9
      D3      1      3      8      9      2      4      5     10      6      7
      E1      3      1      7     10      9      6      5      4      8      2
      E2      10     4      5      7      1      3      8      9      2      6
      E3      7      9      2      8      3     10      5      1      4      6
      F1      9      2      1      3      6      5      8      7     10      4
      F2      1      7      9      2      6      4      3      5      8     10
      F3      1     10      6      2      4      5      7      9      3      8
      G1      3      2      6      5      7     10      4      8      1      9
      G2      5     10      2      9      4      1      6      7      8      3
      G3      7      5      1      2      6      3      9      4     10      8
      H1      7      1      5     10      4      9      6      8      2      3
      H2      4      7      3     10      8      2      1      6      9      5
      H3      10     8      6      5      3      7      4      1      9      2
      I1      2      1      9      3      6      4     10      8      7      5
      I2      9      8     10      2      3      4      7      6      5      1
      I3      6      8      5      1      2      4      3      9      7     10
      J1      2      9      7      5      1     10      4      3      8      6
      J2      5      8      1      4     10      7      3      2      6      9
      J3      8      3      4     10      2      5      6      9      1      7
    """), sep=r'\s+')

    df_e = pd.read_csv(io.StringIO("""
employee proj_A proj_B proj_C proj_D proj_E proj_F proj_G proj_H proj_I proj_J
      A1      9      6      3      7      8      1     10      2      4      5
      A2      9      8     10      7      3      1      6      2      5      4
      A3      9      8      6      3      1      7      2      5     10      4
      B1      8      9      6      1      3      7     10      2      5      4
      B2      9      6      3      8      7      5      1      2     10      4
      B3      9      2      6      7     10      3      1      4      8      5
      C1      9      3      7      6      1      8      4     10      2      5
      C2      9      3      8      7      6      1      5      2     10      4
      C3      9      3      7      6     10      8      4      1      2      5
      D1      3      6      9      1      8      7      5      2      4     10
      D2      9      3      8      7      6      1      5      2      4     10
      D3      9      3      8      1      7      6      2     10      4      5
      E1      9      8      7      6      3      1      2      4     10      5
      E2      9      6      3      7     10      4      8      1      2      5
      E3      9      6      3      7      1      5      8      2     10      4
      F1      9      6      8      1      3      4     10      2      7      5
      F2      9      8      7      3      6      1     10      2      4      5
      F3      9      3      6      1      2      8     10      4      7      5
      G1      9      3      6      7      8      4      1      2     10      5
      G2      9      3      6      1      8      7      2      4     10      5
      G3      3      6      1      9      8      7      2     10      4      5
      H1      9     10      8      6      3      1      7      2      4      5
      H2      9      6      3      1      2      7      8     10      4      5
      H3      9      3      6      1      7      8      5     10      4      2
      I1      2      9      3      6      7      8      1     10      5      4
      I2      9      1      5      6      8      3      4      7     10      2
      I3      3      9      7     10      5      2      8      6      4      1
      J1      9      2      7      6      3      8     10      4      1      5
      J2      9      6     10      1      7      3      2      8      4      5
      J3      9     10      2      4      7      6      3      1      8      5
    """), sep=r'\s+')

    pm_a = ProjectMatcher.from_dataframe(df_a)
    pm_b = ProjectMatcher.from_dataframe(df_b)
    pm_c = ProjectMatcher.from_dataframe(df_c)
    pm_d = ProjectMatcher.from_dataframe(df_d)
    pm_e = ProjectMatcher.from_dataframe(df_e)

    # mp.set_start_method('spawn')
    n_cpu = cpu_count()

    print()
    print("[[DATAFRAME A PROJECT MATCHINGS - should produce 3 options]]")

    pm_a.match(10**4, True)

    print("[[DATAFRAME B PROJECT MATCHINGS - should produce 4 options]]")

    pm_b.match(10**4, True)

    print("[[DATAFRAME C PROJECT MATCHINGS - should produce 4 options]]")
    n_cycles = 10**4 // n_cpu
    n_total  = n_cycles * n_cpu
    print(
        f"{f'[{n_total} cycles total - {n_cycles} approx per CPU...]':^60}")
    print(f"{f'[Running on {n_cpu} CPUs...]':^60}")

    pm_c.multiproc_match(n_total, True)

    print("[[DATAFRAME D PROJECT MATCHINGS - should produce 10 options]]")
    n_cycles = 3 * 10**4 // n_cpu
    n_total  = n_cycles * n_cpu
    print(f"{f'[{n_total} cycles total - {n_cycles} approx per CPU...]':^60}")
    print(f"{f'[Running on {n_cpu} CPUs...]':^60}")

    pm_d.multiproc_match(n_total, report=True, num_procs=n_cpu)

    print(f"[[DATAFRAME E PROJECT MATCHINGS - "
          f"(difficult) should produce 1 option]]")
    n_cycles = 4 * 10**5 // n_cpu
    n_total  = n_cycles * n_cpu
    print(f"{f'[{n_total} cycles total - {n_cycles} approx per CPU...]':^60}")
    print(f"{f'[Running on {n_cpu} CPUs...]':^60}")

    # This is a difficult project to distribute to because I weighted the
    # projects heavily in favor of a few of them to simulate the teammates
    # being very interested in them and choosing them within their top few
    # choices - or close to that. It's not possible to generate a matrix of
    # all 1's and 2's with this one.
    pm_e.multiproc_match(n_total, True, stdev_weight=5000)