codegoblin

stablefitness.py

May 13th, 2020
155
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 38.57 KB | None | 0 0
  1. #!/usr/bin/env python3
  2. """ A project matcher modelled after the Gale-Shapley algorithm to solve the
  3. stable marriage problem with respect to teammates and projects. The teammates
  4. represent the eligible suitors, and the projects the fickle object of their
  5. efforts to establish an engagement.
  6.  
  7. The same preference is shared by both the Project and each Teammate. The
  8. Teammates want to end up on their first choice for Projects, and the Projects
  9. also want them to get their first choice, or at least the Project closest to
  10. the tops of their lists ordered from most interested to least.
  11.  
  12. The ProjectMatcher adds a variation to the GS algorithm where the preferred
  13. distribution is not entirely based on the order of Teammate/Project preference,
  14. but also on the statistical mean and standard deviation of the overall
  15. teammate/project distribution. This approach can produce better matchings, in
  16. that all participants tend to mostly get their first picks, and some their
  17. second, and rarely are any stuck with their 3rd pick or worse.
  18.  
  19. Parts of the algorithm are implemented in the classes as methods. Generators are
  20. used to provide the list of 'free' teammates and lists of project preferences,
  21. which isolates and moves some of the looping that one might expect to see in the
  22. ProjectMatcher.match() loop, which is most recognizably the GS algorithm in the
  23. application.
  24.  
  25. The Gale-Shapley algorithm for reference:
  26.  
  27.    algorithm stable_matching is
  28.        Initialize all m ∈ M and w ∈ W to free
  29.        while ∃ free man m who still has a woman w to propose to do
  30.            w := first woman on m's list to whom m has not yet proposed
  31.            if w is free then
  32.                (m, w) become engaged
  33.            else some pair (m', w) already exists
  34.                if w prefers m to m' then
  35.                    m' becomes free
  36.                    (m, w) become engaged
  37.                else
  38.                    (m', w) remain engaged
  39.                end if
  40.            end if
  41.        repeat
  42.        
  43. Methods that implement a noticeable piece of the algorithm:
  44.  
  45.    Teammate.gen_project_preferences_cycle()  - The teammate's ordered list of
  46.                                                projects to propose to.
  47.    Project.consider()                        - The Project decides whether it
  48.                                                prefers one engagement over
  49.                                                another.
  50.    ProjectMatcher.gen_free_teammates_cycle() - Generates the list of 'free'
  51.                                                Teammates still looking for a
  52.                                                project.
  53.    ProjectMatcher.match()                    - The main loop that drives the
  54.                                                activity of proposals and
  55.                                                engagements.
  56. """
  57.  
  58. import io
  59. # import multiprocessing as mp
  60. import os
  61. # import pandas as pd
  62. import random
  63. import signal
  64. import sys
  65. import time
  66. from bisect import bisect_left
  67. from copy import deepcopy
  68. from itertools import cycle, accumulate
  69. from multiprocessing import cpu_count, Manager, Process, Value
  70. from random import randint, shuffle
  71. from statistics import mean, stdev
  72.  
  73.  
  74. __all__ = ('Teammate', 'Project', 'ProjectMatcher')
  75.  
  76.  
  77. class Teammate:
  78.     """ Teammate - the suitor.
  79.    Represents a teammate to be placed on projects as a romantic suitor in the
  80.    stable marriage problem.
  81.    """
  82.     __slots__ = ('_id', '_projects', '_rankings', '_prefs', '_project',
  83.                  '_weights')
  84.    
  85.     def __init__(self, identifier, projects, rankings):
  86.         """ Constructor.
  87.        @param identifier   - An ID assigned to the teammate.
  88.        @param projects     - The list of projects available.
  89.        @param rankings     - The rankings in order that the teammate gave the
  90.                              projects. These must be in the same order as the
  91.                              list/iterable passed to 'projects'.
  92.        """
  93.         self._id       = identifier
  94.         self._projects = list(projects)
  95.         self._rankings = dict(zip((p.id for p in projects), rankings))
  96.         self._weights  = [2**i for i in range(len(projects), 0, -1)]
  97.         self._prefs    = None
  98.         self._project  = None
  99.  
  100.     @property
  101.     def id(self):
  102.         """ The ID given to the teammate. """
  103.         return self._id
  104.  
  105.     def rank(self, project):
  106.         """ Returns the rank the teammate gave the project. """
  107.         return self._rankings[project.id]
  108.  
  109.     def clear(self):
  110.         """ Removes the teammate from the project and clears its state."""
  111.         self._project = None
  112.  
  113.     def reset(self):
  114.         """ Clears the teammate's state and resets its preferences list. """
  115.         self._project = None
  116.         self._prefs   = self.gen_project_preferences_cycle()
  117.        
  118.     @property
  119.     def is_free(self):
  120.         """ Indicates engagement status.
  121.        True if the teammate isn't 'engaged' to a project. False otherwise.
  122.        """
  123.         return self._project is None
  124.        
  125.     @property
  126.     def is_engaged(self):
  127.         """ True if the teammate is 'engaged' to a project; False otherwise. """
  128.         return self._project is not None
  129.  
  130.     def propose(self, project):
  131.         """ Have the teammate propose to the project.
  132.        If accepted, the teammate's engage() method will be invoked by the
  133.        project.
  134.        @param project - The project to propose to.
  135.        @return        - True if the proposal was accepted, False otherwise.
  136.        """
  137.         return project.consider(self)
  138.  
  139.     def engage(self, project):
  140.         """ Establishes an engagement.
  141.        If a project accepts a proposal, it invokes this method. Adds the
  142.        teammate to the project's roster and changes the teammate's state to
  143.        'engaged' by setting its _project attribute to the project.
  144.         """
  145.         self._project = project
  146.         project.add(self)
  147.  
  148.     def disengage(self, project):
  149.         """ Breaks off engagement.
  150.        If a project breaks the engagement with the teammate, this is invoked.
  151.        Removes the teammate from the project and sets the teammate's state back
  152.        to 'free'.
  153.        @param project - The project that requested the disengagement.
  154.        """
  155.         self._project = None
  156.         project.remove(self)
  157.  
  158.     @property
  159.     def project_preferences(self):
  160.         """ Returns the list of project preferences as a cyclical generator. """
  161.         if not self._prefs:
  162.             self._prefs = self.gen_project_preferences_cycle()
  163.         return self._prefs
  164.  
  165.     def gen_project_preferences_cycle(self):
  166.         """ Creates a cyclical list of project preferences as a generator.
  167.        Sometimes the 2nd choice is added to the front of the list, and less
  168.        often also the 3rd, and rarely a random weighted sample (favoring best
  169.        scored projects). This breaks the tendency for all first comers to get
  170.        their first picks sticking subsequent teammates with their 3rd, 4th, or
  171.        worse pick. Starting at less desirable scores causes more milling for
  172.        better matches when a teammate gets disengaged and has to propose again.
  173.        """
  174.         prefs = sorted(self._projects, key=self.rank)
  175.         rndv  = randint(1, 100)
  176.         if rndv > 98:    # 2%
  177.             len_prefs = len(prefs)
  178.             weights   = self._weights
  179.             prefs     = wsample(prefs, weights, len_prefs // 2) + prefs
  180.         elif rndv > 88:  # 10%
  181.             prefs = [prefs[2], prefs[1]] + prefs
  182.         elif rndv > 58:  # 30%
  183.             prefs = [prefs[1]] + prefs
  184.         return cycle(prefs)
  185.        
  186.     def __lt__(self, other):
  187.         """ The id's are compared to determine less-than.
  188.        The projects need this to keep their internal roster lists in order for
  189.        project comparisons.
  190.        """
  191.         return self._id < other.id
  192.        
  193.     def __eq__(self, other):
  194.         """ The id's are compared for equality. """
  195.         return self._id == other.id
  196.  
  197.  
  198. class Project:
  199.     """ The fickle object of the suitors' efforts.
  200.    Represents a project in the placement strategy modeled after the Gale-
  201.    Shapley algorithm. The Project is the object of the suitors' efforts
  202.    to engage with. It can be 'engaged' to as many suitors as there are seats
  203.    on the project.
  204.    """
  205.     __slots__ = ('_id', '_seats', '_team')
  206.    
  207.     def __init__(self, identifier, seats):
  208.         """ Constructor.
  209.        @param identifier   - The id of the project.
  210.        @param seats        - The number of seats available.
  211.        """
  212.         self._id    = identifier
  213.         self._seats = seats
  214.         self._team  = []
  215.  
  216.     @property
  217.     def id(self):
  218.         """ The id given to the project when created. """
  219.         return self._id
  220.  
  221.     @property
  222.     def score(self):
  223.         """ The sum of the ranks each teammate on the roster gave the project.
  224.        """
  225.         return sum(tm.rank(self) for tm in self._team)
  226.  
  227.     @property
  228.     def ranks(self):
  229.         """ The ranks each teammate gave the project.
  230.        Returns a generator producing the rank each teammate gave the project.
  231.        """
  232.         return (tm.rank(self) for tm in self._team)
  233.  
  234.     def add(self, teammate):
  235.         """ Adds the teammate to the roster. The internal list is sorted. """
  236.         self._team.append(teammate)
  237.         self._team.sort()
  238.  
  239.     def remove(self, teammate):
  240.         """ Removes the teammate from the project. """
  241.         self._team.remove(teammate)
  242.  
  243.     def clear(self):
  244.         """ Clears the project's team roster. """
  245.         self._team = []
  246.  
  247.     def consider(self, teammate):
  248.         """ Consider engagement proposal.
  249.        If accepted, the teammate's engage() method is invoked; if the project's
  250.        roster is already full, one of the teammates will have its disengage()
  251.        method invoked, which removes it from the project to make space for the
  252.        new fiance.
  253.        @param teammate - The teammate submitting the proposal.
  254.        @return         - True if accepted; False otherwise.
  255.        """
  256.         rank = teammate.rank(self)
  257.         team = self._team
  258.         if self.is_free or any(rank < tm.rank(self) for tm in team):
  259.             if self.is_engaged:
  260.                 shuffle(team)
  261.                 tm_out = max(team, key=self.rank)
  262.                 tm_out.disengage(self)
  263.             teammate.engage(self)
  264.             return True
  265.         return False
  266.        
  267.     @property
  268.     def is_free(self):
  269.         """ True if the project has available seats; False otherwise. """
  270.         return len(self._team) < self._seats
  271.        
  272.     @property
  273.     def is_engaged(self):
  274.         """ True if there are no empty seats; False otherwise. """
  275.         return len(self._team) >= self._seats
  276.  
  277.     @property
  278.     def roster_str(self):
  279.         """ Returns a string that can be printed to show which teammates are on
  280.        the project.
  281.        """
  282.         return f"{self.id}: {[(tm.id, tm.rank(self)) for tm in self._team]}"
  283.        
  284.     @property
  285.     def team(self):
  286.         """ Returns a copy of the list of teammates on the project.
  287.        """
  288.         return list(self._team)
  289.        
  290.     def rank(self, teammate):
  291.         """ Returns the ranking the teammate gave the project.
  292.        """
  293.         return teammate.rank(self)
  294.        
  295.     def __eq__(self, other):
  296.         """ Two projects are equal if they have the same id and roster. """
  297.         return self._id == other.id and self._team == other.team
  298.        
  299.     def __lt__(self, other):
  300.         """ The id's and team lists are compared to determine less-than. """
  301.         return self._id < other.id and self._team < other.team
  302.  
  303.  
  304. class ProjectMatcher:
  305.     """ The matchmaking service.
  306.    The ProjectMatcher has the teammates attempt engagements with the projects
  307.    until all teammates and projects are fully 'engaged'. The suitability of the
  308.    matchings are evaluated and given a score, then the process is repeated any
  309.    number of times while keeping track of the distributions with the best
  310.    (lowest) score.
  311.    """
  312.     __slots__ = ('_projects', '_teammates', '_free_teammates', '_shared_count',
  313.                  '_stdev_weight')
  314.    
  315.     def __init__(self, projects, teammates, stdev_weight=1):
  316.         """ Constructor.
  317.        @param projects     - The projects the teammates will try to engage.
  318.        @param teammates    - The suitors seeking project engagements.
  319.        @param stdev_weight - Value multiplied by the standard deviation of the
  320.                              mean sum of rank values of each project to
  321.                              determine the overall score of a distribution.
  322.        """
  323.         self._projects       = list(projects)
  324.         self._teammates      = list(teammates)
  325.         self._free_teammates = None
  326.         self._shared_count   = None
  327.         self._stdev_weight   = stdev_weight
  328.  
  329.     @staticmethod
  330.     def from_dataframe(df):
  331.         """ Creates a ProjectMatcher instance using a Pandas dataframe.
  332.        The header row is used to assign id's to the projects, and the leftmost
  333.        column of each row the teammate id's. The rank each teammate gave for
  334.        each project aligns with the column headers. This method assumes an
  335.        even distribution of seats among the projects for the teammates.
  336.        @param df - The Pandas dataframe.
  337.        @return   - A ProjectMatcher instance.
  338.        """
  339.         n_seats   = len(df.values) // (len(df.columns) - 1)
  340.         projects  = [Project(proj, n_seats) for proj in df.columns[1:]]
  341.         teammates = [Teammate(tm[0], projects, tm[1:]) for tm in df.values]
  342.         return ProjectMatcher(projects, teammates)
  343.  
  344.     def match(self, num_iterations=1000, report=False, timeout=0,
  345.               stdev_weight=-1):
  346.         """ Matches up Project's with Teammate's.
  347.        Runs the loop that iterates over each teammate and has them propose
  348.        to each project in their ordered lists of project preferences. The loop
  349.        continues until all teammates are 'engaged', then the distribution is
  350.        evaluated. If the distribution is the best so far, it's retained.
  351.        The cycle is repeated 'num_iterations' times while keeping a list of the
  352.        best scoring distributions. The list of best distributions is printed
  353.        after the cycles are done. The first block of loops is most noticeably
  354.        the Gale-Shapley algorithm.
  355.        @param num_iterations   - The number of times to generate a project
  356.                                  distribution to find the best scoring
  357.                                  combinations. Takes some experimentation to
  358.                                  find the plateau where the same set is
  359.                                  reliably generated on successive runs.
  360.        @param report           - If True, a printout of the best matches is
  361.                                  done at the end of the iterations.
  362.        @param timeout          - If non-zero, the iterations will stop when
  363.                                  the timeout expires, or 'num_iterations' has
  364.                                  been met - whichever comes first.
  365.        @param stdev_weight     - Value multiplied by the standard deviation of
  366.                                  the mean rank values of each project to
  367.                                  determine the overall score of a distribution.
  368.                                  If -1 (default) the weight given to the
  369.                                  constructor is used instead. Raising this
  370.                                  value may improve distributions with 4 or
  371.                                  worse values showing up in output.
  372.        @return                 - A tuple of the best score and list of
  373.                                  project distributions with that score.
  374.        """
  375.         if stdev_weight > -1:
  376.             self._stdev_weight = stdev_weight
  377.            
  378.         best_score    = float('inf')
  379.         best_distribs = []
  380.         start         = time.time()
  381.         random.seed()
  382.        
  383.         for i in range(num_iterations):
  384.             for teammate in self.free_teammates:
  385.                 for project in teammate.project_preferences:
  386.                     accepted = teammate.propose(project)
  387.                     if accepted:
  388.                         break
  389.  
  390.             score  = self.evaluate()
  391.  
  392.             if score <= best_score and self._projects not in best_distribs:
  393.                 best_dist = deepcopy(self._projects)
  394.                 if score < best_score:
  395.                     best_score    = score
  396.                     best_distribs = [best_dist]
  397.                 else:
  398.                     best_distribs.append(best_dist)
  399.  
  400.             if timeout and time.time() - start > timeout:
  401.                 print(f"Process {os.getpid()} timed out at "
  402.                       f"{time.time()-start:.3f} seconds "
  403.                       f"with {i} iterations completed...",
  404.                       file=sys.stderr, flush=True)
  405.                 break
  406.             elif self._shared_count:
  407.                 with self._shared_count.get_lock():
  408.                     self._shared_count.value += 1
  409.                     if self._shared_count.value >= num_iterations:
  410.                         break
  411.            
  412.             self.reset()
  413.            
  414.         if report:
  415.             self.report(best_distribs)
  416.            
  417.         return best_score, best_distribs
  418.            
  419.     @staticmethod
  420.     def report(best_distribs):
  421.         """ Prints out the best scoring assignments.
  422.        @param best_distribs - The list of best scoring project distributions.
  423.        """
  424.         print("PROJECT ASSIGNMENTS:")
  425.         for i, distrib in enumerate(best_distribs, 1):
  426.             print(f"    option {i}")
  427.             for project in distrib:
  428.                 print(f"        {project.roster_str}")
  429.         print()
  430.        
  431.     def multiproc_match(self, num_iterations=1000, report=False, timeout=0,
  432.                         stdev_weight=-1, num_procs=0):
  433.         """ Match making in parallel.
  434.        Same as match(), but runs the ProjectMatcher in 'num_procs' number
  435.        of processes in parallel, then collects and reports the resulting best
  436.        distributions. It's good to limit the number of processes to the number
  437.        of CPU cores on the system (the default).
  438.        @param num_iterations   - The number times to iteratively create
  439.                                  project distributions to find the best ones.
  440.        @param report           - If True, a printout of the best matches is
  441.                                  done at the end of the iterations.
  442.        @param timeout          - If non-zero, the iterations will stop when
  443.                                  the timeout expires, or 'num_iterations' has
  444.                                  been met - whichever comes first.
  445.        @param stdev_weight     - Value multiplied by the standard deviation of
  446.                                  the mean rank values of each project to
  447.                                  determine the overall score of a distribution.
  448.                                  If -1 (default) the weight given to the
  449.                                  constructor is used instead. Raising this
  450.                                  may improve some distributions that have 4's
  451.                                  or worse showing up in output distributions.
  452.        @param num_procs        - The number of processes to run in parallel.
  453.                                  Defaults to the number of processor cores on
  454.                                  the system if left at 0.
  455.        @return                 - A tuple of the best score and list of
  456.                                  project distributions with that score.
  457.        """
  458.         if stdev_weight > -1:
  459.             self._stdev_weight = stdev_weight
  460.            
  461.         with Manager() as manager:
  462.             SCORE, DISTR, LOCK = 0, 1, 2  # shared_data list indices.
  463.             iter_count         = Value('L', 0)
  464.             best_distribs      = manager.list()
  465.             shared_data        = manager.list([float('inf'),
  466.                                                best_distribs,
  467.                                                manager.RLock()])
  468.             processes = []
  469.             num_procs = num_procs or cpu_count()
  470.             for _ in range(num_procs):
  471.                 process = Process(target=self._mp_match,
  472.                                   args=(num_iterations, shared_data,
  473.                                         iter_count, timeout, report))
  474.                 processes.append(process)
  475.                
  476.             for p in processes:
  477.                 p.start()
  478.             for p in processes:
  479.                 p.join()
  480.  
  481.             score    = shared_data[SCORE]
  482.             distribs = list(best_distribs)
  483.                
  484.             if report:
  485.                 self.report(distribs)
  486.            
  487.             return score, distribs
  488.  
  489.     def _mp_match(self, num_iter, best, shared_count, duration, report):
  490.         """ The method run in the processes by multiprocess_match().
  491.        The internal use method that runs matcher.match() and updates 'best'
  492.        with the best project distributions found.
  493.        @param num_iter     - The number of iterations to run.
  494.        @param best         - A remotely managed list where the first param is
  495.                              the best score so far, 2nd is a remote list of the
  496.                              best project distributions, 3rd is a remote RLock.
  497.        @param shared_count - A multiprocessing.Value object used between
  498.                              processes to determine when num_iter has been met.
  499.        @param duration     - The timeout value.
  500.        @param report       - If true, print out messages.
  501.        """
  502.         SCORE, DISTR, LOCK = 0, 1, 2  # 'best' list indices.
  503.         self._shared_count = shared_count
  504.         start              = time.time()
  505.         score, distribs    = self.match(num_iter, False, duration)
  506.         with best[LOCK]:
  507.             if score < best[SCORE]:
  508.                 best[SCORE]    = score
  509.                 best[DISTR][:] = distribs
  510.             elif score == best[SCORE]:
  511.                 m_dists = best[DISTR]
  512.                 dists   = [d for d in distribs if d not in m_dists]
  513.                 m_dists.extend(dists)
  514.         if report:
  515.             print(f"Process {os.getpid()} completed "
  516.                   f"in {time.time() - start:.3f} seconds.")
  517.  
  518.     def evaluate(self):
  519.         """ Scores the current project/teammate distribution.
  520.        Scores the current distribution of teammates on projects. The lower
  521.        the better. The lowest average team score (the score of each teammate
  522.        added) among the projects plus the standard deviation.
  523.        """
  524.         s = [sum(10**rank for rank in p.ranks) for p in self._projects]
  525.         m = mean(s)
  526.         d = abs(stdev(s, m))
  527.         return m + (d * self._stdev_weight)
  528.  
  529.     def gen_free_teammates_cycle(self):
  530.         """ Creates a cyclical list of teammates who are 'free'.
  531.        Teammates that are currently 'engaged' are skipped each cycle, but can
  532.        reappear in the list if they are bumped from a project. The cycles
  533.        continue until all teammates are 'engaged'. The list of teammates is
  534.        shuffled each cycle to add variety to the order engagements are
  535.        attempted.
  536.        """
  537.         teammates = self.teammates
  538.         while any(tm.is_free for tm in teammates):
  539.             shuffle(teammates)
  540.             for tm in filter(lambda tm: tm.is_free, teammates):
  541.                 yield tm
  542.                    
  543.     @property
  544.     def projects(self):
  545.         """ Returns a copy of the list of projects. """
  546.         return list(self._projects)
  547.        
  548.     @property
  549.     def teammates(self):
  550.         """ Returns a copy of the list of teammates. """
  551.         return list(self._teammates)
  552.        
  553.     @property
  554.     def free_teammates(self):
  555.         """ Returns the generator that produces 'free' teammates. """
  556.         if not self._free_teammates:
  557.             self._free_teammates = self.gen_free_teammates_cycle()
  558.         return self._free_teammates
  559.  
  560.     def reset(self):
  561.         """ Resets the simulation.
  562.        Resets the projects, clearing their rosters, and setting all teammates
  563.        back to 'free', and creates a new free_teammates generator.
  564.        """
  565.         self._free_teammates = self.gen_free_teammates_cycle()
  566.         [p.clear() for p in self._projects]
  567.         [t.reset() for t in self._teammates]
  568.  
  569.  
  570. def wsample(population, weights, k=1):
  571.     """ Weighted sample.
  572.    Returns a sample of size k who’s selection is influenced by the weight
  573.    values. Optimized for speed by minimizing the need to recalculate the
  574.    weights each time a selection is made internally.
  575.    @param population - The population to sample.
  576.    @param weights    - The biases to apply to each item in the population.
  577.    @param k          - The size of the sample to take.
  578.    @return           - The sample.
  579.    """
  580.     wts   = list(weights)
  581.     accum = list(accumulate(wts))
  582.     total = accum[-1]
  583.     cull  = 0.0
  584.     tolr  = 0.6 * total
  585.     sampl = {}
  586.     while len(sampl) < k:
  587.         randval = random.random()
  588.         index   = bisect_left(accum, total * randval)
  589.         if index not in sampl:
  590.             sampl[index] = population[index]
  591.             cull        += accum[index]
  592.             if cull > tolr:
  593.                 for i in sampl:
  594.                     wts[i] = 0
  595.                 cull   = 0.0
  596.                 accum  = list(accumulate(wts))
  597.                 total  = accum[-1]
  598.                 tolr   = 0.6 * total
  599.     return list(sampl.values())
  600.  
  601.  
  602. class DummyPandas:
  603.     """ A simple stand-in to parse CSV text like Pandas does.
  604.    A simple immitation of a Pandas class that works with the code in the
  605.    main block below.
  606.    """
  607.     def __init__(self, columns, rows):
  608.         """ Constructor. """
  609.         self._columns = columns
  610.         self._rows    = rows
  611.        
  612.     @property
  613.     def values(self):
  614.         """ Returns the "rows" of the object. """
  615.         return self._rows
  616.        
  617.     @property
  618.     def columns(self):
  619.         """ Returns the "columns" of the object. """
  620.         return self._columns
  621.        
  622.     @staticmethod
  623.     def read_csv(file, sep):
  624.         """ Parses simple CSV delimited by whitespace.
  625.        Returns an instance of DummyPandas that can be used as a dataframe.
  626.        """
  627.         data_gen = DummyPandas._data_gen(file)
  628.         headers  = next(data_gen)
  629.         row_data = [row for row in data_gen]
  630.         return DummyPandas(headers, row_data)
  631.        
  632.     @staticmethod
  633.     def _data_gen(file):
  634.         """ A generator that reads the file and produces lists of data. """
  635.         for line in file:
  636.             data = line.split()
  637.             if data:
  638.                 data = [int(d) if str.isnumeric(d) else d for d in data]
  639.                 yield data
  640.                
  641.                
  642. def sigint_handler(sig, frame):
  643.     print(f"Ctrl+C received! Process {os.getpid()} exiting...")
  644.     sys.exit(0)
  645.  
  646.  
  647. if __name__ == '__main__':
  648.     signal.signal(signal.SIGINT, sigint_handler)
  649.    
  650.     # Comment this out and uncomment the pandas import at the top of the file
  651.     # if you want to use the real Pandas.
  652.     pd = DummyPandas
  653.  
  654.     df_a = pd.read_csv(io.StringIO("""
  655.        teammate  proj_A  proj_B  proj_C  proj_D  proj_E
  656.               A       1       5       4       3       2
  657.               B       2       4       5       1       3
  658.               C       2       3       5       1       4
  659.               D       3       1       5       4       2
  660.               E       3       4       2       5       1
  661.               F       1       3       2       5       4
  662.               G       5       3       1       2       4
  663.               H       3       1       5       4       2
  664.               I       1       5       4       2       3
  665.               J       2       4       3       1       5
  666.               K       2       1       3       4       5
  667.               L       1       5       4       3       2
  668.               M       4       1       5       3       2
  669.               N       4       2       1       5       3
  670.               O       2       1       4       3       5
  671.        """), sep=r'\s+')
  672.        
  673.     df_b = pd.read_csv(io.StringIO("""
  674.        employee  proj_A  proj_B  proj_C  proj_D  proj_E
  675.              A1       1       5       3       4       2
  676.              B1       5       4       1       2       3
  677.              C1       2       3       4       1       5
  678.              A2       4       2       1       3       5
  679.              B2       4       5       3       2       1
  680.              C2       3       1       2       5       4
  681.              A3       1       2       4       3       5
  682.              B3       2       3       1       5       4
  683.              C3       5       3       4       1       2
  684.              A4       4       5       3       2       1
  685.              B4       5       3       4       2       1
  686.              C4       1       2       3       4       5
  687.              A5       1       3       2       5       4
  688.              B5       2       1       3       5       4
  689.              C5       2       1       4       5       4
  690.          """), sep=r'\s+')
  691.  
  692.     df_c = pd.read_csv(io.StringIO("""
  693. employee proj_A proj_B proj_C proj_D proj_E proj_F proj_G proj_H proj_I proj_J
  694.      A1      1      6      8      9      2      7      4      5     10      3
  695.      A2     10      8      1      5      3      4      6      9      7      2
  696.      A3      4      2     10      8      7      9      5      6      1      3
  697.      B1      9      8      2      6      5      7      1     10      3      4
  698.      B2      6      3      8      2     10      7      1      9      5      4
  699.      B3      9      6      4      2      1     10      7      5      3      8
  700.      C1      7      9      2      1      8      4     10      6      3      5
  701.      C2      3      6      5      1      8      4      2     10      7      9
  702.      C3      5      4      2      3      1      8     10      7      9      6
  703.      D1      2      9      4     10      3      5      8      7      1      6
  704.      D2      8      4     10      2      1      3      9      5      6      7
  705.      D3      5      2      8      6      1      7     10      4      9      3
  706.      E1      6      5      4      2      8      1     10      3      9      7
  707.      E2      5      1      3      6      9      7      2     10      4      8
  708.      E3      5     10      3      6      2      7      8      1      9      4
  709.      F1      7      1      4      9      2      6      5     10      3      8
  710.      F2      1      3      4      9      7     10      5      6      8      2
  711.      F3      8      4      1     10      5      3      6      9      7      2
  712.      G1      2      8     10      3      5      1      7      6      9      4
  713.      G2      4      5      9      1     10      3      7      8      2      6
  714.      G3      6      5      9      7      8      1      2      3      4     10
  715.      H1      7      8     10      5      4      9      6      2      1      3
  716.      H2      5      1      7     10      4      2      9      8      6      3
  717.      H3      9      5     10      1      8      3      4      2      6      7
  718.      I1      3      1     10      2      8      9      7      5      4      6
  719.      I2      2      4      6      3     10      5      8      7      1      9
  720.      I3      4      9     10      2      3      6      5      1      8      7
  721.      J1      2      7      9      5     10      8      6      3      4      1
  722.      J2      6     10      7      3      4      5      2      1      9      8
  723.      J3      1      3      9      8      2     10      7      6      4      5
  724.  """), sep=r'\s+')
  725.  
  726.     df_d = pd.read_csv(io.StringIO("""
  727. teammate proj_A proj_B proj_C proj_D proj_E proj_F proj_G proj_H proj_I proj_J
  728.      A1      4      5      9      3      8     10      2      1      6      7
  729.      A2      5      4     10      2      8      9      1      3      7      6
  730.      A3      4      8      7      6      9      3      5      2     10      1
  731.      B1      5      4     10      8      3      2      1      7      9      6
  732.      B2      8      10     6      7      9      5      4      1      3      2
  733.      B3      3      1      4      9     10      2      6      8      7      5
  734.      C1      1      4      7      8      3      2      6     10      5      9
  735.      C2      9      8      3      5     10      1      2      4      6      7
  736.      C3      4      3      8      9      6     10      2      1      7      5
  737.      D1      6      7      2     10      5      4      8      9      1      3
  738.      D2      3      8      1      2      5      4      6      7     10      9
  739.      D3      1      3      8      9      2      4      5     10      6      7
  740.      E1      3      1      7     10      9      6      5      4      8      2
  741.      E2      10     4      5      7      1      3      8      9      2      6
  742.      E3      7      9      2      8      3     10      5      1      4      6
  743.      F1      9      2      1      3      6      5      8      7     10      4
  744.      F2      1      7      9      2      6      4      3      5      8     10
  745.      F3      1     10      6      2      4      5      7      9      3      8
  746.      G1      3      2      6      5      7     10      4      8      1      9
  747.      G2      5     10      2      9      4      1      6      7      8      3
  748.      G3      7      5      1      2      6      3      9      4     10      8
  749.      H1      7      1      5     10      4      9      6      8      2      3
  750.      H2      4      7      3     10      8      2      1      6      9      5
  751.      H3      10     8      6      5      3      7      4      1      9      2
  752.      I1      2      1      9      3      6      4     10      8      7      5
  753.      I2      9      8     10      2      3      4      7      6      5      1
  754.      I3      6      8      5      1      2      4      3      9      7     10
  755.      J1      2      9      7      5      1     10      4      3      8      6
  756.      J2      5      8      1      4     10      7      3      2      6      9
  757.      J3      8      3      4     10      2      5      6      9      1      7
  758.    """), sep=r'\s+')
  759.  
  760.     df_e = pd.read_csv(io.StringIO("""
  761. employee proj_A proj_B proj_C proj_D proj_E proj_F proj_G proj_H proj_I proj_J
  762.      A1      9      6      3      7      8      1     10      2      4      5
  763.      A2      9      8     10      7      3      1      6      2      5      4
  764.      A3      9      8      6      3      1      7      2      5     10      4
  765.      B1      8      9      6      1      3      7     10      2      5      4
  766.      B2      9      6      3      8      7      5      1      2     10      4
  767.      B3      9      2      6      7     10      3      1      4      8      5
  768.      C1      9      3      7      6      1      8      4     10      2      5
  769.      C2      9      3      8      7      6      1      5      2     10      4
  770.      C3      9      3      7      6     10      8      4      1      2      5
  771.      D1      3      6      9      1      8      7      5      2      4     10
  772.      D2      9      3      8      7      6      1      5      2      4     10
  773.      D3      9      3      8      1      7      6      2     10      4      5
  774.      E1      9      8      7      6      3      1      2      4     10      5
  775.      E2      9      6      3      7     10      4      8      1      2      5
  776.      E3      9      6      3      7      1      5      8      2     10      4
  777.      F1      9      6      8      1      3      4     10      2      7      5
  778.      F2      9      8      7      3      6      1     10      2      4      5
  779.      F3      9      3      6      1      2      8     10      4      7      5
  780.      G1      9      3      6      7      8      4      1      2     10      5
  781.      G2      9      3      6      1      8      7      2      4     10      5
  782.      G3      3      6      1      9      8      7      2     10      4      5
  783.      H1      9     10      8      6      3      1      7      2      4      5
  784.      H2      9      6      3      1      2      7      8     10      4      5
  785.      H3      9      3      6      1      7      8      5     10      4      2
  786.      I1      2      9      3      6      7      8      1     10      5      4
  787.      I2      9      1      5      6      8      3      4      7     10      2
  788.      I3      3      9      7     10      5      2      8      6      4      1
  789.      J1      9      2      7      6      3      8     10      4      1      5
  790.      J2      9      6     10      1      7      3      2      8      4      5
  791.      J3      9     10      2      4      7      6      3      1      8      5
  792.    """), sep=r'\s+')
  793.    
  794.     pm_a = ProjectMatcher.from_dataframe(df_a)
  795.     pm_b = ProjectMatcher.from_dataframe(df_b)
  796.     pm_c = ProjectMatcher.from_dataframe(df_c)
  797.     pm_d = ProjectMatcher.from_dataframe(df_d)
  798.     pm_e = ProjectMatcher.from_dataframe(df_e)
  799.    
  800.     # mp.set_start_method('spawn')
  801.     n_cpu = cpu_count()
  802.  
  803.     print()
  804.     print("[[DATAFRAME A PROJECT MATCHINGS - should produce 3 options]]")
  805.    
  806.     pm_a.match(10**4, True)
  807.  
  808.     print("[[DATAFRAME B PROJECT MATCHINGS - should produce 4 options]]")
  809.    
  810.     pm_b.match(10**4, True)
  811.  
  812.     print("[[DATAFRAME C PROJECT MATCHINGS - should produce 4 options]]")
  813.     n_cycles = 10**4 // n_cpu
  814.     n_total  = n_cycles * n_cpu
  815.     print(
  816.         f"{f'[{n_total} cycles total - {n_cycles} approx per CPU...]':^60}")
  817.     print(f"{f'[Running on {n_cpu} CPUs...]':^60}")
  818.          
  819.     pm_c.multiproc_match(n_total, True)
  820.    
  821.     print("[[DATAFRAME D PROJECT MATCHINGS - should produce 10 options]]")
  822.     n_cycles = 3 * 10**4 // n_cpu
  823.     n_total  = n_cycles * n_cpu
  824.     print(f"{f'[{n_total} cycles total - {n_cycles} approx per CPU...]':^60}")
  825.     print(f"{f'[Running on {n_cpu} CPUs...]':^60}")
  826.          
  827.     pm_d.multiproc_match(n_total, report=True, num_procs=n_cpu)
  828.  
  829.     print(f"[[DATAFRAME E PROJECT MATCHINGS - "
  830.           f"(difficult) should produce 1 option]]")
  831.     n_cycles = 4 * 10**5 // n_cpu
  832.     n_total  = n_cycles * n_cpu
  833.     print(f"{f'[{n_total} cycles total - {n_cycles} approx per CPU...]':^60}")
  834.     print(f"{f'[Running on {n_cpu} CPUs...]':^60}")
  835.    
  836.     # This is a difficult project to distribute to because I weighted the
  837.     # projects heavily in favor of a few of them to simulate the teammates
  838.     # being very interested in them and choosing them within their top few
  839.     # choices - or close to that. It's not possible to generate a matrix of
  840.     # all 1's and 2's with this one.
  841.     pm_e.multiproc_match(n_total, True, stdev_weight=5000)
Add Comment
Please, Sign In to add comment