Advertisement
Guest User

bond_percolation_jugfile.py

a guest
Aug 24th, 2015
99
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 7.96 KB | None | 0 0
  1. # encoding: utf-8
  2.  
  3. """
  4. jugfile to run the bond percolation on the square grid experiment
  5. """
  6.  
  7. # Python 2/3 compatibility
  8. from __future__ import (
  9.     absolute_import, division, print_function, unicode_literals,
  10. )
  11.  
  12. # http://python-future.org/compatible_idioms.html#zip-izip
  13. from builtins import map, zip
  14.  
  15. from functools import reduce
  16. import itertools
  17.  
  18. import numpy as np
  19. import scipy.stats
  20.  
  21. from jug import Task, TaskGenerator, barrier
  22. from jug.utils import CustomHash
  23. import jug.mapreduce
  24.  
  25. import percolate
  26. import percolate.hpc
  27.  
  28. import scripts.helpers as helpers
  29. from scripts.bond_percolation_conf import (
  30.     BOND_PERCOLATION_HPC_DATA_FILENAME
  31. )
  32.  
  33. # script parameters
  34. SYSTEM_DIMENSIONS = 2 ** np.arange(3, 11)
  35. RUNS_PER_TASK = 10
  36. NUMBER_OF_TASKS = 1000
  37. NUMBER_OF_RUNS = NUMBER_OF_TASKS * RUNS_PER_TASK
  38. PS = np.linspace(0.49, 0.51, num=101)
  39. SPANNING_CLUSTER = True
  40. UINT32_MAX = 4294967296
  41. SEED = 201508162248 % UINT32_MAX
  42. ALPHA = 2 * scipy.stats.norm.cdf(-1.0)  # 1 sigma
  43.  
  44.  
  45. @TaskGenerator
  46. def prepare_percolation_graph(dimension):
  47.     graph = percolate.spanning_2d_grid(length=dimension)
  48.     return percolate.percolate.percolation_graph(
  49.         graph=graph, spanning_cluster=SPANNING_CLUSTER
  50.     )
  51.  
  52.  
  53. @TaskGenerator
  54. def compute_convolution_factors_for_single_p(n, p):
  55.     return percolate.percolate._binomial_pmf(
  56.         n=n, p=p,
  57.     )
  58.  
  59.  
  60. def bond_run(perc_graph_result, seed, ps, convolution_factors_tasks):
  61.     """
  62.    Perform a single run (realization) over all microstates and return the
  63.    canonical cluster statistics
  64.    """
  65.     microcanonical_statistics = percolate.hpc.bond_microcanonical_statistics(
  66.         seed=seed, **perc_graph_result
  67.     )
  68.  
  69.     # initialize statistics array
  70.     canonical_statistics = np.empty(
  71.         ps.size,
  72.         dtype=percolate.hpc.canonical_statistics_dtype(
  73.             spanning_cluster=SPANNING_CLUSTER,
  74.         )
  75.     )
  76.  
  77.     # loop over all p's and convolve canonical statistics
  78.     # http://docs.scipy.org/doc/numpy/reference/arrays.nditer.html#modifying-array-values
  79.     for row, convolution_factors_task in zip(
  80.         np.nditer(canonical_statistics, op_flags=['writeonly']),
  81.         convolution_factors_tasks,
  82.     ):
  83.         # load task result
  84.         # http://jug.readthedocs.org/en/latest/api.html#jug.Task.load
  85.         assert not convolution_factors_task.is_loaded()
  86.         convolution_factors_task.load()
  87.         # fetch task result
  88.         my_convolution_factors = convolution_factors_task.result
  89.  
  90.         # convolve to canonical statistics
  91.         row[...] = percolate.hpc.bond_canonical_statistics(
  92.             microcanonical_statistics=microcanonical_statistics,
  93.             convolution_factors=my_convolution_factors,
  94.             spanning_cluster=SPANNING_CLUSTER,
  95.         )
  96.         # explicitly unload task to save memory
  97.         # http://jug.readthedocs.org/en/latest/api.html#jug.Task.unload
  98.         convolution_factors_task.unload()
  99.  
  100.     # initialize canonical averages for reduce
  101.     ret = percolate.hpc.bond_initialize_canonical_averages(
  102.         canonical_statistics=canonical_statistics,
  103.         spanning_cluster=SPANNING_CLUSTER,
  104.     )
  105.  
  106.     return ret
  107.  
  108.  
  109. @TaskGenerator
  110. def bond_task(
  111.     perc_graph_result, seeds, ps, convolution_factors_tasks_iterator
  112. ):
  113.     """
  114.    Perform a number of runs
  115.  
  116.    The number of runs is the number of seeds
  117.  
  118.    convolution_factors_tasks_iterator needs to be an iterator
  119.  
  120.    We shield the convolution factors tasks from jug value/result mechanism
  121.    by supplying an iterator to the list of tasks for lazy evaluation
  122.    http://github.com/luispedro/jug/blob/43f0d80a78f418fd3aa2b8705eaf7c4a5175fff7/jug/task.py#L100
  123.    http://github.com/luispedro/jug/blob/43f0d80a78f418fd3aa2b8705eaf7c4a5175fff7/jug/task.py#L455
  124.    """
  125.  
  126.     # restore the list of convolution factors tasks
  127.     convolution_factors_tasks = list(convolution_factors_tasks_iterator)
  128.  
  129.     return reduce(
  130.         percolate.hpc.bond_reduce,
  131.         map(
  132.             bond_run,
  133.             itertools.repeat(perc_graph_result),
  134.             seeds,
  135.             itertools.repeat(ps),
  136.             itertools.repeat(convolution_factors_tasks),
  137.         )
  138.     )
  139.  
  140.  
  141. @TaskGenerator
  142. def write_to_disk(dimension, canonical_averages):
  143.     import h5py
  144.  
  145.     # Read/write if exsits, create otherwise
  146.     f = h5py.File(BOND_PERCOLATION_HPC_DATA_FILENAME, mode='a')
  147.  
  148.     key = '{}'.format(dimension)
  149.  
  150.     # dataset should not exist yet!
  151.     if key in f:
  152.         raise RuntimeError
  153.  
  154.     f.create_dataset(
  155.         name=key,
  156.         data=canonical_averages,
  157.     )
  158.  
  159.     f.close()
  160.  
  161.  
  162. @TaskGenerator
  163. def write_sysinfo():
  164.     import h5py
  165.  
  166.     f = h5py.File(BOND_PERCOLATION_HPC_DATA_FILENAME, mode='a')
  167.     helpers.dump_sys_info(f.attrs)
  168.     f.close()
  169.  
  170.  
  171. def dummy_hash(x):
  172.     """
  173.    Supplies a constant dummy hash
  174.    """
  175.     return 'dummy_hash'.encode('utf-8')
  176.  
  177.  
  178. convolution_factors = {}
  179. perc_graph_results = {}
  180. reduced_canonical_averages = {}
  181. final_canonical_averages = {}
  182.  
  183. # initialize random number generator
  184. rng = np.random.RandomState(seed=SEED)
  185.  
  186.  
  187. write_sysinfo()
  188.  
  189. # in general:
  190. # avoid premature optimization: design simulation for large system sizes
  191. # (small system sizes take much less time anyway)
  192. for dimension in SYSTEM_DIMENSIONS:
  193.     num_edges = 2 * dimension * (dimension - 1)
  194.     # computing the binomial PMF takes ca. 1s per 10^6 number of states *
  195.     # number of p's
  196.     '''
  197.    >>> import timeit
  198.    >>> s = """\
  199.    ... [
  200.    ...     percolate.percolate._binomial_pmf(n=1e4, p=p)
  201.    ...     for p in 100 * [0.5]
  202.    ... ]
  203.    >>> timeit.timeit(stmt=s, setup='import percolate', number=1)
  204.    '''
  205.     # so store all binomial PMF (for every dimension, p) on disk
  206.     convolution_factors[dimension] = [
  207.         compute_convolution_factors_for_single_p(
  208.             n=num_edges, p=p,
  209.         )
  210.         for p in PS
  211.     ]
  212.  
  213. # finish all tasks up to here first before proceeding
  214. # http://jug.readthedocs.org/en/latest/barrier.html#barriers
  215. barrier()
  216.  
  217. for dimension in SYSTEM_DIMENSIONS:
  218.     # building the graph takes ca. 10s per 10^6 nodes
  219.     '''
  220.    >>> import timeit
  221.    >>> timeit.timeit(
  222.    ...     stmt='percolate.spanning_2d_grid(1000)',
  223.    ...     setup='import percolate',
  224.    ...     number=1
  225.    ... )
  226.    >>>
  227.    '''
  228.     # So that is why we store every graph on disk (as a task)
  229.     perc_graph_results[dimension] = prepare_percolation_graph(dimension)
  230.  
  231.     seeds = rng.randint(UINT32_MAX, size=NUMBER_OF_RUNS)
  232.  
  233.     # now process the actual tasks
  234.     bond_tasks = list()
  235.     for my_seeds in np.split(seeds, NUMBER_OF_TASKS):
  236.         # pass an iterator to list of convolution factors tasks to circumvent
  237.         # jug automatic fetching results of each task (iterator is not resolved
  238.         # and hence we achieve lazy evaluation of tasks here)
  239.         convolution_iterator = iter(
  240.             convolution_factors[dimension]
  241.         )
  242.         # iter cannot be pickled / hashed, so supply a dummy hash
  243.         convolution_iterator_hashed = CustomHash(
  244.             convolution_iterator, dummy_hash,
  245.         )
  246.         bond_tasks.append(
  247.             bond_task(
  248.                 perc_graph_result=perc_graph_results[dimension],
  249.                 seeds=my_seeds,
  250.                 ps=PS,
  251.                 convolution_factors_tasks_iterator=convolution_iterator_hashed,
  252.             )
  253.         )
  254.  
  255.     # reduce
  256.     reduced_canonical_averages[dimension] = jug.mapreduce.reduce(
  257.         reducer=percolate.hpc.bond_reduce,
  258.         inputs=bond_tasks,
  259.         reduce_step=100,
  260.     )
  261.  
  262.     # finalize canonical averages
  263.     final_canonical_averages[dimension] = Task(
  264.         percolate.hpc.finalize_canonical_averages,
  265.         number_of_nodes=perc_graph_results[dimension]['num_nodes'],
  266.         ps=PS,
  267.         canonical_averages=reduced_canonical_averages[dimension],
  268.         alpha=ALPHA,
  269.     )
  270.  
  271.     # write to disk
  272.     write_to_disk(
  273.         dimension=dimension,
  274.         canonical_averages=final_canonical_averages[dimension],
  275.     )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement