Untitled

[for Spencer Wilson]
full credit to Matthew Rocklin [NVIDIA]
modified by Naureen Ghani for [SWC HPC]

[enter cluster]
ssh ssh.swc.ucl.ac.uk -l spencerw
[do not do any computation on log-in node, submit jobs here]
[enter the cpu/gpu nodes to do all computation]
ssh gpu-380-10
module load miniconda

[setup conda env:]
conda create -n dask-tutorial python=3.7 anaconda
conda activate dask-tutorial
conda install -c conda-forge vim

[get dask + dependencies:]
conda install dask
conda install -c conda-forge dask-jobqueue

[open ipython terminal:]
ipython
import dask
import dask.distributed
pip install dask-jobqueue
from dask_jobqueue import SLURMCluster

[note: must specify gpu partition if needed, goes to cpu by default]
cluster = SLURMCluster(queue='gpu', processes=6, cores=24, memory="2GB",
                           env_extra=['export LANG="en_US.utf8"',
                                      'export LANGUAGE="en_US.utf8"',
                                      'export LC_ALL="en_US.utf8"'])
cluster.scale(10)  # this may take a few seconds to launch
from dask.distributed import Client
client = Client(cluster)
client # lists processes and cores active

[open a separate terminal + ssh to log-in node:]
squeue -u spencerw
[should see list of 10 workers requested]

[return to ipython:]
cluster.scale(20) # double amount of workers

[do simple computation to test dask:]
def slow_increment(x):
    time.sleep(1)
    return(x+1)

from dask.distributed import progress
futures = client.map(slow_increment, range(5000) )
progress(futures)

print( cluster.job_script() )
exit()

[go to input node:]
ls ~/.config
ls ~/.config/dask
vi ~/.config/dask/jobqueue.yaml

[uncomment all commands for "slurm"]

[on input node to see full details:]
sinfo --Node --long

S:C:T = socket:core:thread
ifconfig # for Internet details

[edit slurm config file accordingly]
[hit esc]
:wq [ write + quit in vim editor]