Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Using the Rmpi library with Snow

Note

To start:

  • This assumes you have installed the snow library yourself into your home.

  • You know the location for this library: For example: ~/R/x86_64-pc-linux-gnu-library/4.2/snow/

  • You have NOT copied the .Rprofile file into your home folder. If you have, then running this example will appear that the test has stalled and will eventually just time out.

Expand
titlesnow_test.R
Code Block
library(Rmpi)
library(snow)

simu <- function(rep_worker, n_used) {
  theta_simu <- c()
  for (i in 1 : rep_worker) {
    theta_simu[i] <- mean(rnorm(n_used))
  }
  theta_simu
}

num_of_processes <- mpi.universe.size()
sprintf("Num of Processes: %d", num_of_processes)

cluster <- makeCluster(num_of_processes - 1, type = "MPI")

n_used <- 1e4
rep_worker_list <- rep(1, 100)

theta <- clusterApply(cluster, rep_worker_list, simu, n_used)

theta_cbind <- do.call(cbind, theta)
write.csv(theta_cbind, file="values.csv")

stopCluster(cluster)
Expand
titlerun.sh
Code Block
#!/bin/bash
#SBATCH --job-name=rmpi_snow_test
#SBATCH --nodes=2
#SBATCH --ntasks-per-node=16
#SBATCH --cpus-per-task=1
#SBATCH --time=2:00
#SBATCH --mail-type=ALL
#SBATCH --mail-user=<your-email-addr>
#SBATCH --account=<your-project>

echo "SLURM_JOB_ID:" $SLURM_JOB_ID

# Modules to Load
module load gcc/12.2.0 openmpi/4.1.4 r-rmpi/0.7-1-ompi

# https://stat.ethz.ch/pipermail/r-sig-hpc/2019-November/002105.html
# This sets the PATH environment variable to where to find the RMPISNOW command.
export PATH=$PATH:~/R/x86_64-pc-linux-gnu-library/4.2/snow/

mpirun RMPISNOW CMD BATCH --no-restore --no-save --quiet snow_test.R snow_test_$SLURM_JOB_ID.log
Expand
titleExample Run and Output
Code Block
[] ls
run.sh  snow_test.R

[]$ sbatch run.sh
Submitted batch job 13433495
# Job should take less that 10 seconds to complete.

[] ls
run.sh  slurm-13433495.out  snow_test_13433495.log  values.csv  snow_test.R

[] cat slurm-13433495.out
SLURM_JOB_ID: 13433495

[]$ cat snow_test_13433495.log
Loading required package: utils
> library(Rmpi)
> library(snow)
>
> simu <- function(rep_worker, n_used) {
+   theta_simu <- c()
+   for (i in 1 : rep_worker) {
+     theta_simu[i] <- mean(rnorm(n_used))
+   }
+   theta_simu
+ }
>
> num_of_processes <- mpi.universe.size()
> sprintf("Num of Processes: %d", num_of_processes)
[1] "Num of Processes: 32"
>
> cluster <- makeCluster(num_of_processes - 1, type = "MPI")
>
> n_used <- 1e4
> rep_worker_list <- rep(1, 100)
>
> theta <- clusterApply(cluster, rep_worker_list, simu, n_used)
>
> theta_cbind <- do.call(cbind, theta)
> write.csv(theta_cbind, file="values.csv")
>
> stopCluster(cluster)

# Output will be of the form:
[]$ cat values.csv
"","V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","V11","V12","V13","V14","V15","V16","V17","V18","V19","V20","V21","V22","V23","V24","V25","V26","V27","V28","V29","V30","V31","V32","V33","V34","V35","V36","V37","V38","V39","V40","V41","V42","V43","V44","V45","V46","V47","V48","V49","V50","V51","V52","V53","V54","V55","V56","V57","V58","V59","V60","V61","V62","V63","V64","V65","V66","V67","V68","V69","V70","V71","V72","V73","V74","V75","V76","V77","V78","V79","V80","V81","V82","V83","V84","V85","V86","V87","V88","V89","V90","V91","V92","V93","V94","V95","V96","V97","V98","V99","V100"
"1",0.0110153488801525,0.00906305581302429,0.00108685858240707,0.00867668055186904,0.00578274342442965,-0.00530732478112944,0.0120954477596871,-0.00359434785869044,0.00835107071072111,2.64364921843532e-05,0.0152691100103968,-0.0135202565458591,0.00241255871463997,0.00137419862397849,0.00115252792794432,-0.0175922825490621,0.00110849307208272,-0.00937694151181359,-0.0131149112354201,0.00596388487426565,0.0219874640222224,-0.00547747285343229,0.00416837900267555,0.0139932426057719,-0.0234641772417162,0.00433451519201003,-0.00525816814860096,0.00414031282343361,0.00130366800166443,0.000413263824619443,0.0104087338213028,0.00149700313038921,0.011502836202072,-0.00715751527844509,0.0119589163613294,0.0220381656609346,-0.016000771903997,-0.00183947331801148,-0.00284276679070134,-0.00694346146022534,-0.0258218986912262,0.00473135994639517,0.00831409717862001,0.0182355174080247,-0.00931334883761317,0.00529566801009098,-0.00302557027855197,0.00346677904211363,0.00571337545701443,-0.00586232060572412,-0.00256997376593396,0.0165929336422106,0.00796493065507422,-0.00438474136670677,0.0062288833102191,-0.0175721248899911,0.00165933692067554,-0.00237930737409138,0.00121451126970138,0.00623046211970692,0.00559793460867063,-0.00412640828783677,-0.00764407338711362,-0.000460569630436792,-0.0107502392297747,-0.00421225031438457,-0.000926513045440252,0.00334739878419211,-0.00452111805160875,-0.0046740544706875,0.0155997050952078,-0.0234192042710321,0.00324579902597707,-0.0151148830758793,-0.000523464705140069,-0.00175640010460385,-0.010243166679217,-0.00668035373700306,-0.0119873621894053,-0.0141762507674786,-0.00783107010368886,0.0115902891884065,-0.00762658494377125,-0.0223384107212392,-0.00379425267947311,0.0138895890210734,0.00392029947365504,0.00248380077423007,-0.0064247327395136,0.00434147149528924,-0.00572841369840578,0.00966805999144852,-0.0122907653345613,0.00596172188548434,-0.0122757100311107,-0.000426327204500513,0.00108879897276763,0.00975469886227781,0.00675195747959386,-0.00288208828533988

This example is based on a script and discussion here.