Skip to content

Commit f4c7a0b

Browse files
committed
Added script for generating URS files on Summit
1 parent d8f0b04 commit f4c7a0b

1 file changed

Lines changed: 131 additions & 0 deletions

File tree

scripts/summit/gen_urs_summit.pl

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
#!/usr/bin/perl
2+
3+
if(scalar @ARGV != 4){
4+
print "Expect: <total number of nodes> <ranks per node> <cores per rank main> <gpus per rank main>\n";
5+
exit 1;
6+
}
7+
8+
#We will assume 1 core per rank of the AD
9+
10+
$nhosts=$ARGV[0]; #should be the number of hosts (nodes) that the application will run on, plus one for the provDB and pserver. The job script should allocate this many nodes
11+
$nranks_per_host=${ARGV[1]}; #number of MPI ranks per host (node) for the application
12+
$ncores_per_rank_main=${ARGV[2]}; #the number of cores to assign per MPI rank to the main app
13+
$ngpus_per_rank_main=${ARGV[3]}; #number of GPUs per MPI rank for the application
14+
15+
$ncores_per_host_ad = $nranks_per_host;
16+
17+
if($ngpus_per_rank_main * $nranks_per_host > 6){
18+
print "Error: too many GPUs per rank\n";
19+
exit;
20+
}
21+
if($nranks_per_host*$ncores_per_rank_main + $ncores_per_host_ad > 42){
22+
print "Error: too many CPUs per rank\n";
23+
exit;
24+
}
25+
26+
27+
#Summit characteristics
28+
$ncores_host=42; #cores per node
29+
$ncores_socket=21; #cores per socket
30+
$socket_offset_core = 21; #core offset of second socket
31+
$socket_offset_gpu = 3; #3 GPUs per socket
32+
$mem_per_socket=309624; #get this number by assigning 2 ranks and using the -S option to view the output URS file
33+
34+
#Generate URS for Chimbuko services
35+
open(OUT, ">services.urs");
36+
37+
print OUT "RS 0: { host: 1, cpu: ";
38+
for($c=0;$c<$ncores_host;$c++){
39+
print OUT "$c ";
40+
}
41+
print OUT ", mem: 0-${mem_per_socket} 1-${mem_per_socket} }\n";
42+
43+
close(OUT);
44+
45+
46+
47+
#Generate ERF for AD and main application
48+
if($nranks_per_host % 2 != 0){
49+
print "Expect number of ranks to be a multiple of 2!";
50+
exit;
51+
}
52+
if($ncores_per_host_ad % 2 != 0){
53+
print "Expect number of host cores for the AD to be a multiple of 2!";
54+
exit;
55+
}
56+
57+
$nranks_per_socket = ${nranks_per_host}/2;
58+
$ncores_per_socket_ad = ${ncores_per_host_ad}/2;
59+
$ncores_per_socket_main = ${ncores_per_rank_main} * ${nranks_per_socket};
60+
61+
if($ncores_per_socket_main + $ncores_per_socket_ad > $ncores_socket){
62+
print "Too many cores per socket!";
63+
exit;
64+
}
65+
66+
#Divide memory equally over all cores
67+
$mem_per_core = $mem_per_socket / $ncores_socket;
68+
69+
print "Assigning ${ncores_per_rank_main} cores per rank to main program, ${ncores_per_socket_main} cores per socket.\n";
70+
print "Assigning ${ncores_per_socket_ad} cores per socket to the AD\n";
71+
print "Assigning ${mem_per_core} MB memory per core\n";
72+
73+
$mem_per_rank_main = $ncores_per_rank_main * $mem_per_core;
74+
75+
76+
$nhosts_job = $nhosts-1;
77+
$hoststart_job = 2; #0 is launch node, 1 is first compute node
78+
79+
open(OUT, ">ad.urs");
80+
81+
open(OUT2, ">main.urs");
82+
83+
for($h=0;$h<$nhosts_job;$h++){
84+
$host = $hoststart_job +$h;
85+
86+
for($s=0;$s<2;$s++){
87+
$rank_off = $h*$nranks_per_host + $s * $nranks_per_socket;
88+
89+
$corestart=$s*$socket_offset_core;
90+
91+
#AD shares the same resource set for all ranks
92+
#However we need to manually assign the ranks to cores to prevent all ranks being piled on the first core
93+
$coreend = $corestart + ${ncores_per_socket_ad} - 1;
94+
95+
for($r=0;$r<$nranks_per_socket;$r++){
96+
$rank = $r + $rank_off;
97+
$rank_core = ${corestart} + $r;
98+
print OUT "RS ${rank}: { host: ${host}, cpu: ${rank_core}, mem: ${s}-${mem_per_core} }\n";
99+
}
100+
101+
$gpustart = $s*$socket_offset_gpu;
102+
$corestart = $coreend+1;
103+
104+
#Then to main
105+
for($r=0;$r<$nranks_per_socket;$r++){
106+
$rank = $r + $rank_off;
107+
$coreend = $corestart + $ncores_per_rank_main -1;
108+
109+
$gpuend = $gpustart + $ngpus_per_rank_main - 1;
110+
$gpu_str = ", gpu:";
111+
for($g=${gpustart};$g<=${gpuend};$g++){
112+
$gpu_str = "$gpu_str $g";
113+
}
114+
if($ngpus_per_rank_main == 0){
115+
$gpu_str = "";
116+
}
117+
118+
print OUT2 "RS ${rank}: { host: ${host}, cpu: ";
119+
for($c=$corestart;$c<=$coreend;$c++){
120+
print OUT2 "$c ";
121+
}
122+
print OUT2 "${gpu_str}, mem: ${s}-${mem_per_rank_main} }\n";
123+
124+
$gpustart = $gpuend + $ngpus_per_rank_main;
125+
$corestart = $coreend+1;
126+
}
127+
}
128+
}
129+
close(OUT);
130+
close(OUT2);
131+

0 commit comments

Comments
 (0)