|
| 1 | +#!/usr/bin/perl |
| 2 | + |
| 3 | +if(scalar @ARGV != 4){ |
| 4 | + print "Expect: <total number of nodes> <ranks per node> <cores per rank main> <gpus per rank main>\n"; |
| 5 | + exit 1; |
| 6 | +} |
| 7 | + |
| 8 | +#We will assume 1 core per rank of the AD |
| 9 | + |
| 10 | +$nhosts=$ARGV[0]; #should be the number of hosts (nodes) that the application will run on, plus one for the provDB and pserver. The job script should allocate this many nodes |
| 11 | +$nranks_per_host=${ARGV[1]}; #number of MPI ranks per host (node) for the application |
| 12 | +$ncores_per_rank_main=${ARGV[2]}; #the number of cores to assign per MPI rank to the main app |
| 13 | +$ngpus_per_rank_main=${ARGV[3]}; #number of GPUs per MPI rank for the application |
| 14 | + |
| 15 | +$ncores_per_host_ad = $nranks_per_host; |
| 16 | + |
| 17 | +if($ngpus_per_rank_main * $nranks_per_host > 6){ |
| 18 | + print "Error: too many GPUs per rank\n"; |
| 19 | + exit; |
| 20 | +} |
| 21 | +if($nranks_per_host*$ncores_per_rank_main + $ncores_per_host_ad > 42){ |
| 22 | + print "Error: too many CPUs per rank\n"; |
| 23 | + exit; |
| 24 | +} |
| 25 | + |
| 26 | + |
| 27 | +#Summit characteristics |
| 28 | +$ncores_host=42; #cores per node |
| 29 | +$ncores_socket=21; #cores per socket |
| 30 | +$socket_offset_core = 21; #core offset of second socket |
| 31 | +$socket_offset_gpu = 3; #3 GPUs per socket |
| 32 | +$mem_per_socket=309624; #get this number by assigning 2 ranks and using the -S option to view the output URS file |
| 33 | + |
| 34 | +#Generate URS for Chimbuko services |
| 35 | +open(OUT, ">services.urs"); |
| 36 | + |
| 37 | +print OUT "RS 0: { host: 1, cpu: "; |
| 38 | +for($c=0;$c<$ncores_host;$c++){ |
| 39 | + print OUT "$c "; |
| 40 | +} |
| 41 | +print OUT ", mem: 0-${mem_per_socket} 1-${mem_per_socket} }\n"; |
| 42 | + |
| 43 | +close(OUT); |
| 44 | + |
| 45 | + |
| 46 | + |
| 47 | +#Generate ERF for AD and main application |
| 48 | +if($nranks_per_host % 2 != 0){ |
| 49 | + print "Expect number of ranks to be a multiple of 2!"; |
| 50 | + exit; |
| 51 | +} |
| 52 | +if($ncores_per_host_ad % 2 != 0){ |
| 53 | + print "Expect number of host cores for the AD to be a multiple of 2!"; |
| 54 | + exit; |
| 55 | +} |
| 56 | + |
| 57 | +$nranks_per_socket = ${nranks_per_host}/2; |
| 58 | +$ncores_per_socket_ad = ${ncores_per_host_ad}/2; |
| 59 | +$ncores_per_socket_main = ${ncores_per_rank_main} * ${nranks_per_socket}; |
| 60 | + |
| 61 | +if($ncores_per_socket_main + $ncores_per_socket_ad > $ncores_socket){ |
| 62 | + print "Too many cores per socket!"; |
| 63 | + exit; |
| 64 | +} |
| 65 | + |
| 66 | +#Divide memory equally over all cores |
| 67 | +$mem_per_core = $mem_per_socket / $ncores_socket; |
| 68 | + |
| 69 | +print "Assigning ${ncores_per_rank_main} cores per rank to main program, ${ncores_per_socket_main} cores per socket.\n"; |
| 70 | +print "Assigning ${ncores_per_socket_ad} cores per socket to the AD\n"; |
| 71 | +print "Assigning ${mem_per_core} MB memory per core\n"; |
| 72 | + |
| 73 | +$mem_per_rank_main = $ncores_per_rank_main * $mem_per_core; |
| 74 | + |
| 75 | + |
| 76 | +$nhosts_job = $nhosts-1; |
| 77 | +$hoststart_job = 2; #0 is launch node, 1 is first compute node |
| 78 | + |
| 79 | +open(OUT, ">ad.urs"); |
| 80 | + |
| 81 | +open(OUT2, ">main.urs"); |
| 82 | + |
| 83 | +for($h=0;$h<$nhosts_job;$h++){ |
| 84 | + $host = $hoststart_job +$h; |
| 85 | + |
| 86 | + for($s=0;$s<2;$s++){ |
| 87 | + $rank_off = $h*$nranks_per_host + $s * $nranks_per_socket; |
| 88 | + |
| 89 | + $corestart=$s*$socket_offset_core; |
| 90 | + |
| 91 | + #AD shares the same resource set for all ranks |
| 92 | + #However we need to manually assign the ranks to cores to prevent all ranks being piled on the first core |
| 93 | + $coreend = $corestart + ${ncores_per_socket_ad} - 1; |
| 94 | + |
| 95 | + for($r=0;$r<$nranks_per_socket;$r++){ |
| 96 | + $rank = $r + $rank_off; |
| 97 | + $rank_core = ${corestart} + $r; |
| 98 | + print OUT "RS ${rank}: { host: ${host}, cpu: ${rank_core}, mem: ${s}-${mem_per_core} }\n"; |
| 99 | + } |
| 100 | + |
| 101 | + $gpustart = $s*$socket_offset_gpu; |
| 102 | + $corestart = $coreend+1; |
| 103 | + |
| 104 | + #Then to main |
| 105 | + for($r=0;$r<$nranks_per_socket;$r++){ |
| 106 | + $rank = $r + $rank_off; |
| 107 | + $coreend = $corestart + $ncores_per_rank_main -1; |
| 108 | + |
| 109 | + $gpuend = $gpustart + $ngpus_per_rank_main - 1; |
| 110 | + $gpu_str = ", gpu:"; |
| 111 | + for($g=${gpustart};$g<=${gpuend};$g++){ |
| 112 | + $gpu_str = "$gpu_str $g"; |
| 113 | + } |
| 114 | + if($ngpus_per_rank_main == 0){ |
| 115 | + $gpu_str = ""; |
| 116 | + } |
| 117 | + |
| 118 | + print OUT2 "RS ${rank}: { host: ${host}, cpu: "; |
| 119 | + for($c=$corestart;$c<=$coreend;$c++){ |
| 120 | + print OUT2 "$c "; |
| 121 | + } |
| 122 | + print OUT2 "${gpu_str}, mem: ${s}-${mem_per_rank_main} }\n"; |
| 123 | + |
| 124 | + $gpustart = $gpuend + $ngpus_per_rank_main; |
| 125 | + $corestart = $coreend+1; |
| 126 | + } |
| 127 | + } |
| 128 | +} |
| 129 | +close(OUT); |
| 130 | +close(OUT2); |
| 131 | + |
0 commit comments