Skip to content

Commit ccdcbd6

Browse files
committed
Updated ubuntu18.04-cuda10.1 dockerfiles
Mocu example docker image now uses launch scripts
1 parent cdb791d commit ccdcbd6

12 files changed

Lines changed: 249 additions & 127 deletions

File tree

docker/ubuntu18.04-cuda10.1/openmpi4.0.4/Dockerfile.ad.provdb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ SHELL ["/bin/bash", "-c"]
1616

1717
ENV CPATH=/opt/cereal/include:${CPATH}
1818

19-
RUN source /spack/spack/share/spack/setup-env.sh && spack load mochi-sonata && \
19+
RUN source /spack/spack/share/spack/setup-env.sh && spack load boost mochi-sonata && \
2020
CC=mpicc CXX=mpicxx ../Downloads/PerformanceAnalysis/configure --with-adios2=/opt/adios2 --with-network=ZMQ --with-perf-metric --prefix=/opt/chimbuko/ad && \
2121
make -j 4 install && cd /opt/chimbuko/ad/test
2222
#&& ./run_all.sh DOCKER_SETUP_MOCHI

docker/ubuntu18.04-cuda10.1/openmpi4.0.4/Dockerfile.base

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ RUN apt-get update && \
1515
bzip2 libbz2-dev zlib1g zlib1g-dev \
1616
curl libcurl4-openssl-dev libgtest-dev zip unzip \
1717
libopenblas-base libopenblas-dev libnuma-dev \
18-
openssl libssl-dev && \
18+
openssl libssl-dev psmisc iproute2 && \
1919
apt autoremove -y && \
2020
rm -rf /var/lib/apt/lists/*
2121

docker/ubuntu18.04-cuda10.1/openmpi4.0.4/Dockerfile.chimbuko.benchmark_suite

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,13 @@ COPY --from=chimbuko/viz:ubuntu18.04-cuda10.1 /opt/chimbuko/viz /opt/chimbuko/vi
55
ENV PATH=/opt/tau2/x86_64/bin/:/opt/chimbuko/ad/bin:${PATH}
66
ENV TAU_OPTIONS="-optShared -optRevert -optVerbose -optCompInst"
77

8+
#Fix redis path
9+
ENV PATH=/opt/chimbuko/viz/redis-stable/src/:${PATH}
10+
811
RUN apt-get update && apt-get install -y gdb emacs-nox vim psmisc
912

1013
#Create a setup script
11-
RUN echo "source /spack/spack/share/spack/setup-env.sh && spack load py-mochi-sonata" >> /root/.bashrc
14+
RUN echo "source /spack/spack/share/spack/setup-env.sh && spack load boost py-mochi-sonata" >> /root/.bashrc
1215

1316
FROM base AS build
1417

docker/ubuntu18.04-cuda10.1/openmpi4.0.4/Dockerfile.chimbuko.mocu

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ COPY --from=chimbuko/viz:ubuntu18.04 /opt /opt
88

99
ENV PATH=/opt/chimbuko/ad/bin/:${PATH}
1010

11-
COPY run_mocu_chimbuko.sh /Downloads/MOCU_Kuramoto/
11+
COPY chimbuko_config.sh run_mocu_chimbuko.sh /Downloads/MOCU_Kuramoto/
12+
1213
RUN chmod a+x run_mocu_chimbuko.sh
1314

1415
ENTRYPOINT [ "./run_mocu_chimbuko.sh" ]

docker/ubuntu18.04-cuda10.1/openmpi4.0.4/Dockerfile.mochi

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,26 @@ FROM chimbuko/base:ubuntu18.04-cuda10.1 AS mochi-install
33
SHELL ["/bin/bash", "-c"]
44

55
ENV SPACK_ROOT=/spack/spack
6-
RUN echo FORCE_REDOWNLOAD_12_08_20_14_09 >> /dev/null && \
6+
RUN echo FORCE_REDOWNLOAD_6_22_21_1722 >> /dev/null && \
77
mkdir /spack && cd /spack && \
88
git clone https://github.com/spack/spack.git
99

1010
RUN source /spack/spack/share/spack/setup-env.sh && \
1111
mkdir /sds && cd /sds && \
12-
git clone https://xgitlab.cels.anl.gov/sds/sds-repo.git && \
13-
spack repo add sds-repo
12+
git clone https://github.com/mochi-hpc/mochi-spack-packages.git && \
13+
spack repo add mochi-spack-packages
1414

15-
COPY packages.yaml /root/.spack/
15+
COPY modules.yaml packages.yaml /root/.spack/
16+
COPY spack-hack-mochi/ /opt/spack-hack-mochi
1617

17-
RUN source /spack/spack/share/spack/setup-env.sh && \
18-
spack install mercury ^libfabric@1.9.0 && \
19-
spack install mochi-sonata@master ^libfabric@1.9.0 && \
20-
spack install py-mochi-sonata ^mochi-sonata@master ^python@3.6.0 ^libfabric@1.9.0 && \
21-
spack gc -y && spack clean -a
18+
RUN source /spack/spack/share/spack/setup-env.sh && spack repo add /opt/spack-hack-mochi
19+
20+
RUN pip3 install cython pkgconfig
21+
RUN source /spack/spack/share/spack/setup-env.sh && spack install py-mochi-sonata ^python@3.6.0 ^libfabric@1.9.0 ^py-numpy@1.19.5 ^argobots@1.1+stackunwind+tool ^mochi-margo@0.9.99 ^mercury@2.0.1 && \
22+
spack gc -y && spack clean -a
23+
24+
COPY fixup_spack_python.sh /root/.spack/
25+
RUN /root/.spack/fixup_spack_python.sh
2226

2327
WORKDIR /
2428

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
#Note: This configuration file is sourced into the bash environment for Chimbuko startup scripts, thus the user must follow correct shell conventions
2+
#Please do not remove any of the variables!
3+
4+
#IMPORTANT NOTE: Variables that cannot be left as default are marked as <------------ ***SET ME***
5+
6+
service_node_iface=eth0 #network interface upon which communication to the service node is performed <------------ ***SET ME***
7+
8+
####################################
9+
#Options for visualization module
10+
####################################
11+
use_viz=1 #enable or disable the visualization
12+
viz_root=/opt/chimbuko/viz #the root directory of the visualization module <------------ ***SET ME (if using viz)***
13+
viz_worker_port=6379 #the port on which to run the redis server for the visualization backend
14+
viz_port=5002 #the port on which to run the webserver
15+
export C_FORCE_ROOT=1 #required only for docker runs, allows celery to execute properly as root user <----------------- *** SET ME (if using Docker)
16+
17+
############################################################
18+
#General options for Chimbuko backend (pserver, ad, provdb)
19+
############################################################
20+
backend_root="infer" #The root install directory of the PerformanceAnalysis libraries. If set to "infer" it will be inferred from the path of the executables
21+
chimbuko_services="infer" #The location of the Chimbuko service script. If set to "infer" it will be inferred from backend_root
22+
23+
####################################
24+
#Options for the provenance database
25+
####################################
26+
use_provdb=1 #enable or disable the provDB. If disabled the provenance data will be written as JSON ASCII into the ${provdb_writedir} set below
27+
provdb_extra_args="" #any extra command line arguments to pass
28+
provdb_nshards=4 #number of database shards
29+
provdb_engine="ofi+tcp;ofi_rxm" #the OFI libfabric provider used for the Mochi stack
30+
provdb_port=5000 #the port of the provenance database
31+
provdb_nthreads=4 #number of worker threads; should be >= the number of shards
32+
provdb_writedir=chimbuko/provdb #the directory in which the provenance database is written. Chimbuko creates chimbuko/provdb which can be used as a default
33+
provdb_commit_freq=10000 #frequency ms at which the provenance database is committed to disk. If set to 0 it will commit only at the end
34+
35+
#With "verbs" provider (used for infiniband, iWarp, etc) we need to also specify the domain, which can be found by running fi_info (on a compute node)
36+
provdb_domain=mlx5_0 #only needed for verbs provider <------------ ***SET ME (if using verbs)***
37+
38+
39+
####################################
40+
#Options for the parameter server
41+
####################################
42+
use_pserver=1 #enable or disable the pserver
43+
pserver_extra_args="" #any extra command line arguments to pass
44+
pserver_port=5559 #port for parameter server
45+
pserver_nt=2 #number of worker threads
46+
####################################
47+
#Options for the AD module
48+
####################################
49+
ad_extra_args="-perf_outputpath chimbuko/logs -perf_step 1 -trace_connect_timeout 300" #any extra command line arguments to pass. Note: chimbuko/logs is automatically created by services script
50+
ad_win_size=5 #number of events around an anomaly to store; provDB entry size is proportional to this so keep it small!
51+
ad_alg="sstd" #the anomaly detection algorithm. Valid values are "hbos" and "sstd"
52+
ad_outlier_hbos_threshold=0.99 #the percentile of events outside of which are considered anomalies by the HBOS algorithm
53+
ad_outlier_sstd_sigma=12 #number of standard deviations that defines an outlier in the SSTD algorithm
54+
####################################
55+
#Options for TAU
56+
#Note: Only the TAU_ADIOS2_PATH, TAU_ADIOS2_FILE_PREFIX, EXE_NAME and TAU_ADIOS2_ENGINE variables are used by the Chimbuko services script and there only to generate the suggested
57+
# command to launch the AD (output to chimbuko/vars/chimbuko_ad_cmdline.var); they can be overridden by the run script if desired providing the appropriate modifications
58+
# are made to the AD launch command. The remainder of the variables are used only by TAU and can be freely overridden.
59+
####################################
60+
export TAU_ADIOS2_ENGINE=SST #online communication engine (alternative BP4 although this goes through the disk system and may be slower unless the BPfiles are stored on a burst disk)
61+
export TAU_ADIOS2_ONE_FILE=FALSE #a different connection file for each rank
62+
export TAU_ADIOS2_PERIODIC=1 #enable/disable ADIOS2 periodic output
63+
export TAU_ADIOS2_PERIOD=1000000 #period in us between ADIOS2 io steps
64+
export TAU_THREAD_PER_GPU_STREAM=1 #force GPU streams to appear as different TAU virtual threads
65+
export TAU_THROTTLE=0 #enable/disable throttling of short-running functions
66+
67+
export TAU_MAKEFILE=/opt/tau2/x86_64/lib/Makefile.tau-papi-mpi-pthread-python-cupti-pdt-adios2 #The TAU makefile to use <------------ ***SET ME***
68+
69+
#Note: the following 2 variables are not used by the service script but are included here for use from the user's run script allowing the application to be launched with either "${TAU_EXEC} <app>" or "${TAU_PYTHON} <app>"
70+
#Note: the "binding" -T ... is used by Tau to find the appropriate configuration. It can typically be inferred from the name of the Makefile. If using a non-MPI job the 'mpi' should be changed to 'serial' and a non-MPI build of
71+
# ADIOS2/TAU must exist
72+
#Suggestion: It is useful to test the command without Chimbuko first to ensure TAU picks up the correct binding; this can be done by 'export TAU_ADIOS2_ENGINE=BPFile' and then running the application with Tau but without Chimbuko.
73+
TAU_EXEC="tau_exec -T papi,mpi,pthread,python,cupti,pdt,adios2 -adios2_trace -cupti -um" #how to execute tau_exec; the -T arguments should mirror the makefile name <------------ ***SET ME***
74+
TAU_PYTHON="tau_python -T serial,papi,pthread,python,cupti,pdt,adios2 -tau-python-interpreter=python3 -adios2_trace -cupti -um -tau-python-args=-u" #how to execute tau_python. Note that passing -u to python forces it to not buffer stdout so we can pipe it
75+
#to tee in realtime <--- SET ME (if !python3)
76+
77+
export EXE_NAME=python3.6 #the name of the executable (without path) <------------ ***SET ME***
78+
79+
TAU_ADIOS2_PATH=chimbuko/adios2 #path where the adios2 files are to be stored. Chimbuko services creates the directory chimbuko/adios2 in the working directory and this should be used by default
80+
TAU_ADIOS2_FILE_PREFIX=tau-metrics #the prefix of tau adios2 files; full filename is ${TAU_ADIOS2_PREFIX}-${EXE_NAME}-${RANK}.bp
81+
82+
83+
84+
85+
86+
87+
88+
89+
90+
91+
92+
93+
94+
###########################################################################
95+
# NON-USER VARIABLES BELOW = DON'T MODIFY THESE!!
96+
###########################################################################
97+
#Extra processing
98+
export TAU_ADIOS2_FILENAME="${TAU_ADIOS2_PATH}/${TAU_ADIOS2_FILE_PREFIX}"
99+
100+
if [[ ${backend_root} == "infer" ]]; then
101+
backend_root=$( readlink -f $(which provdb_admin | sed 's/provdb_admin//')/../ )
102+
fi
103+
104+
if [[ ${chimbuko_services} == "infer" ]]; then
105+
chimbuko_services="${backend_root}/scripts/launch/run_services.sh"
106+
if [ ! -f "${chimbuko_services}" ]; then
107+
echo "Could not infer service script location: service script does not exist at ${chimbuko_services}!"
108+
exit 1
109+
fi
110+
fi
111+
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#!/bin/bash
2+
cd /spack/spack/opt/spack/linux-ubuntu18.04-broadwell/gcc-7.5.0
3+
pydirs=$(ls | grep '^py-')
4+
echo $pydirs
5+
echo "PWD is" $(pwd)
6+
for p in $pydirs; do
7+
echo "cd to ${p}"
8+
cd ${p}/lib/python3.6
9+
ls -lrt
10+
mv site-packages site-packages-old
11+
ln -s ../python3/dist-packages site-packages
12+
ls -lrt
13+
cd -
14+
done
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
modules:
2+
prefix_inspections:
3+
lib: [LIBRARY_PATH]
4+
lib64: [LIBRARY_PATH]
5+
include:
6+
- C_INCLUDE_PATH
7+
- CPLUS_INCLUDE_PATH
8+
- INCLUDE

docker/ubuntu18.04-cuda10.1/openmpi4.0.4/run_mocu_chimbuko.sh

Lines changed: 16 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -3,135 +3,37 @@
33
############################# START OF USER INPUT ########################
44
ntasks=2 #Requires 2 GPUs; reduce to 1 if only 1 GPU!
55
############################# END OF USER INPUT ########################
6-
7-
export TAU_MAKEFILE=/opt/tau2/x86_64/lib/Makefile.tau-papi-pthread-python-cupti-pdt-adios2
8-
export TAU_ADIOS2_PERIODIC=1
9-
export TAU_ADIOS2_PERIOD=1000000
10-
export TAU_ADIOS2_ENGINE=SST
11-
#export CHIMBUKO_VERBOSE=1
12-
13-
source /spack/spack/share/spack/setup-env.sh
14-
spack load py-mochi-sonata
156

16-
rm -f ./results/*
7+
echo "Loading spack modules. Please be patient!"
8+
source /spack/spack/share/spack/setup-env.sh
9+
spack load py-mochi-sonata boost
10+
export PATH=/opt/chimbuko/viz/redis-stable/src/:${PATH}
1711

18-
#Start the provenance database
19-
extra_args=""
20-
ps_extrargs=""
12+
rm -rf chimbuko
13+
export CHIMBUKO_CONFIG=chimbuko_config.sh
14+
source ${CHIMBUKO_CONFIG}
2115

2216
if (( 1 )); then
23-
rm provdb.*.unqlite provider.address
24-
25-
ip=eth0
26-
port=1234
27-
28-
echo "Instantiating provenance database"
29-
provdb_admin ${ip}:${port} 2>&1 | tee provdb.log &
30-
31-
sleep 1
32-
if ! [[ -f provider.address ]]; then
33-
echo "Provider address file not created after 1 second"
34-
exit 1
35-
fi
17+
echo "Running services"
18+
${chimbuko_services} 2>&1 | tee services.log &
19+
echo "Waiting"
20+
while [ ! -f chimbuko/vars/chimbuko_ad_cmdline.var ]; do sleep 1; done
3621

37-
prov_add=$(cat provider.address)
38-
extra_args="-provdb_addr ${prov_add}"
39-
ps_extraargs="-provdb_addr ${prov_add}"
40-
echo "Enabling provenance database in AD/PS with arg: ${extra_args}"
22+
#Get all the cmdline args for the AD
23+
ad_opts=$(cat chimbuko/vars/chimbuko_ad_opts.var)
4124
fi
4225

43-
#Run the viz
44-
using_viz=0
45-
if (( 1 )); then
46-
using_viz=1
47-
ip=$(hostname -i)
48-
run_dir=$(pwd)
49-
echo "Run dir ${run_dir}"
50-
51-
rm -rf viz_data
52-
mkdir viz_data
53-
viz_dir=$(readlink -f viz_data)
54-
PWD=$(pwd)
55-
viz_install=/opt/chimbuko/viz
56-
57-
export SERVER_CONFIG="production"
58-
if [ -z "${CHIMBUKO_VERBOSE}x" ]; then
59-
export SERVER_CONFIG="development"
60-
fi
61-
62-
export DATABASE_URL="sqlite:///${viz_dir}/main.sqlite"
63-
export ANOMALY_STATS_URL="sqlite:///${viz_dir}/anomaly_stats.sqlite"
64-
export ANOMALY_DATA_URL="sqlite:///${viz_dir}/anomaly_data.sqlite"
65-
export FUNC_STATS_URL="sqlite:///${viz_dir}/func_stats.sqlite"
66-
export PROVENANCE_DB="${PWD}/"
67-
export PROVDB_ADDR=$(cat provider.address)
68-
export SHARDED_NUM=1
69-
export C_FORCE_ROOT=1
70-
#export SIMULATION_JSON="${WORK_DIR}/${DATA_NAME}/stats/"
71-
72-
cd ${viz_install}
73-
74-
echo "run redis ..."
75-
redis-stable/src/redis-server redis-stable/redis.conf 2>&1 | tee ${run_dir}/redis.log &
76-
sleep 5
77-
78-
echo "run celery ..."
79-
python3 manager.py celery --loglevel=info 2>&1 | tee ${run_dir}/celery.log &
80-
sleep 5
81-
82-
echo "create db ..."
83-
python3 manager.py createdb 2>&1 | tee ${run_dir}/create_db.log
84-
sleep 2
85-
86-
echo "run webserver (server config ${SERVER_CONFIG}) with provdb on ${PROVDB_ADDR}... Logging to ${run_dir}/viz.log"
87-
python3 manager.py runserver --host 0.0.0.0 --port 5002 --debug 2>&1 | tee ${run_dir}/viz.log &
88-
sleep 2
89-
90-
cd -
91-
92-
ws_addr="http://${ip}:5002/api/anomalydata"
93-
ps_extraargs+=" -ws_addr ${ws_addr}"
94-
fi
95-
96-
#Start the parameter server
97-
if (( 1 )); then
98-
ip=$(hostname -i)
99-
pserver_port=1235
100-
pserver_addr=tcp://${ip}:${pserver_port}
101-
pserver_nt=1
102-
pserver_logdir="."
103-
echo "Instantiating pserver"
104-
echo "Pserver $pserver_addr with extra args: '${ps_extraargs}'"
105-
pserver -nt ${pserver_nt} -logdir ${pserver_logdir} -port ${pserver_port} -stat_outputdir . ${ps_extraargs} 2>&1 | tee pserver.log &
106-
extra_args="${extra_args} -pserver_addr ${pserver_addr}"
107-
sleep 2
108-
fi
109-
110-
#Start the AD and application
111-
rm -rf *.bp *.sst
112-
11326
for (( i=0; i<${ntasks}; i++ ));
11427
do
11528
#Make sure each instance has a separate file to write to as this is not an MPI process and hence all the ranks will be 0
116-
filename_tau=tau-metrics-${i}
29+
filename_tau=${TAU_ADIOS2_PATH}/tau-metrics-${i}
11730
filename_chimbuko=tau-metrics-${i}-python3.6
11831

11932
#Overwrite the rank index in the data (which is always 0 here) with a new rank index labeling the instances
120-
driver ${TAU_ADIOS2_ENGINE} . ${filename_chimbuko} -prov_outputpath . ${extra_args} -perf_outputpath . -rank ${i} -override_rank 0 2>&1 | tee ad_${i}.log &
121-
#sstSinker ${TAU_ADIOS2_ENGINE} . ${filename_chimbuko} 0 2>&1 | tee sinker_${i}.log &
33+
driver ${TAU_ADIOS2_ENGINE} ${TAU_ADIOS2_PATH} ${filename_chimbuko} -rank ${i} -override_rank 0 ${ad_opts} 2>&1 | tee chimbuko/logs/ad_${i}.log &
12234
sleep 1
123-
TAU_ADIOS2_FILENAME=${filename_tau} CUDA_DEVICE=$i tau_python -T serial,papi,pthread,python,cupti,pdt,adios2 -cupti -um -v -tau-python-interpreter=python3 -adios2_trace runMainForPerformanceMeasure.py -n ${ntasks} -i $i 2>&1 | tee main_${i}.log &
35+
TAU_ADIOS2_FILENAME=${filename_tau} CUDA_DEVICE=$i ${TAU_PYTHON} runMainForPerformanceMeasure.py -n ${ntasks} -i $i 2>&1 | tee main_${i}.log &
12436
done
12537

126-
echo "Waiting for processes to end"
127-
wait $(pidof tau_python)
128-
129-
#Shutdown the viz
130-
if (( ${using_viz} == 1 )); then
131-
echo "Shutting down viz"
132-
cd /opt/chimbuko/viz
133-
./webserver/shutdown_webserver.sh
134-
fi
135-
13638
wait
13739

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#!/bin/bash
2+
export TAU_MAKEFILE=/opt/tau2/x86_64/lib/Makefile.tau-papi-pthread-python-cupti-pdt-adios2
3+
export TAU_ADIOS2_PERIODIC=1
4+
export TAU_ADIOS2_PERIOD=1000000
5+
export TAU_ADIOS2_ENGINE=BPFile
6+
7+
rm -f ./results/*
8+
9+
ntasks=1
10+
11+
12+
for (( i=0; i<${ntasks}; i++ ));
13+
do
14+
TAU_ADIOS2_FILENAME=tau-metrics-${i} CUDA_DEVICE=$i tau_python -T serial,papi,pthread,python,cupti,pdt,adios2 -cupti -um -env -vv -tau-python-interpreter=python3 -adios2_trace runMainForPerformanceMeasure.py -n ${ntasks} -i $i 2>&1 | tee main.log &
15+
done
16+
wait

0 commit comments

Comments
 (0)