Skip to content
This repository was archived by the owner on Nov 17, 2023. It is now read-only.

Commit 82384cb

Browse files
authored
[v1.9.x] Backport docker cache fixes (#20537)
* [v1.x] Fix docker cache build pipeline (#20385) * libturbojpeg0-dev is only for ubuntu 18.04, so don't install on other ubuntu versions. * Change logic to not install on 16.04 (works on 18.04 and 20.04.) * Fix docker registry variable - was inadvertently changed to DOCKER_CACHE_REGISTRY from DOCKER_ECR_REGISTRY. * Use our local ECR registry for docker-compose images as well. * Revert changing DOCKER_CACHE_REGISTRY variable. * Override DOCKER_CACHE_REGISTRY for docker-compose in docker cache pipeline. * Login to ECR if using ECR for docker registry. * Import docker_cache when needed. * Add support for awscli v2, use docker credentials already saved from docker_cache.py run. (#20458) * [v1.x] Docker cache enhancements (#20525) * Properly initialize set so that docker cache builds exclude the centos7 dockerfile. * Change the number of parallel builds to 1 for easier debugging and bandwidth control, also parameterize the number of build retries and reduce from 10 to 1 for easier debugging. * Remove old dockerfiles. * Update docker-compose config to use new ECR-based naming scheme. * Refactor build.py and docker_cache.py to support building and pushing containers for the docker-compose based platforms. Add options for selecting architecture (defaults to x86) since aarch64 container builds will fail on x86 systems. * Remove old commands for building docker-compose based containers since they are not built in docker_cache.py. * Fix running of docker-compose based containers via build.py.
1 parent b5e9ae8 commit 82384cb

9 files changed

Lines changed: 94 additions & 283 deletions

ci/Jenkinsfile_docker_cache

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ core_logic: {
3838
timeout(time: total_timeout, unit: 'MINUTES') {
3939
utils.init_git()
4040
sh "ci/docker_cache.py --docker-registry ${env.DOCKER_ECR_REGISTRY}"
41-
sh "cd ci && python3 ./docker_login.py --secret-name ${env.DOCKERHUB_SECRET_NAME} && docker-compose -f docker/docker-compose.yml build --parallel && docker-compose -f docker/docker-compose.yml push && docker logout"
4241
}
4342
}
4443
}

ci/Jenkinsfile_utils.groovy

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ def collect_test_results_windows(original_file_name, new_file_name) {
148148

149149

150150
def docker_run(platform, function_name, use_nvidia, shared_mem = '500m', env_vars = "", build_args = "") {
151-
def command = "ci/build.py %ENV_VARS% %BUILD_ARGS% --docker-registry ${env.DOCKER_CACHE_REGISTRY} %USE_NVIDIA% --platform %PLATFORM% --docker-build-retries 3 --shm-size %SHARED_MEM% /work/runtime_functions.sh %FUNCTION_NAME%"
151+
def command = "ci/build.py %ENV_VARS% %BUILD_ARGS% --docker-registry ${env.DOCKER_ECR_REGISTRY} %USE_NVIDIA% --platform %PLATFORM% --docker-build-retries 3 --shm-size %SHARED_MEM% /work/runtime_functions.sh %FUNCTION_NAME%"
152152
command = command.replaceAll('%ENV_VARS%', env_vars.length() > 0 ? "-e ${env_vars}" : '')
153153
command = command.replaceAll('%BUILD_ARGS%', env_vars.length() > 0 ? "${build_args}" : '')
154154
command = command.replaceAll('%USE_NVIDIA%', use_nvidia ? '--nvidiadocker' : '')

ci/build.py

Lines changed: 54 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -42,24 +42,36 @@
4242

4343
from util import *
4444

45-
DOCKER_COMPOSE_WHITELIST = ('centos7_cpu', 'centos7_gpu_cu92', 'centos7_gpu_cu100',
46-
'centos7_gpu_cu101', 'centos7_gpu_cu102', 'centos7_gpu_cu110',
47-
'centos7_gpu_cu112')
48-
4945
# Files for docker compose
50-
DOCKER_COMPOSE_FILES = set(('docker/build.centos7'))
46+
DOCKER_COMPOSE_FILES = set(['docker/build.centos7'])
47+
48+
# keywords to identify arm-based dockerfiles
49+
AARCH_FILE_KEYWORDS = ['armv', 'aarch64']
5150

5251
def get_dockerfiles_path():
5352
return "docker"
5453

54+
def get_docker_compose_platforms(path: str = get_dockerfiles_path()):
55+
platforms = set()
56+
with open(os.path.join(path, "docker-compose.yml"), "r") as f:
57+
compose_config = yaml.load(f.read(), yaml.SafeLoader)
58+
for platform in compose_config["services"]:
59+
platforms.add(platform)
60+
return platforms
5561

56-
def get_platforms(path: str = get_dockerfiles_path(), legacy_only=False) -> List[str]:
57-
"""Get a list of architectures given our dockerfiles"""
62+
63+
def get_platforms(path: str = get_dockerfiles_path(), arch='x86') -> List[str]:
64+
"""Get a list of platforms given our dockerfiles"""
5865
dockerfiles = glob.glob(os.path.join(path, "Dockerfile.*"))
5966
dockerfiles = set(filter(lambda x: x[-1] != '~', dockerfiles))
6067
files = set(map(lambda x: re.sub(r"Dockerfile.(.*)", r"\1", x), dockerfiles))
61-
if legacy_only:
62-
files = files - DOCKER_COMPOSE_FILES
68+
files = files - DOCKER_COMPOSE_FILES
69+
files.update(["build."+x for x in get_docker_compose_platforms()])
70+
arm_files = set(filter(lambda x: any(y in x for y in AARCH_FILE_KEYWORDS), files))
71+
if arch == 'x86':
72+
files = files - arm_files
73+
elif arch == 'aarch64':
74+
files = arm_files
6375
platforms = list(map(lambda x: os.path.split(x)[1], sorted(files)))
6476
return platforms
6577

@@ -87,14 +99,21 @@ def _hash_file(ctx, filename):
8799
break
88100
ctx.update(d)
89101

102+
def is_docker_compose(platform: str) -> bool:
103+
""":return: boolean whether specified platform container uses docker-compose"""
104+
platlist = get_docker_compose_platforms()
105+
platform = platform.split(".")[1] if any(x in platform for x in ['build.', 'publish.']) else platform
106+
return platform in platlist
107+
108+
90109
def get_docker_tag(platform: str, registry: str) -> str:
91110
""":return: docker tag to be used for the container"""
92-
if platform in DOCKER_COMPOSE_WHITELIST:
111+
platform = platform if any(x in platform for x in ['build.', 'publish.']) else 'build.{}'.format(platform)
112+
if is_docker_compose(platform):
93113
with open("docker/docker-compose.yml", "r") as f:
94114
compose_config = yaml.load(f.read(), yaml.SafeLoader)
95-
return compose_config["services"][platform]["image"].replace('${DOCKER_CACHE_REGISTRY}', registry)
115+
return compose_config["services"][platform.split(".")[1]]["image"].replace('${DOCKER_CACHE_REGISTRY}', registry)
96116

97-
platform = platform if any(x in platform for x in ['build.', 'publish.']) else 'build.{}'.format(platform)
98117
if not registry:
99118
registry = "mxnet_local"
100119
dockerfile = get_dockerfile(platform)
@@ -121,9 +140,9 @@ def build_docker(platform: str, registry: str, num_retries: int, no_cache: bool,
121140
:return: Id of the top level image
122141
"""
123142
tag = get_docker_tag(platform=platform, registry=registry)
124-
125143
# docker-compose
126-
if platform in DOCKER_COMPOSE_WHITELIST:
144+
if is_docker_compose(platform):
145+
docker_compose_platform = platform.split(".")[1] if any(x in platform for x in ['build.', 'publish.']) else platform
127146
logging.info('Building docker container tagged \'%s\' based on ci/docker/docker-compose.yml', tag)
128147
# We add a user with the same group as the executing non-root user so files created in the
129148
# container match permissions of the local user. Same for the group.
@@ -132,7 +151,7 @@ def build_docker(platform: str, registry: str, num_retries: int, no_cache: bool,
132151
"--build-arg", "GROUP_ID={}".format(os.getgid())]
133152
if cache_intermediate:
134153
cmd.append('--no-rm')
135-
cmd.append(platform)
154+
cmd.append(docker_compose_platform)
136155
else:
137156
logging.info("Building docker container tagged '%s'", tag)
138157
#
@@ -286,17 +305,24 @@ def docker_run_cmd(cmd):
286305
return 0
287306

288307

289-
def list_platforms() -> str:
290-
return "\nSupported platforms:\n{}".format('\n'.join(get_platforms()))
308+
def list_platforms(arch='x86') -> str:
309+
return "\nSupported platforms:\n{}".format('\n'.join(get_platforms(arch=arch)))
291310

292311

293312
def load_docker_cache(platform, tag, docker_registry) -> None:
294313
"""Imports tagged container from the given docker registry"""
295314
if docker_registry:
296-
if platform in DOCKER_COMPOSE_WHITELIST:
315+
if is_docker_compose(platform):
316+
docker_compose_platform = platform.split(".")[1] if any(x in platform for x in ['build.', 'publish.']) else platform
297317
env = os.environ.copy()
298318
env["DOCKER_CACHE_REGISTRY"] = docker_registry
299-
cmd = ['docker-compose', '-f', 'docker/docker-compose.yml', 'pull', platform]
319+
if "dkr.ecr" in docker_registry:
320+
try:
321+
import docker_cache
322+
docker_cache._ecr_login(docker_registry)
323+
except Exception:
324+
logging.exception('Unable to login to ECR...')
325+
cmd = ['docker-compose', '-f', 'docker/docker-compose.yml', 'pull', docker_compose_platform]
300326
logging.info("Running command: 'DOCKER_CACHE_REGISTRY=%s %s'", docker_registry, ' '.join(cmd))
301327
check_call(cmd, env=env)
302328
return
@@ -335,6 +361,11 @@ def main() -> int:
335361
help="platform",
336362
type=str)
337363

364+
parser.add_argument("-A", "--architecture",
365+
help="Architecture of images to build (x86 or aarch64). Default is x86.",
366+
default='x86',
367+
dest='architecture')
368+
338369
parser.add_argument("-b", "--build-only",
339370
help="Only build the container, don't build the project",
340371
action='store_true')
@@ -401,7 +432,7 @@ def main() -> int:
401432
for e in args.environment])
402433

403434
if args.list:
404-
print(list_platforms())
435+
print(list_platforms(arch=args.architecture))
405436
elif args.platform:
406437
platform = args.platform
407438
tag = get_docker_tag(platform=platform, registry=args.docker_registry)
@@ -445,9 +476,9 @@ def main() -> int:
445476
return ret
446477

447478
elif args.all:
448-
platforms = get_platforms()
479+
platforms = get_platforms(arch=args.architecture)
449480
platforms = [platform for platform in platforms if 'build.' in platform]
450-
logging.info("Building for all architectures: %s", platforms)
481+
logging.info("Building for all platforms: %s", platforms)
451482
logging.info("Artifacts will be produced in the build/ directory.")
452483
for platform in platforms:
453484
tag = get_docker_tag(platform=platform, registry=args.docker_registry)
@@ -474,7 +505,7 @@ def main() -> int:
474505

475506
else:
476507
parser.print_help()
477-
list_platforms()
508+
list_platforms(arch=args.architecture)
478509
print("""
479510
Examples:
480511

ci/docker/Dockerfile.build.ubuntu_gpu_cu80

Lines changed: 0 additions & 75 deletions
This file was deleted.

ci/docker/Dockerfile.build.ubuntu_gpu_cu90

Lines changed: 0 additions & 81 deletions
This file was deleted.

0 commit comments

Comments
 (0)