Files
decomp.me/backend/compilers/download.py

413 lines
12 KiB
Python
Executable File

#!/usr/bin/env python3
import argparse
import datetime
import logging
import os
import platform
import queue
import shutil
import sys
import tempfile
import threading
from pathlib import Path
import requests
import yaml
logger = logging.getLogger(__name__)
def get_token(docker_registry, github_repo, docker_image, session=None):
token_url = f"https://{docker_registry}/token?scope=repository:{github_repo}/{docker_image}:pull"
getter = requests if session is None else session
resp = getter.get(token_url, timeout=10)
if resp.status_code != 200:
# hopefully the image does not exist in remote registry
return None
return resp.json().get("token", "")
def get_remote_image_digest(
docker_image,
docker_registry="ghcr.io",
github_repo="decompme/compilers",
tag="latest",
session=None,
):
token = get_token(docker_registry, github_repo, docker_image, session=session)
image_url = (
f"https://{docker_registry}/v2/{github_repo}/{docker_image}/manifests/{tag}"
)
headers = {
"Accept": "application/vnd.oci.image.index.v1+json",
"Authorization": f"Bearer {token}",
}
getter = requests if session is None else session
resp = getter.get(image_url, headers=headers, timeout=10)
if resp.status_code != 200:
logger.debug(
"Unable to get image manifest for %s:%s from %s: %s",
docker_image,
tag,
github_repo,
resp.text,
)
return None
digest = resp.headers.get("docker-content-digest")
return digest
def get_compiler_raw(
platform_id,
compiler_id,
compilers_dir="/tmp",
force=False,
docker_registry="ghcr.io",
github_repo="decompme/compilers",
tag="latest",
session=None,
):
logger.info("Processing %s (%s)", compiler_id, platform_id)
# fast-fail if we cannot create the download_cache
download_cache = compilers_dir / ".download_cache"
download_cache.mkdir(parents=True, exist_ok=True)
clean_compiler_id = compiler_id.lower().replace("+", "plus")
docker_image = f"{platform_id}/{clean_compiler_id}"
logger.debug("Checking for %s in registry", docker_image)
remote_image_digest = get_remote_image_digest(
docker_image,
docker_registry=docker_registry,
github_repo=github_repo,
tag=tag,
session=session,
)
if remote_image_digest is None:
host_arch = platform.system().lower()
logger.debug(
"%s not found in registry, checking for '%s' specific version",
docker_image,
host_arch,
)
docker_image = f"{platform_id}/{clean_compiler_id}/{host_arch}"
remote_image_digest = get_remote_image_digest(
docker_image,
docker_registry=docker_registry,
github_repo=github_repo,
tag=tag,
session=session,
)
if remote_image_digest is None:
logger.error("%s not found in registry!", docker_image)
return None
compiler_dir = compilers_dir / platform_id / compiler_id
image_digest = compiler_dir / ".image_digest"
if (
image_digest.exists()
and image_digest.read_text() == remote_image_digest
and not force
):
logger.debug(
"%s image is present and at latest version, skipping!", compiler_id
)
return None
# First, get a token to do our operations with
token = get_token(
docker_registry,
github_repo,
docker_image,
session=session,
)
# Then, get the container image index. This will give us all the
# container images associated with this tag. There may be different
# images for different OSes/platforms. In our case, we only expect one
# though.
image_url = (
f"https://{docker_registry}/v2/{github_repo}/{docker_image}/manifests/{tag}"
)
headers = {
"Accept": "application/vnd.oci.image.index.v1+json",
"Authorization": f"Bearer {token}",
}
getter = requests if session is None else session
resp = getter.get(image_url, headers=headers, timeout=10)
if resp.status_code != 200:
return None
data = resp.json()
# Grab the first image manifest, and download it.
manifest = next(
(
manifest
for manifest in data["manifests"]
if manifest["mediaType"] == "application/vnd.oci.image.manifest.v1+json"
)
)
digest = manifest["digest"]
mime = manifest["mediaType"]
headers["Accept"] = mime
url = (
f"https://{docker_registry}/v2/{github_repo}/{docker_image}/manifests/{digest}"
)
resp = getter.get(url, headers=headers, timeout=10)
if resp.status_code != 200:
return None
data = resp.json()
with tempfile.TemporaryDirectory() as temp_dir:
# Then, download all the layers
for layer_num, layer in enumerate(data["layers"]):
digest = layer["digest"]
mime = layer["mediaType"]
url = f"https://{docker_registry}/v2/{github_repo}/{docker_image}/blobs/{digest}"
resp = getter.get(url, headers=headers, stream=True)
resp.raise_for_status()
# TODO: Get extension from mime
tar_file = (
Path(temp_dir) / f"{platform_id}_{compiler_id}_{layer_num}.tar.gz"
)
logger.debug("Writing layer %s (%s) to %s...", layer_num, digest, tar_file)
with tar_file.open("wb") as f:
for chunk in resp.iter_content(chunk_size=10 * 1024 * 1024):
f.write(chunk)
# Extract all the layers
for layer_num, layer in enumerate(data["layers"]):
tar_file = (
Path(temp_dir) / f"{platform_id}_{compiler_id}_{layer_num}.tar.gz"
)
logger.debug("Extracting layer %s to %s", tar_file, download_cache)
shutil.unpack_archive(tar_file, download_cache)
# And finally, move the compiler_container_path (the prefix where the
# compiler is stored in the docker archive) to the final destination.
compiler_container_path = f"compilers/{platform_id}/{compiler_id}"
compiler_download_cache = download_cache / compiler_container_path
if not compiler_download_cache.exists():
logger.error("Extracting failed to create %s", compiler_download_cache)
return False
if compiler_dir.exists():
logger.debug("Removing existing compiler dir: %s", compiler_dir)
shutil.rmtree(compiler_dir)
logger.debug("shutil.move %s -> %s", compiler_download_cache, compiler_dir)
shutil.move(str(compiler_download_cache), str(compiler_dir))
image_digest.write_text(remote_image_digest)
logger.info(
"%s (%s) was successfully downloaded into %s",
compiler_id,
platform_id,
compiler_dir,
)
return True
class DownloadThread(threading.Thread):
def __init__(
self,
download_queue: queue.Queue,
results_queue: queue.Queue,
*args,
compilers_dir=Path("/tmp"),
force=False,
docker_registry="ghcr.io",
github_repo="decompme/compilers",
**kwargs,
):
self.download_queue = download_queue
self.results_queue = results_queue
self.compilers_dir = compilers_dir
self.force = force
self.docker_registry = docker_registry
self.github_repo = github_repo
self.session = requests.Session()
super().__init__(*args, **kwargs)
def run(self):
while True:
try:
item = self.download_queue.get_nowait()
except queue.Empty:
break
try:
self.process_item(item)
except Exception as e:
logger.error("Exception thrown while processing item: %s", e)
finally:
self.download_queue.task_done()
def process_item(self, item):
platform_id, compiler_id = item
result = get_compiler_raw(
platform_id,
compiler_id,
compilers_dir=self.compilers_dir,
force=self.force,
docker_registry=self.docker_registry,
github_repo=self.github_repo,
session=self.session,
)
self.results_queue.put(result)
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--force",
help="Force (re)downloading of compilers",
action="store_true",
)
parser.add_argument(
"--compilers-dir",
type=str,
default=None,
help="Directory where compilers will be stored",
)
parser.add_argument(
"--platforms", type=str, nargs="+", help="Only run for these platforms"
)
parser.add_argument(
"--compilers", type=str, nargs="+", help="Only run for these compilers"
)
parser.add_argument(
"--docker-registry",
type=str,
default="ghcr.io",
help="Docker registry where the packages live",
)
parser.add_argument(
"--github-repo",
type=str,
default="decompme/compilers",
help="Name of github repo that owns the packages",
)
parser.add_argument(
"--threads", type=int, default=8, help="Number of download threads to use"
)
parser.add_argument("--verbose", action="store_true", help="Enable DEBUG log level")
args = parser.parse_args()
if args.verbose:
logger.setLevel("DEBUG")
if args.compilers_dir:
compilers_dir: Path = Path(args.compilers_dir)
else:
compilers_dir: Path = Path(os.path.dirname(os.path.realpath(__file__)))
host_arch = platform.system().lower()
if host_arch not in ["darwin", "linux"]:
logger.fatal(
"No compiler configuration is available for '%s' architecture, exiting",
host_arch,
)
sys.exit(0)
download_queue = queue.Queue()
results_queue = queue.Queue()
compilers_yaml = (
Path(os.path.dirname(os.path.realpath(__file__)))
/ f"compilers.{host_arch}.yaml"
)
compilers_config = yaml.safe_load(compilers_yaml.open())
for platform_id, compilers in compilers_config.items():
if args.platforms is not None:
platform_enabled = platform_id in args.platforms
else:
# platforms are considered enabled unless explicitly disabled
platform_enabled = (
os.environ.get(f"ENABLE_{platform_id.upper()}_SUPPORT", "YES").upper()
!= "NO"
)
if args.compilers is not None:
compilers = filter(lambda x: x in args.compilers, compilers)
if platform_enabled:
for compiler in compilers:
download_queue.put(
(platform_id, compiler),
)
if download_queue.qsize() == 0:
logger.warning(
"No platforms/compilers configured or enabled for host architecture (%s)",
host_arch,
)
return
threads = []
for _ in range(args.threads):
thread = DownloadThread(
download_queue,
results_queue,
compilers_dir=compilers_dir,
force=args.force,
docker_registry=args.docker_registry,
github_repo=args.github_repo,
)
threads.append(thread)
start = datetime.datetime.now()
for thread in threads:
thread.start()
download_queue.join()
for thread in threads:
thread.join(timeout=0.1)
results = []
while True:
try:
item = results_queue.get(timeout=1)
results.append(item)
except queue.Empty:
break
end = datetime.datetime.now()
compilers_downloaded = len(list(filter(lambda x: x, results)))
logger.info(
"Updated %i / %i compiler(s) in %.2f second(s)",
compilers_downloaded,
len(results),
(end - start).total_seconds(),
)
if __name__ == "__main__":
logging.basicConfig(
handlers=[logging.StreamHandler()],
level=logging.INFO,
format=("%(asctime)s.%(msecs)03d %(levelname)s %(funcName)s %(message)s"),
datefmt="%Y-%m-%d %H:%M:%S",
)
main()