feat(server): Machine learning's image optimisations (#1908)

* Use multi stage build to slim down ML image size * Use gunicorn as WSGI server in ML image * Configure gunicorn server for ML use case * Use requirements.txt file to install python dependencies in ML image * Make ML listen IP configurable
2024-11-16 02:18:50 -07:00 · 2023-03-01 15:37:12 +00:00 · 2023-03-01 15:37:12 +00:00 · 977740045a
commit 977740045a
parent 2a1dcbc28b
3 changed files with 79 additions and 10 deletions
--- a/machine-learning/Dockerfile
+++ b/machine-learning/Dockerfile
@ -1,19 +1,26 @@
-FROM python:3.10
+FROM python:3.10 as builder
 ENV PYTHONDONTWRITEBYTECODE=1 \
    PYTHONUNBUFFERED=1 \
    PIP_NO_CACHE_DIR=true
 COPY requirements.txt ./
 RUN python -m venv /opt/venv && \
    /opt/venv/bin/pip install --upgrade pip setuptools wheel && \
    /opt/venv/bin/pip install --no-deps -r requirements.txt
 FROM python:3.10-slim
 COPY --from=builder /opt/venv /opt/venv
 ENV TRANSFORMERS_CACHE=/cache \
    PYTHONDONTWRITEBYTECODE=1 \
    PYTHONUNBUFFERED=1 \
-    PIP_NO_CACHE_DIR=true
+    PATH="/opt/venv/bin:$PATH"
 WORKDIR /usr/src/app
 RUN python -m venv /opt/venv
 ENV PATH="/opt/venv/bin:$PATH"
 RUN pip install --pre torch  -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
 RUN pip install transformers tqdm numpy scikit-learn scipy nltk sentencepiece flask Pillow
 RUN pip install --no-deps sentence-transformers
 COPY . .
-CMD ["python", "src/main.py"]
+CMD ["gunicorn", "src.main:server"]
--- a/machine-learning/gunicorn.conf.py
+++ b/machine-learning/gunicorn.conf.py
@ -0,0 +1,29 @@
 """
 Gunicorn configuration options.
 https://docs.gunicorn.org/en/stable/settings.html
 """
 import os
 # Set the bind address based on the env
 port = os.getenv("MACHINE_LEARNING_PORT") or "3003"
 listen_ip = os.getenv("MACHINE_LEARNING_IP") or "0.0.0.0"
 bind = [f"{listen_ip}:{port}"]
 # Preload the Flask app / models etc. before starting the server
 preload_app = True
 # Logging settings - log to stdout and set log level
 accesslog = "-"
 loglevel = os.getenv("MACHINE_LEARNING_LOG_LEVEL") or "info"
 # Worker settings
 # ----------------------
 # It is important these are chosen carefully as per
 # https://pythonspeed.com/articles/gunicorn-in-docker/
 # Otherwise we get workers failing to respond to heartbeat checks,
 # especially as requests take a long time to complete.
 workers = 2
 threads = 4
 worker_tmp_dir = "/dev/shm"
 timeout = 60
--- a/machine-learning/requirements.txt
+++ b/machine-learning/requirements.txt
@ -0,0 +1,33 @@
 certifi==2022.12.7
 charset-normalizer==3.0.1
 click==8.1.3
 filelock==3.9.0
 Flask==2.2.3
 gunicorn==20.1.0
 huggingface-hub==0.12.1
 idna==3.4
 importlib-metadata==6.0.0
 itsdangerous==2.1.2
 Jinja2==3.1.2
 joblib==1.2.0
 MarkupSafe==2.1.2
 nltk==3.8.1
 numpy==1.24.2
 packaging==23.0
 Pillow==9.4.0
 PyYAML==6.0
 regex==2022.10.31
 requests==2.28.2
 scikit-learn==1.2.1
 scipy==1.10.1
 sentence-transformers==2.2.2
 sentencepiece==0.1.97
 threadpoolctl==3.1.0
 tokenizers==0.13.2
 torch==1.13.1 -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
 tqdm==4.64.1
 transformers==4.26.1
 typing-extensions==4.5.0
 urllib3==1.26.14
 Werkzeug==2.2.3
 zipp==3.15.0