feat(server): Machine learning's image optimisations (#1908)

* Use multi stage build to slim down ML image size

* Use gunicorn as WSGI server in ML image

* Configure gunicorn server for ML use case

* Use requirements.txt file to install python dependencies in ML image

* Make ML listen IP configurable
This commit is contained in:
Olly Welch 2023-03-01 15:37:12 +00:00 committed by GitHub
parent 2a1dcbc28b
commit 977740045a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 79 additions and 10 deletions

View File

@ -1,19 +1,26 @@
FROM python:3.10 FROM python:3.10 as builder
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=true
COPY requirements.txt ./
RUN python -m venv /opt/venv && \
/opt/venv/bin/pip install --upgrade pip setuptools wheel && \
/opt/venv/bin/pip install --no-deps -r requirements.txt
FROM python:3.10-slim
COPY --from=builder /opt/venv /opt/venv
ENV TRANSFORMERS_CACHE=/cache \ ENV TRANSFORMERS_CACHE=/cache \
PYTHONDONTWRITEBYTECODE=1 \ PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \ PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=true PATH="/opt/venv/bin:$PATH"
WORKDIR /usr/src/app WORKDIR /usr/src/app
RUN python -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
RUN pip install --pre torch -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
RUN pip install transformers tqdm numpy scikit-learn scipy nltk sentencepiece flask Pillow
RUN pip install --no-deps sentence-transformers
COPY . . COPY . .
CMD ["python", "src/main.py"] CMD ["gunicorn", "src.main:server"]

View File

@ -0,0 +1,29 @@
"""
Gunicorn configuration options.
https://docs.gunicorn.org/en/stable/settings.html
"""
import os
# Set the bind address based on the env
port = os.getenv("MACHINE_LEARNING_PORT") or "3003"
listen_ip = os.getenv("MACHINE_LEARNING_IP") or "0.0.0.0"
bind = [f"{listen_ip}:{port}"]
# Preload the Flask app / models etc. before starting the server
preload_app = True
# Logging settings - log to stdout and set log level
accesslog = "-"
loglevel = os.getenv("MACHINE_LEARNING_LOG_LEVEL") or "info"
# Worker settings
# ----------------------
# It is important these are chosen carefully as per
# https://pythonspeed.com/articles/gunicorn-in-docker/
# Otherwise we get workers failing to respond to heartbeat checks,
# especially as requests take a long time to complete.
workers = 2
threads = 4
worker_tmp_dir = "/dev/shm"
timeout = 60

View File

@ -0,0 +1,33 @@
certifi==2022.12.7
charset-normalizer==3.0.1
click==8.1.3
filelock==3.9.0
Flask==2.2.3
gunicorn==20.1.0
huggingface-hub==0.12.1
idna==3.4
importlib-metadata==6.0.0
itsdangerous==2.1.2
Jinja2==3.1.2
joblib==1.2.0
MarkupSafe==2.1.2
nltk==3.8.1
numpy==1.24.2
packaging==23.0
Pillow==9.4.0
PyYAML==6.0
regex==2022.10.31
requests==2.28.2
scikit-learn==1.2.1
scipy==1.10.1
sentence-transformers==2.2.2
sentencepiece==0.1.97
threadpoolctl==3.1.0
tokenizers==0.13.2
torch==1.13.1 -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
tqdm==4.64.1
transformers==4.26.1
typing-extensions==4.5.0
urllib3==1.26.14
Werkzeug==2.2.3
zipp==3.15.0