ARG HABANA_VERSION=1.15.1
ARG BASEIMAGE=vault.habana.ai/gaudi-docker/${HABANA_VERSION}/rhel9.2/habanalabs/pytorch-installer-2.2.0

FROM ${BASEIMAGE} AS runtime
# base image has PyTorch fork with Habana plugins in self-compiled Python 3.10
ARG PYTHON=python3.10

ENV PYTHON="${PYTHON}" \
    APP_ROOT="/opt/app-root"
ENV PIP_DISABLE_PIP_VERSION_CHECK=1 \
    PIP_NO_COMPILE=1 \
    PS1="(app-root) \w\$ " \
    VIRTUAL_ENV="${APP_ROOT}" \
    PATH="${APP_ROOT}/bin:${PATH}"

RUN ${PYTHON} -m venv --upgrade-deps --system-site-packages ${VIRTUAL_ENV} && \
    mkdir ${VIRTUAL_ENV}/src && \
    find ${VIRTUAL_ENV} -name __pycache__ | xargs rm -rf

COPY containers/sitecustomize.py ${VIRTUAL_ENV}/lib/${PYTHON}/site-packages/
COPY containers/bin/debug-* ${VIRTUAL_ENV}/bin/

# -mno-avx: work around a build problem with llama-cpp-python and gcc.
# flash-attn is compiled from source, bitsandbytes has a manylinux wheel
COPY requirements.txt requirements-hpu.txt /tmp
RUN sed 's/\[.*\]//' /tmp/requirements.txt >/tmp/constraints.txt && \
    export PIP_NO_CACHE_DIR=off; \
    ${VIRTUAL_ENV}/bin/pip install wheel && \
    CMAKE_ARGS="" \
        CFLAGS="-mno-avx" \
        FORCE_CMAKE=1 \
        ${VIRTUAL_ENV}/bin/pip install --no-binary llama_cpp_python -c /tmp/constraints.txt llama_cpp_python && \
    ${VIRTUAL_ENV}/bin/pip install -r /tmp/requirements.txt -r /tmp/requirements-hpu.txt && \
    rm /tmp/constraints.txt && \
    find ${VIRTUAL_ENV} -name __pycache__ | xargs rm -rf

COPY . /tmp/instructlab
RUN ${VIRTUAL_ENV}/bin/pip install "/tmp/instructlab[habana]" && \
    pip list && \
    find ${VIRTUAL_ENV} -name __pycache__ | xargs rm -rf

ENV HOME="${VIRTUAL_ENV}/src"
WORKDIR "${HOME}"
VOLUME ["/opt/app-root/src"]
CMD ["/bin/bash"]


# default values, override with `-e HABANA_VISIBLE_MODULES="0,1"`
ENV TSAN_OPTIONS='ignore_noninstrumented_modules=1' \
    HABANA_VISIBLE_MODULES="all" \
    PT_HPU_LAZY_MODE=0 \
    PT_HPU_ENABLE_EAGER_CACHE=TRUE \
    PT_HPU_EAGER_4_STAGE_PIPELINE_ENABLE=TRUE \
    PT_ENABLE_INT64_SUPPORT=1
# workaround for race condition in libgomp / oneMKL (HS-1795)
ENV OMP_NUM_THREADS=8

# requires habanalabs-container-runtime package with OCI hooks
LABEL com.github.instructlab.instructlab.target="hpu" \
      name="instructlab-hpu-${HABANA_VERSION}" \
      summary="PyTorch, llama.cpp, and InstructLab for Intel Gaudi / Habana Labs ${HABANA_VERSION}" \
      usage="podman run -ti --privileged -v ./data:/opt/app-root/src:z ..."
