FROM docker.io/bitnami/spark:3.1.2
USER root

# TODO: Finish, to be used for dev, when we went to use yeatos code from repo (i.e. latest) and not from pypi (i.e. older)

# Pip installs
RUN apt-get update && apt-get install -y git
...

# TODO: check to put all yaetos requirements in package def to avoid having to call it separately.
# Uncomment 2 lines below to install extra packages. Requires creating a requirements_extra.txt in conf/ file. Using local copy to tmp dir to allow checkpointing this step (no re-installs as long as requirements.txt doesn't change)
# COPY conf/requirements_extra.txt /tmp/requirements_extra.txt
# RUN pip3 install -r /tmp/requirements_extra.txt

# Setup environment variables
ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/build:$PYTHONPATH

ENV PYSPARK_AWS_ETL_HOME /mnt/yaetos/
ENV PYTHONPATH $PYSPARK_AWS_ETL_HOME:$PYTHONPATH
# ENV SPARK_HOME /usr/local/spark # already set in base docker image
ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/build:$PYTHONPATH

ENV PYSPARK_AWS_ETL_JOBS_HOME /mnt/external_pipelines/
ENV PYTHONPATH $PYSPARK_AWS_ETL_JOBS_HOME:$PYTHONPATH


# Expose ports for monitoring.
# SparkContext web UI on 4040 -- only available for the duration of the application.
# Spark master’s web UI on 8080.
# Spark worker web UI on 8081.
EXPOSE 4040 8080 8081

CMD ["/bin/bash"]

# Usage: docker run -it -p 4040:4040 -p 8080:8080 -p 8081:8081 -v ~/.aws:/root/.aws -h spark <image_id>
# or update launch_env.sh and execute it.
