#!/bin/bash -eE

function usage() {
    cat <<EOS
HPC-X rebuild helper script.
It rebuilds OMPI and UCX from HPC-X package using system compiler. In addition,
new init script and environmental module files are created.
To use custom compiler simply define \$CC/\$CXX/\$FC/\$F77 env variables.

Usage:
1. Run this script without any options to start a default rebuild process.
The new OMPI will be installed into <HPCX root>/hpcx-rebuild-[-\$CC].
<HPCX root> is the directory where the HPCX package was installed.

2. Target directory can be redefined with \${ompi_prefix} env variable:
\$ cd <HPCX_ROOT>
\$ env ompi_prefix=\$PWD/new-ompi utils/$(basename "$0")
Note: ompi_prefix must be a first-level subpath of the <HPCX root> directory.

Options:
  -h --help             Show this message
  -mt --multithreading  Use HPCX MT env as base (see README for details)
  --cuda                Recompile with CUDA support
  --rebuild-ucx         Rebuild UCX
  --with-hcoll          Enable HCOLL (--with-hcoll=<path> for OMPI, disabled by default)
  --ompi-extra-config   Custom OMPI configure (except UCX/HCOLL paths)
  --ucx-extra-config    Custom UCX configure (in addition to default ones)
EOS

    exit 0
}

function with_cuda() {
    with_cuda="--with-cuda"
    if [ -n "${CUDA_HOME}" ]; then
        with_cuda="--with-cuda=${CUDA_HOME}"
    fi
    echo "${with_cuda}"
}

mt="no"
cuda="no"
rebuild_ucx="no"
with_hcoll="no"
ompi_extra_conf_opts=""
ucx_extra_conf_opts=""

while [ -n "$1" ]; do
    arg=$1
    shift
    case $arg in
    -h | --help)
        usage
        ;;
    -mt | --multithreading)
        mt="yes"
        ;;
    --cuda)
        cuda="yes"
        ;;
    --rebuild-ucx)
        rebuild_ucx="yes"
        ;;
    --with-hcoll)
        with_hcoll="yes"
        ;;
    --ompi-extra-config)
        if [ -z "$1" ]; then
            echo "ERROR: empty value for --ompi-extra-config"
            exit 1
        fi
        ompi_extra_conf_opts=$1
        shift
        ;;
    --ucx-extra-config)
        if [ -z "$1" ]; then
            echo "ERROR: empty value for --ucx-extra-config"
            exit 1
        fi
        ucx_extra_conf_opts=$1
        shift
        ;;
    *)
        echo -e "ERROR: unknown option $arg\n"
        usage
        ;;
    esac
done

if [[ "${with_hcoll}" == "no" ]]; then
    echo "INFO: HCOLL is disabled (use --with-hcoll to enable it)"
else
    echo "INFO: Building OMPI with HCOLL"
fi

set -eE

# Setup
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
HPCX_ROOT=$(cd "$DIR/.." && pwd)
default_ompi_prefix="${HPCX_ROOT}/hpcx-rebuild"
if [ -n "$CC" ]; then
    default_ompi_prefix="${default_ompi_prefix}-$CC"
fi
ompi_prefix=${ompi_prefix:="${default_ompi_prefix}"}
# ompi_prefix must be a first-level subpath of HPCX_ROOT
root_path=$(realpath -m "${HPCX_ROOT}")
ompi_prefix=$(realpath -m "${ompi_prefix}")
# shellcheck disable=SC2001
subdir=$(echo "${ompi_prefix}" | sed "s|${root_path}/||")
if [ "$subdir" != "$(basename "$subdir")" ]; then
    echo "ERROR: ompi_prefix must be a first level subdir of <HPCX root>"
    exit 1
fi

if [ -d "${ompi_prefix}" ]; then
    echo "ERROR: Directory '${ompi_prefix}' already exists, exit."
    exit 1
fi

base_init_script="hpcx-init.sh"
base_module_file="hpcx"

# Check env
if [ -n "${HPCX_DIR}" ]; then
    cat <<EOS
HPCX environment should not be loaded during OMPI rebuild.
Run 'hpcx_unload' or 'module unload <hpcx-module>'
EOS
    exit 1
fi

# load relevant environment to obtain UCX/HCOLL paths
# shellcheck disable=SC1090
. "${HPCX_ROOT}/${base_init_script}"
hpcx_load
ucx_dir=${HPCX_UCX_DIR}
ucc_dir=${HPCX_UCC_DIR}
hcoll_dir=${HPCX_HCOLL_DIR}
sharp_dir=${HPCX_SHARP_DIR}
hpcx_unload

set -eE

if [ "${rebuild_ucx}" = "yes" ]; then
    name=$(basename "${ompi_prefix}")
    ucx_prefix="$HPCX_ROOT/ucx/$name"
    if [ -d "${ucx_prefix}" ]; then
        echo "ERROR: directory '${ucx_prefix}' already exists"
        exit 1
    fi

    # unpack sources
    cd "${HPCX_ROOT}/sources"
    echo "INFO: Unpacking UCX..."
    # shellcheck disable=SC2010
    ucx_tar=$(ls | grep -P "ucx.*\.tar\.gz")
    ucx_unpack=$(basename -s ".tar.gz" "${ucx_tar}")
    rm -rf "${ucx_unpack}"
    tar -zxf "${ucx_tar}"
    cd "${ucx_unpack}"

    # configure depends on mode
    conf_params=""
    if [ "$mt" = "yes" ]; then
        conf_params="--enable-mt"
    fi
    if [ "$cuda" = "yes" ]; then
        conf_params="${conf_params} $(with_cuda) --with-gdrcopy"
    fi
    conf_params="${conf_params} --prefix=${ucx_prefix} ${ucx_extra_conf_opts}"

    rc=0
    ucx_config_log="$PWD/ucx-configure.log"
    echo "INFO: Running UCX configure script see '${ucx_config_log}'"
    # shellcheck disable=SC2086
    ./contrib/configure-release ${conf_params} >"${ucx_config_log}" 2>&1 || rc=$?
    if [ $rc -ne 0 ]; then
        echo "ERROR: configuring OMPI, see log for details: '${ucx_config_log}'"
        exit 1
    fi

    # make install
    ucx_make_install_log="$PWD/ucx-make-install.log"
    echo "INFO: Running UCX make install, see '${ucx_make_install_log}'"
    make -j9 install >"${ucx_make_install_log}" 2>&1 || rc=$?
    if [[ $rc -ne 0 ]]; then
        echo "ERROR: UCX make install failed, see log for details: '${ucx_make_install_log}'"
    fi

    # update ucx_dir
    ucx_dir=${ucx_prefix}

    echo "INFO: UCX is installed into '${ucx_prefix}'"
fi

# UCX and HCOLL are required for rebuild
if [ -z "${ucx_dir}" ]; then
    echo "ERROR: Cannot define UCX location, check '${base_init_script}'"
    exit 1
fi

if [ -z "${ucc_dir}" ]; then
    echo "ERROR: Cannot define UCC location, check '${base_init_script}'"
    exit 1
fi

if [[ "${with_hcoll}" == "yes" ]] && [ -z "${hcoll_dir}" ]; then
    echo "ERROR: Cannot define HCOLL location, check '${base_init_script}'"
    exit 1
fi

# everything is OK, ready to start
cat <<EOS
Ready to rebuild
  HPCX_ROOT:      ${HPCX_ROOT}
  OMPI PREFIX:    ${ompi_prefix}
  UCX location:   ${ucx_dir}
  UCC location:   ${ucc_dir}
EOS
if [[ "${with_hcoll}" == "yes" ]]; then
    echo "  HCOLL location: ${hcoll_dir}"
fi

# Rebuild process according to README file
cd "${HPCX_ROOT}/sources/"
echo "INFO: Unpacking OMPI..."
rm -rf "${HPCX_ROOT}/sources/openmpi-gitlone"
tar -zxf openmpi-gitclone.tar.gz
cd openmpi-gitclone

export LD_LIBRARY_PATH="${ucx_dir}/lib:${ucc_dir}/lib:${ucc_dir}/lib/ucc:${LD_LIBRARY_PATH}"

with_hcoll_par="--without-hcoll"
if [[ "${with_hcoll}" == "yes" ]]; then
    export LD_LIBRARY_PATH="${hcoll_dir}/lib:${LD_LIBRARY_PATH}"
    export LD_LIBRARY_PATH="${sharp_dir}/lib:${LD_LIBRARY_PATH}"
    with_hcoll_par="--with-hcoll=${hcoll_dir}"
fi

ompi_config_log="$PWD/ompi-configure.log"
rc=0

cmd="./configure --prefix=${ompi_prefix} --with-ucx=${ucx_dir} --with-ucc=${ucc_dir} ${with_hcoll_par}"
if [ "$cuda" = "yes" ]; then
    cmd="$cmd $(with_cuda)"
fi

if [ -n "${ompi_extra_conf_opts}" ]; then
    echo "INFO: Using custom configure cmd for OMPI"
    cmd="$cmd ${ompi_extra_conf_opts}"
else
    # Try to get original configure cmd first. If not possible - use default.
    orig_configure_cmd=""
    # shellcheck disable=SC2010
    orig_config_log=$(ls "${HPCX_ROOT}/sources/config_"*.log | grep -v ucx)
    if [ -e "${orig_config_log}" ]; then
        orig_configure_cmd=$(less "${orig_config_log}" | grep "/configure " | grep -v "Configured" | perl -pne 's/^.*configure\s+//;s/--prefix=\S+//;s/--with-hcoll=\S+//;s/--with-ucx=\S+//;s/--with-ucc=\S+//;s/--with-cuda(=\S+)?//')
        if [ "$cuda" = "yes" ]; then
            orig_configure_cmd="${orig_configure_cmd} $(with_cuda)"
        fi
    fi

    if [ -n "${orig_configure_cmd}" ]; then
        echo "INFO: Using HPC-X original configure cmd for OMPI"
        cmd="$cmd ${orig_configure_cmd}"
    else
        echo "INFO: Using default configure cmd for OMPI"
        cmd="$cmd --with-slurm --with-pmi --with-platform=contrib/platform/mellanox/optimized"
    fi
fi

echo "INFO: Running configure script, see '${ompi_config_log}'"
$cmd >"${ompi_config_log}" 2>&1 || rc=$?
if [[ $rc -ne 0 ]]; then
    echo "ERROR: configuring OMPI, see log for details: '${ompi_config_log}'"
    exit 1
fi

ompi_make_install_log="$PWD/ompi-make-install.log"
echo "INFO: Running make install, see '${ompi_make_install_log}'"
make -j9 all >"${ompi_make_install_log}" 2>&1 || rc=$?
make -j9 install >>"${ompi_make_install_log}" 2>&1 || rc=$?
if [[ $rc -ne 0 ]]; then
    echo "ERROR: make install failed, see log for details: '${ompi_make_install_log}'"
    exit 1
fi

# Create new init script and module file
echo "INFO: Setting up new init script and module file..."
name=$(basename "${ompi_prefix}")
new_init="${name}.sh"
new_mod_file="modulefiles/$name"
cd "${HPCX_ROOT}"
cp ${base_init_script} "${new_init}"
cp modulefiles/${base_module_file} "${new_mod_file}"

sed -i "s;{HPCX_DIR}/ompi;{HPCX_DIR}/$name;" "${new_init}"
sed -i "s;hpcx_dir/ompi;hpcx_dir/$name;" "${new_mod_file}"

if [ "${rebuild_ucx}" = "yes" ]; then
    sed -i "s;{HPCX_DIR}/ucx;{HPCX_DIR}/ucx/$name;" "${new_init}"
    sed -i "s;hpcx_dir/ucx;hpcx_dir/ucx/$name;" "${new_mod_file}"
fi
echo "INFO: Done"

# Help user use new env
cat <<EOS

HPCX with the new OMPI is ready to use.

You can load it with init script:
$ source ${new_init}
$ hpcx_load

or use environmental module:
$ module load ${new_mod_file}
EOS
