Using AlphaFold on izar#
In order to make AlphaFold usable on izar you can access preinstalled versions of the singularity/apptainer containers.
Here is an sbatch script example showing how to use it. This file is
available on izar
/ssoft/spack/external/alphafold/v2.3.2-1/example_slurm_job.sh
. It is an adapted
version of the script coming from this GitHub
project.
- The AlphaFold Genetic Database can be found here:
/work/scitas-share/datasets/AlphaFold Datasets/Database
- The AlphaFold Protein Structure Database can be found here:
/work/scitas-share/datasets/AlphaFold Datasets/{v?,latest}
, the latest version available is currentlyv4
example_slurm_job.sh
#!/bin/bash
# Copyright 2023 David Chin
#
# This file is part of alphafold_singularity.
#
# alphafold_singularity is free software: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# alphafold_singularity is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with alphafold_singularity. If not, see <https://www.gnu.org/licenses/>.
#SBATCH --time=2:00:00
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --mem=20000
#SBATCH --cpus-per-task=20
#SBATCH --gres=gpu:1
export OPENMM_CPU_THREADS=$SLURM_CPUS_PER_TASK
export XLA_FLAGS="--xla_cpu_multi_thread_eigen=false intra_op_parallelism_threads=$SLURM_CPUS_PER_TASK"
### NOTE
### This job script cannot be used without modification for your specific environment.
module load gcc python
export ALPHAFOLD_DIR=/ssoft/spack/external/alphafold/v2.3.2-1
export ALPHAFOLD_DATADIR="/work/scitas-share/datasets/AlphaFold_Datasets/Databases"
export TMP=$(mktemp -d)
### Check values of some environment variables
echo INFO: SLURM_GPUS_ON_NODE=$SLURM_GPUS_ON_NODE
echo INFO: SLURM_JOB_GPUS=$SLURM_JOB_GPUS
echo INFO: SLURM_STEP_GPUS=$SLURM_STEP_GPUS
echo INFO: ALPHAFOLD_DIR=$ALPHAFOLD_DIR
echo INFO: ALPHAFOLD_DATADIR=$ALPHAFOLD_DATADIR
echo INFO: TMP=$TMP
###
### README This runs AlphaFold 2.3.2 on the T1050.fasta file
###
# AlphaFold should use all GPU devices available to the job by default.
#
# To run the CASP14 evaluation, use:
# --model_preset=monomer_casp14
# --db_preset=full_dbs (or delete the line; default is "full_dbs")
#
# On a test system with 4x Tesla V100-SXM2, this took about 50 minutes.
#
# To benchmark, running multiple JAX model evaluations (NB this
# significantly increases run time):
# --benchmark
#
# On a test system with 4x Tesla V100-SXM2, this took about 6 hours.
# Create output directory in $TMP (which is cleaned up by Slurm at end
# of job).
output_dir=$TMP/output
mkdir -p $output_dir
echo INFO: output_dir=$output_dir
source ${ALPHAFOLD_DIR}/.venv/bin/activate
# Run AlphaFold; default is to use GPUs
python3 ${ALPHAFOLD_DIR}/singularity/run_singularity.py \
--use_gpu \
--output_dir=$output_dir \
--data_dir=${ALPHAFOLD_DATADIR} \
--fasta_paths=T1050.fasta \
--max_template_date=2020-05-14 \
--model_preset=monomer \
--db_preset=reduced_dbs
echo INFO: AlphaFold returned $?
### Copy Alphafold output back to directory where "sbatch" command was issued.
mkdir $SLURM_SUBMIT_DIR/Output-$SLURM_JOB_ID
cp -R $output_dir $SLURM_SUBMIT_DIR/Output-$SLURM_JOB_ID
rm -rf $TMP