Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
executable file 55 lines (45 sloc) 2.79 KB
#! /usr/local/bin/bash
### USER SPECIFIED VARIABLES ###
QUERY='query/Chlamydomonas_reinhardtii_chloroplast.faa' # path to input protein fasta file
JOBS=2 # Split query file into subjobs that have no more than 20k sequences each. EG if your query has 28,000 sequences, set JOBS equal to 2 (or greater); if query has 63,000 sequences, set JOBS equal to 4 (or greater).
MAXJOBS=2 # max number of jobs to run at a time
# SLURM variables
USER=$EMAIL # user email; either enter manually or add $EMAIL to your .bashrc
QUEUE='jwisecav' # user queue
THREADS=16
TIME='24:00:00' # max walltime
# HMMER/DIAMOND variables
PROGRAM='diamond' # can be 'phmmer' or 'diamond'
DB="/depot/jwisecav/data/dbs/custom-refseq-2021-01-27/refseq_release204_w_supp_cd95.dmnd" # path to database
HMMEROPTS='--F1 1e-5 --F2 1e-7 --F3 1e-10 --noali --notextw -o /dev/null' # HMMER Options
DMNDOPTS='blastp --sensitive --evalue 1e-3 --max-target-seqs 0' # Diamond blast options
# Parsing variables
MAX_HITS_PER_QUERY=0 # maximum number of hits to keep per query sequence (use 0 to retain all hits)
MAX_HITS_PER_SUBJECT=0 # maximum number of hits to keep per uniq taxnomy id (use 0 to retain all hits)
EVALUE_THRESHOLD=1 # upper bound for evalue to include in parsed output
### Directory information ###
QUERYDIR="$PWD/query"
SPLITDIR="$PWD/split"
SCRIPTS="$PWD/scripts"
HMMRDIR="$PWD/phmmer-out" # output directory for phmmer
DMNDIR="$PWD/diamond-out" # output directory for diamond or diamond-parsed phmmer results
SLURMDIR="$PWD/slurm-out"
echo "Creating working directories"
if [ ! -d "$SPLITDIR" ]; then mkdir $SPLITDIR; fi
if [ $PROGRAM == 'phmmer' ]; then if [ ! -d "$HMMRDIR" ]; then mkdir $HMMRDIR; fi; fi
if [ $PROGRAM == 'diamond' ]; then if [ ! -d "$DMNDIR" ]; then mkdir $DMNDIR; fi; fi
if [ ! -d "$SLURMDIR" ]; then mkdir $SLURMDIR; fi
export QUERY THREADS HMMRDIR DMNDIR
export PROGRAM DB HMMEROPTS DMNDOPTS JOBS PWD
export MAX_HITS_PER_QUERY MAX_HITS_PER_SUBJECT EVALUE_THRESHOLD
## 1 - Create job array input files
echo "Creating job array input files"
module load conda-env/env.genomics
python $SCRIPTS/create_jobarray_fasta.py $QUERY $SPLITDIR $JOBS
## 2 - Submit query self-search
echo "Submitting query self-search job"
CMD=`sbatch --account=$QUEUE --job-name=selfsearch --mail-user=$USER --nodes=1 --cpus-per-task=$THREADS --time=$TIME --output=$SLURMDIR/%x-%j.out $SCRIPTS/run_selfsearch.sub`
FIRST=`echo $CMD | sed "s/Submitted batch job //"`
## 3 - Submit phmmer/diamond jobs against database
echo "Submitting phmmer/diamond jobs: 1-$JOBS max running: $MAXJOBS $SCRIPTS/run_search.sub"
SECOND=`sbatch --dependency=afterok:$FIRST --array=1-$JOBS%$MAXJOBS --account=$QUEUE --job-name=search --mail-user=$USER --nodes=1 --cpus-per-task=$THREADS --time=$TIME --output=$SLURMDIR/%x-%A-%a.out $SCRIPTS/run_search.sub`