Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
phylowise/1_SearchPipe.sh
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
executable file
55 lines (45 sloc)
2.79 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/local/bin/bash | |
### USER SPECIFIED VARIABLES ### | |
QUERY='query/Chlamydomonas_reinhardtii_chloroplast.faa' # path to input protein fasta file | |
JOBS=2 # Split query file into subjobs that have no more than 20k sequences each. EG if your query has 28,000 sequences, set JOBS equal to 2 (or greater); if query has 63,000 sequences, set JOBS equal to 4 (or greater). | |
MAXJOBS=2 # max number of jobs to run at a time | |
# SLURM variables | |
USER=$EMAIL # user email; either enter manually or add $EMAIL to your .bashrc | |
QUEUE='jwisecav' # user queue | |
THREADS=16 | |
TIME='24:00:00' # max walltime | |
# HMMER/DIAMOND variables | |
PROGRAM='diamond' # can be 'phmmer' or 'diamond' | |
DB="/depot/jwisecav/data/dbs/custom-refseq-2021-01-27/refseq_release204_w_supp_cd95.dmnd" # path to database | |
HMMEROPTS='--F1 1e-5 --F2 1e-7 --F3 1e-10 --noali --notextw -o /dev/null' # HMMER Options | |
DMNDOPTS='blastp --sensitive --evalue 1e-3 --max-target-seqs 0' # Diamond blast options | |
# Parsing variables | |
MAX_HITS_PER_QUERY=0 # maximum number of hits to keep per query sequence (use 0 to retain all hits) | |
MAX_HITS_PER_SUBJECT=0 # maximum number of hits to keep per uniq taxnomy id (use 0 to retain all hits) | |
EVALUE_THRESHOLD=1 # upper bound for evalue to include in parsed output | |
### Directory information ### | |
QUERYDIR="$PWD/query" | |
SPLITDIR="$PWD/split" | |
SCRIPTS="$PWD/scripts" | |
HMMRDIR="$PWD/phmmer-out" # output directory for phmmer | |
DMNDIR="$PWD/diamond-out" # output directory for diamond or diamond-parsed phmmer results | |
SLURMDIR="$PWD/slurm-out" | |
echo "Creating working directories" | |
if [ ! -d "$SPLITDIR" ]; then mkdir $SPLITDIR; fi | |
if [ $PROGRAM == 'phmmer' ]; then if [ ! -d "$HMMRDIR" ]; then mkdir $HMMRDIR; fi; fi | |
if [ $PROGRAM == 'diamond' ]; then if [ ! -d "$DMNDIR" ]; then mkdir $DMNDIR; fi; fi | |
if [ ! -d "$SLURMDIR" ]; then mkdir $SLURMDIR; fi | |
export QUERY THREADS HMMRDIR DMNDIR | |
export PROGRAM DB HMMEROPTS DMNDOPTS JOBS PWD | |
export MAX_HITS_PER_QUERY MAX_HITS_PER_SUBJECT EVALUE_THRESHOLD | |
## 1 - Create job array input files | |
echo "Creating job array input files" | |
module load conda-env/env.genomics | |
python $SCRIPTS/create_jobarray_fasta.py $QUERY $SPLITDIR $JOBS | |
## 2 - Submit query self-search | |
echo "Submitting query self-search job" | |
CMD=`sbatch --account=$QUEUE --job-name=selfsearch --mail-user=$USER --nodes=1 --cpus-per-task=$THREADS --time=$TIME --output=$SLURMDIR/%x-%j.out $SCRIPTS/run_selfsearch.sub` | |
FIRST=`echo $CMD | sed "s/Submitted batch job //"` | |
## 3 - Submit phmmer/diamond jobs against database | |
echo "Submitting phmmer/diamond jobs: 1-$JOBS max running: $MAXJOBS $SCRIPTS/run_search.sub" | |
SECOND=`sbatch --dependency=afterok:$FIRST --array=1-$JOBS%$MAXJOBS --account=$QUEUE --job-name=search --mail-user=$USER --nodes=1 --cpus-per-task=$THREADS --time=$TIME --output=$SLURMDIR/%x-%A-%a.out $SCRIPTS/run_search.sub` |