Realized that transcriptomes v2.1 and v3.1 (extracted from BLASTx-annotated FastAs from 20200605) didn’t have any associated stats.
Used built-in Trinity scripts to generate assembly stats on Mox.
SBATCH script (GitHub):
#!/bin/bash
## Job Name
#SBATCH --job-name=cbai_trinity_stats_v2.1_v3.1
## Allocation Definition
#SBATCH --account=coenv
#SBATCH --partition=coenv
## Resources
## Nodes
#SBATCH --nodes=1
## Walltime (days-hours:minutes:seconds format)
#SBATCH --time=15-00:00:00
## Memory per node
#SBATCH --mem=120G
##turn on e-mail notification
#SBATCH --mail-type=ALL
#SBATCH --mail-user=samwhite@uw.edu
## Specify the working directory for this job
#SBATCH --chdir=/gscratch/scrubbed/samwhite/outputs/20200819_cbai_trinity_stats_v2.1_v3.1
# Script to generate C.bairdi transcriptome v2.1 and v3.1 Trinity stats.
###################################################################################
# These variables need to be set by user
# Assign Variables
transcriptomes_dir=/gscratch/srlab/sam/data/C_bairdi/transcriptomes
# Paths to programs
trinity_dir="/gscratch/srlab/programs/trinityrnaseq-v2.9.0"
samtools="/gscratch/srlab/programs/samtools-1.10/samtools"
# Array of the various comparisons to evaluate
# Each condition in each comparison should be separated by a "-"
transcriptomes_array=(
"${transcriptomes_dir}"/cbai_transcriptome_v2.1.fasta \
"${transcriptomes_dir}"/cbai_transcriptome_v3.1.fasta
)
# Programs array
declare -A programs_array
programs_array=(
[samtools_faidx]="${samtools} faidx" \
[trinity_stats]="${trinity_dir}/util/TrinityStats.pl" \
[trinity_gene_trans_map]="${trinity_dir}/util/support_scripts/get_Trinity_gene_to_trans_map.pl" \
[trinity_fasta_seq_length]="${trinity_dir}/util/misc/fasta_seq_length.pl"
)
###################################################################################
# Exit script if any command fails
set -e
# Load Python Mox module for Python module availability
module load intel-python3_2017
# Loop through each transcriptome
for transcriptome in "${!transcriptomes_array[@]}"
do
# Variables
transcriptome_name="${transcriptomes_array[$transcriptome]##*/}"
assembly_stats="${transcriptome_name}_assembly_stats.txt"
# Assembly stats
${programs_array[trinity_stats]} "${transcriptomes_array[$transcriptome]}" \
> "${assembly_stats}"
# Create gene map files
${programs_array[trinity_gene_trans_map]} \
"${transcriptomes_array[$transcriptome]}" \
> "${transcriptome_name}".gene_trans_map
# Create sequence lengths file (used for differential gene expression)
${programs_array[trinity_fasta_seq_length]} \
"${transcriptomes_array[$transcriptome]}" \
> "${transcriptome_name}".seq_lens
# Create FastA index
${programs_array[samtools_faidx]} \
"${transcriptomes_array[$transcriptome]}" \
> "${transcriptome_name}".fai
# Copy files to transcriptomes directory
rsync -av \
"${transcriptome_name}"* \
${transcriptomes_dir}
# Capture FastA checksums for verification
echo ""
echo "Generating checksum for ${transcriptome_name}"
md5sum "${transcriptomes_array[$transcriptome]}" > "${transcriptome_name}".checksum.md5
echo "Finished generating checksum for ${transcriptome_name}"
echo ""
done
# Document programs in PATH (primarily for program version ID)
{
date
echo ""
echo "System PATH for $SLURM_JOB_ID"
echo ""
printf "%0.s-" {1..10}
echo "${PATH}" | tr : \\n
} >> system_path.log
# Capture program options
## Note: Trinity util/support scripts don't have options/help menus
for program in "${!programs_array[@]}"
do
{
echo "Program options for ${program}: "
echo ""
${programs_array[$program]} --help
echo ""
echo ""
echo "----------------------------------------------"
echo ""
echo ""
} &>> program_options.log || true
done
RESULTS
Output folder: