iseq

1
2
3
4
5
6
7
8
9
10
11
12
13
14
# download.sh
cd $1 || { echo "Error: Unable to change directory to \$1"; exit 1; }
mkdir raw
cat SRR_Acc_List.txt | while read Run;
do
cd raw
iseq -i $Run -a -g
done

## bash
sh download.sh [path]
## struc
/path
/path/SRR_Acc_List.txt
  • Create the raw directory in path, and the downloaded data will be saved here

pysradb

1
2
3
import pandas as pd
from pysradb.sraweb import SRAweb
db = SRAweb()
1
2
3
4
5
6
gse = 'GSE197726'
srp = db.gse_to_srp(gse)
srp
## output
study_alias study_accession
0 GSE197726 SRP362072
1
2
3
4
# Output all SRR metadata corresponding to the GSE 
df_meta = db.sra_metadata(srp['study_accession'], detailed=True)
print(df_meta.columns)
df_meta.head(5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# modify

col = ['run_accession','library_name','study_title','organism_name','library_strategy','library_source',
'source_name','tissue','circadian time','genotype','treatment','sex','cell type','age]

df_index = df_meta.loc[:,col]

## modify
# df_index = df_index.rename(columns={'time point': 'time'})
# df_index['time'] = df_index['time'].apply(lambda x: 'CT' + str(x))
# df_index['time'] = df_index['time'].apply(lambda x: str(x).replace(" ",""))
# df_index['time'] = df_index['experiment_title'].apply(lambda x: str(x).split(';')[0].split('_')[-2])
# df_index['time'] = df_index['time'].apply(lambda x: 'ZT' + str(x))
# df_index = df_index.drop('experiment_title', axis=1)

df_index.insert(0,'gse_accession',[gse]*len(df_index))
df_index

fasterq-dump

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# run one
fasterq-dump -e 10 -p --split-3 -O [outdir] [sra]

#
cd $1 || { echo "Error: Unable to change directory to \$1"; exit 1; }
mkdir fastq
fqdir="$1/fastq"

for sra_file in $1/SRR*[0-9];
do
start_time=$(date +"%Y-%m-%d %H:%M:%S")
fasterq-dump -e 10 -p --split-3 -O ${fqdir} ${sra_file}
end_time=$(date +"%Y-%m-%d %H:%M:%S")
echo "Sample ID: ${sra_file} - Start Time: ${start_time} - End Time: ${end_time}"
done

parallel-fastq-dump

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# run one
parallel-fastq-dump -s [sra_file] -t 8 -O [outdir] --split-files --gzip

#
cd $1 || { echo "Error: Unable to change directory to \$1"; exit 1; }
mkdir fastq
fqdir="$1/fastq"

for sra_file in $1/SRR*[0-9];
do
start_time=$(date +"%Y-%m-%d %H:%M:%S")
parallel-fastq-dump -s ${sra_file} -t 8 -O ${fqdir} --split-files --gzip
end_time=$(date +"%Y-%m-%d %H:%M:%S")
echo "Sample ID: ${sra_file} - Start Time: ${start_time} - End Time: ${end_time}"
done