Metagenome数据库构建_物种组成_丰度估计 2023.01.04

时间:2023-01-05 19:57:09

物种组成_丰度估计

***下载数据库
### kraken2 database
wget https://genome-idx.s3.amazonaws.com/kraken/k2_pluspf_8gb_20210517.tar.gz
tar -zxvf k2_pluspf_8gb_20210517.tar.gz
### bracken database
singularity exec ../../software/MetaGenome.sif bracken-build -d K2/ -t 4
***分类kraken & 丰度估计bracken
*step1: kraken-物种组成
singularity exec ../../software/MetaGenome.sif kraken2 --threads 4 --quick --paired --db ../../Database/K2/ --report A1.kreport --output A1.kraken ../../P1.data_filter/02.contaminant/A1_1.clean.fq.gz ../../P1.data_filter/02.contaminant/A1_2.clean.fq.gz
singularity exec ../../software/MetaGenome.sif kraken2 --threads 4 --quick --paired --db ../../Database/K2/ --report A2.kreport --output A2.kraken ../../P1.data_filter/02.contaminant/A2_1.clean.fq.gz ../../P1.data_filter/02.contaminant/A2_2.clean.fq.gz
*step2:bracken-丰度计算
singularity exec ../../software/MetaGenome.sif bracken -d ../../Database/K2/ -i A1.kreport -o out_S/A1.bracken.S -w out_S/A1.kreport -l S -t 10
singularity exec ../../software/MetaGenome.sif bracken -d ../../Database/K2/ -i A2.kreport -o out_S/A2.bracken.S -w out_S/A2.kreport -l S -t 10
*step3:样本结果合并 转换成表格***
singularity exec ../../software/MetaGenome.sif kraken-biom ./out_S/*.kreport --max D -o ./out_S/S.biom
singularity exec ../../software/MetaGenome.sif biom convert -i ./out_S/S.biom -o ./out_S/S.count.tsv.tmp --to-tsv --header-key taxonomy
表格内容调整
sed 's/; g__\([^;]\+\); s__/; g__\1; s__\1 /' ./out_S/S.count.tsv.tmp > ./out_S/S.taxID.count.tsv

sed '/^#/! s/^[0-9]\+\t\(.*[A-Za-z]\+__\([^;]\+\)\)$/\2\t\1/' ./out_S/S.taxID.count.tsv > ./out_S/S.taxName.count.tsv

sed '1d; 2s/^#//' ./out_S/S.taxName.count.tsv |awk -F "\t" -v 'OFS=\t' '{$NF = ""; { print $0 }}' | sed 's/\t$//' > ./out_S/S.count.tsv

绘图-barplot 热图(聚类) krona图
singularity exec ../../software/MetaGenome.sif Rscript ../script/draw_taxonBarplot.R out_S/S.count.tsv 20 out_S/S.count.out
singularity exec ../../software/MetaGenome.sif kreport2krona.py -r A1.kreport -o A1.kreport.krona
singularity exec ../../software/MetaGenome.sif ktImportText -o A1.kreport.krona.html A1.kreport.krona


大量样本 批量生成 step1 2 3的运行脚本

A A1 ../../dataFQ/A1_1.fq.gz ../../dataFQ/A1_2.fq.gz
A A2 ../../dataFQ/A2_1.fq.gz ../../dataFQ/A2_2.fq.gz
A A3 ../../dataFQ/A3_1.fq.gz ../../dataFQ/A3_2.fq.gz
C C1 ../../dataFQ/C1_1.fq.gz ../../dataFQ/C1_2.fq.gz
C C2 ../../dataFQ/C2_1.fq.gz ../../dataFQ/C2_2.fq.gz
C C3 ../../dataFQ/C3_1.fq.gz ../../dataFQ/C3_2.fq.gz
B B1 ../../dataFQ/B1_1.fq.gz ../../dataFQ/B1_2.fq.gz
B B2 ../../dataFQ/B2_1.fq.gz ../../dataFQ/B2_2.fq.gz
B B3 ../../dataFQ/B3_1.fq.gz ../../dataFQ/B3_2.fq.gz

cat sample.txt | while read group sample fq1 fq2
do
echo singularity exec ../../software/MetaGenome.sif kraken2 --threads 4 --quick --paired --db ../../Database/K2/ --report $sample.kreport --output $sample.kraken $fq1 $fq2
done > step1.run_kraken2.sh


for level in D P C O F G S
do
mkdir out_$level
cut -f 2 sample.txt | while read sample
do
echo singularity exec ../../software/MetaGenome.sif bracken -d ../../Database/K2/ -i $sample.kreport -o out_$level/$sample.bracken.$level -w out_$level/$sample.kreport -l $level -t 4
done > step2.run_bracken.$level.sh


echo "
singularity exec ../../software/MetaGenome.sif kraken-biom ./out_$level/*.kreport --max D -o ./out_$level/$level.biom
singularity exec ../../software/MetaGenome.sif biom convert -i ./out_$level/$level.biom -o ./out_$level/$level.count.tsv.tmp --to-tsv --header-key taxonomy

sed 's/; g__\([^;]\+\); s__/; g__\1; s__\1 /' ./out_$level/$level.count.tsv.tmp > ./out_$level/$level.taxID.count.tsv

sed '/^#/! s/^[0-9]\+\t\(.*[A-Za-z]\+__\([^;]\+\)\)$/\2\t\1/' ./out_$level/$level.taxID.count.tsv > ./out_$level/$level.taxName.count.tsv

sed '1d; 2s/^#//' ./out_$level/$level.taxName.count.tsv |awk -F \"\t\" -v 'OFS=\t' '{\$NF = \"\"; { print \$0 }}' | sed 's/\t$//' > ./out_$level/$level.count.tsv

singularity exec ../../software/MetaGenome.sif Rscript ../script/draw_taxonBarplot.R out_$level/$level.count.tsv 20 out_$level/$level.count.out

" > step3.run_abundance.$level.sh

done