python批量处理

# -*- coding: utf-8 -*-

"""

Created on Sat Jun 20 19:36:34 2015

@author: chaofn

"""

import os

"""

这个程序的目的是将linux下/ifs/home/fanchao/Manesh_pdb目录中的所有文件（一共有215个文件）

批处理

将pdb文件生成dssp文件

"""

#listdir返回文件名的列表

fileLine=os.listdir('/ifs/home/fanchao/Manesh_pdb')

#遍历整个列表

for i in range(len(fileLine)-1):

    #将字符串用变量表示

    input_file='/ifs/home/fanchao/Manesh_pdb/'+fileLine[i]

    #先去掉文件名的后缀，然后形成后缀为dssp的文件名

    out_file=fileLine[i].split('.')[0]+'.dssp'

    output_file='/ifs/home/fanchao/Manesh_dssp/'+out_file

    #注意：参数的传递（先是%s，然后是%变量名），多个变量的传入要用元组表示，在元组前用%

    os.system('/ifs/share/lib/dssp/dssp2 -i %s -o %s' %(input_file,output_file))

批量处理数据：从dssp文件夹中遍历提取符合要求的数据，并写入另一个文件夹：

 # -*- coding: utf-8 -*-

 """

 Created on Sun Jun 21 13:03:19 2015

 @author: chaofan

 """

 import os

 import re

 #列出dssp文件夹中的所有文件名，返回的是一个列表

 files=os.listdir('G:/Manesh_dssp')

 #遍历整个文件夹

 for filename in files:

     #将文件的名称和扩展名分离

     portion=os.path.splitext(filename)

     #将每个文件后缀.dssp转化成.fasta。以便访问.fasta的文件

     fastaFile=portion[0]+'.fasta'

     #打开参数指定的fasta文件

     fp=open('G:/Manesh_fasta/%s' %fastaFile)

     #读取文件的第一行

     strLine=fp.readlines()[0]

     #用正则式提取该行的一个字母

     letter=re.search(':([A-Z])\|',strLine).group(1)

     #打开参数指定的dssp文件

     fr=open('G:/Manesh_dssp/%s' %filename)

     #生成后缀名为txt的文件

     txtFile=portion[0]+'.txt'

     fw=open('G:/Manesh_ACC/%s' %txtFile,'w' )

     #从每个dssp文件的第28行开始读取

     for line in fr.readlines()[28:]:

         lineList=[]

         #如果第11个字符等于参数字符，则写入

         if line[11]==letter:

             lineList.extend([line[7:10],line[11],line[13],line[35:38],'\n'])

         #将列表转化成字符串并写入文件

         fw.write(' '.join(lineList))

     #关闭流

     fw.close()

     fr.close()

     fw.close()

秒客网

python批量处理

相关文章