本文实例讲述了Python使用defaultdict读取文件各列的方法。分享给大家供大家参考,具体如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
|
#!/usr/bin/python
"""USAGE: python *.py align_SNP_site out_file"""
import sys
#import time
from collections import Counter
#t0=time.clock()
info = open (sys.argv[ 1 ])
fast = sys.argv[ 2 ]
d_c = {}
d1 = {}
d2 = {}
for line in info:
cols = line.strip().split( "\t" )
if cols[ 0 ] = = "SNP pattern" :
continue
else :
d1.setdefault(cols[ 4 ],[]).append(cols[ 1 ])
d2.setdefault(cols[ 7 ],[]).append(cols[ 1 ])
#d1.setdefault(cols[0],[]).append(cols[5])
#d2[cols[0]] = "\t".join(cols[0:3])
info.close()
print len (d1)
print len (d2)
my_list = []
ref_fa = open ( "some_example.fasta" , 'r' )
for i in ref_fa.readlines():
if i.startswith( ">" ):
my_list.append(i.rstrip())
ref_fa.close()
print len (my_list)
#sys.exit()
result = open (fast, 'w' )
for k,v in d1.iteritems():
cnt1 = Counter(v)
#print cnt1
result.write( "%s\t" % k)
for i in sorted (cnt1.items(), key = lambda x: x[ 1 ], reverse = True ):
result.write( "%s\t%d\t" % (i[ 0 ],i[ 1 ]))
result.write( "\n" )
for k,v in d2.iteritems():
cnt2 = Counter(v)
#print cnt2
result.write( "%s\t" % k)
for i in sorted (cnt2.items(), key = lambda x: x[ 1 ], reverse = False ):
result.write( "%s\t%d\t" % ( i[ 0 ],i[ 1 ]))
result.write( "\n" )
#t1=time.clock()
#print (t1-t0)
|
希望本文所述对大家Python程序设计有所帮助。