-
Notifications
You must be signed in to change notification settings - Fork 1
/
pfam_threading.py
68 lines (63 loc) · 1.99 KB
/
pfam_threading.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import os
import threading
def init():
InputDir = "/home/haidong/PycharmProjects/Anti-crispr/bacteria_sub/subdir_2"
PfamScanDir = "/home/haidong/huangle/PfamScan"
chunk_size = 180
a = []
a.append(InputDir)
a.append(PfamScanDir)
a.append(chunk_size)
return a
def myfunc(i,InputDir,chunk_size,PfamScanDir):
crisper_model_path = PfamScanDir + '/crisper_model.hmm'
hmmscan_parser_path = PfamScanDir+'/hmmscan-parser.sh'
'''get input files
'''
Dirs = os.listdir(InputDir)
Dir_arr = list(chunks(Dirs,chunk_size))
Dir_len = len(Dir_arr)
myDirList = Dir_arr[i]
dir_len = len(myDirList)
cnt = 0
for onedir in myDirList:
GenDir = InputDir + '/' + onedir
acaPath = GenDir+'/'+'aca_predict.txt'
acaHmmPath = GenDir + '/' +'aca_predict_result.txt'
acaHmmParsePath = GenDir +'/' +'aca_predict_result_parse.txt'
'''Judge file is empty or not
'''
if os.path.getsize(acaPath)==0:
f = open(acaHmmPath,"w")
f.close()
f1 = open(acaHmmParsePath,"w")
f1.close()
else:
os.system("hmmscan --domtblout "+acaHmmPath+" "+crisper_model_path+" "+acaPath+" > /dev/null")
os.system("bash "+hmmscan_parser_path+" " +acaHmmPath+" > "+acaHmmParsePath)
i = i+1
if i%100==0:
print(i)
# print ("bash "+hmmscan_parser_path+" " + acaHmmPath+" > "+acaHmmParsePath)
def chunks(l,n):
"""Yield successive n-sized chunks from l.
"""
for i in range(0,len(l), n):
yield l[i:i+n]
arr = init()
# print (arr[1])
InputDir = arr[0]
PfamScanDir = arr[1]
chunk_size = arr[2]
# print (PfamScanDir)
''' Create the threads
use threading function to run the chunks
'''
Dirs = os.listdir(InputDir)
Dir_arr = list(chunks(Dirs,chunk_size))
Dir_len = len(Dir_arr)
threads = []
for i in range(Dir_len):
threads.append(threading.Thread(target=myfunc,args=(i,InputDir,chunk_size,PfamScanDir)))
for t in threads:
t.start()