-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_prokka.py
59 lines (52 loc) · 1.67 KB
/
run_prokka.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import argparse
from multiprocessing import Pool
from pathlib import Path
from pyBioinfo_modules.basic.decompress import (getRootAndFiles,
getStemIfCompressed)
from pyBioinfo_modules.bio_sequences.bio_seq_file_extensions import \
FNA_EXTENSIONS
from pyBioinfo_modules.wrappers.prokka import runProkka
parser = argparse.ArgumentParser(description="Run prokka for fasta files")
parser.add_argument(
"inputFiles",
nargs="+",
help="Input files/folders containing fasta files (can be gz or xz)",
)
parser.add_argument(
"--parrllel", type=int, default=1, help="Number of prokka run in parallel"
)
parser.add_argument(
"--threads",
type=int,
default=4,
help="Number of threads for each prokka run",
)
parser.add_argument("--dry", action="store_true")
args = parser.parse_args()
outputRoot, targetFiles = getRootAndFiles(args.inputFiles, FNA_EXTENSIONS)
runnerPool = Pool(args.parrllel)
results = []
for f in targetFiles:
prokkaDir = outputRoot / (getStemIfCompressed(f) + "_prokka")
results.append(
runnerPool.apply_async(
runProkka,
kwds={
"fastaPath": f,
"gcode": 11,
"gram": "pos",
"center": "MBT",
"genus": None,
"species": None,
"strain": None,
"locustag": None,
"cpu": args.threads,
"output": prokkaDir,
"dry": args.dry,
"silent": True,
},
)
)
returns = []
for res in results:
returns.append(res.get())