Skip to content

Commit

Permalink
Genericize database line-by-line instead of buffering it entirely in …
Browse files Browse the repository at this point in the history
…memory.
  • Loading branch information
reid-wagner committed Jun 26, 2024
1 parent fa446d8 commit f441870
Showing 1 changed file with 5 additions and 6 deletions.
11 changes: 5 additions & 6 deletions tools/fragpipe/genericize_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,9 @@
input_db_file = sys.argv[1]
output_db_file = sys.argv[2]

with open(input_db_file) as f:
input_db = f.readlines()
def sub_header(line):
return re.sub(r'^>(?!sp\||tr\||db\||AP_|NP_|YP_|XP_|WP_|ENSP|UniRef|nxp|generic)', '>generic|', line)

subbed_lines = [re.sub(r'^>(?!sp\||tr\||db\||AP_|NP_|YP_|XP_|WP_|ENSP|UniRef|nxp|generic)', '>generic|', l) for l in input_db]

with open(output_db_file, 'w') as f:
f.writelines(subbed_lines)
with open(input_db_file) as in_file, open(output_db_file, 'w') as out_file:
for line in in_file:
out_file.write(sub_header(line))

0 comments on commit f441870

Please sign in to comment.