-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #7 from monarch-initiative/develop
Develop
- Loading branch information
Showing
51 changed files
with
1,313 additions
and
2,756 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# Set-up | ||
|
||
TODO -- how to setup Java etc. | ||
|
||
## Download command | ||
Before running the batch command, run the download command to get the necessary files | ||
|
||
``` | ||
java -jar target/phenopacket2prompt.jar download | ||
``` | ||
|
||
## Batch command | ||
To run the batch command, first download the latest release from the | ||
[releases](https://github.com/monarch-initiative/phenopacket-store/releases) section of the phenopacket-store | ||
repository. Unpack either all_phenopackets.tgz or all_phenopackets.zip (the files are identical except for the | ||
method of compression). | ||
|
||
``` | ||
java -jar target/phenopacket2prompt.jar batch -d <all_phenopackets> | ||
``` | ||
Replasce `<all_phenopackets>` with the actual path on your system. | ||
|
||
The app should create a folder "prompts", with two subdirectories, "en" and "es" with English and Spanish prompts. | ||
There are some errors that still need to be fixed, but several thousand prompts should appear. | ||
|
||
## Todo | ||
also output a file with expected diagnosis |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
176 changes: 176 additions & 0 deletions
176
src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,176 @@ | ||
package org.monarchinitiative.phenopacket2prompt.cmd; | ||
|
||
|
||
import org.monarchinitiative.phenol.base.PhenolRuntimeException; | ||
import org.monarchinitiative.phenol.io.OntologyLoader; | ||
import org.monarchinitiative.phenol.ontology.data.Ontology; | ||
import org.monarchinitiative.phenopacket2prompt.international.HpInternational; | ||
import org.monarchinitiative.phenopacket2prompt.international.HpInternationalOboParser; | ||
import org.monarchinitiative.phenopacket2prompt.model.PhenopacketDisease; | ||
import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; | ||
import org.monarchinitiative.phenopacket2prompt.output.PromptGenerator; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
import picocli.CommandLine; | ||
|
||
import java.io.BufferedWriter; | ||
import java.io.File; | ||
import java.io.FileWriter; | ||
import java.io.IOException; | ||
import java.nio.file.Files; | ||
import java.nio.file.Paths; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.concurrent.Callable; | ||
|
||
@CommandLine.Command(name = "batch", aliases = {"B"}, | ||
mixinStandardHelpOptions = true, | ||
description = "Translate batch of phenopackets and output prompts") | ||
public class GbtTranslateBatchCommand implements Callable<Integer> { | ||
private final static Logger LOGGER = LoggerFactory.getLogger(GbtTranslateBatchCommand.class); | ||
|
||
|
||
@CommandLine.Option(names = {"--hp"}, | ||
description = "path to HP json file") | ||
private String hpoJsonPath = "data/hp.json"; | ||
|
||
@CommandLine.Option(names = {"--translations"}, | ||
description = "path to translations file") | ||
private String translationsPath = "data/hp-international.obo"; | ||
|
||
@CommandLine.Option(names = {"-d", "--dir"}, description = "Path to directory with JSON phenopacket files", required = true) | ||
private String ppktDir; | ||
|
||
@Override | ||
public Integer call() throws Exception { | ||
File hpJsonFile = new File(hpoJsonPath); | ||
if (! hpJsonFile.isFile()) { | ||
throw new PhenolRuntimeException("Could not find hp.json at " + hpJsonFile.getAbsolutePath()); | ||
} | ||
Ontology hpo = OntologyLoader.loadOntology(hpJsonFile); | ||
LOGGER.info("HPO version {}", hpo.version().orElse("n/a")); | ||
File translationsFile = new File(translationsPath); | ||
if (! translationsFile.isFile()) { | ||
System.err.printf("Could not find translations file at %s. Try download command", translationsPath); | ||
return 1; | ||
} | ||
HpInternationalOboParser oboParser = new HpInternationalOboParser(translationsFile); | ||
Map<String, HpInternational> internationalMap = oboParser.getLanguageToInternationalMap(); | ||
LOGGER.info("Got {} translations", internationalMap.size()); | ||
List<File> ppktFiles = getAllPhenopacketJsonFiles(); | ||
createDir("prompts"); | ||
outputPromptsEnglish(ppktFiles, hpo); | ||
PromptGenerator spanish = PromptGenerator.spanish(hpo, internationalMap.get("es")); | ||
outputPromptsInternational(ppktFiles, hpo, "es", spanish); | ||
return 0; | ||
} | ||
|
||
|
||
|
||
private String getFileName(String phenopacketID) { | ||
return phenopacketID.replaceAll("[^\\w]", phenopacketID).replaceAll("/","_") + "-prompt.txt"; | ||
} | ||
|
||
|
||
|
||
private void outputPromptsInternational(List<File> ppktFiles, Ontology hpo, String languageCode, PromptGenerator generator) { | ||
String dirpath = String.format("prompts/%s", languageCode); | ||
createDir(dirpath); | ||
List<String> diagnosisList = new ArrayList<>(); | ||
for (var f: ppktFiles) { | ||
PpktIndividual individual = new PpktIndividual(f); | ||
List<PhenopacketDisease> diseaseList = individual.getDiseases(); | ||
if (diseaseList.size() != 1) { | ||
System.err.println(String.format("[ERROR] Got %d diseases for %s.\n", diseaseList.size(), individual.getPhenopacketId())); | ||
continue; | ||
} | ||
PhenopacketDisease pdisease = diseaseList.get(0); | ||
String promptFileName = getFileName( individual.getPhenopacketId()); | ||
String diagnosisLine = String.format("%s\t%s\t%s\t%s", pdisease.getDiseaseId(), pdisease.getLabel(), promptFileName, f.getAbsolutePath()); | ||
try { | ||
diagnosisList.add(diagnosisLine); | ||
String prompt = generator.createPrompt(individual); | ||
outputPrompt(prompt, promptFileName, dirpath); | ||
} catch (Exception e) { | ||
e.printStackTrace(); | ||
} | ||
} | ||
} | ||
|
||
|
||
private void outputPromptsEnglish(List<File> ppktFiles, Ontology hpo) { | ||
createDir("prompts/en"); | ||
PromptGenerator generator = PromptGenerator.english(hpo); | ||
List<String> diagnosisList = new ArrayList<>(); | ||
for (var f: ppktFiles) { | ||
PpktIndividual individual = new PpktIndividual(f); | ||
List<PhenopacketDisease> diseaseList = individual.getDiseases(); | ||
if (diseaseList.size() != 1) { | ||
System.err.println(String.format("[ERROR] Got %d diseases for %s.\n", diseaseList.size(), individual.getPhenopacketId())); | ||
continue; | ||
} | ||
PhenopacketDisease pdisease = diseaseList.get(0); | ||
String promptFileName = getFileName( individual.getPhenopacketId()); | ||
String diagnosisLine = String.format("%s\t%s\t%s\t%s", pdisease.getDiseaseId(), pdisease.getLabel(), promptFileName, f.getAbsolutePath()); | ||
try { | ||
diagnosisList.add(diagnosisLine); | ||
String prompt = generator.createPrompt(individual); | ||
outputPrompt(prompt, promptFileName, "prompts/en"); | ||
} catch (Exception e) { | ||
e.printStackTrace(); | ||
} | ||
} | ||
} | ||
|
||
|
||
|
||
private void outputPrompt(String prompt, String promptFileName, String dir) { | ||
File outpath = new File(dir + File.separator + promptFileName); | ||
try (BufferedWriter bw = new BufferedWriter(new FileWriter(outpath))) { | ||
bw.write(prompt); | ||
} catch (IOException e) { | ||
e.printStackTrace(); | ||
} | ||
System.out.print("."); | ||
} | ||
|
||
|
||
|
||
private void createDir(String path) { | ||
File pathAsFile = new File(path); | ||
if (!Files.exists(Paths.get(path))) { | ||
pathAsFile.mkdir(); | ||
} | ||
} | ||
|
||
|
||
|
||
|
||
|
||
private List<File> getAllPhenopacketJsonFiles() { | ||
List<String> ppktDirectories = new ArrayList<>(); | ||
List<File> ppktFiles = new ArrayList<>(); | ||
File[] items = new File(this.ppktDir).listFiles(); | ||
// We know that all phenopackets are located in the subdirectories | ||
if (!ppktDir.substring(ppktDir.length() - 1).equals("/")) { | ||
ppktDir += "/"; | ||
} | ||
for (File item : items) { | ||
if (item.isDirectory()) | ||
ppktDirectories.add(ppktDir+item.getName()); | ||
} | ||
for (var f: ppktDirectories) { | ||
File subdir = new File(f); | ||
File[] files = subdir.listFiles(); | ||
for (var ff : files) { | ||
if (ff.isFile() && ff.getAbsolutePath().endsWith(".json")) { | ||
ppktFiles.add(ff); | ||
} | ||
} | ||
} | ||
System.out.printf("Retrieved %d files.\n", ppktFiles.size()); | ||
return ppktFiles; | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
85 changes: 0 additions & 85 deletions
85
src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/OntoGptCommand.java
This file was deleted.
Oops, something went wrong.
25 changes: 0 additions & 25 deletions
25
src/main/java/org/monarchinitiative/phenopacket2prompt/legacy/AdditionalConcept.java
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.