Skip to content

Commit

Permalink
Merge pull request #7 from monarch-initiative/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
pnrobinson authored Apr 26, 2024
2 parents a1dc18c + b2c4429 commit 3c157fd
Show file tree
Hide file tree
Showing 51 changed files with 1,313 additions and 2,756 deletions.
9 changes: 6 additions & 3 deletions .github/workflows/documentation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: mkdocs-generation
on:
push:
branches:
- main
- develop
permissions:
contents: write
jobs:
Expand All @@ -20,6 +20,9 @@ jobs:
key: ${{ github.ref }}
path: .cache

- run: python3 -m pip install .[docs]

- run: pip install mkdocs-material
- run: pip install mkdocs-material[imaging]
- run: pip install mkdocs-material-extensions
- run: pip install pillow cairosvg
- run: pip install mkdocstrings[python]
- run: mkdocs gh-deploy --force
3 changes: 0 additions & 3 deletions docs/_static/style.css

This file was deleted.

27 changes: 27 additions & 0 deletions docs/setup.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Set-up

TODO -- how to setup Java etc.

## Download command
Before running the batch command, run the download command to get the necessary files

```
java -jar target/phenopacket2prompt.jar download
```

## Batch command
To run the batch command, first download the latest release from the
[releases](https://github.com/monarch-initiative/phenopacket-store/releases) section of the phenopacket-store
repository. Unpack either all_phenopackets.tgz or all_phenopackets.zip (the files are identical except for the
method of compression).

```
java -jar target/phenopacket2prompt.jar batch -d <all_phenopackets>
```
Replasce `<all_phenopackets>` with the actual path on your system.

The app should create a folder "prompts", with two subdirectories, "en" and "es" with English and Spanish prompts.
There are some errors that still need to be fixed, but several thousand prompts should appear.

## Todo
also output a file with expected diagnosis
1 change: 1 addition & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ nav:
- Languages:
- "Template": 'languages.md'
- "English": "english.md"
- Setup: "setup.md"

plugins:
- search
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

<groupId>org.monarchinitiative</groupId>
<artifactId>phenopacket2prompt</artifactId>
<version>0.3.11</version>
<version>0.3.14</version>

<name>phenopacket2prompt</name>
<url>https://github.com/monarch-initiative/phenopacket2prompt</url>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ public static void main(String[] args){
args = new String[]{"-h"};
}
CommandLine cline = new CommandLine(new Main())
.addSubcommand("batch", new GbtTranslateBatchCommand())
.addSubcommand("download", new DownloadCommand())
.addSubcommand("gpt", new OntoGptCommand())
.addSubcommand("translate", new GptTranslateCommand())
;
cline.setToggleBooleanFlags(false);
Expand All @@ -30,4 +30,8 @@ public Integer call() {
// work done in subcommands
return 0;
}




}
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
package org.monarchinitiative.phenopacket2prompt.cmd;


import org.monarchinitiative.phenol.base.PhenolRuntimeException;
import org.monarchinitiative.phenol.io.OntologyLoader;
import org.monarchinitiative.phenol.ontology.data.Ontology;
import org.monarchinitiative.phenopacket2prompt.international.HpInternational;
import org.monarchinitiative.phenopacket2prompt.international.HpInternationalOboParser;
import org.monarchinitiative.phenopacket2prompt.model.PhenopacketDisease;
import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual;
import org.monarchinitiative.phenopacket2prompt.output.PromptGenerator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import picocli.CommandLine;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;

@CommandLine.Command(name = "batch", aliases = {"B"},
mixinStandardHelpOptions = true,
description = "Translate batch of phenopackets and output prompts")
public class GbtTranslateBatchCommand implements Callable<Integer> {
private final static Logger LOGGER = LoggerFactory.getLogger(GbtTranslateBatchCommand.class);


@CommandLine.Option(names = {"--hp"},
description = "path to HP json file")
private String hpoJsonPath = "data/hp.json";

@CommandLine.Option(names = {"--translations"},
description = "path to translations file")
private String translationsPath = "data/hp-international.obo";

@CommandLine.Option(names = {"-d", "--dir"}, description = "Path to directory with JSON phenopacket files", required = true)
private String ppktDir;

@Override
public Integer call() throws Exception {
File hpJsonFile = new File(hpoJsonPath);
if (! hpJsonFile.isFile()) {
throw new PhenolRuntimeException("Could not find hp.json at " + hpJsonFile.getAbsolutePath());
}
Ontology hpo = OntologyLoader.loadOntology(hpJsonFile);
LOGGER.info("HPO version {}", hpo.version().orElse("n/a"));
File translationsFile = new File(translationsPath);
if (! translationsFile.isFile()) {
System.err.printf("Could not find translations file at %s. Try download command", translationsPath);
return 1;
}
HpInternationalOboParser oboParser = new HpInternationalOboParser(translationsFile);
Map<String, HpInternational> internationalMap = oboParser.getLanguageToInternationalMap();
LOGGER.info("Got {} translations", internationalMap.size());
List<File> ppktFiles = getAllPhenopacketJsonFiles();
createDir("prompts");
outputPromptsEnglish(ppktFiles, hpo);
PromptGenerator spanish = PromptGenerator.spanish(hpo, internationalMap.get("es"));
outputPromptsInternational(ppktFiles, hpo, "es", spanish);
return 0;
}



private String getFileName(String phenopacketID) {
return phenopacketID.replaceAll("[^\\w]", phenopacketID).replaceAll("/","_") + "-prompt.txt";
}



private void outputPromptsInternational(List<File> ppktFiles, Ontology hpo, String languageCode, PromptGenerator generator) {
String dirpath = String.format("prompts/%s", languageCode);
createDir(dirpath);
List<String> diagnosisList = new ArrayList<>();
for (var f: ppktFiles) {
PpktIndividual individual = new PpktIndividual(f);
List<PhenopacketDisease> diseaseList = individual.getDiseases();
if (diseaseList.size() != 1) {
System.err.println(String.format("[ERROR] Got %d diseases for %s.\n", diseaseList.size(), individual.getPhenopacketId()));
continue;
}
PhenopacketDisease pdisease = diseaseList.get(0);
String promptFileName = getFileName( individual.getPhenopacketId());
String diagnosisLine = String.format("%s\t%s\t%s\t%s", pdisease.getDiseaseId(), pdisease.getLabel(), promptFileName, f.getAbsolutePath());
try {
diagnosisList.add(diagnosisLine);
String prompt = generator.createPrompt(individual);
outputPrompt(prompt, promptFileName, dirpath);
} catch (Exception e) {
e.printStackTrace();
}
}
}


private void outputPromptsEnglish(List<File> ppktFiles, Ontology hpo) {
createDir("prompts/en");
PromptGenerator generator = PromptGenerator.english(hpo);
List<String> diagnosisList = new ArrayList<>();
for (var f: ppktFiles) {
PpktIndividual individual = new PpktIndividual(f);
List<PhenopacketDisease> diseaseList = individual.getDiseases();
if (diseaseList.size() != 1) {
System.err.println(String.format("[ERROR] Got %d diseases for %s.\n", diseaseList.size(), individual.getPhenopacketId()));
continue;
}
PhenopacketDisease pdisease = diseaseList.get(0);
String promptFileName = getFileName( individual.getPhenopacketId());
String diagnosisLine = String.format("%s\t%s\t%s\t%s", pdisease.getDiseaseId(), pdisease.getLabel(), promptFileName, f.getAbsolutePath());
try {
diagnosisList.add(diagnosisLine);
String prompt = generator.createPrompt(individual);
outputPrompt(prompt, promptFileName, "prompts/en");
} catch (Exception e) {
e.printStackTrace();
}
}
}



private void outputPrompt(String prompt, String promptFileName, String dir) {
File outpath = new File(dir + File.separator + promptFileName);
try (BufferedWriter bw = new BufferedWriter(new FileWriter(outpath))) {
bw.write(prompt);
} catch (IOException e) {
e.printStackTrace();
}
System.out.print(".");
}



private void createDir(String path) {
File pathAsFile = new File(path);
if (!Files.exists(Paths.get(path))) {
pathAsFile.mkdir();
}
}





private List<File> getAllPhenopacketJsonFiles() {
List<String> ppktDirectories = new ArrayList<>();
List<File> ppktFiles = new ArrayList<>();
File[] items = new File(this.ppktDir).listFiles();
// We know that all phenopackets are located in the subdirectories
if (!ppktDir.substring(ppktDir.length() - 1).equals("/")) {
ppktDir += "/";
}
for (File item : items) {
if (item.isDirectory())
ppktDirectories.add(ppktDir+item.getName());
}
for (var f: ppktDirectories) {
File subdir = new File(f);
File[] files = subdir.listFiles();
for (var ff : files) {
if (ff.isFile() && ff.getAbsolutePath().endsWith(".json")) {
ppktFiles.add(ff);
}
}
}
System.out.printf("Retrieved %d files.\n", ppktFiles.size());
return ppktFiles;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
mixinStandardHelpOptions = true,
description = "Translate phenopackets and output prompts")
public class GptTranslateCommand implements Callable<Integer> {
Logger LOGGER = LoggerFactory.getLogger(GptTranslateCommand.class);
private final static Logger LOGGER = LoggerFactory.getLogger(GptTranslateCommand.class);


@CommandLine.Option(names = {"--hp"},
Expand Down

This file was deleted.

This file was deleted.

Loading

0 comments on commit 3c157fd

Please sign in to comment.