diff --git a/cmd/vectorize.go b/cmd/vectorize.go new file mode 100644 index 000000000..308fba5a1 --- /dev/null +++ b/cmd/vectorize.go @@ -0,0 +1,145 @@ +package cmd + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "os" + "path" + "path/filepath" + + "github.com/google/uuid" + "github.com/labdao/plex/internal/ipfs" + "github.com/labdao/plex/internal/ipwl" + "github.com/spf13/cobra" +) + +var ( + ioPath string + toolCid string +) + +var vectorizeCmd = &cobra.Command{ + Use: "vectorize", + Short: "Transform an IO JSON file into a list of outputs", + Long: `Transform an IO JSON file into a list of outputs.`, + Run: func(cmd *cobra.Command, args []string) { + _, err := VectorizeOutputs(ioPath, toolCid, outputDir) + if err != nil { + fmt.Println("Error:", err) + os.Exit(1) + } + }, +} + +func VectorizeOutputs(ioPath string, toolCid string, outputDir string) (map[string]ipwl.OutputValues, error) { + isCID := ipfs.IsValidCID(ioPath) + id := uuid.New() + workDirPath := "" + + var cwd string + var err error + if outputDir == "" { + cwd, err = os.Getwd() + if err != nil { + return nil, err + } + } else { + cwd, err = filepath.Abs(outputDir) + if err != nil { + return nil, err + } + } + + workDirPath = path.Join(cwd, id.String()) + err = os.Mkdir(workDirPath, 0755) + if err != nil { + return nil, err + } + + var ioJsonFilPath string + if isCID { + ioJsonFilPath = path.Join(workDirPath, "io.json") + err = ipfs.DownloadFileContents(ioPath, ioJsonFilPath) + if err != nil { + return nil, err + } + } else { + ioJsonFilPath, err = filepath.Abs(ioPath) + if err != nil { + return nil, err + } + } + + file, err := os.Open(ioJsonFilPath) + if err != nil { + return nil, err + } + defer file.Close() + + bytes, err := ioutil.ReadAll(file) + if err != nil { + return nil, err + } + + var ios []ipwl.IO + err = json.Unmarshal(bytes, &ios) + if err != nil { + return nil, err + } + + outputMap := make(map[string]ipwl.OutputValues) + for i, io := range ios { + if io.Tool.IPFS == toolCid { + for key, output := range io.Outputs { + fileOutput, ok := output.(ipwl.FileOutput) + if ok { + ov := outputMap[key] + + filePath := fmt.Sprintf("entry-%d/outputs/%s", i, fileOutput.FilePath) + absoluteFilePath := path.Join(workDirPath, filePath) + + // Check if the file is already downloaded + if _, err := os.Stat(absoluteFilePath); os.IsNotExist(err) { + // Download the file from IPFS to the local file path + err = ipfs.UnwrapAndDownloadFileContents(fileOutput.IPFS, absoluteFilePath) + if err != nil { + return nil, err + } + } + + ov.FilePaths = append(ov.FilePaths, absoluteFilePath) + ov.CIDs = append(ov.CIDs, fileOutput.IPFS) + outputMap[key] = ov + } + } + } + } + + // Save the output map to a JSON file + outputVectorsPath := path.Join(workDirPath, "output-vectors.json") + outputVectorsFile, err := os.Create(outputVectorsPath) + if err != nil { + return nil, err + } + defer outputVectorsFile.Close() + + jsonData, err := json.MarshalIndent(outputMap, "", " ") + if err != nil { + return nil, err + } + outputVectorsFile.Write(jsonData) + + // Exact text is used by Python SDK, do not modify + fmt.Println("Output Vectors were saved at:", outputVectorsPath) + + return outputMap, nil +} + +func init() { + vectorizeCmd.Flags().StringVarP(&ioPath, "ioPath", "i", "", "CID or file path of IO JsON") + vectorizeCmd.Flags().StringVarP(&toolCid, "toolCid", "t", "", "Only vectorize output CIDs") + vectorizeCmd.Flags().StringVarP(&outputDir, "outputDir", "o", "", "Only vectorize output CIDs") + + rootCmd.AddCommand(vectorizeCmd) +} diff --git a/internal/ipfs/ipfs.go b/internal/ipfs/ipfs.go index b8926d0d0..13ac85aff 100644 --- a/internal/ipfs/ipfs.go +++ b/internal/ipfs/ipfs.go @@ -147,6 +147,77 @@ func DownloadToTempDir(cid string) (string, error) { return downloadPath, nil } +func UnwrapAndDownloadFileContents(cid, outputFilePath string) error { + // First download the CID content to a temporary file + tempDirPath, err := DownloadToTempDir(cid) + if err != nil { + return err + } + + // Ensure that the temporary directory is deleted after we are done + defer os.RemoveAll(tempDirPath) + + onlyOneFile, tempFilePath, err := onlyOneFile(tempDirPath) + if err != nil { + return err + } + + if !onlyOneFile { + return fmt.Errorf("more than one file in the CID %s content", cid) + } + + // Now copy the downloaded content to the output file path + inputFile, err := os.Open(tempFilePath) + if err != nil { + return err + } + defer inputFile.Close() + + // Ensure the directory exists + outputDir := filepath.Dir(outputFilePath) + if _, err := os.Stat(outputDir); os.IsNotExist(err) { + err = os.MkdirAll(outputDir, 0755) + if err != nil { + return err + } + } + + outputFile, err := os.Create(outputFilePath) + if err != nil { + return err + } + defer outputFile.Close() + + _, err = io.Copy(outputFile, inputFile) + if err != nil { + return err + } + + return nil +} + +func onlyOneFile(dirPath string) (bool, string, error) { + files, err := ioutil.ReadDir(dirPath) + if err != nil { + return false, "", err + } + + var filePath string + fileCount := 0 + for _, file := range files { + if !file.IsDir() { + fileCount++ + filePath = filepath.Join(dirPath, file.Name()) + } + } + + if fileCount == 1 { + return true, filePath, nil + } else { + return false, "", nil + } +} + func DownloadFileContents(cid, filepath string) error { ipfsNodeUrl, err := DeriveIpfsNodeUrl() if err != nil { diff --git a/internal/ipwl/io.go b/internal/ipwl/io.go index 6d895409c..6ada3ea3e 100644 --- a/internal/ipwl/io.go +++ b/internal/ipwl/io.go @@ -153,3 +153,8 @@ func PrintIOGraphStatus(ioList []IO) { fmt.Printf("IOs in %s state: %d\n", state, count) } } + +type OutputValues struct { + FilePaths []string `json:"filePaths"` + CIDs []string `json:"cids"` +} diff --git a/python/dev/dev_start_here.ipynb b/python/dev/dev_start_here.ipynb index 680a600aa..55a63500e 100644 --- a/python/dev/dev_start_here.ipynb +++ b/python/dev/dev_start_here.ipynb @@ -10,9 +10,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 144, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 144, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Run this code to setup the Python PLEX module\n", "\n", @@ -59,7 +70,11 @@ "protein_paths = [f\"{PLEX_DIR}/testdata/binding/abl/7n9g.pdb\"]\n", "small_molecule_paths = [f\"{PLEX_DIR}/testdata/binding/pdbbind_processed_size2/6d08/6d08_ligand.sdf\"]\n", "\n", - "initial_io_json_cid = plex_init(CoreTools.EQUIBIND.value, plex_path=\"./plex\", protein=protein_paths, small_molecule=small_molecule_paths)\n", + "initial_io_json_cid = plex_init(CoreTools.EQUIBIND.value,\n", + " plex_path=\"./plex\",\n", + " protein=protein_paths,\n", + " small_molecule=small_molecule_paths)\n", + "\n", "print(f\"Initial IO JSON CID: {initial_io_json_cid}\")\n" ] }, @@ -70,9 +85,63 @@ "outputs": [], "source": [ "\n", - "completed_io_json_cid, io_json_local_filepath = plex_run(initial_io_json_cid, plex_path=\"./plex\")\n", - "print(f\"Completed IO JSON CID: {completed_io_json_cid}\")\n", - "print(f\"IO JSON Local Filepath: {io_json_local_filepath}\")\n" + "CACHE = True\n", + "if CACHE:\n", + " completed_io_json_cid = \"QmTAFuQTLHrS6dmC4BcpxSRPaFxeWHkpVBFUJRk7vwNGRa\"\n", + " io_json_file_path = \"./jobs/e8091127-37f2-4aa5-b468-1180d603c9bf/io.json\"\n", + "else:\n", + " completed_io_json_cid, io_json_file_path = plex_run(initial_io_json_cid, plex_path=\"./plex\")\n", + "\n", + "print(f\"Completed IO JSON CID: {completed_io_json_cid}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 143, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Plex version (v0.8.0) up to date.\n", + "Output Vectors were saved at: /Users/mcmenemy/code/plex/jobs/a945bbed-a680-4179-bcb2-bcc4e84a8f76/output-vectors.json\n", + "{}\n" + ] + } + ], + "source": [ + "from plex.sdk import plex_vectorize\n", + "\n", + "output_vectors = plex_vectorize(io_json_file_path, CoreTools.EQUIBIND.value, outputDir=\"./jobs\", plex_path=\"./plex\")\n", + "print(output_vectors)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "initial_io_json_cid = plex_init(\n", + " CoreTools.ODDT.value,\n", + " plex_path=\"./plex\",\n", + " protein=output_vectors[\"protein\"][\"filePaths\"],\n", + " small_molecule=output_vectors[\"best_docked_small_molecule\"][\"filePaths\"])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "CACHE = False\n", + "if CACHE:\n", + " completed_io_json_cid = \"QmV7rBLVQZGRqkg81pd9sMFEtmHKsJBazTdVvMbiDcpAKe\"\n", + " io_json_file_path = \"./jobs/62d74918-8fea-484c-a0f7-856d77dfa247/io.json\"\n", + "else:\n", + " completed_io_json_cid, io_json_file_path = plex_run(initial_io_json_cid, plex_path=\"./plex\")\n" ] }, { @@ -84,7 +153,7 @@ "from plex.sdk import plex_mint\n", "\n", "print(completed_io_json_cid)\n", - "plex_mint(completed_io_json_cid)" + "plex_mint(completed_io_json_cid, plex_path=\"./plex\")" ] }, { diff --git a/python/plex/sdk.py b/python/plex/sdk.py index 0b329c018..068af8b11 100644 --- a/python/plex/sdk.py +++ b/python/plex/sdk.py @@ -1,6 +1,7 @@ import json import os import subprocess +import tempfile from enum import Enum from typing import Dict, List @@ -47,6 +48,24 @@ def plex_init(toolpath: str, scatteringMethod="dotProduct", plex_path="plex", ** return io_json_cid +def plex_vectorize(io_path: str, tool_cid: str, outputDir="", plex_path="plex"): + cwd = os.getcwd() + plex_work_dir = os.environ.get("PLEX_WORK_DIR", os.path.dirname(os.path.dirname(cwd))) + + cmd = [plex_path, "vectorize", "-i", io_path, "-t", tool_cid, "-o", outputDir] + with subprocess.Popen(cmd, stdout=subprocess.PIPE, bufsize=1, universal_newlines=True, cwd=plex_work_dir) as p: + outvects = "" + for line in p.stdout: + if "Output Vectors were saved at:" in line: + parts = line.split() + io_vector_outpath = parts[-1] + with open(io_vector_outpath, 'r') as f: + outvects = json.load(f) + os.remove(io_vector_outpath) + print(line, end='') + return outvects + + def plex_upload(filePath: str, plex_path="plex"): cmd = [plex_path, "upload", "-p", filePath] diff --git a/upgrade.go b/upgrade.go index 8d41c21a8..449ca5080 100644 --- a/upgrade.go +++ b/upgrade.go @@ -18,7 +18,7 @@ import ( ) const ( - CurrentPlexVersion = "v0.8.0" + CurrentPlexVersion = "v0.8.1" ReleaseURL = "https://api.github.com/repos/labdao/plex/releases/latest" ToolsURL = "https://api.github.com/repos/labdao/plex/contents/tools?ref=main" )