Skip to content

Commit

Permalink
vectorize command works (#453)
Browse files Browse the repository at this point in the history
  • Loading branch information
thetechnocrat-dev authored Jun 30, 2023
1 parent 165c103 commit ebaea50
Show file tree
Hide file tree
Showing 6 changed files with 317 additions and 8 deletions.
145 changes: 145 additions & 0 deletions cmd/vectorize.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
package cmd

import (
"encoding/json"
"fmt"
"io/ioutil"
"os"
"path"
"path/filepath"

"github.com/google/uuid"
"github.com/labdao/plex/internal/ipfs"
"github.com/labdao/plex/internal/ipwl"
"github.com/spf13/cobra"
)

var (
ioPath string
toolCid string
)

var vectorizeCmd = &cobra.Command{
Use: "vectorize",
Short: "Transform an IO JSON file into a list of outputs",
Long: `Transform an IO JSON file into a list of outputs.`,
Run: func(cmd *cobra.Command, args []string) {
_, err := VectorizeOutputs(ioPath, toolCid, outputDir)
if err != nil {
fmt.Println("Error:", err)
os.Exit(1)
}
},
}

func VectorizeOutputs(ioPath string, toolCid string, outputDir string) (map[string]ipwl.OutputValues, error) {
isCID := ipfs.IsValidCID(ioPath)
id := uuid.New()
workDirPath := ""

var cwd string
var err error
if outputDir == "" {
cwd, err = os.Getwd()
if err != nil {
return nil, err
}
} else {
cwd, err = filepath.Abs(outputDir)
if err != nil {
return nil, err
}
}

workDirPath = path.Join(cwd, id.String())
err = os.Mkdir(workDirPath, 0755)
if err != nil {
return nil, err
}

var ioJsonFilPath string
if isCID {
ioJsonFilPath = path.Join(workDirPath, "io.json")
err = ipfs.DownloadFileContents(ioPath, ioJsonFilPath)
if err != nil {
return nil, err
}
} else {
ioJsonFilPath, err = filepath.Abs(ioPath)
if err != nil {
return nil, err
}
}

file, err := os.Open(ioJsonFilPath)
if err != nil {
return nil, err
}
defer file.Close()

bytes, err := ioutil.ReadAll(file)
if err != nil {
return nil, err
}

var ios []ipwl.IO
err = json.Unmarshal(bytes, &ios)
if err != nil {
return nil, err
}

outputMap := make(map[string]ipwl.OutputValues)
for i, io := range ios {
if io.Tool.IPFS == toolCid {
for key, output := range io.Outputs {
fileOutput, ok := output.(ipwl.FileOutput)
if ok {
ov := outputMap[key]

filePath := fmt.Sprintf("entry-%d/outputs/%s", i, fileOutput.FilePath)
absoluteFilePath := path.Join(workDirPath, filePath)

// Check if the file is already downloaded
if _, err := os.Stat(absoluteFilePath); os.IsNotExist(err) {
// Download the file from IPFS to the local file path
err = ipfs.UnwrapAndDownloadFileContents(fileOutput.IPFS, absoluteFilePath)
if err != nil {
return nil, err
}
}

ov.FilePaths = append(ov.FilePaths, absoluteFilePath)
ov.CIDs = append(ov.CIDs, fileOutput.IPFS)
outputMap[key] = ov
}
}
}
}

// Save the output map to a JSON file
outputVectorsPath := path.Join(workDirPath, "output-vectors.json")
outputVectorsFile, err := os.Create(outputVectorsPath)
if err != nil {
return nil, err
}
defer outputVectorsFile.Close()

jsonData, err := json.MarshalIndent(outputMap, "", " ")
if err != nil {
return nil, err
}
outputVectorsFile.Write(jsonData)

// Exact text is used by Python SDK, do not modify
fmt.Println("Output Vectors were saved at:", outputVectorsPath)

return outputMap, nil
}

func init() {
vectorizeCmd.Flags().StringVarP(&ioPath, "ioPath", "i", "", "CID or file path of IO JsON")
vectorizeCmd.Flags().StringVarP(&toolCid, "toolCid", "t", "", "Only vectorize output CIDs")
vectorizeCmd.Flags().StringVarP(&outputDir, "outputDir", "o", "", "Only vectorize output CIDs")

rootCmd.AddCommand(vectorizeCmd)
}
71 changes: 71 additions & 0 deletions internal/ipfs/ipfs.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,77 @@ func DownloadToTempDir(cid string) (string, error) {
return downloadPath, nil
}

func UnwrapAndDownloadFileContents(cid, outputFilePath string) error {
// First download the CID content to a temporary file
tempDirPath, err := DownloadToTempDir(cid)
if err != nil {
return err
}

// Ensure that the temporary directory is deleted after we are done
defer os.RemoveAll(tempDirPath)

onlyOneFile, tempFilePath, err := onlyOneFile(tempDirPath)
if err != nil {
return err
}

if !onlyOneFile {
return fmt.Errorf("more than one file in the CID %s content", cid)
}

// Now copy the downloaded content to the output file path
inputFile, err := os.Open(tempFilePath)
if err != nil {
return err
}
defer inputFile.Close()

// Ensure the directory exists
outputDir := filepath.Dir(outputFilePath)
if _, err := os.Stat(outputDir); os.IsNotExist(err) {
err = os.MkdirAll(outputDir, 0755)
if err != nil {
return err
}
}

outputFile, err := os.Create(outputFilePath)
if err != nil {
return err
}
defer outputFile.Close()

_, err = io.Copy(outputFile, inputFile)
if err != nil {
return err
}

return nil
}

func onlyOneFile(dirPath string) (bool, string, error) {
files, err := ioutil.ReadDir(dirPath)
if err != nil {
return false, "", err
}

var filePath string
fileCount := 0
for _, file := range files {
if !file.IsDir() {
fileCount++
filePath = filepath.Join(dirPath, file.Name())
}
}

if fileCount == 1 {
return true, filePath, nil
} else {
return false, "", nil
}
}

func DownloadFileContents(cid, filepath string) error {
ipfsNodeUrl, err := DeriveIpfsNodeUrl()
if err != nil {
Expand Down
5 changes: 5 additions & 0 deletions internal/ipwl/io.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,3 +153,8 @@ func PrintIOGraphStatus(ioList []IO) {
fmt.Printf("IOs in %s state: %d\n", state, count)
}
}

type OutputValues struct {
FilePaths []string `json:"filePaths"`
CIDs []string `json:"cids"`
}
83 changes: 76 additions & 7 deletions python/dev/dev_start_here.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,20 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 144,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"<module 'plex.sdk' from '/Users/mcmenemy/code/plex/python/plex/sdk.py'>"
]
},
"execution_count": 144,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Run this code to setup the Python PLEX module\n",
"\n",
Expand Down Expand Up @@ -59,7 +70,11 @@
"protein_paths = [f\"{PLEX_DIR}/testdata/binding/abl/7n9g.pdb\"]\n",
"small_molecule_paths = [f\"{PLEX_DIR}/testdata/binding/pdbbind_processed_size2/6d08/6d08_ligand.sdf\"]\n",
"\n",
"initial_io_json_cid = plex_init(CoreTools.EQUIBIND.value, plex_path=\"./plex\", protein=protein_paths, small_molecule=small_molecule_paths)\n",
"initial_io_json_cid = plex_init(CoreTools.EQUIBIND.value,\n",
" plex_path=\"./plex\",\n",
" protein=protein_paths,\n",
" small_molecule=small_molecule_paths)\n",
"\n",
"print(f\"Initial IO JSON CID: {initial_io_json_cid}\")\n"
]
},
Expand All @@ -70,9 +85,63 @@
"outputs": [],
"source": [
"\n",
"completed_io_json_cid, io_json_local_filepath = plex_run(initial_io_json_cid, plex_path=\"./plex\")\n",
"print(f\"Completed IO JSON CID: {completed_io_json_cid}\")\n",
"print(f\"IO JSON Local Filepath: {io_json_local_filepath}\")\n"
"CACHE = True\n",
"if CACHE:\n",
" completed_io_json_cid = \"QmTAFuQTLHrS6dmC4BcpxSRPaFxeWHkpVBFUJRk7vwNGRa\"\n",
" io_json_file_path = \"./jobs/e8091127-37f2-4aa5-b468-1180d603c9bf/io.json\"\n",
"else:\n",
" completed_io_json_cid, io_json_file_path = plex_run(initial_io_json_cid, plex_path=\"./plex\")\n",
"\n",
"print(f\"Completed IO JSON CID: {completed_io_json_cid}\")\n"
]
},
{
"cell_type": "code",
"execution_count": 143,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Plex version (v0.8.0) up to date.\n",
"Output Vectors were saved at: /Users/mcmenemy/code/plex/jobs/a945bbed-a680-4179-bcb2-bcc4e84a8f76/output-vectors.json\n",
"{}\n"
]
}
],
"source": [
"from plex.sdk import plex_vectorize\n",
"\n",
"output_vectors = plex_vectorize(io_json_file_path, CoreTools.EQUIBIND.value, outputDir=\"./jobs\", plex_path=\"./plex\")\n",
"print(output_vectors)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"initial_io_json_cid = plex_init(\n",
" CoreTools.ODDT.value,\n",
" plex_path=\"./plex\",\n",
" protein=output_vectors[\"protein\"][\"filePaths\"],\n",
" small_molecule=output_vectors[\"best_docked_small_molecule\"][\"filePaths\"])\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"CACHE = False\n",
"if CACHE:\n",
" completed_io_json_cid = \"QmV7rBLVQZGRqkg81pd9sMFEtmHKsJBazTdVvMbiDcpAKe\"\n",
" io_json_file_path = \"./jobs/62d74918-8fea-484c-a0f7-856d77dfa247/io.json\"\n",
"else:\n",
" completed_io_json_cid, io_json_file_path = plex_run(initial_io_json_cid, plex_path=\"./plex\")\n"
]
},
{
Expand All @@ -84,7 +153,7 @@
"from plex.sdk import plex_mint\n",
"\n",
"print(completed_io_json_cid)\n",
"plex_mint(completed_io_json_cid)"
"plex_mint(completed_io_json_cid, plex_path=\"./plex\")"
]
},
{
Expand Down
19 changes: 19 additions & 0 deletions python/plex/sdk.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import os
import subprocess
import tempfile

from enum import Enum
from typing import Dict, List
Expand Down Expand Up @@ -47,6 +48,24 @@ def plex_init(toolpath: str, scatteringMethod="dotProduct", plex_path="plex", **
return io_json_cid


def plex_vectorize(io_path: str, tool_cid: str, outputDir="", plex_path="plex"):
cwd = os.getcwd()
plex_work_dir = os.environ.get("PLEX_WORK_DIR", os.path.dirname(os.path.dirname(cwd)))

cmd = [plex_path, "vectorize", "-i", io_path, "-t", tool_cid, "-o", outputDir]
with subprocess.Popen(cmd, stdout=subprocess.PIPE, bufsize=1, universal_newlines=True, cwd=plex_work_dir) as p:
outvects = ""
for line in p.stdout:
if "Output Vectors were saved at:" in line:
parts = line.split()
io_vector_outpath = parts[-1]
with open(io_vector_outpath, 'r') as f:
outvects = json.load(f)
os.remove(io_vector_outpath)
print(line, end='')
return outvects


def plex_upload(filePath: str, plex_path="plex"):
cmd = [plex_path, "upload", "-p", filePath]

Expand Down
2 changes: 1 addition & 1 deletion upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import (
)

const (
CurrentPlexVersion = "v0.8.0"
CurrentPlexVersion = "v0.8.1"
ReleaseURL = "https://api.github.com/repos/labdao/plex/releases/latest"
ToolsURL = "https://api.github.com/repos/labdao/plex/contents/tools?ref=main"
)
Expand Down

0 comments on commit ebaea50

Please sign in to comment.