Skip to content

Commit

Permalink
Testing running kb-sdk kb_quast module with JAWS
Browse files Browse the repository at this point in the history
  • Loading branch information
MrCreosote committed Oct 6, 2024
1 parent 14a8810 commit 9d564c2
Show file tree
Hide file tree
Showing 7 changed files with 255 additions and 35 deletions.
57 changes: 57 additions & 0 deletions .github/workflows/docker-publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
name: Docker

on:
workflow_dispatch:
push:
branches: [ "main", "master", "develop" ]
# Publish semver tags as releases.
tags:
- 'v[0-9]+.[0-9]+.[0-9]+'
- '[0-9]+.[0-9]+.[0-9]+'
- '[0-9]+.[0-9]+.[0-9]+-*'
pull_request:
branches: [ "main", "master", "develop", "jaws" ]
release:
types: [published]

env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}

jobs:
build:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write

steps:
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Checkout repository
uses: actions/checkout@v4

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Extract Docker metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}

- name: Build and push Docker image
id: build-and-push
uses: docker/build-push-action@v5
with:
context: .
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
2 changes: 1 addition & 1 deletion kb_quast.html

Large diffs are not rendered by default.

31 changes: 29 additions & 2 deletions kb_quast.spec
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ module kb_quast {
} Handle;

/* A local FASTA file.
path - the path to the FASTA file.
path - the in-container path to the FASTA file.
label - the label to use for the file in the QUAST output. If missing, the file name will
be used.
*/
Expand Down Expand Up @@ -68,6 +68,33 @@ module kb_quast {
/* Run QUAST and save a KBaseReport with the output. */
funcdef run_QUAST_app(QUASTAppParams params) returns(QUASTAppOutput output)
authentication required;

/* Innput for the run_quest_local function.

files - the list of FASTA files upon which QUAST will be run.
quast_path - the in-container path where the QUAST output should be stored.

Optional arguments:
force_glimmer - runs the '--glimmer' option regardless of file/assembly object size if true
min_contig_length - set the minimum size of contigs to process. Defaults to 500,
minimum allowed is 50.
*/
typedef structure {
list<FASTAFile> files;
string quast_path;
boolean force_glimmer;
int min_contig_length;
} QUASTLocalParams;

/* Output of the run_quast_local function.
quast_path - the directory containing the quast output.
*/
typedef structure {
string quast_path;
} QUASTLocalOutput;

/* Run QUAST entirely locally. */
funcdef run_QUAST_local(QUASTLocalParams params) returns(QUASTLocalOutput output);

/* Input for running QUAST.
assemblies - the list of assemblies upon which QUAST will be run.
Expand All @@ -88,7 +115,7 @@ module kb_quast {
int min_contig_length;
} QUASTParams;

/* Ouput of the run_quast function.
/* Output of the run_quast function.
shock_id - the id of the shock node where the zipped QUAST output is stored.
handle - the new handle for the shock node, if created.
node_file_name - the name of the file stored in Shock.
Expand Down
119 changes: 90 additions & 29 deletions lib/kb_quast/kb_quastImpl.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,25 @@ def __init__(self, obj_info):
self.size = obj_info[9]
self.meta = obj_info[10]
self.ref = str(self.wsid) + '/' + str(self.id) + '/' + str(self.version)


def _setup_files(files):
if not files:
raise ValueError('The files argument is required')
if type(files) != list:
raise ValueError('files must be a list')
labels, filepaths = [], []
for i, lp in enumerate(files):
l = lp.get('label')
p = lp.get('path')
if not _os.path.isfile(p):
raise ValueError('File entry {}, {}, is not a file'.format(i + 1, p))
l = l if l else _os.path.basename(p)
filepaths.append(p)
labels.append(l)
return labels, filepaths


#END_HEADER


Expand All @@ -55,7 +74,7 @@ class kb_quast:
######################################### noqa
VERSION = "1.1.0"
GIT_URL = "https://github.com/kbaseapps/kb_quast"
GIT_COMMIT_HASH = "f6be7c27bbf44a0d65b0250dbdb8079b5df9d7ae"
GIT_COMMIT_HASH = "14a88101fbe5639aeaa7b0e8e759070374a7d540"

#BEGIN_CLASS_HEADER

Expand Down Expand Up @@ -127,7 +146,14 @@ def get_assembly_object_info(self, assemblies, token):
raise ValueError('Duplicate objects detected in input') # could list objs later
return info

def run_quast_exec(self, outdir, filepaths, labels, min_contig_length, skip_glimmer=False):
def run_quast_exec(self, outdir, filepaths, labels, min_contig_length, force_glimmer):
if force_glimmer:
skip_glimmer = False
else:
skip_glimmer = self.check_large_input(filepaths)

# TODO check for name duplicates in labels and do something about it

threads = psutil.cpu_count() * self.THREADS_PER_CORE
# DO NOT use genemark instead of glimmer, not open source
# DO NOT use metaQUAST, uses SILVA DB which is not open source
Expand Down Expand Up @@ -250,6 +276,57 @@ def run_QUAST_app(self, ctx, params):
# return the results
return [output]

def run_QUAST_local(self, ctx, params):
"""
Run QUAST entirely locally.
:param params: instance of type "QUASTLocalParams" (Innput for the
run_quest_local function. files - the list of FASTA files upon
which QUAST will be run. quast_path - the in-container path where
the QUAST output should be stored. Optional arguments:
force_glimmer - runs the '--glimmer' option regardless of
file/assembly object size if true min_contig_length - set the
minimum size of contigs to process. Defaults to 500, minimum
allowed is 50.) -> structure: parameter "files" of list of type
"FASTAFile" (A local FASTA file. path - the in-container path to
the FASTA file. label - the label to use for the file in the QUAST
output. If missing, the file name will be used.) -> structure:
parameter "path" of String, parameter "label" of String, parameter
"quast_path" of String, parameter "force_glimmer" of type
"boolean" (A boolean - 0 for false, 1 for true. @range (0, 1)),
parameter "min_contig_length" of Long
:returns: instance of type "QUASTLocalOutput" (Output of the
run_quast_local function. quast_path - the directory containing
the quast output.) -> structure: parameter "quast_path" of String
"""
# ctx is the context object
# return variables are: output
#BEGIN run_QUAST_local

self.log('Starting QUAST local run. Parameters:')
self.log(str(params))
files = params.get('files')
min_contig_length = self.get_min_contig_length(params) # fail early if param is bad
outdir = params.get("quast_path")
# Deliberatly coded to not use scratch since it's hardcoded in deploy.cfg
# and changing that would break the module for standard SDK runs
# Don't need to do that if we have output mounting
if not outdir or not outdir.strip():
raise ValueError("The quast_path argument is required")
self.mkdir_p(outdir)
labels, filepaths = _setup_files(files)

self.run_quast_exec(
outdir, filepaths, labels, min_contig_length, params.get("force_glimmer"))
output = {"quast_path": outdir}
#END run_QUAST_local

# At some point might do deeper type checking...
if not isinstance(output, dict):
raise ValueError('Method run_QUAST_local return value ' +
'output is not type dict as required.')
# return the results
return [output]

def run_QUAST(self, ctx, params):
"""
Run QUAST and return a shock node containing the zipped QUAST output.
Expand All @@ -265,14 +342,14 @@ def run_QUAST(self, ctx, params):
workspace object containing an assembly, either a
KBaseGenomes.ContigSet or KBaseGenomeAnnotations.Assembly.),
parameter "files" of list of type "FASTAFile" (A local FASTA file.
path - the path to the FASTA file. label - the label to use for
the file in the QUAST output. If missing, the file name will be
used.) -> structure: parameter "path" of String, parameter "label"
of String, parameter "make_handle" of type "boolean" (A boolean -
0 for false, 1 for true. @range (0, 1)), parameter "force_glimmer"
of type "boolean" (A boolean - 0 for false, 1 for true. @range (0,
1)), parameter "min_contig_length" of Long
:returns: instance of type "QUASTOutput" (Ouput of the run_quast
path - the in-container path to the FASTA file. label - the label
to use for the file in the QUAST output. If missing, the file name
will be used.) -> structure: parameter "path" of String, parameter
"label" of String, parameter "make_handle" of type "boolean" (A
boolean - 0 for false, 1 for true. @range (0, 1)), parameter
"force_glimmer" of type "boolean" (A boolean - 0 for false, 1 for
true. @range (0, 1)), parameter "min_contig_length" of Long
:returns: instance of type "QUASTOutput" (Output of the run_quast
function. shock_id - the id of the shock node where the zipped
QUAST output is stored. handle - the new handle for the shock
node, if created. node_file_name - the name of the file stored in
Expand Down Expand Up @@ -311,27 +388,11 @@ def run_QUAST(self, ctx, params):
filepaths = self.get_assemblies(tdir, info)
labels = [i.name for i in info]
else:
if type(files) != list:
raise ValueError('files must be a list')
filepaths = []
labels = []
for i, lp in enumerate(files):
l = lp.get('label')
p = lp.get('path')
if not _os.path.isfile(p):
raise ValueError('File entry {}, {}, is not a file'.format(i + 1, p))
l = l if l else _os.path.basename(p)
filepaths.append(p)
labels.append(l)

if params.get('force_glimmer'):
skip_glimmer = False
else:
skip_glimmer = self.check_large_input(filepaths)
labels, filepaths = _setup_files(files)

out = _os.path.join(tdir, 'quast_results')
# TODO check for name duplicates in labels and do something about it
self.run_quast_exec(out, filepaths, labels, min_contig_length, skip_glimmer)

self.run_quast_exec(out, filepaths, labels, min_contig_length, params.get("force_glimmer"))
dfu = _DFUClient(self.callback_url)
try:
mh = params.get('make_handle')
Expand Down
4 changes: 4 additions & 0 deletions lib/kb_quast/kb_quastServer.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,10 @@ def __init__(self):
name='kb_quast.run_QUAST_app',
types=[dict])
self.method_authentication['kb_quast.run_QUAST_app'] = 'required' # noqa
self.rpc_service.add(impl_kb_quast.run_QUAST_local,
name='kb_quast.run_QUAST_local',
types=[dict])
self.method_authentication['kb_quast.run_QUAST_local'] = 'none' # noqa
self.rpc_service.add(impl_kb_quast.run_QUAST,
name='kb_quast.run_QUAST',
types=[dict])
Expand Down
67 changes: 67 additions & 0 deletions quast.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
version 1.0

workflow sdk_quast_test {
input {
Array[File] files
}

call quast {
input:
files = files
}
}

task quast {
input {
Array[File] files
}

command {
# Not calling any services so no config file needed
export KBASE_ENDPOINT="http://fakeendpointthatdoesntexist.com"

# Hack to make the code not write in /kb/module/work, mounting output to work would work here
mkdir work
export WD=$(pwd)
echo "WD=$WD"

# make a directory for output. Ideally we'd mount this to /kb/module/work
mkdir __output__

# This is an insane hack to make the quast input JSON. It's as minimal as possible here,
# but this isn't workable in general - we need input/output mounting so we can predict the file
# paths and create the JSON serverside at submit time
echo "{\"files\": [" > input.json
echo " {\"path\": \"${files[0]}", \"label\": \"$(basename ${files[0]})}\"" >> input.json
for file in ${input_files[1:]}; do
echo ",\n {"\path\": \"$file\", \"label\": \"$(basename $file)}\"" >> input.json

echo " ],\n \"quast_path\": \"$(pwd)/__output__\"" >> input.json
echo "}" >> input.json

/kb/module/scripts/entrypoint.sh async
EC=$?

echo "Entrypoint exit code: $EC"

find __output__ -type f > ./output_files.txt

if [ $EC -ne 0 ]; then
exit $EC
fi
}

output {
Array[File] output_files = read_lines("output_files.txt")
File stdout = "stdout"
File stderr = "stderr"
}

runtime {
docker: "ghcr.io/kbaseapps/kb_quast:pr-35"
runtime_minutes: 20
memory: "100 GB"
cpu: 4
}
}
10 changes: 7 additions & 3 deletions scripts/run_async.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
script_dir=$(dirname "$(readlink -f "$0")")
export KB_DEPLOYMENT_CONFIG=$script_dir/../deploy.cfg
WD=/kb/module/work
# Would need input mounting to make this work in JAWS, allowing setting it for now
# WD=/kb/module/work
if [ -f $WD/token ]; then
cat $WD/token | xargs sh $script_dir/../bin/run_kb_quast_async_job.sh $WD/input.json $WD/output.json
else
echo "File $WD/token doesn't exist, aborting."
exit 1
sh $script_dir/../bin/run_kb_quast_async_job.sh $WD/input.json $WD/output.json
# Another option would be to require the token but set up an auth endpoint, either in the
# service or nginx, that just returned a fake username and provide a fake token
# echo "File $WD/token doesn't exist, aborting."
# exit 1
fi

0 comments on commit 9d564c2

Please sign in to comment.