Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DO NOT MERGE: Testing running kb-sdk kb_quast module with JAWS #36

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions .github/workflows/docker-publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
name: Docker

on:
workflow_dispatch:
push:
branches: [ "main", "master", "develop" ]
# Publish semver tags as releases.
tags:
- 'v[0-9]+.[0-9]+.[0-9]+'
- '[0-9]+.[0-9]+.[0-9]+'
- '[0-9]+.[0-9]+.[0-9]+-*'
pull_request:
branches: [ "main", "master", "develop", "jaws" ]
release:
types: [published]

env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}

jobs:
build:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write

steps:
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Checkout repository
uses: actions/checkout@v4

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Extract Docker metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}

- name: Build and push Docker image
id: build-and-push
uses: docker/build-push-action@v5
with:
context: .
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
2 changes: 1 addition & 1 deletion kb_quast.html

Large diffs are not rendered by default.

31 changes: 29 additions & 2 deletions kb_quast.spec
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ module kb_quast {
} Handle;

/* A local FASTA file.
path - the path to the FASTA file.
path - the in-container path to the FASTA file.
label - the label to use for the file in the QUAST output. If missing, the file name will
be used.
*/
Expand Down Expand Up @@ -68,6 +68,33 @@ module kb_quast {
/* Run QUAST and save a KBaseReport with the output. */
funcdef run_QUAST_app(QUASTAppParams params) returns(QUASTAppOutput output)
authentication required;

/* Innput for the run_quest_local function.

files - the list of FASTA files upon which QUAST will be run.
quast_path - the in-container path where the QUAST output should be stored.

Optional arguments:
force_glimmer - runs the '--glimmer' option regardless of file/assembly object size if true
min_contig_length - set the minimum size of contigs to process. Defaults to 500,
minimum allowed is 50.
*/
typedef structure {
list<FASTAFile> files;
string quast_path;
boolean force_glimmer;
int min_contig_length;
} QUASTLocalParams;

/* Output of the run_quast_local function.
quast_path - the directory containing the quast output.
*/
typedef structure {
string quast_path;
} QUASTLocalOutput;

/* Run QUAST entirely locally. */
funcdef run_QUAST_local(QUASTLocalParams params) returns(QUASTLocalOutput output);

/* Input for running QUAST.
assemblies - the list of assemblies upon which QUAST will be run.
Expand All @@ -88,7 +115,7 @@ module kb_quast {
int min_contig_length;
} QUASTParams;

/* Ouput of the run_quast function.
/* Output of the run_quast function.
shock_id - the id of the shock node where the zipped QUAST output is stored.
handle - the new handle for the shock node, if created.
node_file_name - the name of the file stored in Shock.
Expand Down
119 changes: 90 additions & 29 deletions lib/kb_quast/kb_quastImpl.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,25 @@ def __init__(self, obj_info):
self.size = obj_info[9]
self.meta = obj_info[10]
self.ref = str(self.wsid) + '/' + str(self.id) + '/' + str(self.version)


def _setup_files(files):
if not files:
raise ValueError('The files argument is required')
if type(files) != list:
raise ValueError('files must be a list')
labels, filepaths = [], []
for i, lp in enumerate(files):
l = lp.get('label')
p = lp.get('path')
if not _os.path.isfile(p):
raise ValueError('File entry {}, {}, is not a file'.format(i + 1, p))
l = l if l else _os.path.basename(p)
filepaths.append(p)
labels.append(l)
return labels, filepaths


#END_HEADER


Expand All @@ -55,7 +74,7 @@ class kb_quast:
######################################### noqa
VERSION = "1.1.0"
GIT_URL = "https://github.com/kbaseapps/kb_quast"
GIT_COMMIT_HASH = "f6be7c27bbf44a0d65b0250dbdb8079b5df9d7ae"
GIT_COMMIT_HASH = "14a88101fbe5639aeaa7b0e8e759070374a7d540"

#BEGIN_CLASS_HEADER

Expand Down Expand Up @@ -127,7 +146,14 @@ def get_assembly_object_info(self, assemblies, token):
raise ValueError('Duplicate objects detected in input') # could list objs later
return info

def run_quast_exec(self, outdir, filepaths, labels, min_contig_length, skip_glimmer=False):
def run_quast_exec(self, outdir, filepaths, labels, min_contig_length, force_glimmer):
if force_glimmer:
skip_glimmer = False
else:
skip_glimmer = self.check_large_input(filepaths)

# TODO check for name duplicates in labels and do something about it

threads = psutil.cpu_count() * self.THREADS_PER_CORE
# DO NOT use genemark instead of glimmer, not open source
# DO NOT use metaQUAST, uses SILVA DB which is not open source
Expand Down Expand Up @@ -250,6 +276,57 @@ def run_QUAST_app(self, ctx, params):
# return the results
return [output]

def run_QUAST_local(self, ctx, params):
"""
Run QUAST entirely locally.
:param params: instance of type "QUASTLocalParams" (Innput for the
run_quest_local function. files - the list of FASTA files upon
which QUAST will be run. quast_path - the in-container path where
the QUAST output should be stored. Optional arguments:
force_glimmer - runs the '--glimmer' option regardless of
file/assembly object size if true min_contig_length - set the
minimum size of contigs to process. Defaults to 500, minimum
allowed is 50.) -> structure: parameter "files" of list of type
"FASTAFile" (A local FASTA file. path - the in-container path to
the FASTA file. label - the label to use for the file in the QUAST
output. If missing, the file name will be used.) -> structure:
parameter "path" of String, parameter "label" of String, parameter
"quast_path" of String, parameter "force_glimmer" of type
"boolean" (A boolean - 0 for false, 1 for true. @range (0, 1)),
parameter "min_contig_length" of Long
:returns: instance of type "QUASTLocalOutput" (Output of the
run_quast_local function. quast_path - the directory containing
the quast output.) -> structure: parameter "quast_path" of String
"""
# ctx is the context object
# return variables are: output
#BEGIN run_QUAST_local

self.log('Starting QUAST local run. Parameters:')
self.log(str(params))
files = params.get('files')
min_contig_length = self.get_min_contig_length(params) # fail early if param is bad
outdir = params.get("quast_path")
# Deliberatly coded to not use scratch since it's hardcoded in deploy.cfg
# and changing that would break the module for standard SDK runs
# Don't need to do that if we have output mounting
if not outdir or not outdir.strip():
raise ValueError("The quast_path argument is required")
self.mkdir_p(outdir)
labels, filepaths = _setup_files(files)

self.run_quast_exec(
outdir, filepaths, labels, min_contig_length, params.get("force_glimmer"))
output = {"quast_path": outdir}
#END run_QUAST_local

# At some point might do deeper type checking...
if not isinstance(output, dict):
raise ValueError('Method run_QUAST_local return value ' +
'output is not type dict as required.')
# return the results
return [output]

def run_QUAST(self, ctx, params):
"""
Run QUAST and return a shock node containing the zipped QUAST output.
Expand All @@ -265,14 +342,14 @@ def run_QUAST(self, ctx, params):
workspace object containing an assembly, either a
KBaseGenomes.ContigSet or KBaseGenomeAnnotations.Assembly.),
parameter "files" of list of type "FASTAFile" (A local FASTA file.
path - the path to the FASTA file. label - the label to use for
the file in the QUAST output. If missing, the file name will be
used.) -> structure: parameter "path" of String, parameter "label"
of String, parameter "make_handle" of type "boolean" (A boolean -
0 for false, 1 for true. @range (0, 1)), parameter "force_glimmer"
of type "boolean" (A boolean - 0 for false, 1 for true. @range (0,
1)), parameter "min_contig_length" of Long
:returns: instance of type "QUASTOutput" (Ouput of the run_quast
path - the in-container path to the FASTA file. label - the label
to use for the file in the QUAST output. If missing, the file name
will be used.) -> structure: parameter "path" of String, parameter
"label" of String, parameter "make_handle" of type "boolean" (A
boolean - 0 for false, 1 for true. @range (0, 1)), parameter
"force_glimmer" of type "boolean" (A boolean - 0 for false, 1 for
true. @range (0, 1)), parameter "min_contig_length" of Long
:returns: instance of type "QUASTOutput" (Output of the run_quast
function. shock_id - the id of the shock node where the zipped
QUAST output is stored. handle - the new handle for the shock
node, if created. node_file_name - the name of the file stored in
Expand Down Expand Up @@ -311,27 +388,11 @@ def run_QUAST(self, ctx, params):
filepaths = self.get_assemblies(tdir, info)
labels = [i.name for i in info]
else:
if type(files) != list:
raise ValueError('files must be a list')
filepaths = []
labels = []
for i, lp in enumerate(files):
l = lp.get('label')
p = lp.get('path')
if not _os.path.isfile(p):
raise ValueError('File entry {}, {}, is not a file'.format(i + 1, p))
l = l if l else _os.path.basename(p)
filepaths.append(p)
labels.append(l)

if params.get('force_glimmer'):
skip_glimmer = False
else:
skip_glimmer = self.check_large_input(filepaths)
labels, filepaths = _setup_files(files)

out = _os.path.join(tdir, 'quast_results')
# TODO check for name duplicates in labels and do something about it
self.run_quast_exec(out, filepaths, labels, min_contig_length, skip_glimmer)

self.run_quast_exec(out, filepaths, labels, min_contig_length, params.get("force_glimmer"))
dfu = _DFUClient(self.callback_url)
try:
mh = params.get('make_handle')
Expand Down
4 changes: 4 additions & 0 deletions lib/kb_quast/kb_quastServer.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,10 @@ def __init__(self):
name='kb_quast.run_QUAST_app',
types=[dict])
self.method_authentication['kb_quast.run_QUAST_app'] = 'required' # noqa
self.rpc_service.add(impl_kb_quast.run_QUAST_local,
name='kb_quast.run_QUAST_local',
types=[dict])
self.method_authentication['kb_quast.run_QUAST_local'] = 'none' # noqa
self.rpc_service.add(impl_kb_quast.run_QUAST,
name='kb_quast.run_QUAST',
types=[dict])
Expand Down
96 changes: 96 additions & 0 deletions quast.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
version 1.0

workflow sdk_quast_test {
input {
Array[File] files
}

call quast {
input:
files = files
}
}

task quast {
input {
Array[File] files
Int total = length(files)
}

command <<<
# No module callbacks
export SDK_CALLBACK_URL="http://fakeendpointthatdoesntexist.com"

# Not calling any services so no config file needed
export KBASE_ENDPOINT="http://fakeendpointthatdoesntexist.com"

# Hack to allow the code to run scripts, mounting output could fix this
cp -R /kb/module/scripts scripts
cp -R /kb/module/bin bin
cp -R /kb/module/lib lib
cp /kb/module/deploy.cfg deploy.cfg

# Hack to make the code not write in /kb/module/work, mounting output
# to work would work here
mkdir work
export WD=$(pwd)
echo "WD=$WD"

# make a directory for output. Ideally we'd mount this to /kb/module/work
mkdir __output__

# This is an insane hack to make the quast input JSON. It's as minimal
# as possible here, but this isn't workable in general - we need
# input/output mounting so we can predict the file paths and create
# the JSON serverside at submit time
FILE=~{files[0]}
FILENAME=$(basename $FILE)
echo "{" > input.json
echo " \"method\": \"kb_quast.run_QUAST_local\"," >> input.json
echo " \"params\": [" >> input.json
echo " {" >> input.json
echo " \"files\": [" >> input.json
echo -n " {\"path\": \"$FILE\", \"label\": \"$FILENAME\"}" >> input.json

FILES=('~{sep="' '" files}')
for (( c = 1; c < ~{total}; c++ )); do
FILE=${FILES[$c]}
FILENAME=$(basename $FILE)
echo , >> input.json
echo -n " {\"path\": \"$FILE\", \"label\": \"$FILENAME\"}" >> input.json
done

echo "" >> input.json
echo " ]," >> input.json
echo " \"quast_path\": \"$(pwd)/__output__\"" >> input.json
echo " }" >> input.json
echo " ]" >> input.json
echo "}" >> input.json

# hack to use the copied scripts dir rather than the linked one.
# if work can be mounted as writeable to output I think this isn't needed
./scripts/entrypoint.sh async
EC=$?

echo "Entrypoint exit code: $EC"

find __output__ -type f > ./output_files.txt

if [ $EC -ne 0 ]; then
exit $EC
fi
>>>

output {
Array[File] output_files = read_lines("output_files.txt")
File stdout = "stdout"
File stderr = "stderr"
}

runtime {
docker: "ghcr.io/kbaseapps/kb_quast:pr-36@sha256:5bb0d1bcf15de1fbf8596d3c4351ff6610a85ec1a0676fc18b224294c505edd0"
runtime_minutes: 20
memory: "100 GB"
cpu: 4
}
}
Loading
Loading