Skip to content

Commit

Permalink
Merge pull request #613 from legumeinfo/genefamily_pangene_linkouts
Browse files Browse the repository at this point in the history
added linkout services for gene families and pan gene sets.
  • Loading branch information
adf-ncgr authored Jan 9, 2024
2 parents e7b8049 + d3ee452 commit 584bde4
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 2 deletions.
2 changes: 1 addition & 1 deletion linkouts/compose.prod.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
services:
linkouts:
image: ghcr.io/legumeinfo/microservices-linkouts:1.0.0
image: ghcr.io/legumeinfo/microservices-linkouts:1.0.1
ports:
- "${PORT:-8080}:8080"
volumes:
Expand Down
64 changes: 64 additions & 0 deletions linkouts/linkouts/http_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,12 @@

GENES_PATH = "/gene_linkouts"
GENOMIC_REGIONS_PATH = "/genomic_region_linkouts"
GENE_FAMILIES_PATH = "/gene_family_linkouts"
PAN_GENE_SETS_PATH = "/pan_gene_set_linkouts"
GENES_QUERY = "genes"
GENOMIC_REGIONS_QUERY = "genomic_regions"
GENE_FAMILIES_QUERY = "gene_families"
PAN_GENE_SETS_QUERY = "pan_gene_sets"


async def http_genes_get_handler(request):
Expand Down Expand Up @@ -60,6 +64,58 @@ async def http_genomic_regions_post_handler(request):
return web.json_response(linkouts)


async def http_gene_families_get_handler(request):
# parse the query from the request query string
try:
ids = request.rel_url.query[GENE_FAMILIES_QUERY]
except KeyError:
raise web.HTTPBadRequest(text="No " + GENE_FAMILIES_QUERY + " supplied")
ids = ids.split(",")
handler = request.app["handler"]
linkouts = handler.process_gene_families(ids)
return web.json_response(linkouts)


async def http_gene_families_post_handler(request):
# parse the query from the request POST data
data = await request.json()
ids = data.get(GENE_FAMILIES_QUERY, [])
if type(ids) != list:
raise web.HTTPBadRequest(text=GENE_FAMILIES_QUERY + " must be given as list")
if len(ids) == 0:
raise web.HTTPBadRequest(text="No " + GENE_FAMILIES_QUERY + " supplied")

handler = request.app["handler"]
linkouts = handler.process_gene_families(ids)
return web.json_response(linkouts)


async def http_pan_gene_sets_get_handler(request):
# parse the query from the request query string
try:
ids = request.rel_url.query[PAN_GENE_SETS_QUERY]
except KeyError:
raise web.HTTPBadRequest(text="No " + PAN_GENE_SETS_QUERY + " supplied")
ids = ids.split(",")
handler = request.app["handler"]
linkouts = handler.process_pan_gene_sets(ids)
return web.json_response(linkouts)


async def http_pan_gene_sets_post_handler(request):
# parse the query from the request POST data
data = await request.json()
ids = data.get(PAN_GENE_SETS_QUERY, [])
if type(ids) != list:
raise web.HTTPBadRequest(text=PAN_GENE_SETS_QUERY + " must be given as list")
if len(ids) == 0:
raise web.HTTPBadRequest(text="No " + PAN_GENE_SETS_QUERY + " supplied")

handler = request.app["handler"]
linkouts = handler.process_pan_gene_sets(ids)
return web.json_response(linkouts)


def run_http_server(host, port, handler):
# make the app
app = web.Application()
Expand All @@ -83,6 +139,14 @@ def run_http_server(host, port, handler):
cors.add(route)
route = app.router.add_get(GENOMIC_REGIONS_PATH, http_genomic_regions_get_handler)
cors.add(route)
route = app.router.add_post(GENE_FAMILIES_PATH, http_gene_families_post_handler)
cors.add(route)
route = app.router.add_get(GENE_FAMILIES_PATH, http_gene_families_get_handler)
cors.add(route)
route = app.router.add_post(PAN_GENE_SETS_PATH, http_pan_gene_sets_post_handler)
cors.add(route)
route = app.router.add_get(PAN_GENE_SETS_PATH, http_pan_gene_sets_get_handler)
cors.add(route)
# run the app
web.run_app(app)
# TODO: what about teardown? runner.cleanup()
63 changes: 62 additions & 1 deletion linkouts/linkouts/request_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

GENE_LINKOUTS = "gene_linkouts"
GENE_REGEX = re.compile("{GENE_ID}")
UNPREFIXED_GENE_REGEX = re.compile("{UNPREFIXED_GENE_ID}")

GENOMIC_REGION_LINKOUTS = "genomic_region_linkouts"
GENOMIC_REGION_SPECIFIER_REGEX = re.compile(r"^([^:]+):(\d+)(-|\.\.)(\d+)$")
Expand All @@ -18,6 +19,12 @@
GENOMIC_REGION_START_REGEX = re.compile("{GENOMIC_REGION_START}")
GENOMIC_REGION_END_REGEX = re.compile("{GENOMIC_REGION_END}")

GENE_FAMILY_LINKOUTS = "gene_family_linkouts"
GENE_FAMILY_REGEX = re.compile("{GENE_FAMILY_ID}")

PAN_GENE_SET_LINKOUTS = "pan_gene_set_linkouts"
PAN_GENE_SET_REGEX = re.compile("{PAN_GENE_SET_ID}")


class RequestHandler:
def __init__(self, lglob_root):
Expand All @@ -36,7 +43,12 @@ def _read_linkouts(self, lfile):
yml = yaml.load(f.read(), Loader=yaml.FullLoader)
prefix = yml["prefix"]

for linkable_type in [GENE_LINKOUTS, GENOMIC_REGION_LINKOUTS]:
for linkable_type in [
GENE_LINKOUTS,
GENOMIC_REGION_LINKOUTS,
GENE_FAMILY_LINKOUTS,
PAN_GENE_SET_LINKOUTS,
]:
if yml.get(linkable_type) is not None:
for linkout in yml[linkable_type]:
if self.linkout_lookup.get(linkable_type) is None:
Expand All @@ -55,6 +67,7 @@ def process_genes(self, ids):

for id in ids:
prefix = ".".join(id.split(".")[0:4])
unprefixed_id = ".".join(id.split(".")[4:])
if type_lookup.get(prefix) is not None:
templates = type_lookup[prefix]
for template in templates:
Expand All @@ -63,7 +76,13 @@ def process_genes(self, ids):
# TODO: if method is POST, we probably need to do something with the
# request body content
linkout[HREF] = GENE_REGEX.sub(id, template[HREF])
linkout[HREF] = UNPREFIXED_GENE_REGEX.sub(
unprefixed_id, linkout[HREF]
)
linkout[TEXT] = GENE_REGEX.sub(id, template[TEXT])
linkout[TEXT] = UNPREFIXED_GENE_REGEX.sub(
unprefixed_id, linkout[TEXT]
)
linkouts.append(linkout)
return linkouts

Expand Down Expand Up @@ -98,3 +117,45 @@ def process_genomic_regions(self, ids):
linkout[TEXT] = GENOMIC_REGION_REGEX.sub(id, template[TEXT])
linkouts.append(linkout)
return linkouts

def process_gene_families(self, ids):
linkouts = []
if self.linkout_lookup.get(GENE_FAMILY_LINKOUTS) is None:
return linkouts

type_lookup = self.linkout_lookup.get(GENE_FAMILY_LINKOUTS)

for id in ids:
prefix = ".".join(id.split(".")[0:1])
if type_lookup.get(prefix) is not None:
templates = type_lookup[prefix]
for template in templates:
linkout = {}
linkout[METHOD] = template[METHOD]
# TODO: if method is POST, we probably need to do something with the
# request body content
linkout[HREF] = GENE_FAMILY_REGEX.sub(id, template[HREF])
linkout[TEXT] = GENE_FAMILY_REGEX.sub(id, template[TEXT])
linkouts.append(linkout)
return linkouts

def process_pan_gene_sets(self, ids):
linkouts = []
if self.linkout_lookup.get(PAN_GENE_SET_LINKOUTS) is None:
return linkouts

type_lookup = self.linkout_lookup.get(PAN_GENE_SET_LINKOUTS)

for id in ids:
prefix = ".".join(id.split(".")[0:2])
if type_lookup.get(prefix) is not None:
templates = type_lookup[prefix]
for template in templates:
linkout = {}
linkout[METHOD] = template[METHOD]
# TODO: if method is POST, we probably need to do something with the
# request body content
linkout[HREF] = PAN_GENE_SET_REGEX.sub(id, template[HREF])
linkout[TEXT] = PAN_GENE_SET_REGEX.sub(id, template[TEXT])
linkouts.append(linkout)
return linkouts

0 comments on commit 584bde4

Please sign in to comment.