Skip to content

Commit

Permalink
Cache genome build regardless how accessed
Browse files Browse the repository at this point in the history
  • Loading branch information
TheMadBug committed Sep 5, 2023
1 parent 65023e6 commit 46b4668
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 5 deletions.
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ dependencies:
- django-postgres-extra==2.0.5
- django_recaptcha==3.0.0
- django-registration-redux==2.9
- django_termsandconditions==2.0.9
- django_termsandconditions==2.0.12
- django-reversion-compare==0.15.0
- django_threadlocals==0.10
- djangorestframework==3.14.0
Expand Down
60 changes: 60 additions & 0 deletions library/django_utils/django_object_managers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from dataclasses import dataclass
from typing import Dict, Any, Optional

from django.db.models import Manager, QuerySet, Q
from frozendict import frozendict


@dataclass(frozen=True)
class CachedQuery:
model: Any
args: Any
kwargs: Any


_CACHED_QUERIES: Dict[CachedQuery, Any] = {}
# TODO, could make this a LRU cache so it doesn't grow infinitely for larger models


class QuerySetCaching(QuerySet):

def __init__(self, model=None, query=None, using=None, hints=None):
super().__init__(model=model, query=query, using=using, hints=hints)

def get(self, *args, **kwargs):
cq: Optional[CachedQuery] = None
kwargs_fd = None
if kwargs:
# turn into a frozen dict so we can hash it
kwargs_fd = frozendict(kwargs)
try:
cq = CachedQuery(model=self.model, args=args, kwargs=kwargs_fd)
except TypeError:
# unhashable argument
pass

if cq:
if existing := _CACHED_QUERIES.get(cq):
return existing
else:
result = super().get(*args, **kwargs)
_CACHED_QUERIES[cq] = result
return result
else:
return super().get(*args, **kwargs)


class CachingObjectManager(Manager):
"""
Best used as both the default objects and as Meta._base_manager_name = 'objects'
Will cache results for queries - so only do this on models that are read from but never written to.
We could invalidate the cache when save() is called, but that only works if the save is done in the only instance
that has already cached values.
Note that currently it only caches the results of get(...)
"""

def __init__(self):
super().__init__()
self._queryset_class = QuerySetCaching
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ django_model_utils==4.2.0
django-postgres-extra==2.0.5
django_recaptcha==3.0.0
django-registration-redux==2.9
django_termsandconditions==2.0.9
django_termsandconditions==2.0.12
django-reversion-compare==0.15.0
djangorestframework==3.14.0
easy_thumbnails==2.8.1
Expand Down
7 changes: 4 additions & 3 deletions snpdb/models/models_genome.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from genes.models_enums import AnnotationConsortium
from library.cache import timed_cache
from library.django_utils import SortMetaOrderingMixin
from library.django_utils.django_object_managers import CachingObjectManager
from library.genomics.fasta_wrapper import FastaFileWrapper
from library.utils import invert_dict
from snpdb.genome.fasta_index import load_genome_fasta_index
Expand All @@ -28,6 +29,8 @@ class GenomeBuild(models.Model, SortMetaOrderingMixin):
Build & Contig are populated via migration snpdb 0006 """

objects = CachingObjectManager()

name = models.TextField(primary_key=True)
accession = models.TextField(null=True)
alias = models.TextField(null=True, unique=True)
Expand All @@ -36,22 +39,20 @@ class GenomeBuild(models.Model, SortMetaOrderingMixin):

class Meta:
ordering = ["name"]
base_manager_name = 'objects'

def is_version(self, version: int) -> bool:
return str(version) in self.name

@classmethod
@timed_cache(ttl=60)
def grch37(cls) -> 'GenomeBuild':
return cls.objects.get(pk='GRCh37')

@classmethod
@timed_cache(ttl=60)
def grch38(cls) -> 'GenomeBuild':
return cls.objects.get(pk='GRCh38')

@classmethod
@timed_cache(ttl=60)
def legacy_build(cls) -> 'GenomeBuild':
""" Use this for hacks - makes it easy to find / fix later """
return cls.objects.get(pk='GRCh37')
Expand Down

0 comments on commit 46b4668

Please sign in to comment.