Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Topic landing pools sql #2110

Open
wants to merge 26 commits into
base: topic-landing-pools
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
6bea38e
feat(topics): add topic_pool_link model
nsantacruz Nov 11, 2024
ac1fa70
refactor(topics): modify random topic api to use topic pool link model
nsantacruz Nov 11, 2024
110b051
refactor(topics): change pool to 'promoted'
nsantacruz Nov 11, 2024
cce7daf
refactor(topics): move management of pools to use TopicLinkPool model
nsantacruz Nov 12, 2024
94dee44
chore(topics): add uniqueness constraint on topicpoollink
nsantacruz Nov 12, 2024
129529b
chore(topics): add migrate_good_to_promote_to_topic_pools.py
nsantacruz Nov 12, 2024
1827d9d
refactor(topics): Refactor to use two models, Topic and TopicPool to …
nsantacruz Nov 13, 2024
ad18ba1
feat(topics): admin interface for topics and topic pools
nsantacruz Nov 14, 2024
9725b61
feat(topics): only show library topics in topic admin view
nsantacruz Nov 14, 2024
544df75
chore(topics): update pools migration to fully migrate
nsantacruz Nov 14, 2024
eed87c6
feat(topics): add filters and boolean columns
nsantacruz Nov 14, 2024
e85ecf1
refactor(topics): refactor sefaria functions to use new django models
nsantacruz Nov 14, 2024
b483714
chore(topics): add topic migrations
nsantacruz Nov 14, 2024
fb18fcd
chore(topics): add PoolType to model export
nsantacruz Nov 14, 2024
f67db08
refactor(topics): rename pools
nsantacruz Nov 14, 2024
67dec73
feat(topics): add utility funcs to topic model
nsantacruz Nov 14, 2024
86804eb
fix(topics): remove pools from mongo topics model
nsantacruz Nov 14, 2024
17c6a31
fix(topics): fix query
nsantacruz Nov 14, 2024
b268246
refactor(topics): import and pool name
nsantacruz Nov 14, 2024
30736ee
chore(topics): update django topic model on mongo topic save
nsantacruz Nov 14, 2024
53affe9
chore(topics): update django topic when mongo topic slug changes
nsantacruz Nov 14, 2024
f754481
chore(topics): remove extra newline
nsantacruz Nov 14, 2024
d787bf6
refactor(topics): move delete to Topic delete dependency
nsantacruz Nov 14, 2024
af9f31d
test(topics): add tests to make sure django topic remains in sync wit…
nsantacruz Nov 14, 2024
c9a0c43
fix(topics): cast queryset to list
nsantacruz Nov 14, 2024
c99c0e5
chore(topics): remove unused imports
nsantacruz Nov 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion reader/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -4229,8 +4229,9 @@ def random_by_topic_api(request):
"""
Returns Texts API data for a random text taken from popular topic tags
"""
from topics.models import PoolType
cb = request.GET.get("callback", None)
random_topic = get_random_topic('torahtab')
random_topic = get_random_topic(PoolType.TORAH_TAB.value)
if random_topic is None:
return random_by_topic_api(request)
random_source = get_random_topic_source(random_topic)
Expand Down
81 changes: 81 additions & 0 deletions scripts/migrations/migrate_good_to_promote_to_topic_pools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import django
from django.db import IntegrityError

django.setup()
from sefaria.model import TopicSet, RefTopicLinkSet
from topics.models.topic import Topic
from topics.models.pool import TopicPool, PoolType


def add_to_torah_tab_pool():
print('Adding topics to torah tab pool')
pool = TopicPool.objects.get(name=PoolType.TORAH_TAB.value)
ts = TopicSet({'good_to_promote': True})
for topic in ts:
t = Topic.objects.get(slug=topic.slug)
t.pools.add(pool)


def add_to_library_pool():
print('Adding topics to library pool')
pool = TopicPool.objects.get(name=PoolType.LIBRARY.value)
ts = TopicSet({'subclass': 'author'})
for topic in ts:
t = Topic.objects.get(slug=topic.slug)
t.pools.add(pool)
links = RefTopicLinkSet({'is_sheet': False, 'linkType': 'about'})
topic_slugs = {link.toTopic for link in links}
for slug in topic_slugs:
try:
t = Topic.objects.get(slug=slug)
t.pools.add(pool)
except Topic.DoesNotExist:
print('Could not find topic with slug {}'.format(slug))


def add_to_sheets_pool():
print('Adding topics to sheets pool')
pool = TopicPool.objects.get(name=PoolType.SHEETS.value)
links = RefTopicLinkSet({'is_sheet': True, 'linkType': 'about'})
topic_slugs = {link.toTopic for link in links}
for slug in topic_slugs:
try:
t = Topic.objects.get(slug=slug)
t.pools.add(pool)
except Topic.DoesNotExist:
print('Could not find topic with slug {}'.format(slug))


def delete_all_data():
print("Delete data")
Topic.pools.through.objects.all().delete()
Topic.objects.all().delete()
TopicPool.objects.all().delete()


def add_topics():
print('Adding topics')
for topic in TopicSet({}):
try:
Topic.objects.create(slug=topic.slug, en_title=topic.get_primary_title('en'), he_title=topic.get_primary_title('he'))
except IntegrityError:
print('Duplicate topic', topic.slug)


def add_pools():
print('Adding pools')
for pool_name in [PoolType.LIBRARY.value, PoolType.SHEETS.value, PoolType.GENERAL.value, PoolType.TORAH_TAB.value]:
TopicPool.objects.create(name=pool_name)


def run():
delete_all_data()
add_topics()
add_pools()
add_to_torah_tab_pool()
add_to_library_pool()
add_to_sheets_pool()


if __name__ == "__main__":
run()
11 changes: 4 additions & 7 deletions sefaria/helper/topic.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,15 +285,12 @@ def get_random_topic(pool=None) -> Optional[Topic]:
:param pool: name of the pool from which to select the topic. If `None`, all topics are considered.
:return: Returns a random topic from the database. If you provide `pool`, then the selection is limited to topics in that pool.
"""
query = {"pools": pool} if pool else {}
random_topic_dict = list(db.topics.aggregate([
{"$match": query},
{"$sample": {"size": 1}}
]))
if len(random_topic_dict) == 0:
from topics.models import Topic as DjangoTopic
random_topic_slugs = DjangoTopic.objects.sample_topic_slugs('random', pool, limit=1)
if len(random_topic_slugs) == 0:
return None

return Topic(random_topic_dict[0])
return Topic.init(random_topic_slugs[0])


def get_random_topic_source(topic:Topic) -> Optional[Ref]:
Expand Down
17 changes: 17 additions & 0 deletions sefaria/model/tests/topic_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from sefaria.model.text import Ref
from sefaria.system.database import db
from sefaria.system.exceptions import SluggedMongoRecordMissingError
from topics.models import Topic as DjangoTopic
from sefaria.helper.topic import update_topic


Expand Down Expand Up @@ -155,6 +156,22 @@ def test_merge(self, topic_graph_to_merge):
{"slug": '30', 'asTyped': 'thirty'}
]

t40 = Topic.init('40')
assert t40 is None
DjangoTopic.objects.get(slug='20')
with pytest.raises(DjangoTopic.DoesNotExist):
DjangoTopic.objects.get(slug='40')

def test_change_title(self, topic_graph):
ts = topic_graph['topics']
dt1 = DjangoTopic.objects.get(slug=ts['1'].slug)
assert dt1.en_title == ts['1'].get_primary_title('en')
ts['1'].title_group.add_title('new title', 'en', True, True)
ts['1'].save()
dt1 = DjangoTopic.objects.get(slug=ts['1'].slug)
assert dt1.en_title == ts['1'].get_primary_title('en')


def test_sanitize(self):
t = Topic()
t.slug = "sdfsdg<script/>"
Expand Down
60 changes: 28 additions & 32 deletions sefaria/model/topic.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from enum import Enum
from typing import Union, Optional
from . import abstract as abst
from .schema import AbstractTitledObject, TitleGroup
from .text import Ref, IndexSet, AbstractTextRecord, Index, Term
from .category import Category
from topics.models import Topic as DjangoTopic
from topics.models import TopicPool, PoolType
from sefaria.system.exceptions import InputError, DuplicateRecordError
from sefaria.model.timeperiod import TimePeriod, LifePeriod
from sefaria.system.validators import validate_url
Expand Down Expand Up @@ -121,11 +122,6 @@ def __hash__(self):
return hash((self.collective_title, self.base_cat_path))


class Pool(Enum):
TEXTUAL = "textual"
SHEETS = "sheets"


class Topic(abst.SluggedAbstractMongoRecord, AbstractTitledObject):
collection = 'topics'
history_noun = 'topic'
Expand Down Expand Up @@ -160,11 +156,8 @@ class Topic(abst.SluggedAbstractMongoRecord, AbstractTitledObject):
"data_source", #any topic edited manually should display automatically in the TOC and this flag ensures this
'image',
"portal_slug", # slug to relevant Portal object
'pools', # list of strings, any of them represents a pool that this topic is member of
]

allowed_pools = [pool.value for pool in Pool] + ['torahtab']

attr_schemas = {
"image": {
'type': 'dict',
Expand All @@ -176,14 +169,7 @@ class Topic(abst.SluggedAbstractMongoRecord, AbstractTitledObject):
'schema': {'en': {'type': 'string', 'required': True},
'he': {'type': 'string', 'required': True}}}}
},
'pools': {
'type': 'list',
'schema': {
'type': 'string',
'allowed': allowed_pools
}
}
}
}

ROOT = "Main Menu" # the root of topic TOC is not a topic, so this is a fake slug. we know it's fake because it's not in normal form
# this constant is helpful in the topic editor tool functions in this file
Expand All @@ -200,10 +186,18 @@ def load(self, query, proj=None):

def _set_derived_attributes(self):
self.set_titles(getattr(self, "titles", None))
self.pools = list(DjangoTopic.objects.get_pools_by_topic_slug(getattr(self, "slug", None)))
if self.__class__ != Topic and not getattr(self, "subclass", False):
# in a subclass. set appropriate "subclass" attribute
setattr(self, "subclass", self.reverse_subclass_map[self.__class__.__name__])

def _pre_save(self):
super()._pre_save()
django_topic, created = DjangoTopic.objects.get_or_create(slug=self.slug)
django_topic.en_title = self.get_primary_title('en')
django_topic.he_title = self.get_primary_title('he')
django_topic.save()

def _validate(self):
super(Topic, self)._validate()
if getattr(self, 'subclass', False):
Expand All @@ -224,10 +218,6 @@ def _normalize(self):
displays_under_link = IntraTopicLink().load({"fromTopic": slug, "linkType": "displays-under"})
if getattr(displays_under_link, "toTopic", "") == "authors":
self.subclass = "author"
if self.get_pools():
self.pools = sorted(set(self.get_pools()))
elif hasattr(self, 'pools'):
delattr(self, 'pools')

def _sanitize(self):
super()._sanitize()
Expand All @@ -237,19 +227,23 @@ def _sanitize(self):
p[k] = bleach.clean(v, tags=[], strip=True)
setattr(self, attr, p)

def get_pools(self):
def get_pools(self) -> list[str]:
return getattr(self, 'pools', [])

def has_pool(self, pool):
def has_pool(self, pool: str) -> bool:
return pool in self.get_pools()

def add_pool(self, pool): #does not save!
def add_pool(self, pool_name: str) -> None:
pool = TopicPool.objects.get(name=pool_name)
DjangoTopic.objects.get(slug=self.slug).pools.add(pool)
self.pools = self.get_pools()
self.pools.append(pool)
self.pools.append(pool_name)

def remove_pool(self, pool): #does not save!
def remove_pool(self, pool_name) -> None:
pool = TopicPool.objects.get(name=pool_name)
DjangoTopic.objects.get(slug=self.slug).pools.remove(pool)
pools = self.get_pools()
pools.remove(pool)
pools.remove(pool_name)

def set_titles(self, titles):
self.title_group = TitleGroup(titles)
Expand Down Expand Up @@ -393,10 +387,10 @@ def set_slug(self, new_slug) -> None:
old_slug = getattr(self, slug_field)
setattr(self, slug_field, new_slug)
setattr(self, slug_field, self.normalize_slug_field(slug_field))
DjangoTopic.objects.filter(slug=old_slug).update(slug=new_slug)
self.save() # so that topic with this slug exists when saving links to it
self.merge(old_slug)


def merge(self, other: Union['Topic', str]) -> None:
"""
Merge `other` into `self`. This means that all data from `other` will be merged into self.
Expand Down Expand Up @@ -498,8 +492,6 @@ def get_ref_links(self, is_sheet, query_kwargs=None, **kwargs):
def contents(self, **kwargs):
mini = kwargs.get('minify', False)
d = {'slug': self.slug} if mini else super(Topic, self).contents(**kwargs)
if kwargs.get('remove_pools', True):
d.pop('pools', None)
d['primaryTitle'] = {}
for lang in ('en', 'he'):
d['primaryTitle'][lang] = self.get_primary_title(lang=lang, with_disambiguation=kwargs.get('with_disambiguation', True))
Expand Down Expand Up @@ -565,7 +557,7 @@ def update_after_link_change(self, pool):
updating the pools 'sheets' or 'textual' according to the existence of links and the numSources
:param pool: 'sheets' or 'textual'
"""
links = self.get_ref_links(pool == Pool.SHEETS.value)
links = self.get_ref_links(pool == PoolType.SHEETS.value)
if self.has_pool(pool) and not links:
self.remove_pool(pool)
elif not self.has_pool(pool) and links:
Expand Down Expand Up @@ -970,7 +962,7 @@ def set_description(self, lang, title, prompt):
return self

def get_related_pool(self):
return Pool.SHEETS.value if self.is_sheet else Pool.TEXTUAL.value
return PoolType.SHEETS.value if self.is_sheet else PoolType.LIBRARY.value

def get_topic(self):
return Topic().load({'slug': self.toTopic})
Expand Down Expand Up @@ -1175,6 +1167,10 @@ def process_topic_delete(topic):
for sheet in db.sheets.find({"topics.slug": topic.slug}):
sheet["topics"] = [t for t in sheet["topics"] if t["slug"] != topic.slug]
db.sheets.save(sheet)
try:
DjangoTopic.objects.get(slug=topic.slug).delete()
except DjangoTopic.DoesNotExist:
print('Topic {} does not exist in django'.format(topic.slug))

def process_topic_description_change(topic, **kwargs):
"""
Expand Down
1 change: 1 addition & 0 deletions sefaria/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@
'reader',
'sourcesheets',
'sefaria.gauth',
'topics',
'captcha',
'django.contrib.admin',
'anymail',
Expand Down
Empty file added topics/__init__.py
Empty file.
Loading
Loading