Skip to content

Commit

Permalink
chore(linker): merge master
Browse files Browse the repository at this point in the history
  • Loading branch information
nsantacruz committed Oct 28, 2024
2 parents 69d9221 + aa79a2f commit 048d505
Show file tree
Hide file tree
Showing 27 changed files with 1,214 additions and 282 deletions.
1 change: 1 addition & 0 deletions .github/workflows/integration-testing.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ name: Run Integration Tests
on:
merge_group:
pull_request:
workflow_dispatch:

concurrency: integration_environment

Expand Down
2 changes: 1 addition & 1 deletion build/linker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ RUN apt-get update -qq \
&& rm -rf /var/lib/apt/lists/*

# fix issues with shared objects
RUN ls /usr/local/cuda-11.6/targets/x86_64-linux/lib/* | xargs -I{} ln -s {} /usr/lib/x86_64-linux-gnu/ \
RUN ls /usr/local/cuda-11.4/targets/x86_64-linux/lib/* | xargs -I{} ln -s {} /usr/lib/x86_64-linux-gnu/ \
&& ln -s libcuda.so /usr/lib/x86_64-linux-gnu/libcuda.so.1 \
&& ln -s libnvidia-ml.so /usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1

Expand Down
2 changes: 1 addition & 1 deletion reader/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -3046,7 +3046,7 @@ def topic_page(request, topic, test_version=None):
props = {
"initialMenu": "topics",
"initialTopic": topic,
"initialTab": urllib.parse.unquote(request.GET.get('tab', 'sources')),
"initialTab": urllib.parse.unquote(request.GET.get('tab', 'notable-sources')),
"initialTopicSort": urllib.parse.unquote(request.GET.get('sort', 'Relevance')),
"initialTopicTitle": {
"en": topic_obj.get_primary_title('en'),
Expand Down
2 changes: 1 addition & 1 deletion sefaria/helper/linker.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def _add_webpage_hit_for_url(url):

@django_cache(cache_type="persistent")
def _make_find_refs_response_with_cache(request_text: _FindRefsText, options: _FindRefsTextOptions, meta_data: dict) -> dict:
if request_text.lang == 'he':
if request_text.lang == 'he' or True:
response = _make_find_refs_response_linker_v3(request_text, options)
else:
response = _make_find_refs_response_linker_v2(request_text, options)
Expand Down
7 changes: 5 additions & 2 deletions sefaria/helper/llm/topic_prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,11 @@ def _get_context_ref(segment_oref: Ref) -> Optional[Ref]:
if segment_oref.primary_category == "Tanakh":
return segment_oref.section_ref()
elif segment_oref.index.get_primary_corpus() == "Bavli":
passage = Passage.containing_segment(segment_oref)
return passage.ref()
try:
passage = Passage.containing_segment(segment_oref)
return passage.ref()
except:
return None
return None


Expand Down
21 changes: 20 additions & 1 deletion sefaria/model/linker/ref_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,12 +145,31 @@ def contains(self, other: 'ResolvedRef') -> bool:
"""
Does `self` contain `other`. If `self.ref` and `other.ref` aren't None, this is just ref comparison.
Otherwise, see if the schema/altstruct node that back `self` contains `other`'s node.
Note this function is a bit confusing. It works like this:
- If `self.ref` and `other.ref` are None, we compare the nodes themselves to see if self is an ancestor of other
- If `self.ref` is None and `other.ref` isn't, we check that `other.ref` is contained in at least one of `self`'s children (`self` may be an AltStructNode in which case it has no Ref)
- If `self.ref` isn't None and `other_ref` is None, we check that `self.ref` contains all of `other`'s children (`other` may be an AltStructNode in which case it has no Ref)
- If `self.ref` and `other.ref` are both defined, we can use Ref.contains()
@param other:
@return:
"""
if not other.node or not self.node:
return False
return self.node.contains(other.node, self.ref, other.ref)
if other.ref and self.ref:
return self.ref.contains(other.ref)
try:
if other.ref is None:
if self.ref is None:
return self.node.is_ancestor_of(other.node)
# other is alt struct and self has a ref
# check that every leaf node is contained by self's ref
return all([self.ref.contains(leaf_ref) for leaf_ref in other.node.leaf_refs()])
# self is alt struct and other has a ref
# check if any leaf node contains other's ref
return any([leaf_ref.contains(other.ref) for leaf_ref in self.node.leaf_refs()])
except NotImplementedError:
return False


@property
def order_key(self):
Expand Down
89 changes: 41 additions & 48 deletions sefaria/model/linker/referenceable_book_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,47 +85,29 @@ def is_default(self) -> bool:
def referenceable(self) -> bool:
return True

def contains(self, other: 'ReferenceableBookNode', self_ref: Optional[text.Ref], other_ref: Optional[text.Ref]) -> bool:
def is_ancestor_of(self, other: 'ReferenceableBookNode') -> bool:
other_node = other._get_titled_tree_node()
self_node = self._get_titled_tree_node()
return self_node.is_ancestor_of(other_node)

def _get_titled_tree_node(self) -> schema.TitledTreeNode:
raise NotImplementedError

def leaf_refs(self) -> list[text.Ref]:
"""
Does `self` contain `other`. If `self_ref` and `other_ref` aren't None, this is just ref comparison.
Otherwise, see if the schema/altstruct node that back `self` contains `other`'s node.
Note this function is a bit confusing. It works like this:
- If `self_ref` and `other_ref` are None, we compare the nodes themselves to see if self is an ancestor of other
- If `self_ref` is None and `other_ref` isn't, we check that `other_ref` is contained in at least one of `self`'s children (`self` may be an AltStructNode in which case it has no Ref)
- If `self_ref` isn't None and `other_ref` is None, we check that `self_ref` contains all of `other`'s children (`other` may be an AltStructNode in which case it has no Ref)
- If `self_ref` and `other_ref` are both defined, we can use Ref.contains()
@param other:
@param self_ref: although `self` has a ref (if it's backed by a schemaNode) this ref doesn't include sections. For this reason, we need to be able to pass `self_ref`.
@param other_ref: see `self_ref` for docs.
Get the Refs for the ReferenceableBookNode leaf nodes from `self`
@return:
"""
if other_ref and self_ref:
return self_ref.contains(other_ref)
try:
if other_ref is None:
other_node = other._get_titled_tree_node()
if self_ref is None:
return self._get_titled_tree_node().is_ancestor_of(other_node)
# other is alt struct and self has a ref
# check that every leaf node is contained by self's ref
return all([self_ref.contains(child.ref()) for child in other_node.get_leaf_nodes()])
# self is alt struct and other has a ref
# check if any leaf node contains other's ref
return any([child.ref().contains(other_ref) for child in self._get_titled_tree_node().get_leaf_nodes()])
except NotImplementedError:
return False

def _get_titled_tree_node(self) -> schema.TitledTreeNode:
raise NotImplementedError


class NamedReferenceableBookNode(ReferenceableBookNode):
class IndexNodeReferenceableBookNode(ReferenceableBookNode):
"""
ReferenceableBookNode backed by node in an Index (either SchemaNode or AltStructNode)
"""

def __init__(self, titled_tree_node_or_index: Union[schema.TitledTreeNode, text.Index]):
self._titled_tree_node_or_index = titled_tree_node_or_index
self._titled_tree_node = titled_tree_node_or_index
if isinstance(titled_tree_node_or_index, text.Index):
self._titled_tree_node = titled_tree_node_or_index.nodes
def __init__(self, titled_tree_node: schema.TitledTreeNode):
self._titled_tree_node = titled_tree_node

@property
def referenceable(self):
Expand All @@ -135,10 +117,7 @@ def _get_titled_tree_node(self) -> schema.TitledTreeNode:
return self._titled_tree_node

def is_default(self):
return self._titled_tree_node.is_default()

def get_numeric_equivalent(self):
return getattr(self._titled_tree_node, "numeric_equivalent", None)
return self._titled_tree_node.is_default() and self._titled_tree_node.parent is not None

def ref(self) -> text.Ref:
return self._titled_tree_node.ref()
Expand All @@ -156,6 +135,19 @@ def ref_order_id(self) -> str:
return approx_ref.order_id()
return self.ref().order_id()


class NamedReferenceableBookNode(IndexNodeReferenceableBookNode):

def __init__(self, titled_tree_node_or_index: Union[schema.TitledTreeNode, text.Index]):
self._titled_tree_node_or_index = titled_tree_node_or_index
titled_tree_node = titled_tree_node_or_index
if isinstance(titled_tree_node_or_index, text.Index):
titled_tree_node = titled_tree_node_or_index.nodes
super().__init__(titled_tree_node)

def get_numeric_equivalent(self):
return getattr(self._titled_tree_node, "numeric_equivalent", None)

@staticmethod
def _is_array_map_referenceable(node: schema.ArrayMapNode) -> bool:
if not getattr(node, "isMapReferenceable", True):
Expand Down Expand Up @@ -217,24 +209,22 @@ def get_children(self, *args, **kwargs) -> List[ReferenceableBookNode]:
def ref_part_title_trie(self, *args, **kwargs):
return self._titled_tree_node.get_match_template_trie(*args, **kwargs)

def leaf_refs(self) -> list[text.Ref]:
return [n.ref() for n in self._get_titled_tree_node().get_leaf_nodes()]

class NumberedReferenceableBookNode(ReferenceableBookNode):

class NumberedReferenceableBookNode(IndexNodeReferenceableBookNode):

def __init__(self, ja_node: schema.NumberedTitledTreeNode):
self._ja_node = ja_node
super().__init__(ja_node)
self._ja_node: schema.NumberedTitledTreeNode = ja_node

@property
def referenceable(self):
return getattr(self._ja_node, 'referenceable', True)

def _get_titled_tree_node(self) -> schema.TitledTreeNode:
return self._ja_node

def is_default(self):
return self._ja_node.is_default() and self._ja_node.parent is not None

def ref(self):
return self._ja_node.ref()
def leaf_refs(self) -> list[text.Ref]:
return [self.ref()]

def possible_subrefs(self, lang: str, initial_ref: text.Ref, section_str: str, fromSections=None) -> Tuple[List[text.Ref], List[bool]]:
try:
Expand Down Expand Up @@ -379,6 +369,9 @@ def __get_section_with_offset(self, i: int, node: schema.ArrayMapNode) -> int:
def ref(self):
raise NotImplementedError(f'{self.__class__} does not have a single ref.')

def leaf_refs(self) -> list[text.Ref]:
return list(self._section_ref_map.values())

def possible_subrefs(self, lang: str, initial_ref: text.Ref, section_str: str, fromSections=None) -> Tuple[List[text.Ref], List[bool]]:
try:
possible_sections, possible_to_sections, addr_classes = self._address_class.\
Expand Down
4 changes: 2 additions & 2 deletions sefaria/model/linker/tests/linker_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ class TestResolveRawRef:
[None, """גמ' שמזונותן עליך. עיין ביצה (דף טו ע"ב רש"י ד"ה שמא יפשע:)""", 'he', ("Rashi on Beitzah 15b:8:1",), ['ביצה (דף טו ע"ב רש"י ד"ה שמא יפשע:)']],
[None, """שם אלא ביתך ל"ל. ע' מנחות מד ע"א תד"ה טלית:""", 'he', ("Tosafot on Menachot 44a:12:1",), ['מנחות מד ע"א תד"ה טלית']],
[None, """גמ' במה מחנכין. עי' מנחות דף עח ע"א תוס' ד"ה אחת:""", 'he',("Tosafot on Menachot 78a:10:1",), ['''מנחות דף עח ע"א תוס' ד"ה אחת''']],
[None, """cf. Ex. 9:6,5""", 'en', ("Exodus 9:6", "Exodus 9:5"), ['Ex. 9:6', '5']],
[None, """cf. Ex. 9:6,12:8""", 'en', ("Exodus 9:6", "Exodus 12:8"), ['Ex. 9:6', '12:8']],
["Gilyon HaShas on Berakhot 25b:1", 'רש"י תמורה כח ע"ב ד"ה נעבד שהוא מותר. זה רש"י מאוד יפה.', 'he', ("Rashi on Temurah 28b:4:2",), ['רש"י תמורה כח ע"ב ד"ה נעבד שהוא מותר']],
[None, "See Genesis 1:1. It says in the Torah, \"Don't steal\". It also says in 1:3 \"Let there be light\".", "en", ("Genesis 1:1", "Genesis 1:3"), ("Genesis 1:1", "1:3")],
])
Expand All @@ -309,12 +309,12 @@ def test_full_pipeline_ref_resolver(context_tref, input_str, lang, expected_tref
linker = library.get_linker(lang)
doc = linker.link(input_str, context_oref, type_filter='citation')
resolved = doc.resolved_refs
assert len(resolved) == len(expected_trefs)
resolved_orefs = sorted(reduce(lambda a, b: a + b, [[match.ref] if not match.is_ambiguous else [inner_match.ref for inner_match in match.resolved_raw_refs] for match in resolved], []), key=lambda x: x.normal())
if len(expected_trefs) != len(resolved_orefs):
print(f"Found {len(resolved_orefs)} refs instead of {len(expected_trefs)}")
for matched_oref in resolved_orefs:
print("-", matched_oref.normal())
assert len(resolved) == len(expected_trefs)
for expected_tref, matched_oref in zip(sorted(expected_trefs, key=lambda x: x), resolved_orefs):
assert matched_oref == Ref(expected_tref)
for match, expected_pretty_text in zip(resolved, expected_pretty_texts):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest

from scripts.sefer_hayashar_restructure import add_alt_struct
from sefaria.model.linker.referenceable_book_node import *
from unittest.mock import Mock
from sefaria.model.linker.referenceable_book_node import ReferenceableBookNode, NumberedReferenceableBookNode, NamedReferenceableBookNode, MapReferenceableBookNode
from sefaria.model.linker.ref_resolver import ResolvedRef
from sefaria.model.text import Ref


Expand Down Expand Up @@ -31,6 +31,10 @@ def make_named_node(title: str, node_path: list[str], is_alt_struct_path: bool):
return NamedReferenceableBookNode(node)


zohar_volume1_intro_node = MapReferenceableBookNode(Ref('Zohar').index.get_alt_structure("Daf").children[0].children[0])
zohar_first_daf_node = zohar_volume1_intro_node.get_children()[0]


@pytest.mark.parametrize(('node_a', 'node_b', 'self_tref', 'other_tref', 'is_contained'), [
[make_num_node(Ref('Genesis')), make_num_node(Ref('Genesis'), 1), None, None, True], # Generic pasuk node is contained in generic perek node
[make_num_node(Ref('Genesis')), make_num_node(Ref('Genesis'), 1), "Genesis 1", "Genesis 1:2", True], # Specific pasuk ref is contained in specific perek ref
Expand All @@ -39,8 +43,12 @@ def make_named_node(title: str, node_path: list[str], is_alt_struct_path: bool):
[make_num_node(Ref("Sefer HaChinukh")), make_named_node('Sefer HaChinukh', ['Parasha', 'Lech Lecha'], True), "Sefer HaChinukh, 2", None, True], # alt struct node with only one ref is contained in that ref
[make_num_node(Ref("Sefer HaChinukh")), make_named_node('Sefer HaChinukh', ['Parasha', 'Bo'], True), "Sefer HaChinukh, 4", None, False], # alt struct node with multiple refs is not contained in a single one of refs
[make_named_node('Sefer HaChinukh', ['Parasha', 'Lech Lecha'], True), make_num_node(Ref("Sefer HaChinukh")), None, "Sefer HaChinukh, 3", False], # ref outside of alt struct node isn't contained in it
[zohar_volume1_intro_node, zohar_first_daf_node, None, 'Zohar, Volume I, Introduction 1b', True], # zohar altStruct ref
[zohar_first_daf_node, zohar_volume1_intro_node, 'Zohar, Volume I, Introduction 1b', None, False], # zohar altStruct ref
])
def test_contains(node_a: ReferenceableBookNode, node_b: ReferenceableBookNode, self_tref: str, other_tref: str, is_contained: bool):
self_oref = self_tref and Ref(self_tref)
other_oref = other_tref and Ref(other_tref)
assert node_a.contains(node_b, self_oref, other_oref) == is_contained
rr_a = ResolvedRef(Mock(), Mock(), node_a, self_oref)
rr_b = ResolvedRef(Mock(), Mock(), node_b, other_oref)
assert rr_a.contains(rr_b) == is_contained
8 changes: 5 additions & 3 deletions sefaria/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,10 @@ def generic_subscribe_to_newsletter_api(request, org, email):
"steinsaltz": subscribe_steinsaltz,
}
body = json.loads(request.body)
first_name = body.get("firstName", None)
last_name = body.get("lastName", None)
first_name = body.get("firstName")
last_name = body.get("lastName")
if not first_name or not last_name:
return jsonResponse({"error": "You must provide first and last name."})
try:
subscribe = org_subscribe_fn_map.get(org)
if not subscribe:
Expand Down Expand Up @@ -417,7 +419,7 @@ def bundle_many_texts(refs, useTextFamily=False, as_sized_string=False, min_char
'url': oref.url()
}
else:
he_tc = model.TextChunk(oref, "he", actual_lang=translation_language_preference, vtitle=hebrew_version)
he_tc = model.TextChunk(oref, "he", vtitle=hebrew_version)
en_tc = model.TextChunk(oref, "en", actual_lang=translation_language_preference, vtitle=english_version)
if hebrew_version and he_tc.is_empty():
raise NoVersionFoundError(f"{oref.normal()} does not have the Hebrew version: {hebrew_version}")
Expand Down
Loading

0 comments on commit 048d505

Please sign in to comment.