Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/replace-get-highlighted-text #313

Open
wants to merge 42 commits into
base: v4
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
663b04e
Add JIT highlighting of morphs
RobHelgeson Nov 27, 2024
05ff0b6
Add Rubification of sting - built in filters cannot run after custom …
RobHelgeson Nov 27, 2024
2df17f9
Allow filter to run after built-in filter
RobHelgeson Nov 27, 2024
9056db7
Add documentation
RobHelgeson Nov 27, 2024
d84799b
Fix am-highlight-morphs parse html issue
RobHelgeson Nov 27, 2024
1117111
Allow cards where you want to use am-highlight-morphs to not have the…
RobHelgeson Nov 27, 2024
3401040
Address PR comments
RobHelgeson Nov 28, 2024
bd73b95
Update documentation
RobHelgeson Nov 28, 2024
d2e2cf4
Update documentation
RobHelgeson Nov 28, 2024
0e2573a
Updates for code review
RobHelgeson Nov 29, 2024
fdeb76e
Fix bad morph highlighting in ruby html
RobHelgeson Nov 29, 2024
832a255
Fix bad morph highlighting in ruby html - WIP
RobHelgeson Nov 29, 2024
b23ebd8
experemental
RobHelgeson Nov 30, 2024
005f2dd
Reimpl of highlight text using regexes.
RobHelgeson Dec 2, 2024
eb34b3a
Update highlight comments.
RobHelgeson Dec 2, 2024
ea14846
Update documentation.
RobHelgeson Dec 2, 2024
6072576
WIP - WIP
RobHelgeson Dec 4, 2024
fb5ce61
Update jit formatter with changes from experimental branch
RobHelgeson Dec 9, 2024
7189712
Checkpoint commit - WIP
RobHelgeson Dec 10, 2024
7a3ae5f
Checkpoint speeeeedy! - WIP
RobHelgeson Dec 10, 2024
f7841ee
Checkpoint - WIP
RobHelgeson Dec 10, 2024
c8e8883
Checkpoint - WIP
RobHelgeson Dec 10, 2024
994355f
Checkpoint - WIP
RobHelgeson Dec 10, 2024
fdc6a07
Checkpoint - WIP
RobHelgeson Dec 10, 2024
c16ba70
Checkpoint - feature complete
RobHelgeson Dec 11, 2024
78aee2e
Add JIT highlighting
RobHelgeson Dec 11, 2024
eb57cba
Documentation and dead code
RobHelgeson Dec 11, 2024
f2b702f
Cleanup
RobHelgeson Dec 11, 2024
47f7c14
Replace get_highlighted_text
RobHelgeson Dec 11, 2024
b5af09f
Rename some variables for clarity
RobHelgeson Dec 12, 2024
20336c1
Formatting per PR request.
RobHelgeson Dec 12, 2024
da5eb66
Renamed some symbols for clarity.
RobHelgeson Dec 13, 2024
4b4be12
Renamed some symbols for clarity.
RobHelgeson Dec 13, 2024
b530660
updated guide
mortii Dec 14, 2024
f8c56d1
fixed broken test
mortii Dec 14, 2024
d569b52
deleted empty file
mortii Dec 14, 2024
e58e9d7
Address text parsing bug where rubies were allowed to overlap
RobHelgeson Dec 21, 2024
af3449b
Support text based rubies
RobHelgeson Jan 3, 2025
0d897a4
Pr cleanup
RobHelgeson Jan 6, 2025
bda97d0
Implement jit formatting for furigana, kanji and kana.
RobHelgeson Jan 6, 2025
c4eb14d
added temp verbose dev logging
mortii Jan 7, 2025
648054c
Create subclasses for all text formatting options
RobHelgeson Jan 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion ankimorphs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from typing import Literal

import aqt
from anki import hooks
from anki.cards import Card
from anki.collection import OpChangesAfterUndo
from aqt import gui_hooks, mw
Expand All @@ -39,7 +40,7 @@
from . import ankimorphs_globals as am_globals
from . import (
browser_utils,
debugging_utils,
debug_utils,
message_box_utils,
name_file_utils,
reviewing_utils,
Expand All @@ -52,6 +53,7 @@
from .extra_settings import ankimorphs_extra_settings, extra_settings_keys
from .extra_settings.ankimorphs_extra_settings import AnkiMorphsExtraSettings
from .generators.generators_window import GeneratorWindow
from .highlight_morphs_jit import highlight_morphs_jit
from .known_morphs_exporter import KnownMorphsExporterDialog
from .progression.progression_window import ProgressionWindow
from .recalc import recalc_main
Expand Down Expand Up @@ -97,6 +99,8 @@ def main() -> None:

gui_hooks.profile_will_close.append(cleanup_profile_session)

hooks.field_filter.append(highlight_morphs_jit)


def init_toolbar_items(links: list[str], toolbar: Toolbar) -> None:
# Adds the 'L: I:' and 'Recalc' to the toolbar
Expand Down
12 changes: 12 additions & 0 deletions ankimorphs/ankimorphs_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -816,6 +816,18 @@ def get_modify_enabled_filters() -> list[AnkiMorphsConfigFilter]:
return modify_filters


def get_matching_filter(note: Note) -> AnkiMorphsConfigFilter | None:
assert mw is not None
config_filters = AnkiMorphsConfig().get_config_filters()
assert isinstance(config_filters, list)

for am_filter in config_filters:
note_type_id: NotetypeId | None = mw.col.models.id_for_name(am_filter.note_type)
if note_type_id == note.mid:
return am_filter
return None


def get_matching_modify_filter(note: Note) -> AnkiMorphsConfigFilter | None:
assert mw is not None
modify_filters: list[AnkiMorphsConfigFilter] = get_modify_enabled_filters()
Expand Down
2 changes: 1 addition & 1 deletion ankimorphs/ankimorphs_globals.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# Semantic Versioning https://semver.org/
__version__ = "3.3.0"

DEV_MODE: bool = False
DEV_MODE: bool = True

PROFILE_SETTINGS_FILE_NAME = "ankimorphs_profile_settings.json"
NAMES_TXT_FILE_NAME = "names.txt"
Expand Down
14 changes: 14 additions & 0 deletions ankimorphs/debugging_utils.py → ankimorphs/debug_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
from pathlib import Path
from typing import Any

from . import ankimorphs_globals
from .morpheme import Morpheme


def print_stacktrace() -> None:
stacktrace = ""
Expand Down Expand Up @@ -72,3 +75,14 @@ def print_directory_tree(root_dir: str, indent: str = "") -> None:
print_directory_tree(item_path, indent + " ")
else:
print(new_indent + item)


def dev_print(message: str) -> None:
if ankimorphs_globals.DEV_MODE:
print(message)


def dev_print_morphs(morphs: list[Morpheme]) -> None:
if ankimorphs_globals.DEV_MODE:
for morph in morphs:
print(f"morph: {morph.inflection}")
175 changes: 175 additions & 0 deletions ankimorphs/highlight_morphs_jit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
from __future__ import annotations

import re

import anki
from anki.template import TemplateRenderContext

from . import (
ankimorphs_config,
ankimorphs_globals,
debug_utils,
text_highlighting,
text_preprocessing,
)
from .ankimorphs_config import AnkiMorphsConfig, AnkiMorphsConfigFilter
from .ankimorphs_db import AnkiMorphsDB
from .morpheme import Morpheme
from .morphemizers import morphemizer as morphemizer_module
from .morphemizers import spacy_wrapper
from .morphemizers.morphemizer import Morphemizer, SpacyMorphemizer


def highlight_morphs_jit(
field_text: str,
field_name: str,
filter_name: str,
context: TemplateRenderContext,
) -> str:
"""Use morph learning progress to decorate the morphemes in the supplied text.
Adds css classes to the output that can be styled in the card."""

# Perf: Bail early if the user attempts to use this template filter on the already
# formatted data.
if (
filter_name
not in [
"am-highlight",
"am-highlight-furigana",
"am-highlight-kanji",
"am-highlight-kana",
]
mortii marked this conversation as resolved.
Show resolved Hide resolved
or field_name == ankimorphs_globals.EXTRA_FIELD_HIGHLIGHTED
):
return field_text

am_config_filter: AnkiMorphsConfigFilter | None = (
ankimorphs_config.get_matching_filter(context.note())
)

if am_config_filter is None:
return field_text

morphemizer: Morphemizer | None = morphemizer_module.get_morphemizer_by_description(
am_config_filter.morphemizer_description
)

if not morphemizer:
return field_text

am_config = AnkiMorphsConfig()

card_morphs: list[Morpheme] = _get_morph_meta_for_text(
morphemizer, field_text, am_config
)

if not card_morphs:
return field_text

debug_utils.dev_print(f"filter name: {filter_name}")

highlighted_jit_text = (
f"<span class='{filter_name}'>"
+ text_highlighting.get_highlighted_text(
am_config=am_config,
morphemes=card_morphs,
text=_dehtml(field_text),
ruby_type=_get_ruby_type(filter_name),
)
+ "</span>"
)

debug_utils.dev_print(f"highlighted_jit_text: {highlighted_jit_text}")

return highlighted_jit_text


def _get_morph_meta_for_text(
morphemizer: Morphemizer,
field_text: str,
am_config: AnkiMorphsConfig,
) -> list[Morpheme]:
"""Take in a string and gather the morphemes from it."""

# If we were piped in after the `furigana` built-in filter, or if there is html in the source
# data, we need to do some cleansing.
clean_text = _dehtml(field_text, am_config, True)

if isinstance(morphemizer, SpacyMorphemizer):
nlp = spacy_wrapper.get_nlp(
morphemizer.get_description().removeprefix("spaCy: ")
)

morphs = text_preprocessing.get_processed_spacy_morphs(
am_config, next(nlp.pipe([clean_text]))
)
else:
morphs = text_preprocessing.get_processed_morphemizer_morphs(
morphemizer, clean_text, am_config
)

morphs = list(set(morphs))

if not morphs:
return []

with AnkiMorphsDB() as am_db:
for morph in morphs:
if am_config.evaluate_morph_inflection:
morph.highest_inflection_learning_interval = (
am_db.get_highest_inflection_learning_interval(morph) or 0
)
else:
morph.highest_lemma_learning_interval = (
am_db.get_highest_lemma_learning_interval(morph) or 0
)

return morphs


def _dehtml(
text: str,
am_config: AnkiMorphsConfig | None = None,
clean_html: bool = False,
) -> str:
"""Prepare a string to be passed to a morphemizer. Specially process <ruby><rt> tags to extract
ruby to reconstruct base/ruby ruby shorthand. Remove all html from the input string.
"""

# Capture html ruby ruby. The built in furigana filter will turn X[yz] into
# <ruby><rb>X</rb><rt>yz</rt></ruby>, and if we blindly strip out all html we will loose
# information on the ruby. Find <rt> tags and capture all text between them in a capture
# group called ruby, allow for any attributes or other decorations on the <rt> tag by
# non-eagerly capturing all chars up to '>', so that the whole element can just be dropped.
# non-eagerly capture one or more characters into the capture group named ruby.
# Samples:
# <ruby><rb>X</rb><rt>yz</rt></ruby> = ` X[yz]`
# <ruby>X<rt>yz</rt></ruby> = ` X[yz]`
# <ruby>X<rt class='foo'>234</rt>sdf</ruby> = ` X[234]sdf`
# <ruby>X<rt >>234</rt>sdf</ruby> = ` X[>234]sdf`
# <ruby>X<rt></rt></ruby> = Will not match
ruby_longhand = r"(?:<ruby[^<]*>)(?:<rb[^>]*>|.{0})(?P<base>.*?)(?:</rb>|.{0})<rt[^>]*>(?P<ruby>.+?)</rt>(?P<after>.*?)(?:</ruby>)"

# Emit the captured ruby into square brackets, thus reconstructing the ruby shorthand "X[yz]".
# Pad with a leading space so that we can retain the base/ruby relationship
ruby_shorthand = r" \g<base>[\g<ruby>]\g<after>"

text = re.sub(ruby_longhand, ruby_shorthand, text, flags=re.IGNORECASE).strip()

if clean_html:
text = anki.utils.strip_html(text)

return text_preprocessing.get_processed_text(am_config, text) if am_config else text


def _get_ruby_type(filter_name: str) -> str:
"""Get local styles for this run, based on the filter name."""

if filter_name == "am-highlight-furigana":
return "furigana"
if filter_name == "am-highlight-kanji":
return "kanji"
if filter_name == "am-highlight-kana":
return "kana"

return "text"
Loading