Skip to content

Commit

Permalink
Add vader_sentiment support (#218)
Browse files Browse the repository at this point in the history
  • Loading branch information
jamie256 authored Jan 22, 2024
1 parent c58c0e9 commit 20cac62
Show file tree
Hide file tree
Showing 4 changed files with 1,578 additions and 1,551 deletions.
64 changes: 64 additions & 0 deletions langkit/tests/test_vader_sentiment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from logging import getLogger
import pandas as pd
import whylogs as why
from whylogs.experimental.core.udf_schema import udf_schema

TEST_LOGGER = getLogger(__name__)


def test_vader_sentiment():
from langkit import vader_sentiment

vader_sentiment.init()
df = pd.DataFrame(
{
"prompt": [
"timely text propagated prolifically",
"articulate artichokes aggravated allergically",
"So amazing!! ABSOLUTELY fantastic :-) Da BOMB!!",
],
"response": [
"I neither approve or disapprove.",
"strawberries are not true fruits, and they smell",
"this is not my response",
],
}
)
schema = udf_schema()
view = why.log(df, schema=schema).view()
print(view.get_columns().keys())
for column in ["prompt", "response"]:
dist = view.get_column(f"{column}.vader_sentiment").get_metric("distribution")
assert "mean" in dist.to_summary_dict()
TEST_LOGGER.debug(f"{column}.vader_sentiment has {dist.to_summary_dict()}")
assert dist.avg > -0.4
assert dist.min < 0
assert dist.max >= 0


def test_vader_sentiment_with_llm_metrics():
from langkit import llm_metrics
from langkit import vader_sentiment

vader_sentiment.init()
schema = llm_metrics.init()
df = pd.DataFrame(
{
"prompt": [
"timely text propagated prolifically",
"articulate artichokes aggravated allergically",
"So amazing!! ABSOLUTELY fantastic :-) Da BOMB!!",
],
"response": [
"I neither approve or disapprove.",
"strawberries are not true fruits, and they smell",
"this is not my response",
],
}
)
view = why.log(df, schema=schema).view()
TEST_LOGGER.debug(view.get_columns().keys())
for column in ["prompt", "response"]:
dist = view.get_column(f"{column}.vader_sentiment").get_metric("distribution")
assert "mean" in dist.to_summary_dict()
assert dist.min < 0
37 changes: 37 additions & 0 deletions langkit/vader_sentiment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from logging import getLogger
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

from whylogs.experimental.core.udf_schema import register_dataset_udf
from langkit import prompt_column, response_column


_prompt = prompt_column
_response = response_column
_vader_sentiment_analyzer = None
diagnostic_logger = getLogger(__name__)


def vader_sentiment(text: str) -> float:
global _vader_sentiment_analyzer
if _vader_sentiment_analyzer is None:
diagnostic_logger.info(
"vader_sentiment called before init, using default initialization."
)
_vader_sentiment_analyzer = init()
return _vader_sentiment_analyzer.polarity_scores(text)["compound"]


@register_dataset_udf([_prompt], udf_name=f"{_prompt}.vader_sentiment")
def prompt_sentiment(text):
return [vader_sentiment(t) for t in text[_prompt]]


@register_dataset_udf([_response], udf_name=f"{_response}.vader_sentiment")
def response_sentiment(text):
return [vader_sentiment(t) for t in text[_response]]


def init() -> SentimentIntensityAnalyzer:
global _vader_sentiment_analyzer
_vader_sentiment_analyzer = SentimentIntensityAnalyzer()
return _vader_sentiment_analyzer
Loading

0 comments on commit 20cac62

Please sign in to comment.