OpenVoiceOS · NeonDaniel · Sep 17, 2024 · Sep 17, 2024 · Sep 17, 2024 · Sep 17, 2024
diff --git a/ovos_workshop/skills/common_query_skill.py b/ovos_workshop/skills/common_query_skill.py
@@ -13,7 +13,7 @@
 from abc import abstractmethod
 from enum import IntEnum
 from os.path import dirname
-from typing import List, Optional, Tuple
+from typing import List, Optional, Tuple, Union
 
 from ovos_bus_client import Message
 from ovos_utils.file_utils import resolve_resource_file
@@ -26,14 +26,16 @@
 class CQSMatchLevel(IntEnum):
     EXACT = 1  # Skill could find a specific answer for the question
     CATEGORY = 2  # Skill could find an answer from a category in the query
-    GENERAL = 3  # The query could be processed as a general quer
+    GENERAL = 3  # The query could be processed as a general query
 
 
 # Copy of CQSMatchLevel to use if the skill returns visual media
 CQSVisualMatchLevel = IntEnum('CQSVisualMatchLevel',
                               [e.name for e in CQSMatchLevel])
 
 """these are for the confidence calculation"""
+# TODO: TOPIC_MATCH_RELEVANCE and RELEVANCE_MULTIPLIER stack on the same count of
+#   "relevant" words. This adds too much artificial confidence (>100%)
 # how much each topic word is worth
 # when found in the answer
 TOPIC_MATCH_RELEVANCE = 5
@@ -60,12 +62,18 @@ class CommonQuerySkill(OVOSSkill):
     """
 
     def __init__(self, *args, **kwargs):
-        # these should probably be configurable
+        # Confidence calculation numbers may be configured per-skill
         self.level_confidence = {
             CQSMatchLevel.EXACT: 0.9,
             CQSMatchLevel.CATEGORY: 0.6,
             CQSMatchLevel.GENERAL: 0.5
         }
+        self.relevance_multiplier = TOPIC_MATCH_RELEVANCE * RELEVANCE_MULTIPLIER
+        self.input_consumed_multiplier = 0.1
+        # TODO: The below defaults of 0.1 add ~25% for a 2-sentence response which is too much
+        self.response_sentences_multiplier = 0.1
+        self.response_words_multiplier = 1 / WORD_COUNT_DIVISOR
+
         super().__init__(*args, **kwargs)
 
         noise_words_filepath = f"text/{self.lang}/noise_words.list"
@@ -142,7 +150,16 @@ def __handle_question_query(self, message: Message):
             level = result[1]
             answer = result[2]
             callback = result[3] if len(result) > 3 else {}
-            confidence = self.__calc_confidence(match, search_phrase, level, answer)
+            if isinstance(level, float):
+                LOG.debug(f"Confidence directly reported by skill")
+                confidence = level
+            else:
+                LOG.info(f"Calculating confidence for level {level}")
+                confidence = self.__calc_confidence(match, search_phrase, level,
+                                                    answer)
+            if confidence > 1.0:
+                LOG.warning(f"Calculated confidence {confidence} > 1.0")
+                confidence = 1.0
             callback["answer"] = answer  # ensure we get it back in CQS_action
             self.bus.emit(message.response({"phrase": search_phrase,
                                             "skill_id": self.skill_id,
@@ -156,8 +173,8 @@ def __handle_question_query(self, message: Message):
                                             "skill_id": self.skill_id,
                                             "searching": False}))
 
-    def __get_cq(self, search_phrase: str) -> (str, CQSMatchLevel, str,
-                                               Optional[dict]):
+    def __get_cq(self, search_phrase: str) -> (str, Union[CQSMatchLevel, float],
+                                               str, Optional[dict]):
         """
         Invoke the CQS handler to let the skill perform its search
         @param search_phrase: parsed question to get an answer for
@@ -201,36 +218,52 @@ def __calc_confidence(self, match: str, phrase: str, level: CQSMatchLevel,
         consumed_pct = len(match.split()) / len(phrase.split())
         if consumed_pct > 1.0:
             consumed_pct = 1.0
-        consumed_pct /= 10
 
-        # bonus for more sentences
-        num_sentences = float(float(len(answer.split("."))) / float(10))
+        # Approximate the number of sentences in the answer. A trailing `.` will
+        # split, so reduce length by 1. If no `.` is present, ensure we count
+        # any response as at least 1 sentence
+        num_sentences = min(len(answer.split(".")) - 1, 1)
 
-        # extract topic
+        # Remove articles and question words to approximate the meaningful part
+        # of what the skill extracted from the user input
         topic = self.remove_noise(match)
 
-        # calculate relevance
+        # Determine how many relevant words from the input are present in the
+        # answer
+        # TODO: Strip SSML from the answer here
         answer = answer.lower()
         matches = 0
         for word in topic.split(' '):
             if answer.find(word) > -1:
-                matches += TOPIC_MATCH_RELEVANCE
-
+                matches += 1
+        LOG.debug(f"Answer matched {matches} words")
         answer_size = len(answer.split(" "))
-        answer_size = min(MAX_ANSWER_LEN_FOR_CONFIDENCE, answer_size)
 
+        # Calculate relevance as the percentage of relevant input words divided
+        # by the length of the response. This means that an answer that only
+        # contains the input will have a relevance value of 1
         relevance = 0.0
         if answer_size > 0:
             relevance = float(float(matches) / float(answer_size))
 
-        relevance = relevance * RELEVANCE_MULTIPLIER
+        # extra credit for more words up to a point. By default, 50 words is
+        # considered optimal
+        answer_size = min(MAX_ANSWER_LEN_FOR_CONFIDENCE, answer_size)
 
-        # extra credit for more words up to a point
-        wc_mod = float(float(answer_size) / float(WORD_COUNT_DIVISOR)) * 2
+        # Calculate bonuses based on calculated values and configured weights
+        consumed_pct_bonus = consumed_pct * self.input_consumed_multiplier
+        num_sentences_bonus = num_sentences * self.response_sentences_multiplier
+        relevance_bonus = relevance * self.relevance_multiplier
+        word_count_bonus = answer_size * self.response_words_multiplier
 
+        LOG.debug(f"consumed_pct_bonus={consumed_pct_bonus}|num_sentence_bonus="
+                  f"{num_sentences_bonus}|relevance_bonus={relevance_bonus}|"
+                  f"word_count_bonus={word_count_bonus}")
         confidence = self.level_confidence[level] + \
-                     consumed_pct + num_sentences + relevance + wc_mod
-
+                     consumed_pct_bonus + num_sentences_bonus + relevance_bonus + word_count_bonus
+        if confidence > 1:
+            LOG.warning(f"Calculated confidence > 1.0: {confidence}")
+            return 1.0
         return confidence
 
     def __handle_query_classic(self, message):
@@ -270,7 +303,7 @@ def __handle_query_action(self, message: Message):
 
     @abstractmethod
     def CQS_match_query_phrase(self, phrase: str) -> \
-            Optional[Tuple[str, CQSMatchLevel, Optional[dict]]]:
+            Optional[Tuple[str, Union[CQSMatchLevel, float], Optional[dict]]]:
         """
         Determine an answer to the input phrase and return match information, or
         `None` if no answer can be determined.

diff --git a/test/unittests/skills/test_common_query_skill.py b/test/unittests/skills/test_common_query_skill.py
@@ -1,4 +1,5 @@
 from unittest import TestCase
+from unittest.mock import Mock
 
 from ovos_utils.messagebus import FakeBus
 from ovos_workshop.skills.base import BaseSkill
@@ -36,16 +37,39 @@ def test_handle_question_query(self):
         pass
 
     def test_get_cq(self):
-        # TODO
-        pass
+        test_phrase = "test"
+        mock_return = Mock()
+        self.skill.CQS_match_query_phrase = Mock(return_value=mock_return)
+        result = self.skill._CommonQuerySkill__get_cq(test_phrase)
+        self.skill.CQS_match_query_phrase.assert_called_once_with(test_phrase)
+        self.assertEqual(result, mock_return)
+
+        self.skill.CQS_match_query_phrase.side_effect = Exception()
+        result = self.skill._CommonQuerySkill__get_cq(test_phrase)
+        self.assertIsNone(result)
 
     def test_remove_noise(self):
-        # TODO
-        pass
+        noisy_match = "what is a computer"
+        normalized = "computer"
+        self.assertEqual(self.skill.remove_noise(noisy_match), normalized)
 
     def test_calc_confidence(self):
-        # TODO
-        pass
+        generic_q = "what is coca cola"
+        specific_q = "how much caffeine is in coca cola"
+        specific_q_2 = "what is the stock price for coca cola"
+        cw_answer = ("The drink diet coke has 32 milligrams of caffeine in "
+                     "250 milliliters.</speak> Provided by CaffeineWiz.")
+
+        generic_conf = self.skill._CommonQuerySkill__calc_confidence(
+            "coca cola", generic_q, CQSMatchLevel.GENERAL, cw_answer)
+        exact_conf = self.skill._CommonQuerySkill__calc_confidence(
+            "coca cola", specific_q, CQSMatchLevel.EXACT, cw_answer)
+        low_conf = self.skill._CommonQuerySkill__calc_confidence(
+            "coca cola", specific_q_2, CQSMatchLevel.GENERAL, cw_answer)
+
+        self.assertEqual(exact_conf, 1.0)
+        self.assertLess(generic_conf, exact_conf)
+        self.assertLess(low_conf, generic_conf)
 
     def test_handle_query_action(self):
         # TODO