diff --git a/openvino_xai/metrics/adcc.py b/openvino_xai/metrics/adcc.py deleted file mode 100644 index 33d9244d..00000000 --- a/openvino_xai/metrics/adcc.py +++ /dev/null @@ -1,104 +0,0 @@ -import numpy as np -from scipy import stats as STS - -from openvino_xai import Task -from openvino_xai.explainer.explainer import Explainer, ExplainMode - - -class ADCC: - def __init__(self, model, compiled_model, preprocess_fn, postprocess_fn): - self.preprocess_fn = preprocess_fn - self.postprocess_fn = postprocess_fn - self.compiled_model = compiled_model - self.explainer = Explainer( - model=model, - task=Task.CLASSIFICATION, - preprocess_fn=self.preprocess_fn, - explain_mode=ExplainMode.WHITEBOX, - ) - - def predict(self, input) -> int: - logits = self.compiled_model([self.preprocess_fn(input)])[0] - logits = self.postprocess_fn(logits) - return logits - - def average_drop(self, image, saliency_map, model_output, class_idx=None): - """It measures the average percentage drop in - confidence for the target class c when the model sees only - the explanation map, instead of the full image.""" - - masked_image = image * saliency_map[:, :, None] - - # if masked_image.ndim == 2: - # masked_image = masked_image[: , :, None] - - confidence_on_input = np.max(model_output) - if class_idx is None: - class_idx = np.argmax(model_output) - - prediction_on_saliency_map = self.predict(masked_image) - confidence_on_saliency_map = prediction_on_saliency_map[class_idx] - - return max(0.0, confidence_on_input - confidence_on_saliency_map) / confidence_on_input - - def complexity(self, saliency_map): - """ - Saliency map has to be as less complex as possible, i.e., it must contain the minimum set of pixels that explains the prediction. - Defined as L1 norm of the saliency map. Complexity is minimized when the number of pixels highlighted by the attribution method is low. - - """ - return abs(saliency_map).sum() / (saliency_map.shape[-1] * saliency_map.shape[-2]) - - def coherency(self, image, saliency_map, model_output, class_idx=None): - """Maximum Coherency. The CAM should contain all the - relevant features that explain a prediction and should remove useless features in a coherent way. As a consequence, - given an input image x and a class of interest c, the CAM - of x should not change when conditioning x on the CAM - itself""" - if not (np.max(saliency_map) <= 1 and np.min(saliency_map) >= 0): - saliency_map = saliency_map / 255 # Normalize to [0, 1] - - assert ( - np.max(saliency_map) <= 1 and np.min(saliency_map) >= 0 - ), "Saliency map should be normalized between 0 and 1" - - masked_image = image * saliency_map[:, :, None] - - if class_idx is None: - class_idx = np.argmax(model_output) - - saliency_map_mapped_image = self.explainer( - masked_image, - targets=[class_idx], - colormap=False, - ) - - # Find a way to return not scaled salinecy map [0, 1] - saliency_map_mapped_image = saliency_map_mapped_image.saliency_map[class_idx] - if not (np.max(saliency_map_mapped_image) <= 1 and np.min(saliency_map_mapped_image) >= 0): - saliency_map_mapped_image = saliency_map_mapped_image / 255 # Normalize to [0, 1] - - A, B = saliency_map, saliency_map_mapped_image - - """ - # Pearson correlation coefficient - # """ - Asq, Bsq = A.flatten(), B.flatten() - - y, _ = STS.pearsonr(Asq, Bsq) - y = (y + 1) / 2 - - return y - - def adcc(self, image, saliency_map, target_class_idx=None): - # TODO test target_class_idx - - model_output = self.predict(image) - - avgdrop = self.average_drop(image, saliency_map, model_output, class_idx=target_class_idx) - coh = self.coherency(image, saliency_map, model_output, class_idx=target_class_idx) - com = self.complexity(saliency_map) - - adcc = 3 / (1 / coh + 1 / (1 - com) + 1 / (1 - avgdrop)) - - return adcc diff --git a/openvino_xai/metrics/insertion_deletion_auc.py b/openvino_xai/metrics/insertion_deletion_auc.py deleted file mode 100644 index 65cfb469..00000000 --- a/openvino_xai/metrics/insertion_deletion_auc.py +++ /dev/null @@ -1,110 +0,0 @@ -from typing import Tuple - -import numpy as np - - -def auc(arr): - """Returns normalized Area Under Curve of the array.""" - return (arr.sum() - arr[0] / 2 - arr[-1] / 2) / (arr.shape[0] - 1) - - -class InsertionDeletionAUC: - def __init__(self, compiled_model, preprocess_fn, postprocess_fn): - self.preprocess_fn = preprocess_fn - self.postprocess_fn = postprocess_fn - self.compiled_model = compiled_model - - def predict(self, input) -> Tuple[np.ndarray, int]: - logits = self.compiled_model([self.preprocess_fn(input)])[0] - logits = self.postprocess_fn(logits) - predicted_class = np.argmax(logits) - return logits, predicted_class - - def insertion_auc_image(self, input_image, saliency_map, steps=100, baseline_value=0): - """ - Calculate the Insertion AUC metric for images. - - Parameters: - - model: the model to evaluate. - - input_image: the input image to the model (H, W, C). - - saliency_map: importance scores for each pixel (H, W). - - steps: number of steps for inserting pixels. - - baseline_value: value to initialize the baseline (e.g., 0 for a black image). - - Returns: - - insertion_auc_score: the calculated AUC for insertion. - """ - baseline = np.full_like(input_image, 0) - sorted_indices = np.argsort(-saliency_map.flatten()) # Sort pixels by importance (descending) - sorted_indices = np.unravel_index(sorted_indices, saliency_map.shape) - - _, pred_class = self.predict(input_image) - - scores = [] - for i in range(steps + 1): - temp_image = baseline.copy() - num_pixels_to_insert = int(i * len(sorted_indices[0]) / steps) - temp_image[ - sorted_indices[0][:num_pixels_to_insert], sorted_indices[1][:num_pixels_to_insert] - ] = input_image[sorted_indices[0][:num_pixels_to_insert], sorted_indices[1][:num_pixels_to_insert]] - - # Predict and record the score - # cv2.imshow("temp_image", temp_image) - temp_logits, _ = self.predict(temp_image) # Model expects batch dimension - scores.append(temp_logits[pred_class]) - # cv2.waitKey(0) - # cv2.destroyAllWindows() - - insertion_auc_score = auc(np.array(scores)) - return insertion_auc_score - - def deletion_auc_image(self, input_image, saliency_map, steps=100): - """ - Calculate the Deletion AUC metric for images. - - Parameters: - - model: the model to evaluate. - - input_image: the input image to the model (H, W, C). - - saliency_map: importance scores for each pixel (H, W). - - steps: number of steps for deleting pixels. - - Returns: - - deletion_auc_score: the calculated AUC for deletion. - """ - sorted_indices = np.argsort(-saliency_map.flatten()) # Sort pixels by importance (descending) - sorted_indices = np.unravel_index(sorted_indices, saliency_map.shape) - - _, pred_class = self.predict(input_image) - - scores = [] - for i in range(steps + 1): - temp_image = input_image.copy() - num_pixels_to_delete = int(i * len(sorted_indices[0]) / steps) - temp_image[ - sorted_indices[0][:num_pixels_to_delete], sorted_indices[1][:num_pixels_to_delete] - ] = 0 # Remove important pixels - - # Predict and record the score - # cv2.imshow("temp_image", temp_image) - temp_logits, _ = self.predict(temp_image) # Model expects batch dimension - scores.append(temp_logits[pred_class]) - # cv2.waitKey(0) - # cv2.destroyAllWindows() - - deletion_auc_score = auc(np.array(scores)) - return deletion_auc_score - - def evaluate(self, input_images, saliency_maps, steps): - insertions, deletions = [], [] - for input_image, saliency_map in zip(input_images, saliency_maps): - insertion = self.insertion_auc_image(input_image, saliency_map, steps) - deletion = self.deletion_auc_image(input_image, saliency_map, steps) - - insertions.append(insertion) - deletions.append(deletion) - - insertion = np.mean(np.array(insertions)) - deletion = np.mean(np.array(deletion)) - delta = insertion - deletion - - return insertion, deletion, delta diff --git a/openvino_xai/metrics/pointing_game.py b/openvino_xai/metrics/pointing_game.py index 0f92042c..3b02b60a 100644 --- a/openvino_xai/metrics/pointing_game.py +++ b/openvino_xai/metrics/pointing_game.py @@ -7,15 +7,15 @@ class PointingGame: @staticmethod def pointing_game(saliency_map: np.ndarray, gt_bbox: Tuple[int, int, int, int]) -> bool: """ - Implements the Pointing Game metric using bounding boxes. + Implements the Pointing Game metric using bounding boxes. Returns a boolean indicating + if any of the most salient point falls within the ground truth bounding box. - Parameters: - - saliency_map: A 2D numpy array representing the saliency map for the image. - - gt_bbox: A tuple (x, y, w, h) representing the bounding box of the ground truth object. - - Returns: - - hit: A boolean indicating if any of the most salient point falls within the ground truth bounding box. + :param saliency_map: A 2D numpy array representing the saliency map for the image. + :type saliency_map: np.ndarray + :param gt_bbox: A tuple (x, y, w, h) representing the bounding box of the ground truth object. + :type gt_bbox: Tuple[int, int, int, int] """ + # TODO: Support a case with multiple bounding boxes for one imege x, y, w, h = gt_bbox # Find the most salient points in the saliency map @@ -31,17 +31,17 @@ def evaluate(self, saliency_maps: List[np.ndarray], gt_bboxes: List[Tuple[int, i """ Evaluates the Pointing Game metric over a set of images. - Parameters: - - saliency_maps: A list of 2D numpy arrays representing the saliency maps. - - ground_truth_bbs: A list of bounding box of the ground truth object. - - Returns: - - score: The Pointing Game accuracy score over the dataset. + :param saliency_maps: A list of 2D numpy arrays representing the saliency maps. + :type saliency_maps: List[np.ndarray] + :param gt_bboxes: A list of bounding box of the ground truth objects for each image. + :type gt_bboxes: List[Tuple[int, int, int, int]] """ assert len(saliency_maps) == len( gt_bboxes ), "Number of saliency maps and ground truth bounding boxes must match." - hits = sum([self.pointing_game(s_map, gt_map) for s_map, gt_map in zip(saliency_maps, gt_bboxes)]) + hits = sum( + [self.pointing_game(s_map, image_gt_bboxes) for s_map, image_gt_bboxes in zip(saliency_maps, gt_bboxes)] + ) score = hits / len(saliency_maps) return score diff --git a/tests/regression/test_regression.py b/tests/regression/test_regression.py index cd412235..431836c1 100644 --- a/tests/regression/test_regression.py +++ b/tests/regression/test_regression.py @@ -2,7 +2,6 @@ # SPDX-License-Identifier: Apache-2.0 import json -from typing import Callable, List, Mapping import cv2 import openvino as ov @@ -11,21 +10,13 @@ from openvino_xai import Task from openvino_xai.common.utils import retrieve_otx_model from openvino_xai.explainer.explainer import Explainer, ExplainMode -from openvino_xai.explainer.utils import get_postprocess_fn, get_preprocess_fn, sigmoid -from openvino_xai.methods.black_box.base import Preset -from openvino_xai.metrics.adcc import ADCC -from openvino_xai.metrics.insertion_deletion_auc import InsertionDeletionAUC +from openvino_xai.explainer.utils import get_preprocess_fn from openvino_xai.metrics.pointing_game import PointingGame from tests.unit.explanation.test_explanation_utils import VOC_NAMES MODEL_NAME = "mlc_mobilenetv3_large_voc" -def postprocess_fn(x: Mapping): - x = sigmoid(x) - return x[0] - - def load_gt_bboxes(class_name="person"): with open("tests/assets/cheetah_person_coco.json", "r") as f: coco_anns = json.load(f) @@ -39,11 +30,6 @@ def load_gt_bboxes(class_name="person"): return category_gt_bboxes -def postprocess_fn(x: Mapping): - x = sigmoid(x) - return x[0] - - class TestDummyRegression: image = cv2.imread("tests/assets/cheetah_person.jpg") @@ -59,15 +45,10 @@ class TestDummyRegression: @pytest.fixture(autouse=True) def setup(self, fxt_data_root): - self.data_dir = fxt_data_root - retrieve_otx_model(self.data_dir, MODEL_NAME) - model_path = self.data_dir / "otx_models" / (MODEL_NAME + ".xml") - core = ov.Core() - model = core.read_model(model_path) - compiled_model = core.compile_model(model=model, device_name="AUTO") - - self.auc = InsertionDeletionAUC(compiled_model, self.preprocess_fn, postprocess_fn) - self.adcc = ADCC(model, compiled_model, self.preprocess_fn, postprocess_fn) + data_dir = fxt_data_root + retrieve_otx_model(data_dir, MODEL_NAME) + model_path = data_dir / "otx_models" / (MODEL_NAME + ".xml") + model = ov.Core().read_model(model_path) self.explainer = Explainer( model=model, @@ -91,16 +72,6 @@ def test_explainer_image(self): score = self.pointing_game.evaluate(saliency_maps, self.gt_bboxes) assert score > 0.5 - insertion_auc_score = self.auc.insertion_auc_image(self.image, saliency_maps[0], self.steps) - assert insertion_auc_score >= 0.9 - - deletion_auc_score = self.auc.deletion_auc_image(self.image, saliency_maps[0], self.steps) - assert deletion_auc_score >= 0.2 - - adcc_score = self.adcc.adcc(self.image, saliency_maps[0]) - # Why metric for real image and detector is worse then for a random image? - assert adcc_score >= 0.1 - def test_explainer_images(self): # TODO support multiple classes images = [self.image, self.image] @@ -115,10 +86,5 @@ def test_explainer_images(self): saliency_map = list(explanation.saliency_map.values())[0] saliency_maps.append(saliency_map) - score = self.pointing_game.evaluate(saliency_maps, [self.gt_bboxes[0], self.gt_bboxes[0]]) + score = self.pointing_game.evaluate(saliency_maps, self.gt_bboxes * 2) assert score > 0.5 - - insertion, deletion, delta = self.auc.evaluate(images, saliency_maps, self.steps) - assert insertion >= 0.9 - assert deletion >= 0.2 - assert delta >= 0.7 diff --git a/tests/unit/metrics/test_adcc.py b/tests/unit/metrics/test_adcc.py deleted file mode 100644 index 6ba1a67b..00000000 --- a/tests/unit/metrics/test_adcc.py +++ /dev/null @@ -1,68 +0,0 @@ -import json -from typing import Callable, List, Mapping - -import cv2 -import numpy as np -import openvino as ov -import pytest - -from openvino_xai import Task -from openvino_xai.common.utils import retrieve_otx_model -from openvino_xai.explainer.explainer import Explainer, ExplainMode -from openvino_xai.explainer.utils import get_postprocess_fn, get_preprocess_fn, sigmoid -from openvino_xai.methods.black_box.base import Preset -from openvino_xai.metrics.adcc import ADCC -from openvino_xai.metrics.insertion_deletion_auc import InsertionDeletionAUC -from openvino_xai.metrics.pointing_game import PointingGame -from tests.unit.explanation.test_explanation_utils import VOC_NAMES - -MODEL_NAME = "mlc_mobilenetv3_large_voc" - - -def postprocess_fn(x: Mapping): - x = sigmoid(x) - return x[0] - - -class TestADCC: - image = cv2.imread("tests/assets/cheetah_person.jpg") - preprocess_fn = get_preprocess_fn( - change_channel_order=True, - input_size=(224, 224), - hwc_to_chw=True, - ) - - @pytest.fixture(autouse=True) - def setup(self, fxt_data_root): - self.data_dir = fxt_data_root - retrieve_otx_model(self.data_dir, MODEL_NAME) - model_path = self.data_dir / "otx_models" / (MODEL_NAME + ".xml") - core = ov.Core() - model = core.read_model(model_path) - compiled_model = core.compile_model(model=model, device_name="AUTO") - self.adcc = ADCC(model, compiled_model, self.preprocess_fn, postprocess_fn) - - # self.explainer = Explainer( - # model=model, - # task=Task.CLASSIFICATION, - # preprocess_fn=self.preprocess_fn, - # explain_mode=ExplainMode.WHITEBOX, - # ) - - def test_adcc_random_image(self): - input_image = np.random.randint(0, 256, (224, 224, 3), dtype=np.uint8) - saliency_map = np.random.rand(224, 224) - - complexity_score = self.adcc.complexity(saliency_map) - assert complexity_score >= 0.2 - - model_output = self.adcc.predict(input_image) - - average_drop_score = self.adcc.average_drop(input_image, saliency_map, model_output) - assert average_drop_score >= 0.2 - - coherency_score = self.adcc.coherency(input_image, saliency_map, model_output) - assert coherency_score >= 0.2 - - adcc_score = self.adcc.adcc(input_image, saliency_map) - assert adcc_score >= 0.5 diff --git a/tests/unit/metrics/test_auc.py b/tests/unit/metrics/test_auc.py deleted file mode 100644 index 81ef941f..00000000 --- a/tests/unit/metrics/test_auc.py +++ /dev/null @@ -1,60 +0,0 @@ -import json -from typing import Callable, List, Mapping - -import cv2 -import numpy as np -import openvino as ov -import pytest - -from openvino_xai import Task -from openvino_xai.common.utils import retrieve_otx_model -from openvino_xai.explainer.explainer import Explainer, ExplainMode -from openvino_xai.explainer.utils import get_postprocess_fn, get_preprocess_fn, sigmoid -from openvino_xai.methods.black_box.base import Preset -from openvino_xai.metrics.insertion_deletion_auc import InsertionDeletionAUC -from openvino_xai.metrics.pointing_game import PointingGame -from tests.unit.explanation.test_explanation_utils import VOC_NAMES - -MODEL_NAME = "mlc_mobilenetv3_large_voc" - - -def postprocess_fn(x: Mapping): - x = sigmoid(x) - return x[0] - - -class TestAUC: - image = cv2.imread("tests/assets/cheetah_person.jpg") - preprocess_fn = get_preprocess_fn( - change_channel_order=True, - input_size=(224, 224), - hwc_to_chw=True, - ) - steps = 10 - - @pytest.fixture(autouse=True) - def setup(self, fxt_data_root): - self.data_dir = fxt_data_root - retrieve_otx_model(self.data_dir, MODEL_NAME) - model_path = self.data_dir / "otx_models" / (MODEL_NAME + ".xml") - core = ov.Core() - model = core.read_model(model_path) - compiled_model = core.compile_model(model=model, device_name="AUTO") - self.auc = InsertionDeletionAUC(compiled_model, self.preprocess_fn, postprocess_fn) - - self.explainer = Explainer( - model=model, - task=Task.CLASSIFICATION, - preprocess_fn=self.preprocess_fn, - explain_mode=ExplainMode.WHITEBOX, - ) - - def test_auc_random_image(self): - input_image = np.random.randint(0, 256, (224, 224, 3), dtype=np.uint8) - saliency_map = np.random.rand(224, 224) - - insertion_auc_score = self.auc.insertion_auc_image(input_image, saliency_map, self.steps) - assert insertion_auc_score >= 0.2 - - deletion_auc_score = self.auc.deletion_auc_image(input_image, saliency_map, self.steps) - assert deletion_auc_score >= 0.2