diff --git a/demos/multi_camera/Dockerfile b/demos/multi_camera/Dockerfile
new file mode 100644
index 00000000..04b97ef3
--- /dev/null
+++ b/demos/multi_camera/Dockerfile
@@ -0,0 +1,6 @@
+FROM ultralytics/yolov5:v6.2
+
+# Install Norfair
+RUN pip install git+https://github.com/tryolabs/norfair.git@master#egg=norfair
+
+WORKDIR /demo/src/
diff --git a/demos/multi_camera/README.md b/demos/multi_camera/README.md
new file mode 100644
index 00000000..883728cd
--- /dev/null
+++ b/demos/multi_camera/README.md
@@ -0,0 +1,64 @@
+# Multi-Camera Demo
+
+In this example, we show how to associate trackers of different synchronized videos in Norfair.
+
+Why would we want that?
+
+- When subjects that are being tracked go out of frame in one video, you might still be able to track them and recognize that it is the same individual if it is still visible in other videos.
+- Take footage from one or many videos to a common reference frame. For example, if you are watching a soccer match, you might want to combine the information from different cameras and show the position of the players from a top-down view.
+
+## Example 1: Associating different videos
+
+This method will allow you to associate trackers from different footage of the same scene. You can use as many videos as you want.
+
+```bash 
+python3 demo.py video1.mp4 video2.mp4 video3.mp4
+```
+
+A UI will appear to associate points in `video1.mp4` with points in the other videos, to set `video1.mp4` as a common frame of reference.
+
+If the videos move, you should also use the `--use-motion-estimator-footage` flag to consider camera movement.
+
+## Example 2: Creating a new perspective
+
+This method will allow you to associate trackers from different footage of the same scen, and create a new perspective of the scene which didn't exist in those videos. You can use as many videos as you want, and also you need to provide one reference (either an image or video) corresponding to the new perspective. In the soccer example, the reference could be a cenital view of a soccer field.
+
+```bash 
+python3 demo.py video1.mp4 video2.mp4 video3.mp4 --reference path_to_reference_file
+```
+
+As before, you will have to use the UI.
+
+If the videos where you are tracking have camera movement, you should also use the `--use-motion-estimator-footage` flag to consider camera movement in those videos.
+
+If you are using a video for the reference file, and the camera moves in the reference, then you should use the `--use-motion-estimator-reference` flag.
+
+
+For additional settings, you may display the instructions using `python demo.py --help`.
+
+
+## UI usage
+
+The UI has the puropose of annotating points that match in the reference and the footage (either images or videos), to estimate a transformation.
+
+To add a point, just click a pair of points (one from the footage window, and another from the reference window) and select `"Add"`.
+To remove a point, just select the corresponding point at the bottom left corner, and select `"Remove"`.
+You can also ignore points, by clicking them and selecting `"Ignore"`. The transformation will not used ingored points.
+To 'uningnore' points that have been previously ignored, just click them and select `"Unignore"`.
+
+To resize the footage or the reference image, you can use the `"+"` and `"-"` buttons in the `'Resize footage'` and `'Resize reference'` sections of the Menu.
+
+If either footage or reference are videos, you can jump to future frames to pick points that match.
+For example, to jump 215 frames in the footage, just write that number next to `'Frames to skip (footage)'`, and select `"Skip frames"`.
+
+You can go back to the first frame of the video (in either footage or reference) by selecting "Reset video".
+
+Once a transformation has been estimated (you will know that if the `"Finished"` button is green), you can test it:
+To Test your transformation, Select the `"Test"` mode, and pick a point in either the reference or the footage, and see the associated point in the other window.
+You can go back to the `"Annotate"` mode keep adding more associated points until you are satisfied with the estimated transformation.
+
+You can also save the state (points and transformation you have) to a `.pkl` file using the `"Save"` button, so that you can later load that state from the UI with the `"Load"` button.
+
+You can swap the reference points with the footage points (inverting the transformation) with the `"Invert"` button. This is particularly useful if you have previously saved a state in which the reference was the current footage, and the footage was the current reference.
+
+Once you are happy with the transformation, just click on `"Finished"`.
\ No newline at end of file
diff --git a/demos/multi_camera/requirements.txt b/demos/multi_camera/requirements.txt
new file mode 100644
index 00000000..b87b5972
--- /dev/null
+++ b/demos/multi_camera/requirements.txt
@@ -0,0 +1 @@
+yolov5==6.1.8
diff --git a/demos/multi_camera/run_gpu.sh b/demos/multi_camera/run_gpu.sh
new file mode 100755
index 00000000..4175ff7d
--- /dev/null
+++ b/demos/multi_camera/run_gpu.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env -S bash -e
+docker build . -t norfair-multicamera
+docker run -it --rm \
+           --gpus all \
+           --shm-size=1gb \
+           -v `realpath .`:/demo \
+           norfair-multicamera \
+           bash
diff --git a/demos/multi_camera/src/demo.py b/demos/multi_camera/src/demo.py
new file mode 100644
index 00000000..4c7e8c1c
--- /dev/null
+++ b/demos/multi_camera/src/demo.py
@@ -0,0 +1,677 @@
+import argparse
+import os
+import pickle
+from logging import warning
+
+import cv2
+import numpy as np
+import torch
+
+from norfair import Palette, Video, get_cutout, mean_manhattan
+from norfair.camera_motion import HomographyTransformation, MotionEstimator
+from norfair.common_reference_ui import set_reference
+from norfair.drawing.drawer import Drawer
+from norfair.multi_camera import MultiCameraClusterizer
+from norfair.tracker import Detection, Tracker
+
+
+def embedding_distance_detection_and_tracker(detection, tracker):
+    embedding_correlations = []
+    past_detections = tracker.past_detections + [tracker.last_detection]
+    for past_det in past_detections:
+        if past_det.embedding is None:
+            continue
+
+        embedding_correlations.append(
+            1
+            - cv2.compareHist(
+                past_det.embedding, detection.embedding, cv2.HISTCMP_CORREL
+            )
+        )
+
+    if len(embedding_correlations) > 0:
+        return np.mean(embedding_correlations)
+    else:
+        return 2
+
+
+def embedding_distance(tracker1, tracker2):
+    embedding_correlations = []
+    past_detections_1 = tracker1.past_detections + [tracker1.last_detection]
+    past_detections_2 = tracker2.past_detections + [tracker2.last_detection]
+
+    for past_det_1 in past_detections_1:
+        if past_det_1.embedding is None:
+            continue
+        for past_det_2 in past_detections_2:
+            if past_det_2.embedding is None:
+                continue
+
+            embedding_correlations.append(
+                1
+                - cv2.compareHist(
+                    past_det_1.embedding, past_det_2.embedding, cv2.HISTCMP_CORREL
+                )
+            )
+
+    if len(embedding_correlations) > 0:
+        return np.mean(embedding_correlations)
+    else:
+        return 2
+
+
+def get_hist(image):
+    hist = cv2.calcHist(
+        [cv2.cvtColor(image, cv2.COLOR_BGR2Lab)],
+        [0, 1],
+        None,
+        [128, 128],
+        [0, 256, 0, 256],
+    )
+    return cv2.normalize(hist, hist).flatten()
+
+
+def draw_feet(
+    frame,
+    clusters,
+    initial_transformations,
+    reference_motion_transformation=None,
+    thickness=None,
+    radius=None,
+    text_size=None,
+    text_thickness=None,
+    draw_cluster_ids=True,
+):
+    if thickness is None:
+        thickness = -1
+    if radius is None:
+        radius = int(round(max(max(frame.shape) * 0.005, 1)))
+
+    for cluster in clusters:
+        color = Palette.choose_color(cluster.id)
+        cluster_center = 0
+        cluster_is_alive = False
+        for tracked_object in cluster.tracked_objects.values():
+            if tracked_object.live_points.any():
+                cluster_is_alive = True
+                point = get_absolute_feet(
+                    tracked_object, initial_transformations[tracked_object.camera_name]
+                )
+                if reference_motion_transformation is not None:
+                    point = reference_motion_transformation.abs_to_rel(
+                        np.array([point])
+                    )[0]
+
+                cluster_center += point
+                frame = Drawer.circle(
+                    frame,
+                    tuple(point.astype(int)),
+                    radius=radius,
+                    color=color,
+                    thickness=thickness,
+                )
+
+        if draw_cluster_ids and cluster_is_alive:
+            cluster_center /= len(cluster.tracked_objects)
+            frame = Drawer.text(
+                frame,
+                f"{cluster.id}",
+                tuple(cluster_center.astype(int)),
+                size=text_size,
+                color=color,
+                thickness=text_thickness,
+            )
+    return frame
+
+
+def get_embedding(bbox, frame):
+    t_shirt_bbox_x = 0.7 * bbox[:, 0] + 0.3 * bbox[::-1, 0]
+    top = np.min(bbox[:, 1])
+    bottom = np.max(bbox[:, 1])
+    t_shirt_bbox_y = np.array([top * 0.9 + bottom * 0.1, top * 0.5 + bottom * 0.5])
+
+    bbox[:, 0] = t_shirt_bbox_x
+    bbox[:, 1] = t_shirt_bbox_y
+
+    cut = get_cutout(bbox, frame)
+    if cut.shape[0] > 0 and cut.shape[1] > 0:
+        return get_hist(cut)
+    else:
+        return None
+
+
+def draw_cluster_bboxes(
+    images,
+    clusters,
+    draw_cluster_ids=True,
+    thickness=None,
+    text_thickness=None,
+    text_size=None,
+):
+    for cluster in clusters:
+        color = Palette.choose_color(cluster.id)
+        for path, tracked_object in cluster.tracked_objects.items():
+            if tracked_object.live_points.any():
+                frame = images[path]
+
+                if thickness is None:
+                    current_thickness = max(int(max(frame.shape) / 500), 1)
+                else:
+                    current_thickness = thickness
+
+                # draw the bbox
+                points = tracked_object.estimate.astype(int)
+                frame = Drawer.rectangle(
+                    frame,
+                    tuple(points),
+                    color=color,
+                    thickness=current_thickness,
+                )
+
+                if draw_cluster_ids:
+                    text = f"{cluster.id}"
+
+                    # the anchor will become the bottom-left of the text,
+                    # we select-top left of the bbox compensating for the thickness of the box
+                    text_anchor = (
+                        points[0, 0] - current_thickness // 2,
+                        points[0, 1] - current_thickness // 2 - 1,
+                    )
+
+                    frame = Drawer.text(
+                        frame,
+                        text,
+                        position=text_anchor,
+                        size=text_size,
+                        color=color,
+                        thickness=text_thickness,
+                    )
+                    images[path] = frame
+    return images
+
+
+def get_mask_from_boxes(frame, boxes):
+    # create a mask of ones
+    mask = np.ones(frame.shape[:2], frame.dtype)
+    # set to 0 all detections
+    for b in boxes:
+        i = b.astype(int)
+        mask[i[0, 1] : i[1, 1], i[0, 0] : i[1, 0]] = 0
+    return mask
+
+
+def yolo_detections_to_norfair_detections(yolo_detections, frame):
+    norfair_detections = []
+    boxes = []
+    detections_as_xyxy = yolo_detections.xyxy[0]
+    for detection_as_xyxy in detections_as_xyxy:
+        detection_as_xyxy = detection_as_xyxy.cpu().numpy()
+        bbox = np.array(
+            [
+                [detection_as_xyxy[0].item(), detection_as_xyxy[1].item()],
+                [detection_as_xyxy[2].item(), detection_as_xyxy[3].item()],
+            ]
+        )
+        boxes.append(bbox)
+        points = bbox.copy()
+        scores = np.array([detection_as_xyxy[4], detection_as_xyxy[4]])
+        embedding = get_embedding(bbox, frame)
+
+        norfair_detections.append(
+            Detection(
+                points=points,
+                scores=scores,
+                label=detection_as_xyxy[-1].item(),
+                embedding=embedding,
+            )
+        )
+
+    return norfair_detections, boxes
+
+
+def get_absolute_feet(tracked_object, initial_transformation):
+    bbox_relative = tracked_object.estimate
+    feet = np.array([[bbox_relative[:, 0].mean(), bbox_relative[:, 1].max()]])
+    try:
+        return initial_transformation.rel_to_abs(tracked_object.rel_to_abs(feet))[0]
+    except AttributeError:
+        return initial_transformation.rel_to_abs(feet)[0]
+
+
+def run():
+    parser = argparse.ArgumentParser(description="Track objects in a video.")
+    parser.add_argument("files", type=str, nargs="+", help="Video files to process")
+    parser.add_argument(
+        "--reference", type=str, default=None, help="Image or Video for reference"
+    )
+    parser.add_argument(
+        "--use-motion-estimator-reference",
+        action="store_true",
+        help="If your reference is a video where the camera might move, you should use a motion estimator.",
+    )
+    parser.add_argument(
+        "--resolution",
+        type=int,
+        nargs=2,
+        default=[360, 288],
+        help="Output resolution for each subblock",
+    )
+    parser.add_argument(
+        "--ui-width",
+        type=int,
+        default=None,
+        help="Image width in the UI",
+    )
+    parser.add_argument(
+        "--ui-height",
+        type=int,
+        default=None,
+        help="Image height in the UI",
+    )
+    parser.add_argument(
+        "--use-motion-estimator-footage",
+        action="store_true",
+        help="If your footage are a video where the camera might move, you should use a motion estimator. This argument will apply the motion estimator for all your videos indifferently.",
+    )
+    parser.add_argument(
+        "--model",
+        type=str,
+        default="yolov5n",
+        help="YOLO model to use, possible values are yolov5n, yolov5s, yolov5m, yolov5l, yolov5x",
+    )
+    parser.add_argument(
+        "--confidence-threshold",
+        type=float,
+        help="Confidence threshold of detections",
+        default=0.2,
+    )
+    parser.add_argument(
+        "--foot-distance-threshold",
+        type=float,
+        default=0.2,
+        help="Maximum spatial distance that two tracked objects of different videos can have in order to match",
+    )
+    parser.add_argument(
+        "--reid-embedding-correlation-threshold",
+        type=float,
+        default=0.3,
+        help="Threshold for embedding match during a reid phase after object has been lost. (The 1-correlation distance we use is bounded in [0, 2])",
+    )
+    parser.add_argument(
+        "--embedding-correlation-threshold",
+        type=float,
+        default=1,
+        help="Threshold for embedding match.  (The 1-correlation distance we use is bounded in [0, 2]",
+    )
+    parser.add_argument(
+        "--max-votes-grow",
+        type=int,
+        default=3,
+        help="Amount of votes we need before increasing the size of a cluster",
+    )
+    parser.add_argument(
+        "--max-votes-split",
+        type=int,
+        default=15,
+        help="Amount of votes we need before decreasing the size of a cluster",
+    )
+    parser.add_argument(
+        "--memory",
+        type=int,
+        default=3,
+        help="How long into the past should we consider past clusters",
+    )
+    parser.add_argument(
+        "--joined-distance",
+        type=str,
+        default="mean",
+        help="How a distance between clusters is done when associating trackers from different videos. Either 'mean' or 'max'",
+    )
+    parser.add_argument(
+        "--keep-id-criteria",
+        type=str,
+        default="hit_counter",
+        help="When splitting a cluster, we have to choose which subcluster will keep the id of the old cluster. Either 'age' or 'hit_counter'",
+    )
+    parser.add_argument(
+        "--initialization-delay",
+        type=float,
+        default=19,
+        help="Min detections needed to start the tracked object",
+    )
+    parser.add_argument(
+        "--clusterizer-initialization-delay",
+        type=int,
+        default=15,
+        help="Minimum age of a cluster (or it's objects) to be returned",
+    )
+    parser.add_argument(
+        "--maximum-time-since-last-update",
+        type=int,
+        default=1,
+        help="Filter tracked objects that were not detected recently to not be considered in the distance function of the clusterizer",
+    )
+    parser.add_argument(
+        "--hit-counter-max",
+        type=int,
+        default=20,
+        help="Max iteration the tracked object is kept after when there are no detections",
+    )
+    parser.add_argument(
+        "--reid-hit-counter-max",
+        type=int,
+        default=150,
+        help="Maximum amount of frames trying to reidentify the object. (Use a value >=0)",
+    )
+    parser.add_argument(
+        "--nms-threshold", type=float, help="Iou threshold for detector", default=0.15
+    )
+    parser.add_argument(
+        "--image-size", type=int, help="Size of the images for detector", default=480
+    )
+    parser.add_argument(
+        "--classes", type=int, nargs="+", default=[0], help="Classes to track"
+    )
+    parser.add_argument(
+        "--max-points",
+        type=int,
+        default=500,
+        help="Max points sampled to calculate camera motion",
+    )
+    parser.add_argument(
+        "--min-distance",
+        type=float,
+        default=7,
+        help="Min distance between points sampled to calculate camera motion",
+    )
+    parser.add_argument(
+        "--no-mask-detections",
+        dest="mask_detections",
+        action="store_false",
+        default=True,
+        help="By default we don't sample regions where objects were detected when estimating camera motion. Pass this flag to disable this behavior",
+    )
+    parser.add_argument(
+        "--output-name",
+        default=None,
+        help="Name of the output file",
+    )
+
+    args = parser.parse_args()
+
+    model = torch.hub.load("ultralytics/yolov5", args.model)
+    model.conf_threshold = 0
+    model.iou_threshold = args.nms_threshold
+    model.image_size = args.image_size
+    model.classes = args.classes
+
+    if args.mask_detections:
+
+        def mask_generator(frame):
+            detections = model(frame)
+            detections, boxes = yolo_detections_to_norfair_detections(detections, frame)
+            return get_mask_from_boxes(frame, boxes)
+
+    else:
+        mask_generator = None
+
+    videos = {}
+    trackers = {}
+    initial_transformations = {}
+    tracked_objects = {}
+    motion_estimators = {}
+    images = {}
+
+    motion_estimator = MotionEstimator(
+        max_points=args.max_points,
+        min_distance=args.min_distance,
+    )
+
+    motion_estimator_reference = None
+    motion_estimator_footage = None
+
+    first_video_is_reference = args.reference is None
+    if args.use_motion_estimator_footage:
+        motion_estimator_footage = motion_estimator
+        for path in args.files:
+            motion_estimators[path] = motion_estimator
+
+    if args.use_motion_estimator_reference or (
+        args.use_motion_estimator_footage and first_video_is_reference
+    ):
+        motion_estimator_reference = motion_estimator
+
+    # set the initial transformation for all the videos (common reference)
+    if first_video_is_reference:
+        reference_path = args.files[0]
+        initial_transformations[reference_path] = HomographyTransformation(np.eye(3))
+    else:
+        reference_path = args.reference
+    for path in args.files[first_video_is_reference:]:
+
+        initial_transformations[path] = set_reference(
+            reference_path,
+            path,
+            motion_estimator_footage=motion_estimator_footage,
+            motion_estimator_reference=motion_estimator_reference,
+            mask_generator=mask_generator,
+            image_width=args.ui_width,
+            image_height=args.ui_height,
+        )
+
+    # initialize the reference if it exists
+    reference = {"video": None, "image": None, "motion_estimator": None}
+    image_reference = None
+    if not first_video_is_reference:
+        # if failing to read it as an image, try to read it as a video
+        image_reference = cv2.imread(args.reference)
+        reference["image"] = image_reference
+        if image_reference is None:
+            video = Video(input_path=path)
+            image_reference = next(video.__iter__())
+            reference["video"] = video
+            reference["motion_estimator"] = motion_estimator_reference
+
+    # now initialize the videos and their trackers
+    fps = None
+    total_frames = None
+    distance_functions = {}
+
+    def get_distance_function_from_threshold(threshold):
+        # compare embeddings if spatial distance is small enough
+        def conditional_embedding_to_spatial(detection, tracked_object):
+            if mean_manhattan(detection, tracked_object) < threshold:
+                return embedding_distance_detection_and_tracker(
+                    detection, tracked_object
+                )
+            else:
+                return 2
+
+        return conditional_embedding_to_spatial
+
+    for path in args.files:
+        extension = os.path.splitext(path)[1]
+        if args.output_name is None:
+            output_path = f"output_multi_camera_demo{extension}"
+        else:
+            output_path = args.output_name
+
+        video = Video(input_path=path, output_path=output_path)
+
+        # check the fps and total frames
+        if fps is None:
+            fps = video.output_fps
+            total_frames = int(video.video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
+        else:
+            current_fps = video.output_fps
+            current_total_frames = int(
+                video.video_capture.get(cv2.CAP_PROP_FRAME_COUNT)
+            )
+            if current_fps != fps:
+                warning(
+                    f"{args.files[0]} is at {fps} FPS, but {path} is at {current_fps} FPS."
+                )
+            if total_frames != current_total_frames:
+                warning(
+                    f"{args.files[0]} has {total_frames} frames, but {path} has {current_total_frames} frames."
+                )
+        if image_reference is None:
+            image_reference = next(video.__iter__())
+            height = image_reference.shape[0]
+        else:
+            height = next(video.__iter__()).shape[0]
+
+        videos[path] = video
+        distance_functions[path] = get_distance_function_from_threshold(height / 10)
+        trackers[path] = Tracker(
+            distance_function=distance_functions[path],
+            detection_threshold=args.confidence_threshold,
+            distance_threshold=args.embedding_correlation_threshold,
+            initialization_delay=args.initialization_delay,
+            hit_counter_max=args.hit_counter_max,
+            camera_name=path,
+            past_detections_length=10,
+            reid_distance_function=embedding_distance,
+            reid_distance_threshold=args.reid_embedding_correlation_threshold,
+            reid_hit_counter_max=args.reid_hit_counter_max,
+        )
+        tracked_objects[path] = []
+
+    big_black_frame = np.zeros(
+        tuple(
+            [
+                args.resolution[1]
+                * ((len(args.files) + (not first_video_is_reference) + 1) // 2),
+                args.resolution[0] * 2,
+                3,
+            ]
+        ),
+        dtype=np.uint8,
+    )
+
+    height_reference = image_reference.shape[0]
+
+    def normalized_foot_distance(tracker1, tracker2):
+        return (
+            np.linalg.norm(
+                get_absolute_feet(
+                    tracker1, initial_transformations[tracker1.camera_name]
+                )
+                - get_absolute_feet(
+                    tracker2, initial_transformations[tracker2.camera_name]
+                )
+            )
+            / height_reference
+        )
+
+    def clusterizer_distance(tracker1, tracker2):
+        # if the foot distance is small
+        # then compare the embeddings
+        if normalized_foot_distance(tracker1, tracker2) < args.foot_distance_threshold:
+            return embedding_distance(tracker1, tracker2)
+        else:
+            return 2
+
+    multicamera_clusterizer = MultiCameraClusterizer(
+        clusterizer_distance,
+        args.embedding_correlation_threshold,
+        join_distance_by=args.joined_distance,
+        max_votes_grow=args.max_votes_grow,
+        max_votes_split=args.max_votes_grow,
+        memory=args.memory,
+        initialization_delay=args.clusterizer_initialization_delay,
+        reid_hit_counter_max=args.reid_hit_counter_max,
+        maximum_time_since_last_update=args.maximum_time_since_last_update,
+        keep_id_criteria=args.keep_id_criteria,
+    )
+
+    while True:
+        try:
+            big_black_frame_copy = big_black_frame.copy()
+            for path in args.files:
+
+                frame = next(videos[path].__iter__())
+                images[path] = frame
+
+                detections = model(frame)
+                detections, boxes = yolo_detections_to_norfair_detections(
+                    detections,
+                    frame,
+                )
+                if args.mask_detections:
+                    mask = get_mask_from_boxes(frame, boxes)
+                else:
+                    mask = None
+
+                if args.use_motion_estimator_footage:
+                    coord_transformations = motion_estimators[path].update(frame, mask)
+                else:
+                    coord_transformations = None
+
+                tracked_objects[path] = trackers[path].update(
+                    detections=detections, coord_transformations=coord_transformations
+                )
+
+            clusters = multicamera_clusterizer.update(list(tracked_objects.values()))
+
+            images = draw_cluster_bboxes(images, clusters)
+
+            # fit images to single image
+            for n, path in enumerate(args.files):
+                row = n // 2
+                column = n % 2
+                frame = images[path]
+                frame = cv2.resize(
+                    frame, tuple(args.resolution), interpolation=cv2.INTER_AREA
+                )
+
+                height, width, channels = frame.shape
+
+                big_black_frame_copy[
+                    row * height : (row + 1) * height,
+                    column * width : (column + 1) * width,
+                ] = frame
+
+            if not first_video_is_reference:
+                coord_transformations = None
+                if reference["video"] is not None:
+                    frame = next(reference["video"].__iter__())
+
+                    if reference["motion_estimator"] is not None:
+                        if args.args.mask_detections:
+                            mask = mask_generator(frame)
+                        else:
+                            mask = None
+                        coord_transformations = reference["motion_estimator"].update(
+                            frame, mask
+                        )
+                else:
+                    frame = reference["image"].copy()
+
+                frame = draw_feet(
+                    frame, clusters, initial_transformations, coord_transformations
+                )
+
+                frame = cv2.resize(
+                    frame, tuple(args.resolution), interpolation=cv2.INTER_AREA
+                )
+
+                height, width, channels = frame.shape
+
+                row = len(args.files) // 2
+                is_at_center = bool((len(args.files) + 1) % 2)
+
+                if is_at_center:
+                    x0 = args.resolution[0] // 2
+                else:
+                    x0 = args.resolution[0]
+
+                big_black_frame_copy[row * height :, x0 : x0 + width] = frame
+
+            videos[args.files[0]].write(big_black_frame_copy)
+        except StopIteration:
+            break
+
+
+if __name__ == "__main__":
+    run()
diff --git a/norfair/camera_motion.py b/norfair/camera_motion.py
index cdf6a8ba..66fdfd59 100644
--- a/norfair/camera_motion.py
+++ b/norfair/camera_motion.py
@@ -99,7 +99,7 @@ def __init__(
     ) -> None:
         self.bin_size = bin_size
         self.proportion_points_used_threshold = proportion_points_used_threshold
-        self.data = None
+        self.transformation = TranslationTransformation(0)
 
     def __call__(
         self, curr_pts: np.ndarray, prev_pts: np.ndarray
@@ -119,14 +119,15 @@ def __call__(
         flow_mode = unique_flows[max_index]
 
         try:
-            flow_mode += self.data
+            flow_mode += self.transformation.movement_vector
         except TypeError:
             pass
 
+        current_transformation = TranslationTransformation(flow_mode)
         if update_prvs:
-            self.data = flow_mode
+            self.transformation = current_transformation
 
-        return update_prvs, TranslationTransformation(flow_mode)
+        return update_prvs, current_transformation
 
 
 #
@@ -204,7 +205,8 @@ def __init__(
         confidence: float = 0.995,
         proportion_points_used_threshold: float = 0.9,
     ) -> None:
-        self.data = None
+
+        self.transformation = HomographyTransformation(np.eye(3))
         if method is None:
             method = cv2.RANSAC
         self.method = method
@@ -227,10 +229,7 @@ def __call__(
                 "The homography couldn't be computed in this frame "
                 "due to low amount of points"
             )
-            if isinstance(self.data, np.ndarray):
-                return True, HomographyTransformation(self.data)
-            else:
-                return True, None
+            return True, self.transformation
 
         homography_matrix, points_used = cv2.findHomography(
             prev_pts,
@@ -246,14 +245,18 @@ def __call__(
         update_prvs = proportion_points_used < self.proportion_points_used_threshold
 
         try:
-            homography_matrix = homography_matrix @ self.data
+            homography_matrix = (
+                homography_matrix @ self.transformation.homography_matrix
+            )
         except (TypeError, ValueError):
             pass
 
+        current_transformation = HomographyTransformation(homography_matrix)
+
         if update_prvs:
-            self.data = homography_matrix
+            self.transformation = current_transformation
 
-        return update_prvs, HomographyTransformation(homography_matrix)
+        return update_prvs, current_transformation
 
 
 #
diff --git a/norfair/common_reference_ui.py b/norfair/common_reference_ui.py
new file mode 100644
index 00000000..8b5e0da2
--- /dev/null
+++ b/norfair/common_reference_ui.py
@@ -0,0 +1,1369 @@
+import os
+import pickle
+import tkinter as tk
+import tkinter.filedialog
+from copy import deepcopy
+
+import cv2
+import numpy as np
+from PIL import Image, ImageTk, UnidentifiedImageError
+
+from norfair import Video
+from norfair.camera_motion import HomographyTransformationGetter, TransformationGetter
+
+
+def resize_image(image, desired_width=None, desired_height=None):
+    aspect_ratio = image.height / image.width
+
+    if (desired_width is None) and (desired_height is not None):
+        desired_width = int(desired_height / aspect_ratio)
+    elif (desired_width is not None) and (desired_height is None):
+        desired_height = int(aspect_ratio * desired_width)
+
+    return image.resize((desired_width, desired_height), Image.LANCZOS)
+
+
+def set_reference(
+    reference: str,
+    footage: str,
+    transformation_getter: TransformationGetter = None,
+    mask_generator=None,
+    image_width=None,
+    image_height=None,
+    motion_estimator_footage=None,
+    motion_estimator_reference=None,
+):
+    """
+    Get a transformation to relate the coordinate transformations between footage absolute frame (first image in footage) and reference absolute frame (first image in reference).
+
+    UI usage:
+
+        The UI has the puropose of annotating points that match in the reference and the footage (either images or videos), to estimate a transformation.
+
+        To add a point, just click a pair of points (one from the footage window, and another from the reference window) and select `"Add"`.
+        To remove a point, just select the corresponding point at the bottom left corner, and select `"Remove"`.
+        You can also ignore points, by clicking them and selecting `"Ignore"`. The transformation will not used ingored points.
+        To 'uningnore' points that have been previously ignored, just click them and select `"Unignore"`.
+
+        To resize the footage or the reference image, you can use the `"+"` and `"-"` buttons in the `'Resize footage'` and `'Resize reference'` sections of the Menu.
+
+        If either footage or reference are videos, you can jump to future frames to pick points that match.
+        For example, to jump 215 frames in the footage, just write that number next to `'Frames to skip (footage)'`, and select `"Skip frames"`.
+
+        You can go back to the first frame of the video (in either footage or reference) by selecting "Reset video".
+
+        Once a transformation has been estimated (you will know that if the `"Finished"` button is green), you can test it:
+        To Test your transformation, Select the `"Test"` mode, and pick a point in either the reference or the footage, and see the associated point in the other window.
+        You can go back to the `"Annotate"` mode keep adding more associated points until you are satisfied with the estimated transformation.
+
+        You can also save the state (points and transformation you have) to a `.pkl` file using the `"Save"` button, so that you can later load that state from the UI with the `"Load"` button.
+
+        You can swap the reference points with the footage points (inverting the transformation) with the `"Invert"` button. This is particularly useful if you have previously saved a state in which the reference was the current footage, and the footage was the current reference.
+
+        Once you are happy with the transformation, just click on `"Finished"`.
+
+    Argumentsco:
+     - reference: str
+        Path to the reference image or video
+
+     - footage: str
+        Path to the footage image or video
+
+     - transformation_getter: TransformationGetter, optional
+        TransformationGetter defining the type of transformation you want to fix between reference and footage.
+        Since the transformation can be really far from identity (given that the perspectives in footage and reference can be immensely different),
+        and also knowing that outliers shouldn't be common given that a human is picking the points, it is recommended to use a high ransac_reproj_threshold (~ 1000)
+
+     - mask_generator: optional function that creates a mask (np.ndarray) from a PIL image. This mask is then provided to the corresponding MotionEstimator to avoid
+        sampling points within the mask.
+
+     - image_width: int, optional
+        Width of the image of the UI. If the height is not provided, then it will be calculated so that the aspect ratio is preserved.
+
+     - image_height: int, optional
+        Height of the image of the UI. If the width is not provided, then it will be calculated so that the aspect ratio is preserved.
+
+     - motion_estimator_footage: MotionEstimator, optional
+        When using videos for the footage, you can provide a MotionEstimator to relate the coordinates in all the frames in the video.
+        The motion estimator is only useful if the camera in the video of the footage can move. Otherwise, avoid using it.
+
+     - motion_estimator_reference: MotionEstimator, optional
+        When using videos the reference, you can provide a MotionEstimator to relate the coordinates in all the frames in the video.
+        The motion estimator is only useful if the camera in the video of the reference can move. Otherwise, avoid using it.
+
+     returns: CoordinatesTransformation instance
+        The provided transformation_getter will fit a transformation from the reference (as 'absolute') to the footage (as 'relative').
+        CoordinatesTransformation.abs_to_rel will give the transformation from the first frame in the reference to the first frame in the footage.
+        CoordinatesTransformation.rel_to_abs will give the transformation from the first frame in the footage to the first frame in the reference.
+    """
+
+    global window
+
+    global transformation
+
+    global button_finish
+    global button_says_ignore
+    global button_ignore
+
+    global reference_point_canvas
+    global footage_point_canvas
+
+    global canvas_reference
+    global canvas_footage
+
+    global reference_original_size
+    global reference_canvas_size
+    global footage_original_size
+    global footage_canvas_size
+
+    global footage_point
+    global reference_point
+
+    global skipper
+
+    global points
+    global points_sampled
+
+    global mode_annotate
+
+    global frame_options_annotations
+    global handling_mark_functions
+    global handle_mark_annotation
+
+    if transformation_getter is None:
+        transformation_getter = HomographyTransformationGetter(
+            method=cv2.RANSAC,
+            ransac_reproj_threshold=1000,
+            max_iters=2000,
+            confidence=0.995,
+            proportion_points_used_threshold=0.9,
+        )
+
+    skipper = {}
+
+    radius = None
+    if (image_width is None) and (image_height is None):
+        image_height = 450
+    elif (image_width is not None) and (image_height is None):
+        radius = max(int(image_width / 100), 1)
+    if radius is None:
+        radius = max(int(image_height / 100), 1)
+
+    points = {}
+    points_sampled = len(points)
+
+    transformation = None
+
+    window = tk.Tk()
+    window.title("Norfair - Set Reference Coordinates")
+    window.configure(bg="LightSkyBlue1")
+
+    frame_options = tk.Frame()
+    frame_images = tk.Frame()
+    frame_options_annotations = tk.Frame(master=frame_options)
+
+    # utilities
+
+    def estimate_transformation(points):
+        global button_finish
+        prev_pts = np.array(
+            [point["reference"] for point in points.values() if not point["ignore"]]
+        )  # use current points as reference points
+        curr_pts = np.array(
+            [point["footage"] for point in points.values() if not point["ignore"]]
+        )  # use previous points as footage points (to deduce reference -> footage)
+
+        button_finish.configure(fg="black", highlightbackground="green")
+        try:
+            transformation = transformation_getter(curr_pts, prev_pts)[1]
+        except:
+            transformation = None
+
+        if transformation is not None:
+            button_finish.configure(fg="black", highlightbackground="green")
+        else:
+            button_finish.configure(fg="grey", highlightbackground="SystemButtonFace")
+        return transformation
+
+    def test_transformation(
+        change_of_coordinates,
+        canvas,
+        point,
+        original_size,
+        canvas_size,
+        motion_transformation=None,
+    ):
+        point_in_new_coordinates = change_of_coordinates(np.array([point]))[0]
+
+        try:
+            point_in_new_coordinates = motion_transformation.rel_to_abs(
+                np.array([point_in_new_coordinates])
+            )[0]
+        except AttributeError:
+            pass
+
+        point_in_canvas_coordinates = np.multiply(
+            point_in_new_coordinates,
+            np.array(
+                [canvas_size[0] / original_size[0], canvas_size[1] / original_size[1]]
+            ),
+        ).astype(int)
+
+        draw_point_in_canvas(canvas, point_in_canvas_coordinates, "blue")
+
+    def remove_drawings_in_canvas(canvas):
+        if len(canvas.find_withtag("myPoint")) > 0:
+            canvas.delete("myPoint")
+
+    def draw_point_in_canvas(canvas, point, color="green"):
+        remove_drawings_in_canvas(canvas)
+        canvas.create_oval(
+            point[0] - radius,
+            point[1] - radius,
+            point[0] + radius,
+            point[1] + radius,
+            fill=color,
+            tags="myPoint",
+        )
+
+    ######### MAKE SUBBLOCK TO FINISH
+
+    frame_options_finish = tk.Frame(master=frame_options)
+
+    space = tk.Label(
+        master=frame_options_finish,
+        text="",
+        foreground="white",
+        width=40,
+        height=1,
+    )
+    button_finish = tk.Button(
+        master=frame_options_finish,
+        text="Finished!",
+        width=30,
+        height=1,
+        bg="blue",
+        fg="gray",
+        command=lambda: handle_finish(),
+    )
+
+    def handle_finish():
+        global window
+        global transformation
+
+        if transformation is not None:
+            window.destroy()
+            for info in skipper.values():
+                if info["video"] is not None:
+                    info["video"].video_capture.release()
+            cv2.destroyAllWindows()
+            return transformation
+        else:
+            print("Can't leave without estimating the transformation.")
+
+    space.pack(side=tk.TOP)
+    button_finish.pack(side=tk.TOP)
+    frame_options_finish.pack(side=tk.BOTTOM)
+
+    ###### MAKE SUBBLOCK TO SEE POINTS AND CHOOSE THEM
+    def handle_mark_annotation(key):
+        def handle_annotation(event):
+            global skipper
+            global reference_original_size
+            global reference_canvas_size
+            global footage_original_size
+            global footage_canvas_size
+            global button_says_ignore
+            global button_ignore
+            global points
+
+            points[key]["marked"] = not points[key]["marked"]
+
+            marked_points = [
+                point["ignore"] for point in points.values() if point["marked"]
+            ]
+
+            if (len(marked_points) > 0) and (all(marked_points)):
+                button_says_ignore = False
+                button_ignore.configure(text="Unignore")
+            else:
+                button_says_ignore = True
+                button_ignore.configure(text="Ignore")
+
+            if points[key]["marked"]:
+                points[key]["button"].configure(fg="black", highlightbackground="red")
+
+                try:
+                    footage_point_in_rel_coords = skipper["footage"][
+                        "motion_transformation"
+                    ].rel_to_abs(np.array([points[key]["footage"]]))[0]
+                except AttributeError:
+                    footage_point_in_rel_coords = points[key]["footage"]
+                footage_point_in_rel_coords = np.multiply(
+                    footage_point_in_rel_coords,
+                    np.array(
+                        [
+                            footage_canvas_size[0] / footage_original_size[0],
+                            footage_canvas_size[1] / footage_original_size[1],
+                        ]
+                    ),
+                ).astype(int)
+
+                try:
+                    reference_point_in_rel_coords = skipper["reference"][
+                        "motion_transformation"
+                    ].rel_to_abs(np.array([points[key]["reference"]]))[0]
+                except AttributeError:
+                    reference_point_in_rel_coords = points[key]["reference"]
+
+                reference_point_in_rel_coords = np.multiply(
+                    reference_point_in_rel_coords,
+                    np.array(
+                        [
+                            reference_canvas_size[0] / reference_original_size[0],
+                            reference_canvas_size[1] / reference_original_size[1],
+                        ]
+                    ),
+                ).astype(int)
+
+                if points[key]["ignore"]:
+                    color = "gray"
+                else:
+                    color = "red"
+
+                draw_point_in_canvas(
+                    canvas_footage, footage_point_in_rel_coords, color=color
+                )
+                draw_point_in_canvas(
+                    canvas_reference, reference_point_in_rel_coords, color=color
+                )
+            else:
+                if points[key]["ignore"]:
+                    points[key]["button"].configure(
+                        fg="gray", highlightbackground="gray"
+                    )
+                else:
+                    points[key]["button"].configure(
+                        fg="black", highlightbackground="SystemButtonFace"
+                    )
+
+                points[key]["button"].configure(
+                    fg="black", highlightbackground="SystemButtonFace"
+                )
+                canvas_footage.delete("myPoint")
+                canvas_reference.delete("myPoint")
+
+        return handle_annotation
+
+    handling_mark_functions = {}
+    for key, couple in points.items():
+
+        handling_mark_functions[key] = handle_mark_annotation(key)
+
+        new_button = tk.Button(
+            master=frame_options_annotations,
+            text=f"{key}: reference ({couple['reference'][0]}, {couple['reference'][1]}) <-> footage ({couple['footage'][0]}, {couple['footage'][1]})",
+            width=35,
+            height=1,
+            bg="blue",
+            fg="black",
+            highlightbackground="SystemButtonFace",
+        )
+
+        new_button.bind("<Button>", handling_mark_functions[key])
+
+        new_button.pack(side=tk.TOP)
+        points[key]["button"] = new_button
+
+    frame_options_annotations.pack(side=tk.BOTTOM)
+
+    ######## Add clickable windows for the frames
+
+    reference_point = None
+    reference_point_canvas = None
+    footage_point = None
+    footage_point_canvas = None
+
+    motion_transformation = None
+    motion_estimator_backup = None
+    try:
+        image = Image.open(footage)
+        video = None
+        fps = None
+        total_frames = None
+    except UnidentifiedImageError:
+        video = Video(input_path=footage)
+        total_frames = int(video.video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
+        fps = video.output_fps
+        image = cv2.cvtColor(next(video.__iter__()), cv2.COLOR_BGR2RGB)
+        if motion_estimator_footage is not None:
+            motion_estimator_backup = deepcopy(motion_estimator_footage)
+            if mask_generator is not None:
+                mask = mask_generator(image)
+            else:
+                mask = None
+            motion_transformation = motion_estimator_footage.update(image, mask)
+        image = Image.fromarray(image)
+
+    footage_original_size = (image.width, image.height)
+
+    resized_image = resize_image(
+        image, desired_width=image_width, desired_height=image_height
+    )
+
+    footage_photo = ImageTk.PhotoImage(resized_image)
+    footage_canvas_size = (footage_photo.width(), footage_photo.height())
+
+    canvas_footage = tk.Canvas(
+        frame_images,
+        width=footage_canvas_size[0],
+        height=footage_canvas_size[1],
+        bg="gray",
+    )
+    footage_image_container = canvas_footage.create_image(
+        0, 0, anchor=tk.NW, image=footage_photo
+    )
+
+    def reference_coord_chosen_in_footage(event):
+        global footage_point
+        global footage_point_canvas
+        global transformation
+        global canvas_reference
+        global reference_original_size
+        global reference_canvas_size
+        global footage_original_size
+        global footage_canvas_size
+        global skipper
+
+        footage_point_canvas = (event.x, event.y)
+        draw_point_in_canvas(canvas_footage, footage_point_canvas)
+
+        footage_point = np.array(
+            [
+                event.x * (footage_original_size[0] / footage_canvas_size[0]),
+                event.y * (footage_original_size[1] / footage_canvas_size[1]),
+            ]
+        )
+        print("Footage window clicked at: ", footage_point.round(1))
+
+        try:
+            footage_point = skipper["footage"]["motion_transformation"].abs_to_rel(
+                np.array([footage_point])
+            )[0]
+        except AttributeError:
+            pass
+
+        footage_point = footage_point.round(1)
+
+        if not mode_annotate:
+            if transformation is not None:
+                test_transformation(
+                    transformation.rel_to_abs,
+                    canvas_reference,
+                    footage_point,
+                    reference_original_size,
+                    reference_canvas_size,
+                    skipper["reference"]["motion_transformation"],
+                )
+            else:
+                print("Can't test the transformation yet, still need more points")
+
+    canvas_footage.bind("<Button>", reference_coord_chosen_in_footage)
+    canvas_footage.pack(side=tk.TOP, fill=tk.BOTH, expand=True)
+
+    skipper["footage"] = {
+        "video": video,
+        "total_frames": total_frames,
+        "current_frame": 1,
+        "fps": fps,
+        "button_skip": None,
+        "entry_skip": None,
+        "button_reset": None,
+        "motion_estimator": motion_estimator_footage,
+        "motion_transformation": motion_transformation,
+        "motion_estimator_backup": motion_estimator_backup,
+        "canvas": canvas_footage,
+        "image_container": footage_image_container,
+        "current_frame_label": None,
+        "path": footage,
+        "original_image": image,
+    }
+
+    motion_transformation = None
+    motion_estimator_backup = None
+    try:
+        image = Image.open(reference)
+        video = None
+        fps = None
+        total_frames = None
+    except UnidentifiedImageError:
+        video = Video(input_path=reference)
+        total_frames = int(video.video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
+        fps = video.output_fps
+        image = cv2.cvtColor(next(video.__iter__()), cv2.COLOR_BGR2RGB)
+        if motion_estimator_reference is not None:
+            motion_estimator_backup = deepcopy(motion_estimator_reference)
+            if mask_generator is not None:
+                mask = mask_generator(image)
+            else:
+                mask = None
+            motion_transformation = motion_estimator_reference.update(image, mask)
+
+        image = Image.fromarray(image)
+
+    reference_original_size = (image.width, image.height)
+    resized_image = resize_image(
+        image, desired_width=image_width, desired_height=image_height
+    )
+
+    reference_photo = ImageTk.PhotoImage(resized_image)
+    reference_canvas_size = (reference_photo.width(), reference_photo.height())
+
+    canvas_reference = tk.Canvas(
+        frame_images,
+        width=reference_canvas_size[0],
+        height=reference_canvas_size[1],
+        bg="gray",
+    )
+    reference_image_container = canvas_reference.create_image(
+        0, 0, anchor=tk.NW, image=reference_photo
+    )
+
+    def reference_coord_chosen_in_reference(event):
+        global reference_point
+        global reference_point_canvas
+        global transformation
+        global canvas_footage
+        global reference_original_size
+        global reference_canvas_size
+        global footage_original_size
+        global footage_canvas_size
+        global skipper
+
+        reference_point_canvas = (event.x, event.y)
+        draw_point_in_canvas(canvas_reference, reference_point_canvas)
+
+        reference_point = np.array(
+            [
+                event.x * (reference_original_size[0] / reference_canvas_size[0]),
+                event.y * (reference_original_size[1] / reference_canvas_size[1]),
+            ]
+        )
+        print("Reference window clicked at: ", reference_point.round(1))
+
+        try:
+            reference_point = skipper["reference"]["motion_transformation"].abs_to_rel(
+                np.array([reference_point])
+            )[0]
+        except AttributeError:
+            pass
+
+        reference_point = reference_point.round(1)
+
+        if not mode_annotate:
+            if transformation is not None:
+                test_transformation(
+                    transformation.abs_to_rel,
+                    canvas_footage,
+                    reference_point,
+                    footage_original_size,
+                    footage_canvas_size,
+                    skipper["footage"]["motion_transformation"],
+                )
+            else:
+                print("Can't test the transformation yet, still need more points")
+
+    canvas_reference.bind("<Button>", reference_coord_chosen_in_reference)
+    canvas_reference.pack(side=tk.TOP, fill=tk.BOTH, expand=True)
+
+    skipper["reference"] = {
+        "video": video,
+        "total_frames": total_frames,
+        "current_frame": 1,
+        "fps": fps,
+        "button_skip": None,
+        "entry_skip": None,
+        "button_reset": None,
+        "motion_estimator": motion_estimator_reference,
+        "motion_transformation": motion_transformation,
+        "motion_estimator_backup": motion_estimator_backup,
+        "canvas": canvas_reference,
+        "image_container": reference_image_container,
+        "current_frame_label": None,
+        "path": reference,
+        "original_image": image,
+    }
+    ######### MAKE SUBBLOCK FOR LOGO
+
+    frame_options_logo = tk.Frame(master=frame_options)
+    image = Image.open(
+        os.path.join(os.path.dirname(__file__), "./logo_ui/logo-dark.png")
+    )
+    image = image.resize((300, 70))
+    image = ImageTk.PhotoImage(image)
+
+    image_label = tk.Label(
+        frame_options_logo, image=image, width=40, height=60, bg="LightSkyBlue1"
+    )
+
+    image_label.pack(side=tk.TOP, fill="both", expand="yes")
+    frame_options_logo.pack(side=tk.TOP, fill="both", expand="yes")
+
+    ###### MAKE SUBBLOCK FOR TITLE
+    frame_options_title = tk.Frame(master=frame_options)
+    title = tk.Label(
+        master=frame_options_title,
+        text="Options",
+        foreground="white",
+        background="#34A2FE",
+        width=40,
+        height=1,
+    )
+    title.pack(side=tk.TOP)
+    frame_options_title.pack(side=tk.TOP)
+
+    ############### VIDEO CATEGORY SUBBLOCKS (FPS, TOTAL FRAMES, SKIP FRAMES, RESET VIDEO)
+
+    def get_reset_video_handler(video_type):
+        def handle_reset_video(event):
+            global skipper
+            global canvas_footage
+            global canvas_reference
+            global reference_canvas_size
+            global footage_canvas_size
+
+            if skipper[video_type]["current_frame"] > 1:
+                skipper[video_type]["video"].video_capture.release()
+                cv2.destroyAllWindows()
+                video = Video(input_path=skipper[video_type]["path"])
+                image = cv2.cvtColor(next(video.__iter__()), cv2.COLOR_BGR2RGB)
+                skipper[video_type]["video"] = video
+                if skipper[video_type]["motion_estimator"] is not None:
+                    skipper[video_type]["motion_estimator"] = deepcopy(
+                        skipper[video_type]["motion_estimator_backup"]
+                    )
+                    if mask_generator is not None:
+                        mask = mask_generator(image)
+                    else:
+                        mask = None
+                    skipper[video_type]["motion_transformation"] = skipper[video_type][
+                        "motion_estimator"
+                    ].update(image, mask)
+                skipper[video_type]["current_frame"] = 1
+                image = Image.fromarray(image)
+                skipper[video_type]["original_image"] = image
+
+                if video_type == "reference":
+                    size = reference_canvas_size
+                else:
+                    size = footage_canvas_size
+                image = resize_image(
+                    image, desired_width=size[0], desired_height=size[1]
+                )
+                image = ImageTk.PhotoImage(image)
+
+                skipper[video_type]["canvas"].itemconfig(
+                    skipper[video_type]["image_container"], image=image
+                )
+                skipper[video_type]["canvas"].imgref = image
+                canvas_footage.delete("myPoint")
+                canvas_reference.delete("myPoint")
+
+                skipper[video_type]["current_frame_label"].config(
+                    text=f"Total frames {video_type}: 1/{skipper[video_type]['total_frames']}"
+                )
+
+        return handle_reset_video
+
+    def get_skiper_handler(video_type):
+        entry_skip = skipper[video_type]["entry_skip"]
+
+        def handle_skip_frame(event):
+            global skipper
+            global canvas_footage
+            global canvas_reference
+            global reference_canvas_size
+            global footage_canvas_size
+
+            try:
+                frames_to_skip = int(entry_skip.get())
+            except:
+                print(
+                    f"Frames to skip has to be an integer, but you wrote '{entry_skip.get()}'"
+                )
+                return
+            video = skipper[video_type]["video"]
+            change_image = False
+            motion_estimator = skipper[video_type]["motion_estimator"]
+            motion_transformation = skipper[video_type]["motion_transformation"]
+
+            while (frames_to_skip > 0) and (
+                skipper[video_type]["current_frame"]
+                < skipper[video_type]["total_frames"]
+            ):
+                change_image = True
+                frames_to_skip -= 1
+                skipper[video_type]["current_frame"] += 1
+
+                image = next(video.__iter__())
+
+                if motion_estimator is not None:
+                    if mask_generator is not None:
+                        mask = mask_generator(image)
+                    else:
+                        mask = None
+                    motion_transformation = motion_estimator.update(
+                        np.array(image), mask
+                    )
+
+            skipper[video_type]["motion_estimator"] = motion_estimator
+            skipper[video_type]["motion_transformation"] = motion_transformation
+
+            if change_image:
+                image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+                skipper[video_type]["original_image"] = image
+                if video_type == "reference":
+                    size = reference_canvas_size
+                else:
+                    size = footage_canvas_size
+                image = resize_image(
+                    image, desired_width=size[0], desired_height=size[1]
+                )
+                image = ImageTk.PhotoImage(image)
+
+                skipper[video_type]["canvas"].itemconfig(
+                    skipper[video_type]["image_container"], image=image
+                )
+                skipper[video_type]["canvas"].imgref = image
+
+                frame_number = skipper[video_type]["current_frame"]
+                total_frames = skipper[video_type]["total_frames"]
+
+                skipper[video_type]["current_frame_label"].config(
+                    text=f"Total frames {video_type}: {frame_number}/{total_frames}"
+                )
+                canvas_footage.delete("myPoint")
+                canvas_reference.delete("myPoint")
+
+        return handle_skip_frame
+
+    skiper_handlers = {}
+    reset_video_handlers = {}
+    for video_type in skipper.keys():
+
+        if skipper[video_type]["fps"] is not None:
+            ###### MAKE SUBBLOCK FOR FPS
+
+            fps = skipper[video_type]["fps"]
+
+            frame_options_fps = tk.Frame(master=frame_options)
+            title = tk.Label(
+                master=frame_options_fps,
+                text=f"FPS {video_type}: {round(fps, 2)}",
+                foreground="white",
+                background="#34A2FE",
+                width=40,
+                height=1,
+            )
+            title.pack(side=tk.TOP)
+            frame_options_fps.pack(side=tk.TOP)
+
+            ###### MAKE SUBBLOCK FOR TOTAL FRAMES
+            frame_number = skipper[video_type]["current_frame"]
+            total_frames = skipper[video_type]["total_frames"]
+
+            frame_options_total_frames = tk.Frame(master=frame_options)
+            title = tk.Label(
+                master=frame_options_total_frames,
+                text=f"Total frames {video_type}: {frame_number}/{total_frames}",
+                foreground="white",
+                background="#34A2FE",
+                width=40,
+                height=1,
+            )
+            title.pack(side=tk.TOP)
+            frame_options_total_frames.pack(side=tk.TOP)
+            skipper[video_type]["current_frame_label"] = title
+
+            ###### MAKE SUBBLOCK FOR SKIP FRAMES
+            frame_options_skip = tk.Frame(master=frame_options)
+            text_skip_frames = tk.Label(
+                master=frame_options_skip,
+                text=f"Frames to skip ({video_type})",
+                foreground="white",
+                background="#5f9ea0",
+                width=20,
+                height=1,
+            )
+            entry_skip = tk.Entry(
+                master=frame_options_skip, fg="black", bg="white", width=5
+            )
+            skipper[video_type]["entry_skip"] = entry_skip
+            skiper_handlers[video_type] = get_skiper_handler(video_type)
+
+            button_skip = tk.Button(
+                master=frame_options_skip,
+                text="Skip frames",
+                width=10,
+                height=1,
+                bg="blue",
+                fg="black",
+            )
+
+            button_skip.bind("<Button>", skiper_handlers[video_type])
+
+            skipper[video_type]["button_skip"] = button_skip
+
+            text_skip_frames.pack(side=tk.LEFT)
+            entry_skip.pack(side=tk.LEFT)
+            button_skip.pack(side=tk.LEFT)
+
+            frame_options_skip.pack(side=tk.TOP)
+
+            ###### MAKE SUBBLOCK TO RESET VIDEO
+            frame_options_reset_video = tk.Frame(master=frame_options)
+            text_reset_video = tk.Label(
+                master=frame_options_reset_video,
+                text=f"Go to frame 1 ({video_type})",
+                foreground="white",
+                background="#5f9ea0",
+                width=20,
+                height=1,
+            )
+
+            reset_video_handlers[video_type] = get_reset_video_handler(video_type)
+
+            button_reset = tk.Button(
+                master=frame_options_reset_video,
+                text="Reset video",
+                width=16,
+                height=1,
+                bg="blue",
+                fg="black",
+            )
+
+            button_reset.bind("<Button>", reset_video_handlers[video_type])
+
+            skipper[video_type]["button_reset"] = button_reset
+
+            text_reset_video.pack(side=tk.LEFT)
+            button_reset.pack(side=tk.LEFT)
+
+            frame_options_reset_video.pack(side=tk.TOP)
+
+    ###### MAKE SUBBLOCK TO ADD ANNOTATION
+
+    frame_options_add = tk.Frame(master=frame_options)
+    text_add_annotation = tk.Label(
+        master=frame_options_add,
+        text="Add annotation",
+        foreground="white",
+        background="#5f9ea0",
+        width=20,
+        height=1,
+    )
+    button_add_annotation = tk.Button(
+        master=frame_options_add,
+        text="Add",
+        width=16,
+        height=1,
+        bg="blue",
+        fg="black",
+        command=lambda: handle_add_annotation(),
+    )
+
+    def create_annotation_button(point):
+        global points_sampled
+        global frame_options_annotations
+        global handling_mark_functions
+        global handle_mark_annotation
+
+        handling_mark_functions[points_sampled] = handle_mark_annotation(points_sampled)
+
+        new_button = tk.Button(
+            master=frame_options_annotations,
+            text=f"{points_sampled}: reference ({point['reference'][0]}, {point['reference'][1]}) <-> footage ({point['footage'][0]}, {point['footage'][1]})",
+            width=35,
+            height=1,
+            bg="blue",
+            fg="black",
+            highlightbackground="SystemButtonFace",
+        )
+
+        new_button.bind("<Button>", handling_mark_functions[points_sampled])
+        new_button.pack(side=tk.TOP)
+
+        canvas_footage.delete("myPoint")
+        canvas_reference.delete("myPoint")
+        return new_button
+
+    def handle_add_annotation():
+        global points
+        global points_sampled
+        global reference_point
+        global footage_point
+        global mode_annotate
+        global transformation
+
+        if mode_annotate:
+            if (footage_point is not None) and (reference_point is not None):
+                new_point = {
+                    "reference": reference_point,
+                    "footage": footage_point,
+                    "button": None,
+                    "marked": False,
+                    "ignore": False,
+                }
+                points[points_sampled] = new_point
+
+                points[points_sampled]["button"] = create_annotation_button(new_point)
+
+                reference_point = None
+                footage_point = None
+                points_sampled += 1
+
+                transformation = estimate_transformation(points)
+            else:
+                print(
+                    "Need to pick a point from the footage and from the reference to annotate them"
+                )
+        else:
+            print("Can't annotate in 'Test' mode.")
+
+    text_add_annotation.pack(side=tk.LEFT)
+    button_add_annotation.pack(side=tk.LEFT)
+
+    frame_options_add.pack(side=tk.TOP)
+
+    ###### MAKE SUBBLOCK TO LOAD OR SAVE TRANSFORMATION
+
+    frame_options_files = tk.Frame(master=frame_options)
+    text_files = tk.Label(
+        master=frame_options_files,
+        text="Save/Load state",
+        foreground="white",
+        background="#5f9ea0",
+        width=20,
+        height=1,
+    )
+    button_save_state = tk.Button(
+        master=frame_options_files,
+        text="Save",
+        width=6,
+        height=1,
+        bg="blue",
+        fg="black",
+        command=lambda: handle_save_state(),
+    )
+    button_load_state = tk.Button(
+        master=frame_options_files,
+        text="Load",
+        width=6,
+        height=1,
+        bg="blue",
+        fg="black",
+        command=lambda: handle_load_state(),
+    )
+
+    def handle_save_state():
+        global points
+        global transformation
+        global window
+
+        initialfile = f"{os.path.splitext(os.path.basename(reference))[0]}_to_{os.path.splitext(os.path.basename(footage))[0]}.pkl"
+
+        file = tkinter.filedialog.asksaveasfile(
+            parent=window, initialfile=initialfile, mode="wb", defaultextension=".pkl"
+        )
+        if file is not None:
+            points_reference = []
+            points_footage = []
+            is_ignored = []
+            for point in points.values():
+                points_reference.append(point["reference"])
+                points_footage.append(point["footage"])
+                is_ignored.append(point["ignore"])
+            pickle.dump(
+                {
+                    "reference": points_reference,
+                    "footage": points_footage,
+                    "ignore": is_ignored,
+                    "transformation": transformation,
+                },
+                file,
+            )
+            file.close()
+
+    def handle_load_state():
+        global points
+        global transformation
+        global points_sampled
+        global window
+        global button_finish
+        global reference_point
+        global footage_point
+        global canvas_footage
+        global canvas_reference
+
+        initialfile = f"{os.path.splitext(os.path.basename(reference))[0]}_to_{os.path.splitext(os.path.basename(footage))[0]}.pkl"
+
+        file = tkinter.filedialog.askopenfile(
+            parent=window, initialfile=initialfile, mode="rb"
+        )
+        if file is not None:
+
+            loaded_state = pickle.load(file)
+
+            points_reference = loaded_state["reference"]
+            points_footage = loaded_state["footage"]
+            transformation = loaded_state["transformation"]
+            is_ignored = loaded_state["ignore"]
+
+            if transformation is not None:
+                button_finish.configure(fg="black", highlightbackground="green")
+            else:
+                button_finish.configure(
+                    fg="grey", highlightbackground="SystemButtonFace"
+                )
+
+            points = recreate_all_buttons(points_reference, points_footage, is_ignored)
+
+            remove_drawings_in_canvas(canvas_footage)
+            remove_drawings_in_canvas(canvas_reference)
+
+            reference_point = None
+            footage_point = None
+
+            file.close()
+
+    text_files.pack(side=tk.LEFT)
+    button_save_state.pack(side=tk.LEFT)
+    button_load_state.pack(side=tk.LEFT)
+    frame_options_files.pack(side=tk.TOP)
+
+    def recreate_all_buttons(points_reference, points_footage, is_ignored):
+        global points
+        global points_sampled
+
+        # remove existing buttons
+        for point in points.values():
+            point["button"].destroy()
+        points = {}
+
+        points_sampled = 0
+        # create new points
+        for reference_point, footage_point, ignore in zip(
+            points_reference, points_footage, is_ignored
+        ):
+
+            new_point = {
+                "reference": reference_point,
+                "footage": footage_point,
+                "button": None,
+                "marked": False,
+                "ignore": ignore,
+            }
+            points[points_sampled] = new_point
+            points[points_sampled]["button"] = create_annotation_button(new_point)
+            if ignore:
+                points[points_sampled]["button"].configure(
+                    fg="gray", highlightbackground="gray"
+                )
+
+            points_sampled += 1
+
+        return points
+
+    ###### MAKE SUBBLOCK TO CHANGE BETWEEN ANOTATE AND TEST
+    frame_options_annotate_or_test = tk.Frame(master=frame_options)
+
+    mode_annotate = True
+
+    text_annotate_or_test = tk.Label(
+        master=frame_options_annotate_or_test,
+        text="Mode:",
+        foreground="white",
+        background="#5f9ea0",
+        width=20,
+        height=1,
+    )
+    button_annotate = tk.Button(
+        master=frame_options_annotate_or_test,
+        text="Annotate",
+        width=6,
+        height=1,
+        bg="blue",
+        fg="black",
+        highlightbackground="green",
+    )
+    button_test = tk.Button(
+        master=frame_options_annotate_or_test,
+        text="Test",
+        width=6,
+        height=1,
+        bg="blue",
+        fg="black",
+        highlightbackground="red",
+    )
+
+    def handle_annotate_mode(event):
+        global mode_annotate
+        mode_annotate = True
+        button_test.configure(fg="black", highlightbackground="red")
+        button_annotate.configure(fg="black", highlightbackground="green")
+
+    def handle_test_mode(event):
+        global mode_annotate
+        mode_annotate = False
+        button_test.configure(fg="black", highlightbackground="green")
+        button_annotate.configure(fg="black", highlightbackground="red")
+
+    button_annotate.bind("<Button>", handle_annotate_mode)
+    button_test.bind("<Button>", handle_test_mode)
+
+    text_annotate_or_test.pack(side=tk.LEFT)
+    button_annotate.pack(side=tk.LEFT)
+    button_test.pack(side=tk.LEFT)
+
+    frame_options_annotate_or_test.pack(side=tk.TOP)
+
+    ###### MAKE SUBBLOCK TO IGNORE POINTS
+
+    button_says_ignore = True
+    frame_options_ignore = tk.Frame(master=frame_options)
+    text_ignore = tk.Label(
+        master=frame_options_ignore,
+        text="Ignore points",
+        foreground="white",
+        background="#5f9ea0",
+        width=20,
+        height=1,
+    )
+    button_ignore = tk.Button(
+        master=frame_options_ignore,
+        text="Ignore",
+        width=16,
+        height=1,
+        bg="blue",
+        fg="black",
+        command=lambda: handle_ignore_point(),
+    )
+
+    def handle_ignore_point():
+        global points
+        global transformation
+        global button_says_ignore
+
+        if button_says_ignore:
+            fg = "gray"
+            highlightbackground = "gray"
+        else:
+            fg = "black"
+            highlightbackground = "SystemButtonFace"
+
+        for key, couple in points.items():
+            if couple["marked"]:
+                points[key]["ignore"] = button_says_ignore
+                points[key]["button"].configure(
+                    fg=fg, highlightbackground=highlightbackground
+                )
+                points[key]["marked"] = False
+                remove_drawings_in_canvas(canvas_footage)
+                remove_drawings_in_canvas(canvas_reference)
+        button_says_ignore = True
+        button_ignore.configure(text="Ignore")
+        transformation = estimate_transformation(points)
+
+    text_ignore.pack(side=tk.LEFT)
+    button_ignore.pack(side=tk.LEFT)
+
+    frame_options_ignore.pack(side=tk.TOP)
+
+    ###### MAKE SUBBLOCK TO REMOVE POINTS
+
+    frame_options_remove = tk.Frame(master=frame_options)
+    text_remove = tk.Label(
+        master=frame_options_remove,
+        text="Remove points",
+        foreground="white",
+        background="#5f9ea0",
+        width=20,
+        height=1,
+    )
+    button_remove = tk.Button(
+        master=frame_options_remove,
+        text="Remove",
+        width=16,
+        height=1,
+        bg="blue",
+        fg="black",
+        command=lambda: handle_remove_point(),
+    )
+
+    def handle_remove_point():
+        global points
+        global transformation
+        points_copy = points.copy()
+        for key, couple in points.items():
+            if couple["marked"]:
+                points_copy.pop(key)
+                couple["button"].destroy()
+
+                remove_drawings_in_canvas(canvas_footage)
+                remove_drawings_in_canvas(canvas_reference)
+        points = points_copy.copy()
+
+        transformation = estimate_transformation(points)
+
+    text_remove.pack(side=tk.LEFT)
+    button_remove.pack(side=tk.LEFT)
+
+    frame_options_remove.pack(side=tk.TOP)
+
+    ######  MAKE SUBBLOCK TO INVERT TRANSFORMATION
+
+    frame_options_invert = tk.Frame(master=frame_options)
+    text_invert = tk.Label(
+        master=frame_options_invert,
+        text="Invert (pts and transf)",
+        foreground="white",
+        background="#5f9ea0",
+        width=20,
+        height=1,
+    )
+    button_invert = tk.Button(
+        master=frame_options_invert,
+        text="Invert",
+        width=16,
+        height=1,
+        bg="blue",
+        fg="black",
+        command=lambda: handle_invert(),
+    )
+
+    def handle_invert():
+        global points
+        global transformation
+        global points_sampled
+        global footage_point
+        global reference_point
+        global canvas_footage
+        global canvas_reference
+
+        points_reference = []
+        points_footage = []
+        is_ignored = []
+
+        for key, couple in points.items():
+            # swap footage coordinate with reference
+            points_reference.append(couple["footage"])
+            points_footage.append(couple["reference"])
+            is_ignored.append(couple["ignore"])
+
+        remove_drawings_in_canvas(canvas_footage)
+        remove_drawings_in_canvas(canvas_reference)
+
+        points = recreate_all_buttons(points_reference, points_footage, is_ignored)
+        points_sampled = len(points)
+
+        reference_point = None
+        footage_point = None
+
+        transformation = estimate_transformation(points)
+
+    text_invert.pack(side=tk.LEFT)
+    button_invert.pack(side=tk.LEFT)
+
+    frame_options_invert.pack(side=tk.TOP)
+
+    ######  MAKE SUBBLOCK TO RESIZE FRAMES
+
+    def get_handle_resize(video_type, delta_height):
+        def handle_resize(event):
+            global footage_canvas_size
+            global footage_original_size
+            global reference_canvas_size
+            global reference_original_size
+            global skipper
+
+            if video_type == "reference":
+                new_canvas_height = max(reference_canvas_size[1] + delta_height, 10)
+                new_canvas_width = int(
+                    new_canvas_height
+                    * reference_canvas_size[0]
+                    / reference_canvas_size[1]
+                )
+                reference_canvas_size = (new_canvas_width, new_canvas_height)
+            else:
+                new_canvas_height = max(footage_canvas_size[1] + delta_height, 10)
+                new_canvas_width = int(
+                    new_canvas_height * footage_canvas_size[0] / footage_canvas_size[1]
+                )
+                footage_canvas_size = (new_canvas_width, new_canvas_height)
+
+            image = resize_image(
+                skipper[video_type]["original_image"],
+                desired_width=new_canvas_width,
+                desired_height=new_canvas_height,
+            )
+            image = ImageTk.PhotoImage(image)
+            skipper[video_type]["canvas"].itemconfig(
+                skipper[video_type]["image_container"], image=image
+            )
+            skipper[video_type]["canvas"].imgref = image
+            skipper[video_type]["canvas"].delete("myPoint")
+
+        return handle_resize
+
+    resizer = {}
+
+    for video_type in skipper.keys():
+
+        frame_options_resize = tk.Frame(master=frame_options)
+
+        resizer[video_type] = {}
+        resizer[video_type]["handle_increase"] = get_handle_resize(video_type, 10)
+        resizer[video_type]["handle_decrease"] = get_handle_resize(video_type, -10)
+
+        text = tk.Label(
+            master=frame_options_resize,
+            text=f"Resize {video_type}",
+            foreground="white",
+            background="#5f9ea0",
+            width=20,
+            height=1,
+        )
+
+        button_increase = tk.Button(
+            master=frame_options_resize,
+            text="+",
+            width=6,
+            height=1,
+            bg="blue",
+            fg="black",
+        )
+        button_decrease = tk.Button(
+            master=frame_options_resize,
+            text="-",
+            width=6,
+            height=1,
+            bg="blue",
+            fg="black",
+        )
+
+        button_increase.bind("<Button>", resizer[video_type]["handle_increase"])
+        button_decrease.bind("<Button>", resizer[video_type]["handle_decrease"])
+
+        text.pack(side=tk.LEFT)
+        button_decrease.pack(side=tk.LEFT)
+        button_increase.pack(side=tk.LEFT)
+
+        frame_options_resize.pack(side=tk.TOP)
+
+    ########## pack options with images
+
+    frame_images.pack(side=tk.RIGHT, fill=tk.BOTH, expand=True)
+    frame_options.pack(side=tk.LEFT)
+    window.mainloop()
+
+    return transformation
diff --git a/norfair/logo_ui/logo-dark.png b/norfair/logo_ui/logo-dark.png
new file mode 100644
index 00000000..a2784694
Binary files /dev/null and b/norfair/logo_ui/logo-dark.png differ
diff --git a/norfair/multi_camera.py b/norfair/multi_camera.py
new file mode 100644
index 00000000..629a1ea1
--- /dev/null
+++ b/norfair/multi_camera.py
@@ -0,0 +1,855 @@
+import itertools
+from copy import deepcopy
+
+import numpy as np
+
+
+def flatten_list(nested_list):
+    return list(itertools.chain(*nested_list))
+
+
+def redefine_distance(
+    distance_function,
+    distance_same_camera,
+    distance_threshold,
+    clusters,
+    maximum_time_since_last_update,
+):
+    """
+    In order to not match trackers of the same camera, we need to make sure that the
+    distance between objects of the same camera is at least:
+        - distance_threshold: if join_distance_by is 'max'
+        - amount_of_cameras**2 * distance_threshold: if join_distance_by is "mean"
+    """
+
+    def new_distance(tracked_object_1, tracked_object_2):
+        if tracked_object_1.camera_name == tracked_object_2.camera_name:
+            return distance_same_camera
+        elif not (
+            (
+                (
+                    tracked_object_1.point_hit_counter
+                    >= tracked_object_1.hit_counter_max - maximum_time_since_last_update
+                ).any()
+            )
+            and (
+                (
+                    tracked_object_2.point_hit_counter
+                    >= tracked_object_2.hit_counter_max - maximum_time_since_last_update
+                ).any()
+            )
+        ):
+            for cluster in clusters:
+                current_ids = {
+                    (tracked_object_1.camera_name, tracked_object_1.id),
+                    (tracked_object_2.camera_name, tracked_object_2.id),
+                }
+                if len(current_ids.intersection(set(cluster.tracked_ids))) == 2:
+                    return min(
+                        distance_threshold * 0.99,
+                        distance_function(tracked_object_1, tracked_object_2),
+                    )
+
+        return distance_function(tracked_object_1, tracked_object_2)
+
+    return new_distance
+
+
+class ClustersList:
+    def __init__(self, clusters):
+        self.clusters = clusters
+        try:
+            self.all_track_ids = set.union(
+                *[set(cluster.tracked_ids) for cluster in clusters]
+            )
+        except TypeError:
+            self.all_track_ids = set()
+
+    def __len__(self):
+        return len(self.clusters)
+
+
+class Cluster:
+    def __init__(self, tracked_object=None, fake_id=None):
+        """
+        Class that relates trackers from different videos
+
+        Attributes:
+        - Cluster.id: number identifying the cluster
+        - Cluster.tracked_objects: dict of the form {str: TrackedObject}
+            where str will indicate the name of the camera/video
+        - Cluster.tracked_ids: list of tuples of the form (str, int)
+            where str is the name of the camera, and the int is the TrackedObject.id
+        """
+
+        self.fake_id = fake_id
+        self.id = None
+
+        if tracked_object is not None:
+            self.tracked_objects = {tracked_object.camera_name: tracked_object}
+            self.tracked_ids = [(tracked_object.camera_name, tracked_object.id)]
+        else:
+            self.tracked_objects = {}
+            self.tracked_ids = []
+
+        self.grow_votes = 0
+        self.split_votes = 0
+
+        self.has_been_covered = False
+        self.total_covered = 0
+        self.reid_hit_counter = {}
+        self.age = 0
+
+    def __len__(self):
+        return len(self.tracked_ids)
+
+
+def cluster_intersection_matrix(current_clusters, clusters):
+
+    cardinal_intersection_matrix = []
+    intersection_matrix_ids = []
+
+    cluster_number = 0
+    while cluster_number < len(clusters):
+        new_row_interesection_ids = [set()] * len(current_clusters)
+        new_row_cardinal_intersection = np.zeros((len(current_clusters),))
+
+        cluster = clusters[cluster_number]
+
+        size_of_cluster = len(cluster)
+        size_covered = 0
+
+        current_cluster_number = 0
+        while current_cluster_number < len(current_clusters):
+            current_cluster = current_clusters.clusters[current_cluster_number]
+
+            if current_cluster.total_covered == len(current_cluster):
+                current_cluster_number += 1
+                continue
+
+            intersection = set(cluster.tracked_ids).intersection(
+                set(current_cluster.tracked_ids)
+            )
+            new_row_interesection_ids[current_cluster_number] = intersection
+
+            # update the old cluster with the new tracker information
+            for camera_name, track_id in intersection:
+                cluster.reid_hit_counter[camera_name] = 0
+                cluster.tracked_objects[camera_name] = current_cluster.tracked_objects[
+                    camera_name
+                ]
+
+            intersection_size = len(intersection)
+            new_row_cardinal_intersection[current_cluster_number] = intersection_size
+
+            size_covered += intersection_size
+            current_cluster.total_covered += intersection_size
+
+            if size_covered == size_of_cluster:
+                # all other intersections with current_clusters should be empty afterwards for this cluster
+                current_cluster_number = len(current_clusters) + 1
+            else:
+                current_cluster_number += 1
+
+        cardinal_intersection_matrix.append(new_row_cardinal_intersection)
+        intersection_matrix_ids.append(new_row_interesection_ids)
+
+        clusters[cluster_number] = cluster
+
+        cluster_number += 1
+
+    return intersection_matrix_ids, cardinal_intersection_matrix, clusters
+
+
+def generate_current_clusters(
+    trackers_by_camera,
+    distance_function,
+    distance_threshold,
+    clusters,
+    join_distance_by="mean",
+    maximum_time_since_last_update=1,
+):
+
+    # In case number of camera is variable, I will redefine the distance function
+    distance_same_camera = len(trackers_by_camera) ** 2 * distance_threshold + 1
+    distance_function = redefine_distance(
+        distance_function,
+        distance_same_camera,
+        distance_threshold,
+        clusters,
+        maximum_time_since_last_update,
+    )
+
+    current_clusters = flatten_list(trackers_by_camera)
+
+    if len(current_clusters) > 0:
+        distance_matrix = (
+            np.zeros((len(current_clusters), len(current_clusters)))
+            + distance_same_camera
+        )
+
+        # this could have been better optimized, since we don't need to iterate over tracked_objects of the same camera
+        for n, tracked_object_1 in enumerate(current_clusters):
+            for m, tracked_object_2 in enumerate(current_clusters[:n]):
+                distance_matrix[n, m] = distance_function(
+                    tracked_object_1, tracked_object_2
+                )
+                distance_matrix[m, n] = distance_matrix[n, m]
+
+        # change the type from TrackedObject to Cluster, to initialize individual clusters
+        current_clusters = [
+            Cluster(tracked_object) for tracked_object in current_clusters
+        ]
+
+        min_distance = distance_matrix.min()
+
+        # join clusters iteratively by looking at distances
+        while min_distance < distance_threshold:
+
+            flattened_arg_min = distance_matrix.argmin()
+
+            number_cluster_A = flattened_arg_min // distance_matrix.shape[1]
+            number_cluster_B = flattened_arg_min % distance_matrix.shape[1]
+
+            cluster_A = current_clusters[number_cluster_A]
+            cluster_B = current_clusters[number_cluster_B]
+
+            if join_distance_by == "max":
+                distance_to_joined_cluster = np.maximum(
+                    distance_matrix[number_cluster_A],
+                    distance_matrix[number_cluster_B],
+                )
+            elif join_distance_by == "mean":
+                size_cluster_A = len(cluster_A)
+                size_cluster_B = len(cluster_B)
+                distance_to_joined_cluster = (
+                    size_cluster_A * distance_matrix[number_cluster_A]
+                    + size_cluster_B * distance_matrix[number_cluster_B]
+                ) / (size_cluster_A + size_cluster_B)
+            else:
+                raise ValueError(
+                    f"MultiCameraClusterizer.join_distance_by was changed to a value that is not 'mean' or 'max'."
+                )
+
+            distance_matrix[number_cluster_A] = distance_to_joined_cluster
+            distance_matrix[:, number_cluster_A] = distance_to_joined_cluster
+
+            distance_matrix[number_cluster_B] = distance_threshold + 1
+            distance_matrix[:, number_cluster_B] = distance_threshold + 1
+
+            current_clusters[number_cluster_A].tracked_objects.update(
+                cluster_B.tracked_objects
+            )
+            current_clusters[number_cluster_A].tracked_ids.extend(cluster_B.tracked_ids)
+            current_clusters[number_cluster_B] = None
+
+            min_distance = distance_matrix.min()
+
+        current_clusters = [
+            cluster for cluster in current_clusters if cluster is not None
+        ]
+        current_clusters = sorted(current_clusters, key=len)
+
+        return ClustersList(current_clusters)
+    else:
+        return ClustersList([])
+
+
+def intersect_past_clusters(past_clusters):
+    current_clusters = deepcopy(past_clusters[0])
+    past_cluster_number = 1
+    while past_cluster_number < len(past_clusters):
+        past_cluster = past_clusters[past_cluster_number]
+        accumulated_intersections_since_we_started_with_this_past_cluster = 0
+        covered_current_cluster = False
+
+        # lists as [[('cam0', '2'), ('cam2', '2')], [('cam0', '1'), ('cam1', '1'), ('cam2', '1')]]
+        past_cluster_as_list = [
+            cluster.tracked_ids for cluster in past_cluster.clusters
+        ]
+
+        sorted(past_cluster_as_list, key=len, reverse=True)
+
+        cluster_B_number = 0
+        for n in range(len(current_clusters)):
+            current_clusters.clusters[n].has_been_covered = False
+        while cluster_B_number < len(past_cluster_as_list):
+            cluster_B = past_cluster_as_list[cluster_B_number]
+            accumulated_intersections_since_we_started_with_cluster_B = 0
+            covered_cluster_B = False
+
+            cluster_A_number = 0
+            while cluster_A_number < len(current_clusters):
+                cluster_A = current_clusters.clusters[cluster_A_number]
+
+                if not cluster_A.has_been_covered:
+                    intersection = set(cluster_A.tracked_ids).intersection(
+                        set(cluster_B)
+                    )
+                    if len(intersection) > 0:
+                        accumulated_intersections_since_we_started_with_cluster_B += (
+                            len(intersection)
+                        )
+                        accumulated_intersections_since_we_started_with_this_past_cluster += len(
+                            intersection
+                        )
+
+                        covered_cluster_B = (
+                            accumulated_intersections_since_we_started_with_cluster_B
+                            == len(cluster_B)
+                        )
+                        covered_current_cluster = (
+                            accumulated_intersections_since_we_started_with_this_past_cluster
+                            == len(current_clusters.all_track_ids)
+                        )
+
+                        if len(cluster_A) > len(intersection):
+                            # split cluster_A
+
+                            missing_indices = set(cluster_A.tracked_ids) - intersection
+                            missing_trackers = {}
+                            for (camera_name, track_id) in missing_indices:
+                                missing_trackers[
+                                    camera_name
+                                ] = cluster_A.tracked_objects.pop(camera_name)
+
+                            cluster_A.tracked_ids = list(intersection)
+
+                            missing_cluster = Cluster()
+                            missing_cluster.tracked_objects = missing_trackers
+                            missing_cluster.tracked_ids = list(missing_indices)
+
+                            current_clusters.clusters.append(missing_cluster)
+
+                        cluster_A.has_been_covered = True
+                        current_clusters.clusters[cluster_A_number] = cluster_A
+
+                        if covered_current_cluster:
+                            # if the past_cluster already covered the current_cluster, then go to next past_cluster
+                            cluster_A_number = len(current_clusters) + 1
+                            cluster_B_number = len(past_cluster_as_list) + 1
+                            past_cluster_number += 1
+                        elif covered_cluster_B:
+                            # if I covered cluster_B with current_cluster, then go to next cluster_B in past_cluster
+                            cluster_A_number = len(current_clusters) + 1
+                            cluster_B_number += 1
+                cluster_A_number += 1
+            cluster_B_number += 1
+        past_cluster_number += 1
+
+    return current_clusters
+
+
+def update_cluster_votes(
+    cluster_number, cardinal_intersection_matrix, current_clusters, clusters
+):
+
+    cluster = clusters[cluster_number]
+    if len(current_clusters) > 0:
+
+        visible_trackers = [
+            (camera_name, track_id)
+            for (camera_name, track_id) in cluster.tracked_ids
+            if cluster.reid_hit_counter[camera_name] == 0
+        ]
+
+        number_current_cluster_with_biggest_intersection = cardinal_intersection_matrix[
+            cluster_number
+        ].argmax()
+
+        if (cardinal_intersection_matrix[cluster_number] > 0).sum() > 1:
+            # in this case, the visible trackers are splitted into several current clusters, so vote to split
+            cluster.split_votes = cluster.split_votes + 1
+            cluster.grow_votes = max(0, cluster.grow_votes - 1)
+            return cluster, number_current_cluster_with_biggest_intersection
+
+        elif (
+            len(visible_trackers)
+            < len(
+                current_clusters.clusters[
+                    number_current_cluster_with_biggest_intersection
+                ]
+            )
+        ) and (len(visible_trackers) > 0):
+            # in this case, the visible trackers are strictly contained in one of the current clusters
+            cluster.grow_votes = cluster.grow_votes + 1
+            cluster.split_votes = max(0, cluster.split_votes - 1)
+            return cluster, number_current_cluster_with_biggest_intersection
+
+    # the visible trackers are exactly one of the current clusters
+    cluster.split_votes = max(0, cluster.split_votes - 1)
+    cluster.grow_votes = max(0, cluster.grow_votes - 1)
+
+    return cluster, None
+
+
+def remove_current_cluster_from_clusters(
+    clusters,
+    number_current_cluster_with_biggest_intersection,
+    intersection_matrix_ids,
+    cardinal_intersection_matrix,
+):
+    n = 0
+    cluster_numbers_with_oldest_tracker = []
+    oldest_age = -1
+    while n < len(clusters):
+        if (
+            cardinal_intersection_matrix[n][
+                number_current_cluster_with_biggest_intersection
+            ]
+            > 0
+        ):
+
+            intersection = intersection_matrix_ids[n][
+                number_current_cluster_with_biggest_intersection
+            ]
+            cluster = clusters[n]
+
+            cluster.tracked_ids = list(set(cluster.tracked_ids) - intersection)
+            for (
+                camera_name,
+                track_id,
+            ) in intersection:
+                tracked_object = clusters[n].tracked_objects.pop(camera_name)
+                if tracked_object.age == oldest_age:
+                    cluster_numbers_with_oldest_tracker.append(n)
+                elif tracked_object.age > oldest_age:
+                    cluster_numbers_with_oldest_tracker = [n]
+                    oldest_age = tracked_object.age
+
+            # update the matrices of intersection for the other clusters
+            intersection_matrix_ids[n][
+                number_current_cluster_with_biggest_intersection
+            ] = set()
+            cardinal_intersection_matrix[n][
+                number_current_cluster_with_biggest_intersection
+            ] = 0
+
+            clusters[n] = cluster
+
+        n += 1
+
+    return (
+        clusters,
+        cluster_numbers_with_oldest_tracker,
+        intersection_matrix_ids,
+        cardinal_intersection_matrix,
+    )
+
+
+def swap_cluster_ids(clusters, cluster_number, cluster_number_with_oldest_tracker):
+
+    old_cluster = clusters[cluster_number_with_oldest_tracker]
+    old_cluster_fake_id = old_cluster.fake_id
+
+    cluster = clusters[cluster_number]
+    cluster_fake_id = cluster.fake_id
+
+    if old_cluster_fake_id < cluster_fake_id:
+        old_cluster_age = old_cluster.age
+        old_cluster_id = old_cluster.id
+
+        cluster_age = cluster.age
+        cluster_id = cluster.id
+        cluster.id = old_cluster_id
+        cluster.fake_id = old_cluster_fake_id
+        cluster.age = old_cluster_age
+        old_cluster.id = cluster_id
+        old_cluster.fake_id = cluster_fake_id
+        old_cluster.age = cluster_age
+
+        clusters[cluster_number_with_oldest_tracker] = old_cluster
+        clusters[cluster_number] = cluster
+    return clusters
+
+
+def conditionally_update_which_id_keeper(
+    oldest_age,
+    greatest_hit_counter,
+    additional_cluster_number_keeping_old_id,
+    tracked_object,
+    keep_id_criteria,
+):
+
+    if additional_cluster_number_keeping_old_id is None:
+        return True
+    # keep the id by the age or by the hit_counter if the current cluster is promising
+    if not tracked_object.live_points.any():
+        # if this cluster hasn't matched recently, cluster is not promising
+        return False
+
+    if keep_id_criteria == "hit_counter":
+        update_cluster_number = (greatest_hit_counter < tracked_object.hit_counter) or (
+            (greatest_hit_counter == tracked_object.hit_counter)
+            and (oldest_age < tracked_object.age)
+        )
+    elif keep_id_criteria == "age":
+        update_cluster_number = (oldest_age < tracked_object.age) or (
+            (oldest_age == tracked_object.age)
+            and (greatest_hit_counter < tracked_object.hit_counter)
+        )
+    return update_cluster_number
+
+
+class MultiCameraClusterizer:
+    def __init__(
+        self,
+        distance_function,
+        distance_threshold: float,
+        join_distance_by: str = "mean",
+        max_votes_grow: int = 5,
+        max_votes_split: int = 5,
+        memory: int = 3,
+        initialization_delay: int = 4,
+        reid_hit_counter_max: int = 0,
+        maximum_time_since_last_update: int = 1,
+        keep_id_criteria: str = "hit_counter",
+    ):
+        """
+        Associate trackers from different cameras/videos.
+
+        Arguments:
+         - distance_function: function that takes two TrackedObject instances and returns a non negative number.
+            This indicates how you meassure the distance between two tracked objects of different videos.
+
+         - distance_threshold: float.
+            How far two clusters (group of trackers) need to be to not join them.
+
+         - join_distance_by: str.
+            String indicating how we combine distance between trackers to construct a distance between clusters.
+            Each cluster will have several TrackedObject instances, so in our approach we can either take
+            the maximum distance between their TrackedObject instances, or the average distance.
+            Can be either 'max' or 'mean'.
+
+         - max_votes_grow: int.
+            For how many updates should we wait before increasing the size of a cluster, whenever
+            a cluster we have is strictly inside another we currently see.
+
+         - max_votes_split: int.
+            For how many updates should we wait before increasing the size of a cluster, whenever
+            a cluster we have is never inside another we currently see.
+
+         - memory: int.
+            Merge the information of the current update with past updates to generate clusters and vote (to grow, split or neither).
+            This parameter indicates how far into the past we should look.
+
+        - initialization_delay: int.
+            When a new cluster is created, we wait a few frames before returning it in the update method, so that new clusters
+            have the chance to be merged with other existing clusters.
+
+        - reid_hit_counter_max: int.
+            If doing reid in the tracking, then provide the reid_hit_counter_max so that the MultiCameraClusterizer instance knows
+            for how long to keep storing clusters of tracked objects that have dissapeared.
+
+        - maximum_time_since_last_update: int.
+            Filter tracked objects that were not detected recently. This can be useful since those tracked objects might have
+            position that will not match well with their position in a different camera.
+
+        - keep_id_criteria: str.
+            When splitting a cluster, we have to choose which subcluster will keep the id of the old cluster. We have 2 criterias;
+            'age' criteria: the cluster with the oldest tracked object keeps the cluster id
+            'hit_counter' criteria: the cluster with the tracked object with greatest hit_counter keeps the cluster id
+        """
+        if max_votes_grow < 1:
+            raise ValueError("max_votes_grow parameter needs to be >= 1")
+        if max_votes_split < 1:
+            raise ValueError("max_votes_split parameter needs to be >= 1")
+        if memory < 0:
+            raise ValueError("memory parameter needs to be >= 0")
+
+        if keep_id_criteria not in ["age", "hit_counter"]:
+            raise ValueError(
+                f"keep_id_criteria can only be either 'age' or 'hit_counter'"
+            )
+
+        self.last_cluster_fake_id = 0
+        self.last_cluster_id = 0
+
+        if join_distance_by not in ["mean", "max"]:
+            raise ValueError(
+                f"join_distance_by argument should be either 'mean' or 'max'."
+            )
+
+        # distance function should always return non negative numbers
+        self.distance_function = distance_function
+        self.distance_threshold = distance_threshold
+        self.join_distance_by = join_distance_by
+
+        self.clusters = []
+        self.past_clusters = []
+
+        self.memory = memory
+
+        self.max_votes_grow = max_votes_grow
+        self.max_votes_split = max_votes_split
+
+        # I will give the trackers at least enough time to merge their cluster with another
+        self.initialization_delay = initialization_delay + max_votes_grow
+
+        self.reid_hit_counter_max = reid_hit_counter_max + 1
+        self.maximum_time_since_last_update = maximum_time_since_last_update
+
+        self.keep_id_criteria = keep_id_criteria
+
+    def update(self, trackers_by_camera):
+
+        # increase all reid_hit_counter_by_one
+        cluster_number = 0
+        while cluster_number < len(self.clusters):
+            cluster = self.clusters[cluster_number]
+            cluster.tracked_objects = {}
+            for camera_name, track_id in cluster.tracked_ids:
+                if cluster.reid_hit_counter[camera_name] > self.reid_hit_counter_max:
+                    cluster.tracked_ids.remove((camera_name, track_id))
+                else:
+                    cluster.reid_hit_counter[camera_name] += 1
+            cluster_number += 1
+
+        # generate current clusters
+        current_clusters = generate_current_clusters(
+            trackers_by_camera,
+            self.distance_function,
+            self.distance_threshold,
+            self.clusters,
+            self.join_distance_by,
+            self.maximum_time_since_last_update,
+        )
+        self.past_clusters.insert(0, deepcopy(current_clusters))
+
+        if len(self.past_clusters) > self.memory:
+            self.past_clusters = self.past_clusters[: self.memory]
+
+        # Let's intersect the past clusters
+        current_clusters = intersect_past_clusters(self.past_clusters)
+
+        # compute intersection of current_cluster with self.clusters
+        (
+            intersection_matrix_ids,
+            cardinal_intersection_matrix,
+            self.clusters,
+        ) = cluster_intersection_matrix(current_clusters, self.clusters)
+        # once I have the matrix of intersections, I check if my clusters need to grow or be splitted
+        cluster_number = 0
+        while cluster_number < len(self.clusters):
+
+            (
+                cluster,
+                number_current_cluster_with_biggest_intersection,
+            ) = update_cluster_votes(
+                cluster_number,
+                cardinal_intersection_matrix,
+                current_clusters,
+                self.clusters,
+            )
+
+            if cluster.grow_votes == self.max_votes_grow:
+                cluster.grow_votes = 0
+                cluster.split_votes = 0
+                self.clusters[cluster_number] = cluster
+
+                big_current_cluster = current_clusters.clusters[
+                    number_current_cluster_with_biggest_intersection
+                ]
+                (
+                    self.clusters,
+                    cluster_numbers_with_oldest_tracker,
+                    intersection_matrix_ids,
+                    cardinal_intersection_matrix,
+                ) = remove_current_cluster_from_clusters(
+                    self.clusters,
+                    number_current_cluster_with_biggest_intersection,
+                    intersection_matrix_ids,
+                    cardinal_intersection_matrix,
+                )
+                cluster = self.clusters[cluster_number]
+
+                # remove track ids from cluster that have a common camera_name with big_current_cluster
+                camera_names_in_big_current_cluster = [
+                    camera_name
+                    for (camera_name, track_id) in big_current_cluster.tracked_ids
+                ]
+                cluster.tracked_ids = [
+                    (camera_name, track_id)
+                    for (camera_name, track_id) in cluster.tracked_ids
+                    if camera_name not in camera_names_in_big_current_cluster
+                ]
+
+                cluster.tracked_ids.extend(big_current_cluster.tracked_ids)
+                for (
+                    camera_name,
+                    tracked_object,
+                ) in big_current_cluster.tracked_objects.items():
+                    cluster.tracked_objects[camera_name] = tracked_object
+
+                self.clusters[cluster_number] = cluster
+
+                # keep the smallest id with the oldest object
+                self.clusters = swap_cluster_ids(
+                    self.clusters,
+                    cluster_number,
+                    np.array(cluster_numbers_with_oldest_tracker).min(),
+                )
+
+                # update the matrix of intersections so that the current cluster is now contained in self.clusters[cluster_number]
+                intersection_matrix_ids[cluster_number][
+                    number_current_cluster_with_biggest_intersection
+                ] = set(big_current_cluster.tracked_ids)
+                cardinal_intersection_matrix[cluster_number][
+                    number_current_cluster_with_biggest_intersection
+                ] = len(big_current_cluster.tracked_ids)
+            elif cluster.split_votes == self.max_votes_split:
+                cluster.grow_votes = 0
+                cluster.split_votes = 0
+
+                # create the aditional clusters
+                additional_clusters = []
+
+                additional_cluster_number_keeping_old_id = None
+                greatest_hit_counter = -1
+                oldest_age = -1
+                for current_cluster_number, tracked_ids in enumerate(
+                    intersection_matrix_ids[cluster_number]
+                ):
+
+                    if len(tracked_ids) > 0:
+                        # we remove the tracked_ids subcluster by subcluster, to keep the non visible tracked objects in the cluster
+                        cluster.tracked_ids = list(
+                            set(cluster.tracked_ids) - tracked_ids
+                        )
+
+                        new_cluster = Cluster(None, self.last_cluster_fake_id)
+                        self.last_cluster_fake_id += 1
+
+                        new_cluster.tracked_ids = list(tracked_ids)
+
+                        for camera_name, track_id in tracked_ids:
+                            new_cluster.tracked_objects[
+                                camera_name
+                            ] = cluster.tracked_objects[camera_name]
+                            new_cluster.reid_hit_counter[camera_name] = 0
+
+                            update_cluster_number = (
+                                conditionally_update_which_id_keeper(
+                                    oldest_age,
+                                    greatest_hit_counter,
+                                    additional_cluster_number_keeping_old_id,
+                                    cluster.tracked_objects[camera_name],
+                                    self.keep_id_criteria,
+                                )
+                            )
+                            if update_cluster_number:
+                                greatest_hit_counter = cluster.tracked_objects[
+                                    camera_name
+                                ].hit_counter
+                                oldest_age = cluster.tracked_objects[camera_name].age
+                                additional_cluster_number_keeping_old_id = (
+                                    current_cluster_number
+                                )
+
+                        additional_clusters.append(new_cluster)
+                    else:
+                        additional_clusters.append(None)
+
+                cluster.tracked_ids.extend(
+                    additional_clusters[
+                        additional_cluster_number_keeping_old_id
+                    ].tracked_ids
+                )
+                cluster.tracked_objects = additional_clusters[
+                    additional_cluster_number_keeping_old_id
+                ].tracked_objects
+                cluster.reid_hit_counter.update(
+                    additional_clusters[
+                        additional_cluster_number_keeping_old_id
+                    ].reid_hit_counter
+                )
+
+                self.clusters[cluster_number] = cluster
+                for current_cluster_number, new_cluster in enumerate(
+                    additional_clusters
+                ):
+                    if additional_clusters[current_cluster_number] is None:
+                        continue
+                    # update the intersection matrices
+                    new_row_interesection_ids = [set()] * len(current_clusters)
+                    new_row_cardinal_intersection = np.zeros((len(current_clusters),))
+
+                    new_row_interesection_ids[current_cluster_number] = set(
+                        new_cluster.tracked_ids
+                    )
+                    new_row_cardinal_intersection[current_cluster_number] = len(
+                        new_cluster.tracked_ids
+                    )
+
+                    if (
+                        current_cluster_number
+                        == additional_cluster_number_keeping_old_id
+                    ):
+                        intersection_matrix_ids[
+                            cluster_number
+                        ] = new_row_interesection_ids
+                        cardinal_intersection_matrix[
+                            cluster_number
+                        ] = new_row_cardinal_intersection
+                    else:
+
+                        self.clusters.append(new_cluster)
+                        # need to create new rows for the new cluster
+                        cardinal_intersection_matrix.append(
+                            new_row_cardinal_intersection
+                        )
+                        intersection_matrix_ids.append(new_row_interesection_ids)
+            else:
+                self.clusters[cluster_number] = cluster
+            cluster_number += 1
+
+        # remove empty clusters
+        self.clusters = [cluster for cluster in self.clusters if len(cluster) > 0]
+
+        # create new clusters with remaining ids that were not used
+        all_ids_in_self_clusters = set(
+            flatten_list([cluster.tracked_ids for cluster in self.clusters])
+        )
+        for current_cluster in current_clusters.clusters:
+            difference_ids = set(current_cluster.tracked_ids) - all_ids_in_self_clusters
+            if len(difference_ids) > 0:
+                new_cluster = Cluster(None, self.last_cluster_fake_id)
+                self.last_cluster_fake_id += 1
+
+                for (camera_name, track_id) in difference_ids:
+                    new_cluster.tracked_objects[
+                        camera_name
+                    ] = current_cluster.tracked_objects[camera_name]
+                new_cluster.tracked_ids = list(difference_ids)
+
+                self.clusters.append(new_cluster)
+
+        # update clusters age, and assign id to old enough clusters
+        cluster_number = 0
+        while cluster_number < len(self.clusters):
+            cluster = self.clusters[cluster_number]
+            cluster.age += 1
+            if (cluster.age > self.initialization_delay) and (cluster.id is None):
+                cluster.id = self.last_cluster_id
+                self.last_cluster_id += 1
+
+            # check that their reid_hit_counter make sense
+            new_reid_hit_counter = {}
+            for camera_name, track_id in cluster.tracked_ids:
+                try:
+                    new_reid_hit_counter[camera_name] = cluster.reid_hit_counter[
+                        camera_name
+                    ]
+                except KeyError:
+                    new_reid_hit_counter[camera_name] = 0
+
+            cluster.reid_hit_counter = new_reid_hit_counter
+
+            self.clusters[cluster_number] = cluster
+            cluster_number += 1
+
+        return [
+            cluster
+            for cluster in self.clusters
+            if ((cluster.id is not None) and (len(cluster.tracked_objects) > 0))
+        ]
diff --git a/norfair/tracker.py b/norfair/tracker.py
index 275eceee..7b965c65 100644
--- a/norfair/tracker.py
+++ b/norfair/tracker.py
@@ -93,6 +93,7 @@ def __init__(
         ] = None,
         reid_distance_threshold: float = 0,
         reid_hit_counter_max: Optional[int] = None,
+        camera_name: str = None,
     ):
         self.tracked_objects: Sequence["TrackedObject"] = []
 
@@ -139,6 +140,7 @@ def __init__(
         else:
             self.reid_distance_function = reid_distance_function
         self.reid_distance_threshold = reid_distance_threshold
+        self.camera_name = camera_name
         self._obj_factory = _TrackedObjectFactory()
 
     def update(
@@ -254,6 +256,7 @@ def update(
                     past_detections_length=self.past_detections_length,
                     reid_hit_counter_max=self.reid_hit_counter_max,
                     coord_transformations=coord_transformations,
+                    camera_name=self.camera_name,
                 )
             )
 
@@ -404,6 +407,7 @@ def create(
         past_detections_length: int,
         reid_hit_counter_max: Optional[int],
         coord_transformations: CoordinatesTransformation,
+        camera_name: str,
     ) -> "TrackedObject":
         obj = TrackedObject(
             obj_factory=self,
@@ -417,6 +421,7 @@ def create(
             past_detections_length=past_detections_length,
             reid_hit_counter_max=reid_hit_counter_max,
             coord_transformations=coord_transformations,
+            camera_name=camera_name,
         )
         return obj
 
@@ -482,6 +487,7 @@ def __init__(
         past_detections_length: int,
         reid_hit_counter_max: Optional[int],
         coord_transformations: Optional[CoordinatesTransformation] = None,
+        camera_name: str = None,
     ):
         if not isinstance(initial_detection, Detection):
             raise ValueError(
@@ -504,6 +510,8 @@ def __init__(
         self.age: int = 0
         self.is_initializing: bool = self.hit_counter <= self.initialization_delay
 
+        self.camera_name = camera_name
+
         self.initializing_id: Optional[int] = self._obj_factory.get_initializing_id()
         self.id: Optional[int] = None
         self.global_id: Optional[int] = None
@@ -735,6 +743,7 @@ def update_coordinate_transformation(
     ):
         if coordinate_transformation is not None:
             self.abs_to_rel = coordinate_transformation.abs_to_rel
+            self.rel_to_abs = coordinate_transformation.rel_to_abs
 
     def _acquire_ids(self):
         self.id, self.global_id = self._obj_factory.get_ids()