From c1347d4ac540f4869759aa62735fecbbbedb81c5 Mon Sep 17 00:00:00 2001
From: Danglewood <85772166+deeleeramone@users.noreply.github.com>
Date: Wed, 22 Jan 2025 08:14:41 -0800
Subject: [PATCH] some edge case...

---
 .../models/management_discussion_analysis.py  | 80 +++++++++++++------
 1 file changed, 56 insertions(+), 24 deletions(-)

diff --git a/openbb_platform/providers/sec/openbb_sec/models/management_discussion_analysis.py b/openbb_platform/providers/sec/openbb_sec/models/management_discussion_analysis.py
index d59f3be1369..85d962cdf4d 100644
--- a/openbb_platform/providers/sec/openbb_sec/models/management_discussion_analysis.py
+++ b/openbb_platform/providers/sec/openbb_sec/models/management_discussion_analysis.py
@@ -206,8 +206,9 @@ def transform_data(  # noqa: PLR0912
             filing_str,
             include_tables=True,
             include_comments=True,
-            include_formatting=False,
+            include_formatting=True,
             include_images=True,
+            include_links=True,
         )
 
         if not extracted_text:
@@ -216,16 +217,21 @@ def transform_data(  # noqa: PLR0912
         def is_table_header(line: str) -> bool:
             """Check if line is a table header"""
             return (
-                all(
-                    not char.isnumeric()
-                    for char in line.replace("(", "")
-                    .replace(")", "")
-                    .replace(",", "")
-                    .replace(" ", "")
-                    .replace("|", "")
+                (
+                    all(
+                        not char.isnumeric()
+                        for char in line.replace("(", "")
+                        .replace(")", "")
+                        .replace(",", "")
+                        .replace(" ", "")
+                        .replace("|", "")
+                        .replace("/", "")
+                    )
+                    and line.replace("|", "").replace("-", "").strip() != ""
                 )
-                and line.replace("|", "").replace("-", "").strip() != ""
-            ) or line.replace("|", "").replace(" ", "").endswith(":")
+                or line.replace("|", "").replace(" ", "").endswith(":")
+                or line.replace("|", "").isupper()
+            )
 
         def insert_cell_dividers(line):
             cells = line.strip().split("|")
@@ -246,17 +252,25 @@ def insert_cell_dividers(line):
                 ):
                     # Remove the dash and insert a divider before it
                     new_cell = re.sub(r"[—\-–]+$", "", new_cell).strip() + " | —"
+
                 elif (
-                    re.search("[A-Za-z]", new_cell)
-                    and re.search("[0-9]", new_cell)
-                    and re.search(r"[A-Za-z]\s+[0-9]", new_cell)
+                    re.search(r"[A-Za-z]", new_cell)
+                    and re.search(
+                        r"[\d\(\)]", new_cell
+                    )  # Modified to include parentheses
+                    and re.search(
+                        r"[A-Za-z]\s+(\(?\d|\(\d+\))", new_cell
+                    )  # Modified pattern
                     and "thru" not in new_cell.lower()
                     and "through" not in new_cell.lower()
                     and "outstanding" not in new_cell.lower()
                 ):
-                    # Handle cases with spaces between letters and numbers
-                    new_cell = re.sub(r"(?<=[A-Za-z])\s+(?=[0-9])", " |", new_cell)
-                    new_cell = re.sub(r"(?<=[A-Za-z])(?=[0-9])", "|", new_cell)
+                    # Handle cases with spaces between letters and numbers/parenthesized numbers
+                    new_cell = re.sub(
+                        r"([A-Za-z])\s+(\(?\d|\(\d+\))", r"\1 |\2", new_cell
+                    )
+                    new_cell = re.sub(r"([A-Za-z])(\(?\d|\(\d+\))", r"\1|\2", new_cell)
+
                 # Insert divider between consecutive numbers
                 if (
                     re.search(
@@ -275,10 +289,10 @@ def insert_cell_dividers(line):
             return "|".join(new_cells)
 
         new_lines: list = []
-        starting_line = "Item 2."
-        annual_start = "Item 7."
+        starting_line = "Item 2"
+        annual_start = "Item 7"
         ending_line = "Item 6"
-        annual_end = "Item 8. "
+        annual_end = "Item 8"
         found_start = False
         at_end = False
         is_quarterly = data.get("report_type", "").endswith("Q")
@@ -386,9 +400,20 @@ def insert_cell_dividers(line):
                         continue
 
                     if "$" in line:
-                        line = line.replace("$ |", "").replace("| |", "|")  # noqa
-                    elif "%" in line:
-                        line = line.replace("% |", "").replace("| |", "|")  # noqa
+                        line = (
+                            line.replace("($) |", "")
+                            .replace("$ |", "")
+                            .replace("$", "")
+                            .replace("| |", "|")
+                        )  # noqa
+                    if "%" in line:
+                        line = (
+                            line.replace("(%) |", "")
+                            .replace("% |", "")
+                            .replace("%", "")
+                            .replace("| |", "|")
+                        )  # noqa
+                    line = line.replace("|)", ")").replace("(|", ")")  # noqa
 
                     if "|" not in previous_line and all(
                         char == "|" for char in line.replace(" ", "")
@@ -416,12 +441,18 @@ def insert_cell_dividers(line):
                                 line.replace(" | | ", " | ")
                                 .replace(" | |", " | ")
                                 .replace(" |  |", "")
+                                .replace("|)", ")")
                             )
                             if is_header:
                                 line = "| " + line  # noqa
                         else:
-                            line = line.replace("| $ | ", "").replace(  # noqa
-                                "| % |", ""
+                            line = (  # noqa
+                                line.replace("| ($) |", "")
+                                .replace("| $ | ", "")
+                                .replace("|$", "|")
+                                .replace("| (%) |", "")
+                                .replace("| % |", "")
+                                .replace("|%", "")
                             )
                             if not line.strip().startswith("|"):
                                 line = "| " + line  # noqa
@@ -710,6 +741,7 @@ def process_document(document: list[str]) -> list[str]:
                         )
 
                     elif "|" in current_line:
+                        current_line = current_line.replace("|)", ")")
                         if (
                             current_line in ("|  |", "| |", "|")
                             or "form 10-k" in current_line.replace("|", "").lower()