From 81b832465e611bfe360951e6ddd6d2b5985f741f Mon Sep 17 00:00:00 2001
From: Fabian Utech <f.utech@gmx.net>
Date: Tue, 5 Dec 2023 21:52:48 +0100
Subject: [PATCH] Automatic creation of simplified reports issue #81

Co-authored-by: Ahmed Sheta <ahmed.sheta@fau.de>
Signed-off-by: Fabian Utech <f.utech@gmx.net>
---
 .gitignore    |   1 +
 src/report.py | 251 ++++++++++++++++++++++++++++++++++++--------------
 2 files changed, 182 insertions(+), 70 deletions(-)

diff --git a/.gitignore b/.gitignore
index cf14e17..5790c84 100644
--- a/.gitignore
+++ b/.gitignore
@@ -52,6 +52,7 @@ bin/
 **/data/*.geojson
 !**/data/merged_geo.geojson
 **/data/reviews/*.json
+**/data/reports/*.pdf
 
 # Env files
 *.env
diff --git a/src/report.py b/src/report.py
index 4ae8f7a..614e8b1 100644
--- a/src/report.py
+++ b/src/report.py
@@ -1,97 +1,208 @@
 # SPDX-License-Identifier: MIT
 # SPDX-FileCopyrightText: 2023 Fabian-Paul Utech <f.utech@gmx.net>
+# SPDX-FileCopyrightText: 2023 Ahmed Sheta <ahmed.sheta@fau.de>
 
-from reportlab.lib.pagesizes import A4
+import argparse
+import os
+
+import pandas as pd
 from reportlab.lib import colors
-from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Spacer, Paragraph
+from reportlab.lib.pagesizes import A4
 from reportlab.lib.styles import getSampleStyleSheet
-from reportlab.pdfbase import pdfmetrics
-from reportlab.lib.styles import ParagraphStyle
+from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer, Table, TableStyle
+
+standard_group_format = {
+    # 1 pdf per lead (1 row in .csv)
+    "Contact": ["Last Name", "First Name", "Company / Account", "Phone", "Email"],
+    "Reviews": [
+        "google_places_user_ratings_total",
+        "google_places_rating",
+        "google_places_price_level",
+        "reviews_sentiment_score",
+    ],
+    #'Region':[] starts with regional_atlas
+    # Regarding columns names if there are more than one '_' take the split after the second _
+}
+
+
+def process_lead(lead):
+    # Input search string (either specific leads or a whole file)
+    # Output: pd.series of a lead from leads_enriched.csv
+    try:
+        df = pd.read_csv("src/data/leads_enriched.csv", delimiter=",")
+    except FileNotFoundError:
+        raise FileNotFoundError("File not found.")
+    if os.path.exists(
+        os.path.dirname(lead)
+    ):  # If a path was specified (by default the dummy dataset)
+        df = pd.read_csv("src/data/dummy_leads_email.csv", delimiter=",")
+        return df
+    elif isinstance(lead, list):  # A specified group of leads
+        rows = df[df["Company / Account"] in lead]
+        return rows
+
+    elif isinstance(lead, str):  # One specified lead
+        row = df[df["Company / Account"] == lead]
+        return row
+    else:
+        raise ValueError(
+            "Invalid type for 'lead'. It should be a single string, a list of strings, or a file path."
+        )
+
+
+def process_format(fmt):
+    if isinstance(fmt, list):  # Transform list to dictionary
+        new_fmt = {}
+
+        for value in fmt:
+            try:
+                key = str(standard_group_format[value])
+            except:
+                key = "Others"
+            if key in new_fmt:
+                new_fmt[key] = new_fmt[key].append(str(value))
+            else:
+                new_fmt[key] = [str(value)]
 
+        return new_fmt
+    elif isinstance(fmt, dict):
+        return fmt
+    elif fmt is None:
+        return standard_group_format
+    else:
+        raise ValueError(
+            "Invalid type for 'format'. It should be either a list or a dictionary."
+        )
 
-def create_pdf(output_filename, groups):
+
+def create_pdf(lead, format):
     """
-    Input: str:output_filename, str[]:groups
-           groups: [{'group_title': 'group_title', 'key1': 'value1', 'key2': 'value2','key3 (dictionary)': {'i_key1':'element1','i_key2':'element2'}},...]
-    Description: Function is there to create reports. 
-                 A report consists of tables of features.                    
-    Output: 'output_filename'.pdf
+    Input: lead: pd.series
+           format: dict
+    Description: Function to create reports.
+                 A report consists of tables of grouped features.
+    Output: '...'.pdf
     """
-    
-    if not isinstance(groups,list):
-        groups = [groups]
-        
-    doc = SimpleDocTemplate(output_filename, pagesize=A4)
+    doc = SimpleDocTemplate(
+        f"src/data/reports/{lead['Company / Account']}.pdf", pagesize=A4
+    )
+
+    # Creating a Paragraph with a large font size and centered alignment
+    headline_style = getSampleStyleSheet()["Title"]
+    headline_style.fontSize = 32
+    headline_style.alignment = 0
+
+    headline_paragraph = Paragraph(lead["Company / Account"], headline_style)
 
     # List for the 'Flowable' objects
-    elements = []
+    elements = [headline_paragraph]
+    elements.append(Spacer(1, 50))
 
     # Styles for tables and paragraphs
     styles = getSampleStyleSheet()
 
-    for data in groups:  
-        
-        title_paragraph = Paragraph(data['group_title'], styles['Title'])
-        elements.append(title_paragraph)  
-        
-        tmp_data = data.copy()
-        del tmp_data['group_title']
-        
-        # Create a table with the given data        
-        table_data = [[key for key in tmp_data.keys()]]  # Header row
-        row = [Paragraph(str(value), styles['Normal']) for value in tmp_data.values()]
-        table_data.append(row)
-        
-        for k,v in tmp_data.items():
+    groups = format.keys()
+
+    for group in groups:
+        title_paragraph = Paragraph(group, styles["Title"])
+        elements.append(title_paragraph)
+
+        col_names = format[group]
+
+        # Header row
+        split_col = [col_names[i : i + 4] for i in range(0, len(col_names), 5)]
+
+        # Center the table on the page
+        table_style = TableStyle(
+            [
+                ("ALIGN", (0, 0), (-1, -1), "CENTER"),  # center the text
+                (
+                    "VALIGN",
+                    (0, 0),
+                    (-1, -1),
+                    "MIDDLE",
+                ),  # put the text in the middle of the cell
+                ("TEXTCOLOR", (0, 0), (-1, 0), colors.black),
+                ("GRID", (0, 0), (-1, -1), 1, colors.black),
+                (
+                    "SPLITBYROWS",
+                    (0, 0),
+                    (-1, -1),
+                    True,
+                ),  # Ensure rows are not split between pages
+                ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
+            ]
+        )
+
+        for group_columns in split_col:
+            header_row = group_columns
+            data_row = []
+            for column in group_columns:
+                try:
+                    if lead[column] == "nan":
+                        data_row.append("")
+                    else:
+                        data_row.append(str(lead[column]))
+                except:
+                    data_row.append("")
+
+            table = [header_row, data_row]
+
+            pdf_table = Table(table)
+            pdf_table.setStyle(table_style)
+
+            # Add the table to the elements
+            elements.append(pdf_table)
+
+            # Add an empty line between tables
+            elements.append(Spacer(1, 25))
+
+        """for k,v in tmp_data.items():
             if isinstance(v, dict):
-                
+
                 ul_items=[]
-                for key,val in v.items():  
+                for key,val in v.items():
                     bolded_text = f'<b>{key}:</b>{val}'
                     ul_items.append(Paragraph(bolded_text,styles['Normal']))
-                
+
                 col_index = list(tmp_data.keys()).index(k)
-                table_data[1][col_index] = ul_items
-                
-        table = Table(table_data)#[doc.width / len(table_data[0])]*len(table_data[0])
-        
-        # Center the table on the page
-        table_style = TableStyle([
-            ('ALIGN', (0, 0), (-1, -1), 'CENTER'), # center the text
-            ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'), # put the text in the middle of the cell
-            ('TEXTCOLOR', (0, 0), (-1, 0), colors.black),
-            ('GRID', (0, 0), (-1, -1), 1, colors.black),
-            ('SPLITBYROWS', (0, 0), (-1, -1), True),  # Ensure rows are not split between pages
-        ])
-
-        # Set left alignment for all non-header cells
+                table_data[1][col_index] = ul_items"""
+
+        """# Set left alignment for all non-header cells
         for col in range(len(table_data[0])):
             table_style.add('FONTNAME', (col, 0), (col, 0), 'Helvetica-Bold')
-            table_style.add('ALIGN', (col, 1), (col, -1), 'LEFT')
+            table_style.add('ALIGN', (col, 1), (col, -1), 'LEFT')"""
 
-        table.setStyle(table_style)
+    # Build the PDF document
+    doc.build(elements)
 
-        # Add the table to the elements
-        elements.append(table)
 
-        # Add an empty line between tables
-        elements.append(Spacer(1, 25))
+def main():
+    parser = argparse.ArgumentParser(description="Process lead and format arguments.")
+    parser.add_argument(
+        "--lead",
+        default="src/data/dummy_leads_email.csv",
+        help="Lead argument: a single search-string, a list of strings, or a file path.",
+    )
+    parser.add_argument(
+        "--format", nargs="+", help="Format argument: a list or a dictionary."
+    )
+
+    args = parser.parse_args()
+
+    # Process lead argument (result: either specific row(/s) or a table)
+    # Choose lead with
+    processed_lead = process_lead(args.lead)
+    print(processed_lead)
+
+    # Process format argument (result: format that is a dictionary)
+    processed_format = process_format(args.format)
+
+    # Generate report for every lead
+
+    for index, lead in processed_lead.iterrows():
+        create_pdf(lead, processed_format)
 
-    # Build the PDF document
-    doc.build(elements)
 
-#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXAMPLE
-output_filename = "report.pdf"
-
-groups = [
-    {'group_title': 'group_title', 'key1': 'value1', 'key2': 'value2','key3 (dictionary)': {'i_key1':'element1','i_key2':'element2'}},
-    {'group_title': 'Contact', 'contact_date': '17.11.2023', 'contact_name': 'Christopher M. Bauder',
-     'contact_email': 'info@darkmatter.berlin', 'contact_number': '+49 30 123123', 'contact_number_valid': False,
-     'contact_timezone': 'UTC +1'},
-    {'group_title': 'Another Group', 'contact_date': '18.11.2023', 'contact_name': 'Lorem ipsum dolor ipsum dolor sit amet.',
-     'contact_email': 'john.doe@example.com', 'contact_number': '+1 123 456789', 'contact_number_valid': True,
-     'contact_timezone': {'1':'UTC -5','2':'UTC -5','3':'UTC -5'}}
-]
-
-# Call the function to generate the PDF
-create_pdf(output_filename, [groups[0],groups[1],groups[2],groups[1],groups[2],groups[1],groups[1],groups[2]])
\ No newline at end of file
+if __name__ == "__main__":
+    main()