From 81b832465e611bfe360951e6ddd6d2b5985f741f Mon Sep 17 00:00:00 2001 From: Fabian Utech Date: Tue, 5 Dec 2023 21:52:48 +0100 Subject: [PATCH] Automatic creation of simplified reports issue #81 Co-authored-by: Ahmed Sheta Signed-off-by: Fabian Utech --- .gitignore | 1 + src/report.py | 251 ++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 182 insertions(+), 70 deletions(-) diff --git a/.gitignore b/.gitignore index cf14e17..5790c84 100644 --- a/.gitignore +++ b/.gitignore @@ -52,6 +52,7 @@ bin/ **/data/*.geojson !**/data/merged_geo.geojson **/data/reviews/*.json +**/data/reports/*.pdf # Env files *.env diff --git a/src/report.py b/src/report.py index 4ae8f7a..614e8b1 100644 --- a/src/report.py +++ b/src/report.py @@ -1,97 +1,208 @@ # SPDX-License-Identifier: MIT # SPDX-FileCopyrightText: 2023 Fabian-Paul Utech +# SPDX-FileCopyrightText: 2023 Ahmed Sheta -from reportlab.lib.pagesizes import A4 +import argparse +import os + +import pandas as pd from reportlab.lib import colors -from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Spacer, Paragraph +from reportlab.lib.pagesizes import A4 from reportlab.lib.styles import getSampleStyleSheet -from reportlab.pdfbase import pdfmetrics -from reportlab.lib.styles import ParagraphStyle +from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer, Table, TableStyle + +standard_group_format = { + # 1 pdf per lead (1 row in .csv) + "Contact": ["Last Name", "First Name", "Company / Account", "Phone", "Email"], + "Reviews": [ + "google_places_user_ratings_total", + "google_places_rating", + "google_places_price_level", + "reviews_sentiment_score", + ], + #'Region':[] starts with regional_atlas + # Regarding columns names if there are more than one '_' take the split after the second _ +} + + +def process_lead(lead): + # Input search string (either specific leads or a whole file) + # Output: pd.series of a lead from leads_enriched.csv + try: + df = pd.read_csv("src/data/leads_enriched.csv", delimiter=",") + except FileNotFoundError: + raise FileNotFoundError("File not found.") + if os.path.exists( + os.path.dirname(lead) + ): # If a path was specified (by default the dummy dataset) + df = pd.read_csv("src/data/dummy_leads_email.csv", delimiter=",") + return df + elif isinstance(lead, list): # A specified group of leads + rows = df[df["Company / Account"] in lead] + return rows + + elif isinstance(lead, str): # One specified lead + row = df[df["Company / Account"] == lead] + return row + else: + raise ValueError( + "Invalid type for 'lead'. It should be a single string, a list of strings, or a file path." + ) + + +def process_format(fmt): + if isinstance(fmt, list): # Transform list to dictionary + new_fmt = {} + + for value in fmt: + try: + key = str(standard_group_format[value]) + except: + key = "Others" + if key in new_fmt: + new_fmt[key] = new_fmt[key].append(str(value)) + else: + new_fmt[key] = [str(value)] + return new_fmt + elif isinstance(fmt, dict): + return fmt + elif fmt is None: + return standard_group_format + else: + raise ValueError( + "Invalid type for 'format'. It should be either a list or a dictionary." + ) -def create_pdf(output_filename, groups): + +def create_pdf(lead, format): """ - Input: str:output_filename, str[]:groups - groups: [{'group_title': 'group_title', 'key1': 'value1', 'key2': 'value2','key3 (dictionary)': {'i_key1':'element1','i_key2':'element2'}},...] - Description: Function is there to create reports. - A report consists of tables of features. - Output: 'output_filename'.pdf + Input: lead: pd.series + format: dict + Description: Function to create reports. + A report consists of tables of grouped features. + Output: '...'.pdf """ - - if not isinstance(groups,list): - groups = [groups] - - doc = SimpleDocTemplate(output_filename, pagesize=A4) + doc = SimpleDocTemplate( + f"src/data/reports/{lead['Company / Account']}.pdf", pagesize=A4 + ) + + # Creating a Paragraph with a large font size and centered alignment + headline_style = getSampleStyleSheet()["Title"] + headline_style.fontSize = 32 + headline_style.alignment = 0 + + headline_paragraph = Paragraph(lead["Company / Account"], headline_style) # List for the 'Flowable' objects - elements = [] + elements = [headline_paragraph] + elements.append(Spacer(1, 50)) # Styles for tables and paragraphs styles = getSampleStyleSheet() - for data in groups: - - title_paragraph = Paragraph(data['group_title'], styles['Title']) - elements.append(title_paragraph) - - tmp_data = data.copy() - del tmp_data['group_title'] - - # Create a table with the given data - table_data = [[key for key in tmp_data.keys()]] # Header row - row = [Paragraph(str(value), styles['Normal']) for value in tmp_data.values()] - table_data.append(row) - - for k,v in tmp_data.items(): + groups = format.keys() + + for group in groups: + title_paragraph = Paragraph(group, styles["Title"]) + elements.append(title_paragraph) + + col_names = format[group] + + # Header row + split_col = [col_names[i : i + 4] for i in range(0, len(col_names), 5)] + + # Center the table on the page + table_style = TableStyle( + [ + ("ALIGN", (0, 0), (-1, -1), "CENTER"), # center the text + ( + "VALIGN", + (0, 0), + (-1, -1), + "MIDDLE", + ), # put the text in the middle of the cell + ("TEXTCOLOR", (0, 0), (-1, 0), colors.black), + ("GRID", (0, 0), (-1, -1), 1, colors.black), + ( + "SPLITBYROWS", + (0, 0), + (-1, -1), + True, + ), # Ensure rows are not split between pages + ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"), + ] + ) + + for group_columns in split_col: + header_row = group_columns + data_row = [] + for column in group_columns: + try: + if lead[column] == "nan": + data_row.append("") + else: + data_row.append(str(lead[column])) + except: + data_row.append("") + + table = [header_row, data_row] + + pdf_table = Table(table) + pdf_table.setStyle(table_style) + + # Add the table to the elements + elements.append(pdf_table) + + # Add an empty line between tables + elements.append(Spacer(1, 25)) + + """for k,v in tmp_data.items(): if isinstance(v, dict): - + ul_items=[] - for key,val in v.items(): + for key,val in v.items(): bolded_text = f'{key}:{val}' ul_items.append(Paragraph(bolded_text,styles['Normal'])) - + col_index = list(tmp_data.keys()).index(k) - table_data[1][col_index] = ul_items - - table = Table(table_data)#[doc.width / len(table_data[0])]*len(table_data[0]) - - # Center the table on the page - table_style = TableStyle([ - ('ALIGN', (0, 0), (-1, -1), 'CENTER'), # center the text - ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'), # put the text in the middle of the cell - ('TEXTCOLOR', (0, 0), (-1, 0), colors.black), - ('GRID', (0, 0), (-1, -1), 1, colors.black), - ('SPLITBYROWS', (0, 0), (-1, -1), True), # Ensure rows are not split between pages - ]) - - # Set left alignment for all non-header cells + table_data[1][col_index] = ul_items""" + + """# Set left alignment for all non-header cells for col in range(len(table_data[0])): table_style.add('FONTNAME', (col, 0), (col, 0), 'Helvetica-Bold') - table_style.add('ALIGN', (col, 1), (col, -1), 'LEFT') + table_style.add('ALIGN', (col, 1), (col, -1), 'LEFT')""" - table.setStyle(table_style) + # Build the PDF document + doc.build(elements) - # Add the table to the elements - elements.append(table) - # Add an empty line between tables - elements.append(Spacer(1, 25)) +def main(): + parser = argparse.ArgumentParser(description="Process lead and format arguments.") + parser.add_argument( + "--lead", + default="src/data/dummy_leads_email.csv", + help="Lead argument: a single search-string, a list of strings, or a file path.", + ) + parser.add_argument( + "--format", nargs="+", help="Format argument: a list or a dictionary." + ) + + args = parser.parse_args() + + # Process lead argument (result: either specific row(/s) or a table) + # Choose lead with + processed_lead = process_lead(args.lead) + print(processed_lead) + + # Process format argument (result: format that is a dictionary) + processed_format = process_format(args.format) + + # Generate report for every lead + + for index, lead in processed_lead.iterrows(): + create_pdf(lead, processed_format) - # Build the PDF document - doc.build(elements) -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXAMPLE -output_filename = "report.pdf" - -groups = [ - {'group_title': 'group_title', 'key1': 'value1', 'key2': 'value2','key3 (dictionary)': {'i_key1':'element1','i_key2':'element2'}}, - {'group_title': 'Contact', 'contact_date': '17.11.2023', 'contact_name': 'Christopher M. Bauder', - 'contact_email': 'info@darkmatter.berlin', 'contact_number': '+49 30 123123', 'contact_number_valid': False, - 'contact_timezone': 'UTC +1'}, - {'group_title': 'Another Group', 'contact_date': '18.11.2023', 'contact_name': 'Lorem ipsum dolor ipsum dolor sit amet.', - 'contact_email': 'john.doe@example.com', 'contact_number': '+1 123 456789', 'contact_number_valid': True, - 'contact_timezone': {'1':'UTC -5','2':'UTC -5','3':'UTC -5'}} -] - -# Call the function to generate the PDF -create_pdf(output_filename, [groups[0],groups[1],groups[2],groups[1],groups[2],groups[1],groups[1],groups[2]]) \ No newline at end of file +if __name__ == "__main__": + main()