diff --git a/dispatcher/backend/maint-scripts/monthly_report_tasks.py b/dispatcher/backend/maint-scripts/monthly_report_tasks.py new file mode 100644 index 00000000..7f516638 --- /dev/null +++ b/dispatcher/backend/maint-scripts/monthly_report_tasks.py @@ -0,0 +1,87 @@ +# Creates a CSV report of tasks completed (successfully or not) +# during last month (for 1st to last day of the month) + +import csv +import datetime +import logging +import os +from pathlib import Path + +import requests +from dateutil.relativedelta import relativedelta + +logging.basicConfig( + level=logging.DEBUG, format="[%(asctime)s: %(levelname)s] %(message)s" +) +logger = logging.getLogger(__name__) + +# url of the zimfarm API to request +url = os.getenv("ZF_URI", "https://api.farm.zimit.kiwix.org/v1") + +# prefix of csv file to create: {prefix}{year}-{month}.csv +file_prefix = os.getenv("FILE_PREFIX", "zimfarm_tasks_") + +# list of offliners to include +offliners = os.getenv("OFFLINERS", "zimit").split(",") + + +def main(): + """Creates recipes for TED by topics""" + + now = datetime.datetime.now(datetime.UTC) + start_of_this_month = now.replace(day=1, hour=0, minute=0, second=0, microsecond=0) + start_of_last_month = start_of_this_month - relativedelta(months=1) + + nb_per_page = 100 + current_page = 0 + + last_task_found = False + with open( + Path( + f"{file_prefix}{start_of_last_month.year}-{start_of_last_month.month}.csv" + ), + "w", + ) as fh: + csvwriter = csv.writer(fh) + csvwriter.writerow(["ID", "URL", "Status", "Requested", "Started", "Completed"]) + while not last_task_found: + response = requests.get( + f"{url}/tasks/?skip={current_page*nb_per_page}&limit={nb_per_page}" + "&status=failed&status=canceled&status=succeeded" + ) + response.raise_for_status() + items = response.json()["items"] + for item in items: + item_last_modification = datetime.datetime.fromisoformat( + item["updated_at"] + ) + if item_last_modification >= start_of_this_month: + # task is too recent (it will be pushed to next report), ignore it + continue + if item_last_modification < start_of_last_month: + # we found a task which is older than current period, we stop + last_task_found = True + break + + response = requests.get(f'{url}/tasks/{item["_id"]}') + response.raise_for_status() + task = response.json() + offliner = task["config"]["task_name"] + if "all" not in offliners and offliner not in offliners: + continue + csvwriter.writerow( + [ + task["_id"], + task["config"]["flags"]["url"], + task["status"], + task["timestamp"]["requested"], + task["timestamp"].get("started", "-"), + task["updated_at"], + ] + ) + current_page += 1 + fh.flush() + + +if __name__ == "__main__": + main()