From a0b67e9b23a6fc0a61ff0ad353aa2502081514b6 Mon Sep 17 00:00:00 2001 From: tkalir Date: Thu, 2 Jan 2025 18:23:12 +0200 Subject: [PATCH 1/4] added comment to publish_notification --- anyway/telegram_accident_notifications.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/anyway/telegram_accident_notifications.py b/anyway/telegram_accident_notifications.py index abecd469..b5040b53 100644 --- a/anyway/telegram_accident_notifications.py +++ b/anyway/telegram_accident_notifications.py @@ -65,6 +65,8 @@ def send_after_infographics_message(bot, message_id_in_group, newsflash_id, link return bot.send_message(linked_group, message, reply_to_message_id=message_id_in_group) +#this function sends the "root" message for the newsflash in telegram. +#the flow continues when the telegram server sends a request to our /api/telegram/webhook def publish_notification(newsflash_id, chat_id=TELEGRAM_CHANNEL_CHAT_ID): accident_text = create_accident_text(newsflash_id) bot = telebot.TeleBot(secrets.get("BOT_TOKEN")) From 7d5bff28b52cc4eafc53f144f150387495ecd7c6 Mon Sep 17 00:00:00 2001 From: tkalir Date: Thu, 2 Jan 2025 18:24:41 +0200 Subject: [PATCH 2/4] added delay in generate_infographics_in_selenium_container after scrolling to infographic, to prevent map infographics from being blurry --- anyway/infographic_image_generator.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/anyway/infographic_image_generator.py b/anyway/infographic_image_generator.py index 31ce6ce0..36f4e72d 100644 --- a/anyway/infographic_image_generator.py +++ b/anyway/infographic_image_generator.py @@ -18,7 +18,7 @@ selenium_hub_url = f"https://{selenium_hub_url}/wd/hub" selenium_remote_results_url = f"https://{selenium_url}/tempdata" CHROME_PARTIALLY_DOWNLOADED_FILE_EXTENSION = "crdownload" - +SLEEP_DURATION_FOR_MAP_TO_HAVE_FOCUS = 5 def create_chrome_browser_session(newsflash_id): options = webdriver.ChromeOptions() @@ -94,7 +94,9 @@ def generate_infographics_in_selenium_container(browser, newsflash_id): logging.debug(f"found {buttons_found} buttons") if buttons_found > 0: for element in elements: - ActionChains(browser).move_to_element(element).click().perform() + ActionChains(browser).move_to_element(element).perform() + time.sleep(SLEEP_DURATION_FOR_MAP_TO_HAVE_FOCUS) #without sleep map infographic may be blurry + element.click() time.sleep(1) #prevents click arriving before the last finished is_download_done, generated_images_names = wait_for_folder_to_contain_all_files(newsflash_id, buttons_found, timeout=60) From c3b466df5b617db678111ea15301d0b11d6bf810 Mon Sep 17 00:00:00 2001 From: tkalir Date: Thu, 2 Jan 2025 23:30:55 +0200 Subject: [PATCH 3/4] extracted get_items_for_send from send_infographics_to_telegram --- anyway/telegram_accident_notifications.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/anyway/telegram_accident_notifications.py b/anyway/telegram_accident_notifications.py index b5040b53..bc285f1a 100644 --- a/anyway/telegram_accident_notifications.py +++ b/anyway/telegram_accident_notifications.py @@ -79,20 +79,28 @@ def publish_notification(newsflash_id, chat_id=TELEGRAM_CHANNEL_CHAT_ID): db.session.commit() +def get_items_for_send(newsflash_id): + items = [] + transcription_by_widget_name = fetch_transcription_by_widget_name(newsflash_id) + urls_by_infographic_name = create_public_urls_for_infographics_images(str(newsflash_id)) + for infographic_name, url in urls_by_infographic_name.items(): + text = transcription_by_widget_name[infographic_name] \ + if infographic_name in transcription_by_widget_name else None + items.append((url, text)) + return items + + def send_infographics_to_telegram(root_message_id, newsflash_id, channel_of_initial_message): #every message in the channel is automatically forwarded to the linked discussion group. #to create a comment on the channel message, we need to send a reply to the #forwareded message in the discussion group. bot = telebot.TeleBot(secrets.get("BOT_TOKEN")) - transcription_by_widget_name = fetch_transcription_by_widget_name(newsflash_id) - urls_by_infographic_name = create_public_urls_for_infographics_images(str(newsflash_id)) - linked_group = telegram_linked_group_by_channel[channel_of_initial_message] - for infographic_name, url in urls_by_infographic_name.items(): - text = transcription_by_widget_name[infographic_name] \ - if infographic_name in transcription_by_widget_name else None + items_for_send = get_items_for_send(newsflash_id) + for url, text in items_for_send: bot.send_photo(linked_group, url, reply_to_message_id=root_message_id, caption=text) + send_after_infographics_message(bot, root_message_id, newsflash_id, linked_group) logging.info("notification send done") From 214cf20b15e638196d5df1ad8af68726cb9bb3d7 Mon Sep 17 00:00:00 2001 From: tkalir Date: Sun, 5 Jan 2025 16:27:39 +0200 Subject: [PATCH 4/4] send newsflashes to telegeram in the order of appearance on website, added retries to widget fetching --- anyway/telegram_accident_notifications.py | 46 ++++++++++++++++++----- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/anyway/telegram_accident_notifications.py b/anyway/telegram_accident_notifications.py index bc285f1a..7376c731 100644 --- a/anyway/telegram_accident_notifications.py +++ b/anyway/telegram_accident_notifications.py @@ -50,11 +50,9 @@ def create_accident_text(newsflash_id): return f"{first_line}\n\n{newsflash['description']}" -def fetch_transcription_by_widget_name(newsflash_id): - widgets_url = f"{ANYWAY_BASE_API_URL}/infographics-data?lang=he&news_flash_id={newsflash_id}&years_ago=5" - widgets_json = requests.get(widgets_url).json() +def get_transcription_by_widget_name(widgets): transcription_by_widget_name = {widget["name"]: widget["data"]["text"]["transcription"] - for widget in widgets_json["widgets"] + for widget in widgets if "transcription" in widget["data"]["text"]} return transcription_by_widget_name @@ -79,14 +77,41 @@ def publish_notification(newsflash_id, chat_id=TELEGRAM_CHANNEL_CHAT_ID): db.session.commit() +def fetch_widgets_with_retries(newsflash_id, wait_times): + url = f"https://www.anyway.co.il/api/infographics-data?news_flash_id={newsflash_id}" + for attempt, wait_time in enumerate(wait_times): + try: + logging.debug(f"Attempt {attempt + 1}: Fetching data widgets for newsflash {newsflash_id}") + response = requests.get(url) + if response.ok: + response_json = response.json() + widgets = response_json.get("widgets", []) + if len(widgets) > 0: + return widgets + except requests.exceptions.RequestException as e: + logging.debug(e) + time.sleep(wait_time) + raise RuntimeError(f"Failed to fetch data from {url}") + + +def fetch_widgets(newsflash_id): + retry_timeouts = [10, 20, 30, 60] + widgets = fetch_widgets_with_retries(newsflash_id, retry_timeouts) + return [widget.get("name") for widget in widgets] + + def get_items_for_send(newsflash_id): items = [] - transcription_by_widget_name = fetch_transcription_by_widget_name(newsflash_id) + retry_timeouts = [10, 20, 30, 60] + widgets = fetch_widgets_with_retries(newsflash_id, retry_timeouts) + transcription_by_widget_name = get_transcription_by_widget_name(widgets) urls_by_infographic_name = create_public_urls_for_infographics_images(str(newsflash_id)) - for infographic_name, url in urls_by_infographic_name.items(): - text = transcription_by_widget_name[infographic_name] \ - if infographic_name in transcription_by_widget_name else None - items.append((url, text)) + for widget in widgets: + name = widget.get("name") + if name in urls_by_infographic_name: + url = urls_by_infographic_name.get(name) + text = transcription_by_widget_name.get(name) + items.append((url, text)) return items @@ -104,6 +129,7 @@ def send_infographics_to_telegram(root_message_id, newsflash_id, channel_of_init send_after_infographics_message(bot, root_message_id, newsflash_id, linked_group) logging.info("notification send done") + def extract_infographic_name_from_s3_object(s3_object_name): left = s3_object_name.rindex("/") right = s3_object_name.rindex(".") @@ -131,4 +157,4 @@ def trigger_generate_infographics_and_send_to_telegram(newsflash_id, pre_verific dag_conf = {"news_flash_id": newsflash_id} dag_conf["chat_id"] = TELEGRAM_CHANNEL_CHAT_ID if pre_verification_chat \ else TELEGRAM_POST_VERIFICATION_CHANNEL_CHAT_ID - trigger_airflow_dag("generate-and-send-infographics-images", dag_conf) + trigger_airflow_dag("generate-and-send-infographics-images", dag_conf) \ No newline at end of file