From 06e7ccb0f479aca57c392e647a6f663b54748ab8 Mon Sep 17 00:00:00 2001 From: ablakley-r7 <96182471+ablakley-r7@users.noreply.github.com> Date: Tue, 21 Jan 2025 13:04:11 +0000 Subject: [PATCH] [SOAR-18584] Proofpoint TAP (#3051) (#3061) * Initial commit * Initial commit | Update state and api limit handling in task * Validators | Bump SDK * Fix test | Remove test which logic no longer exists * Add new unit test * Fix import * Fix import * Fix unused argument --- plugins/proofpoint_tap/.CHECKSUM | 8 +- plugins/proofpoint_tap/Dockerfile | 2 +- .../proofpoint_tap/bin/komand_proofpoint_tap | 2 +- plugins/proofpoint_tap/help.md | 3 +- .../tasks/monitor_events/schema.py | 382 +++++++++++++++++- .../tasks/monitor_events/task.py | 190 ++++----- plugins/proofpoint_tap/plugin.spec.yaml | 5 +- plugins/proofpoint_tap/setup.py | 2 +- .../monitor_events_bad_request.json.exp | 3 +- .../monitor_events_server_error.json.exp | 3 +- .../unit_test/test_monitor_events.py | 137 ++----- plugins/proofpoint_tap/unit_test/test_util.py | 8 +- 12 files changed, 528 insertions(+), 217 deletions(-) diff --git a/plugins/proofpoint_tap/.CHECKSUM b/plugins/proofpoint_tap/.CHECKSUM index dac4ce6fe4..3af82fcf0c 100644 --- a/plugins/proofpoint_tap/.CHECKSUM +++ b/plugins/proofpoint_tap/.CHECKSUM @@ -1,7 +1,7 @@ { - "spec": "2fb04d208580295eb2130af367993059", - "manifest": "28e4d2cb40027a13b74ccd2156ed9798", - "setup": "c7621059ff603f9c8aa42bbbe2ce39e8", + "spec": "e3b6dd712f77e68c4db2906763ea4984", + "manifest": "14b35211c0ebf40e6fad3e569f26aeae", + "setup": "97182fc26a28ec7fc9c4cd98d2ce7d83", "schemas": [ { "identifier": "fetch_forensics/schema.py", @@ -45,7 +45,7 @@ }, { "identifier": "monitor_events/schema.py", - "hash": "b871e3cb1925e5738a83b666dc74268a" + "hash": "e2ac0558ed03896ab84d1c123e43b868" } ] } \ No newline at end of file diff --git a/plugins/proofpoint_tap/Dockerfile b/plugins/proofpoint_tap/Dockerfile index 872c201cb3..cbe61f233c 100755 --- a/plugins/proofpoint_tap/Dockerfile +++ b/plugins/proofpoint_tap/Dockerfile @@ -1,4 +1,4 @@ -FROM --platform=linux/amd64 rapid7/insightconnect-python-3-slim-plugin:6.1.0 +FROM --platform=linux/amd64 rapid7/insightconnect-python-3-slim-plugin:6.2.3 LABEL organization=rapid7 LABEL sdk=python diff --git a/plugins/proofpoint_tap/bin/komand_proofpoint_tap b/plugins/proofpoint_tap/bin/komand_proofpoint_tap index 6e0251503b..c5772fa46a 100755 --- a/plugins/proofpoint_tap/bin/komand_proofpoint_tap +++ b/plugins/proofpoint_tap/bin/komand_proofpoint_tap @@ -6,7 +6,7 @@ from sys import argv Name = "Proofpoint TAP" Vendor = "rapid7" -Version = "4.1.10" +Version = "4.1.11" Description = "[Proofpoint Targeted Attack Protection](https://www.proofpoint.com/us/products/ransomware-and-targeted-attack-protection) (TAP) helps you stay ahead of attackers with an innovative approach that detects, analyzes and blocks advanced threats before they reach your inbox. This plugin enables users to parse TAP alerts" diff --git a/plugins/proofpoint_tap/help.md b/plugins/proofpoint_tap/help.md index 2d11857f12..5f59737b64 100644 --- a/plugins/proofpoint_tap/help.md +++ b/plugins/proofpoint_tap/help.md @@ -1175,7 +1175,8 @@ Example output: # Version History -* 4.1.10 - Update `Parse Tap Alert` to utilise BeautifulSoup resolving vulnerabilities | SDK Bump to 6.1.0 | Task Connection test added +* 4.1.11 - SDK Bump to 6.2.3 | `Monitor Events` task updated to handle requests outside of Proofpoint TAP API limits +* 4.1.10 - SDK Bump to 6.1.0 | Task Connection test added Update `Parse Tap Alert` to utilise BeautifulSoup resolving vulnerabilities * 4.1.9 - Update connection fields to be required. * 4.1.8 - Include SDK 5.4.9 | Task - Use cutoff of 7 days for first query, use cutoff of 1 hours for subsequent queries * 4.1.7 - Include SDK 5.4.5 | Task - enforce query cutoff based on Proofpoint API max lookback | Task - toggle pagination when backfilling | Task - only store previous page of hashes. diff --git a/plugins/proofpoint_tap/komand_proofpoint_tap/tasks/monitor_events/schema.py b/plugins/proofpoint_tap/komand_proofpoint_tap/tasks/monitor_events/schema.py index f3d497f24a..1c6d4f09c3 100755 --- a/plugins/proofpoint_tap/komand_proofpoint_tap/tasks/monitor_events/schema.py +++ b/plugins/proofpoint_tap/komand_proofpoint_tap/tasks/monitor_events/schema.py @@ -43,13 +43,387 @@ class MonitorEventsOutput(insightconnect_plugin_runtime.Output): "type": "array", "title": "Events", "description": "List of all events", - "items": { - "type": "object" - }, + "items": {}, "required": [ "events" ], - "definitions": {} + "definitions": { + "event": { + "type": "object", + "title": "event", + "properties": { + "eventType": { + "type": "string", + "title": "Event Type", + "description": "The type of event logged", + "order": 1 + }, + "ccAddresses": { + "type": "array", + "title": "CC Addresses", + "description": "A list of email addresses contained within the CC", + "items": { + "type": "string" + }, + "order": 2 + }, + "cluster": { + "type": "string", + "title": "Cluster", + "description": "The name of the PPS cluster which processed the message", + "order": 3 + }, + "completelyRewritten": { + "type": "boolean", + "title": "Completely Rewritten", + "description": "The rewrite status of the message", + "order": 4 + }, + "fromAddress": { + "type": "array", + "title": "From Address", + "description": "The email address contained in the From", + "items": { + "type": "string" + }, + "order": 5 + }, + "GUID": { + "type": "string", + "title": "GUID", + "description": "The ID of the message within PPS", + "order": 6 + }, + "headerFrom": { + "type": "string", + "title": "Header From", + "description": "The full content of the From", + "order": 7 + }, + "headerReplyTo": { + "type": "string", + "title": "Header Reply To", + "description": "If present, the full content of the Reply-To", + "order": 8 + }, + "impostorScore": { + "type": "integer", + "title": "Impostor Score", + "description": "The impostor score of the message. Higher scores indicate higher certainty", + "order": 9 + }, + "malwareScore": { + "type": "integer", + "title": "Malware Score", + "description": "The malware score of the message. Higher scores indicate higher certainty", + "order": 10 + }, + "messageID": { + "type": "string", + "title": "Message ID", + "description": "Message-ID extracted from the headers of the email message", + "order": 11 + }, + "messageParts": { + "type": "array", + "title": "Message Parts", + "description": "Details about parts of the message, including both message bodies and attachments", + "items": { + "$ref": "#/definitions/messageParts" + }, + "order": 12 + }, + "messageSize": { + "type": "integer", + "title": "Message Size", + "description": "The size in bytes of the message, including headers and attachments", + "order": 13 + }, + "messageTime": { + "type": "string", + "title": "Message Time", + "description": "When the message was delivered to the user or quarantined by PPS", + "order": 14 + }, + "modulesRun": { + "type": "array", + "title": "Modules Run", + "description": "The list of PPS modules which processed the message", + "items": { + "type": "string" + }, + "order": 15 + }, + "phishScore": { + "type": "integer", + "title": "Phish Score", + "description": "The phish score of the message. Higher scores indicate higher certainty", + "order": 16 + }, + "policyRoutes": { + "type": "array", + "title": "Policy Routes", + "description": "The policy routes that the message matched during processing by PPS", + "items": { + "type": "string" + }, + "order": 17 + }, + "QID": { + "type": "string", + "title": "QID", + "description": "The queue ID of the message within PPS", + "order": 18 + }, + "quarantineFolder": { + "type": "string", + "title": "Quarantine Folder", + "description": "The name of the folder which contains the quarantined message", + "order": 19 + }, + "quarantineRule": { + "type": "string", + "title": "Quarantine Rule", + "description": "The name of the rule which quarantined the message", + "order": 20 + }, + "recipient": { + "type": "array", + "title": "Recipient", + "description": "An array containing the email addresses of the SMTP (envelope) recipients", + "items": { + "type": "string" + }, + "order": 21 + }, + "replyToAddress": { + "type": "array", + "title": "Reply To Address", + "description": "The email address contained in the Reply-To", + "items": { + "type": "string" + }, + "order": 22 + }, + "sender": { + "type": "string", + "title": "Sender", + "description": "The email address of the SMTP (envelope) sender. The user-part is hashed. The domain-part is cleartext", + "order": 23 + }, + "senderIP": { + "type": "string", + "title": "Sender IP", + "description": "The IP address of the sender", + "order": 24 + }, + "spamScore": { + "type": "integer", + "title": "Spam Score", + "description": "The spam score of the message. Higher scores indicate higher certainty", + "order": 25 + }, + "subject": { + "type": "string", + "title": "Subject", + "description": "The subject line of the message, if available", + "order": 26 + }, + "threatsInfoMap": { + "type": "array", + "title": "Threats Info Map", + "description": "Details about detected threats within the message", + "items": { + "$ref": "#/definitions/threatsInfoMap" + }, + "order": 27 + }, + "toAddresses": { + "type": "array", + "title": "To Address", + "description": "A list of email addresses contained within the To", + "items": { + "type": "string" + }, + "order": 28 + }, + "xmailer": { + "type": "string", + "title": "X-Mailer", + "description": "The content of the X-Mailer", + "order": 29 + }, + "campaignId": { + "type": "string", + "title": "Campaign ID", + "description": "An identifier for the campaign of which the threat is a member", + "order": 30 + }, + "classification": { + "type": "string", + "title": "Classification", + "description": "The threat category of the malicious URL", + "order": 31 + }, + "clickIP": { + "type": "string", + "title": "Click IP", + "description": "The external IP address of the user who clicked on the link", + "order": 32 + }, + "clickTime": { + "type": "string", + "title": "Click Time", + "description": "The time the user clicked on the URL", + "order": 33 + }, + "id": { + "type": "string", + "title": "ID", + "description": "The unique id of the click", + "order": 34 + }, + "threatId": { + "type": "string", + "title": "Threat ID", + "description": "The unique identifier associated with this threat", + "order": 35 + }, + "threatStatus": { + "type": "string", + "title": "Threat Status", + "description": "The current state of the threat", + "order": 36 + }, + "threatTime": { + "type": "string", + "title": "Threat Time", + "description": "Proofpoint identified the URL as a threat at this time", + "order": 37 + }, + "threatUrl": { + "type": "string", + "title": "Threat URL", + "description": "A link to the entry on the TAP Dashboard for the particular threat", + "order": 38 + }, + "url": { + "type": "string", + "title": "URL", + "description": "The malicious URL which was clicked", + "order": 39 + }, + "userAgent": { + "type": "string", + "title": "User Agent", + "description": "The User-Agent header from the clicker's HTTP request", + "order": 40 + } + } + }, + "messageParts": { + "type": "object", + "title": "messageParts", + "properties": { + "contentType": { + "type": "string", + "title": "Content Type", + "description": "The true, detected Content-Type of the messagePart", + "order": 1 + }, + "disposition": { + "type": "string", + "title": "Disposition", + "description": "If the value is 'inline', the messagePart is a message body. If the value is 'attached', the messagePart is an attachment", + "order": 2 + }, + "filename": { + "type": "string", + "title": "Filename", + "description": "The filename of the messagePart", + "order": 3 + }, + "md5": { + "type": "string", + "title": "MD5", + "description": "The MD5 hash of the messagePart contents", + "order": 4 + }, + "oContentType": { + "type": "string", + "title": "Declared Content Type", + "description": "The declared Content-Type of the messagePart", + "order": 5 + }, + "sandboxStatus": { + "type": "string", + "title": "Sandbox Status", + "description": "The verdict returned by the sandbox during the scanning process", + "order": 6 + }, + "sha256": { + "type": "string", + "title": "SHA256", + "description": "The SHA256 hash of the messagePart contents", + "order": 7 + } + } + }, + "threatsInfoMap": { + "type": "object", + "title": "threatsInfoMap", + "properties": { + "campaignId": { + "type": "string", + "title": "Campaign ID", + "description": "An identifier for the campaign of which the threat is a member", + "order": 1 + }, + "classification": { + "type": "string", + "title": "Classification", + "description": "The category of threat found in the message", + "order": 2 + }, + "threat": { + "type": "string", + "title": "Threat", + "description": "The artifact which was condemned by Proofpoint. The malicious URL, hash of the attachment threat, or email address of the impostor sender", + "order": 3 + }, + "threatID": { + "type": "string", + "title": "Threat ID", + "description": "The unique identifier associated with this threat", + "order": 4 + }, + "threatStatus": { + "type": "string", + "title": "Threat Status", + "description": "The current state of the threat", + "order": 5 + }, + "threatTime": { + "type": "string", + "title": "Threat Time", + "description": "Proofpoint assigned the threatStatus at this time", + "order": 6 + }, + "threatType": { + "type": "string", + "title": "Threat Type", + "description": "Whether the threat was an attachment, URL, or message type", + "order": 7 + }, + "threatUrl": { + "type": "string", + "title": "Threat URL", + "description": "A link to the entry about the threat on the TAP Dashboard", + "order": 8 + } + } + } + } } """) diff --git a/plugins/proofpoint_tap/komand_proofpoint_tap/tasks/monitor_events/task.py b/plugins/proofpoint_tap/komand_proofpoint_tap/tasks/monitor_events/task.py index a9822b7694..b3a7406d45 100755 --- a/plugins/proofpoint_tap/komand_proofpoint_tap/tasks/monitor_events/task.py +++ b/plugins/proofpoint_tap/komand_proofpoint_tap/tasks/monitor_events/task.py @@ -1,7 +1,5 @@ from datetime import datetime, timedelta, timezone from hashlib import sha1 -from json import loads -from os import getenv from typing import Dict import insightconnect_plugin_runtime @@ -15,7 +13,6 @@ INITIAL_LOOKBACK_HOURS = 24 # Lookback time in hours for first run SUBSEQUENT_LOOKBACK_HOURS = 24 * 7 # Lookback time in hours for subsequent runs API_MAX_LOOKBACK = 24 * 7 # API limits to 7 days ago -SPECIFIC_DATE = getenv("SPECIFIC_DATE") class MonitorEvents(insightconnect_plugin_runtime.Task): @@ -34,83 +31,44 @@ def __init__(self): state=MonitorEventsState(), ) - def run(self, params={}, state={}, custom_config={}): # noqa: MC0001 + def run(self, params={}, state={}, custom_config={}): # pylint: disable=unused-argument + existing_state = state.copy() self.connection.client.toggle_rate_limiting = False has_more_pages = False try: + now = self.get_current_time() - timedelta(minutes=1) last_collection_date = state.get(self.LAST_COLLECTION_DATE) - self.logger.info(f"Last collection date retrieved: {last_collection_date}") + if last_collection_date: + last_collection_date = datetime.fromisoformat(last_collection_date) next_page_index = state.get(self.NEXT_PAGE_INDEX) - now = self.get_current_time() - timedelta(minutes=1) - - first_run = not state or not last_collection_date - max_allowed_lookback, backfill_date = self._apply_custom_timings(custom_config, now, first_run) + previous_logs_hashes = state.get(self.PREVIOUS_LOGS_HASHES, []) - # [PLGN-727] skip comparison check if first run of a backfill, next run should specify lookback to match. - if not backfill_date: - # Don't allow collection to go back further than <*>_LOOKBACK_HOURS > (24) hours max - # Unless otherwise defined externally and passed in via custom_config parameter. - if last_collection_date and datetime.fromisoformat(last_collection_date) < max_allowed_lookback: - last_collection_date = max_allowed_lookback.isoformat() - if next_page_index: - next_page_index = None - state.pop(self.NEXT_PAGE_INDEX) - self.logger.info(f"Last collection date reset to max lookback allowed: {last_collection_date}") + first_run = not state + is_paginating = (not first_run) and next_page_index - previous_logs_hashes = state.get(self.PREVIOUS_LOGS_HASHES, []) + api_limit = self._get_api_limit_date_time(is_paginating, API_MAX_LOOKBACK, now) + start_time = self._determine_start_time(now, first_run, is_paginating, last_collection_date) + if custom_config and first_run: + custom_api_limit, start_time = self._apply_custom_config( + now, custom_config, API_MAX_LOOKBACK, start_time + ) + self.logger.info( + f"Attempting to use custom value of {start_time} for start time and {custom_api_limit} for API limit" + ) + api_limit, _ = self._apply_api_limit(api_limit, custom_api_limit, 0, "custom_api_limit") + start_time, next_page_index = self._apply_api_limit(api_limit, start_time, next_page_index, "start_time") + end_time = self._check_end_time((start_time + timedelta(hours=1)), now).isoformat() + start_time = start_time.isoformat() query_params = {"format": "JSON"} + parameters = SiemUtils.prepare_time_range(start_time, end_time, query_params) + self.logger.info(f"Using following parameters in query: {parameters}") - if first_run: - task_start = "First run... " - first_time = now - timedelta(hours=1) - if backfill_date: - first_time = backfill_date # PLGN-727: allow backfill - task_start += f"Using custom value of {backfill_date}" - self.logger.info(task_start) - last_time = first_time + timedelta(hours=1) - state[self.LAST_COLLECTION_DATE] = last_time.isoformat() - parameters = SiemUtils.prepare_time_range(first_time.isoformat(), last_time.isoformat(), query_params) - else: - if next_page_index: - state.pop(self.NEXT_PAGE_INDEX) - self.logger.info(f"Getting the next page (page index {next_page_index}) of results...") - parameters = SiemUtils.prepare_time_range( - (datetime.fromisoformat(last_collection_date) - timedelta(hours=1)).isoformat(), - last_collection_date, - query_params, - ) - else: - self.logger.info("Subsequent run") - start_time = last_collection_date - end_time = (datetime.fromisoformat(last_collection_date) + timedelta(hours=1)).isoformat() - if end_time > now.isoformat(): - self.logger.info( - f"End time for lookup reset from {end_time} to {now.isoformat()} to avoid going out of range" - ) - end_time = now.isoformat() - - # If the resulting time interval is invalid, do not query API this time around - query_delta = datetime.fromisoformat(end_time) - datetime.fromisoformat(start_time) - if start_time >= end_time or query_delta < timedelta(minutes=1): - self.logger.info(f"Query delta: {query_delta} Time delta allowed: {timedelta(minutes=1)}") - self.logger.info( - f"Start time > End time or Insufficient interval between start time {start_time} and end time {end_time} to avoid going out of query range" - ) - self.logger.info("Do not query API this time round") - return [], state, has_more_pages, 200, None - else: - state[self.LAST_COLLECTION_DATE] = end_time - parameters = SiemUtils.prepare_time_range(start_time, end_time, query_params) - - self.logger.info(f"Parameters used to query endpoint: {parameters}") try: parsed_logs = self.parse_logs( self.connection.client.siem_action(Endpoint.get_all_threats(), parameters) ) self.logger.info(f"Retrieved {len(parsed_logs)} total parsed events in time interval") - state, has_more_pages = self.determine_page_and_state_values( - next_page_index, parsed_logs, has_more_pages, state, parameters, now - ) + current_page_index = next_page_index if next_page_index else 0 # Send back a maximum of SPLIT_SIZE events at a time (use page index to track this in state) new_unique_logs, new_logs_hashes = self.compare_hashes( @@ -118,6 +76,10 @@ def run(self, params={}, state={}, custom_config={}): # noqa: MC0001 parsed_logs[current_page_index * self.SPLIT_SIZE : (current_page_index + 1) * self.SPLIT_SIZE], ) + state, has_more_pages = self.determine_page_and_state_values( + next_page_index, parsed_logs, has_more_pages, state, parameters, now + ) + # PLGN-811: reduce the number of pages of hashes we store to prevent hitting DynamoDB limits state[self.PREVIOUS_LOGS_HASHES] = ( [*previous_logs_hashes[-self.SPLIT_SIZE :], *new_logs_hashes] @@ -125,17 +87,31 @@ def run(self, params={}, state={}, custom_config={}): # noqa: MC0001 else new_logs_hashes ) + state[self.LAST_COLLECTION_DATE] = end_time + self.logger.info(f"Retrieved {len(new_unique_logs)} events. Returning has_more_pages={has_more_pages}") return new_unique_logs, state, has_more_pages, 200, None except ApiException as error: + if "The requested interval is too short" in error.data: + self.logger.info("The requested interval is too short. Retrying in next run.") + return [], existing_state, False, 200, None + if "The requested start time is too far into the past." in error.data: + self.logger.info("The requested start time is too far into the past. Resetting state.") + return [], {}, False, 200, None + self.logger.info(f"API Exception occurred: status_code: {error.status_code}, error: {error}") state[self.PREVIOUS_LOGS_HASHES] = [] - return [], state, False, error.status_code, error + return [], existing_state, False, error.status_code, error except Exception as error: self.logger.info(f"Exception occurred in monitor events task: {error}", exc_info=True) - state[self.PREVIOUS_LOGS_HASHES] = [] - return [], state, has_more_pages, 500, PluginException(preset=PluginException.Preset.UNKNOWN, data=error) + return ( + [], + existing_state, + has_more_pages, + 500, + PluginException(preset=PluginException.Preset.UNKNOWN, data=error), + ) def determine_page_and_state_values( self, @@ -157,10 +133,14 @@ def determine_page_and_state_values( ) else: end_str, now_str = query_params.get("interval").split("/")[1], now.isoformat().replace("z", "") + if now_str != end_str: # we want to force more pages if the end query time is not 'now' self.logger.info("Setting has more pages to True as interval params is not querying until now") + if state.get(self.NEXT_PAGE_INDEX): + state.pop(self.NEXT_PAGE_INDEX) has_more_pages = True - + if not has_more_pages and state.get(self.NEXT_PAGE_INDEX): + state.pop(self.NEXT_PAGE_INDEX) return state, has_more_pages @staticmethod @@ -191,7 +171,7 @@ def prepare_log(self, log: dict, value: str) -> dict: @staticmethod def sha1(log: dict) -> str: - hash_ = sha1() # nosec B303 + hash_ = sha1(usedforsecurity=False) # nosec B303 for key, value in log.items(): hash_.update(f"{key}{value}".encode("utf-8")) return hash_.hexdigest() @@ -210,44 +190,50 @@ def compare_hashes(self, previous_logs_hashes: list, new_logs: list): ) return logs_to_return, new_logs_hashes - def _apply_custom_timings( - self, custom_config: Dict[str, Dict], now: datetime, first_run: bool - ) -> (datetime, datetime): - """ - If a custom_config is supplied to the plugin we can modify our timing logic. - Lookback is applied for the first run with no state. - Default is applied to the max look back hours being enforced on the parameters. - """ - if first_run: - default_lookback_hours = INITIAL_LOOKBACK_HOURS + def _check_end_time(self, end_time, now): + end_time = min(end_time, now) + return end_time + + def _get_api_limit_date_time(self, is_paginating, limit_delta_hours, now): + if is_paginating: + api_limit_date_time = now - timedelta(hours=limit_delta_hours) + timedelta(minutes=5) else: - default_lookback_hours = SUBSEQUENT_LOOKBACK_HOURS + api_limit_date_time = now - timedelta(hours=limit_delta_hours) + timedelta(minutes=15) + return api_limit_date_time + + def _apply_api_limit(self, api_limit, time_to_check, next_page_index, time_name): + if api_limit >= time_to_check: + self.logger.info(f"Supplied a {time_name} further than allowed. Moving this to {api_limit}") + time_to_check = api_limit + if time_to_check == "start_time": + next_page_index = 0 + return time_to_check, next_page_index + + def _apply_custom_config(self, now, custom_config, default_lookback_hours, start_time): cutoff_values = custom_config.get("cutoff", {"hours": default_lookback_hours}) cutoff_date, cutoff_hours = cutoff_values.get("date", {}), cutoff_values.get("hours") if cutoff_date: - max_lookback = datetime(**cutoff_date, tzinfo=timezone.utc) + cutoff_date_time = datetime(**cutoff_date, tzinfo=timezone.utc) else: - max_lookback = now - timedelta(hours=cutoff_hours) + cutoff_date_time = now - timedelta(hours=cutoff_hours) # if using env var we need to convert to dict from string, CPS API will return us a dict. - env_var_date = loads(SPECIFIC_DATE) if SPECIFIC_DATE else None - specific_date = custom_config.get("lookback", env_var_date) - self.logger.info( - "Plugin task execution received the following date values. " - f"Max lookback={max_lookback}, specific date={specific_date}" - ) - - # Ensure we never pass values that exceed the API limit of Proofpoint. - # But allow 5 minutes latency window as we actually have now = now - 1 minute - api_limit_date = (now - timedelta(hours=API_MAX_LOOKBACK)) + timedelta(minutes=5) - if api_limit_date > max_lookback: - max_lookback = api_limit_date - self.logger.info(f"**Supplied a max lookback further than allowed. Moving this to {api_limit_date}") - + specific_date = custom_config.get("lookback") if specific_date: specific_date = datetime(**specific_date, tzinfo=timezone.utc) - if api_limit_date > specific_date: - self.logger.info(f"**Supplied a specific date further than allowed. Moving this to {api_limit_date}") - specific_date = api_limit_date + else: + specific_date = start_time + return cutoff_date_time, specific_date - return max_lookback, specific_date + def _determine_start_time(self, now, first_run, is_paginating, last_collection_date) -> int: + if first_run: + integration_status = "First run" + start_time = now - timedelta(hours=1) + elif is_paginating: + integration_status = "Pagination run" + start_time = last_collection_date - timedelta(hours=1) + else: + integration_status = "Subsequent run" + start_time = last_collection_date + self.logger.info(f"Integration status: {integration_status}") + return start_time diff --git a/plugins/proofpoint_tap/plugin.spec.yaml b/plugins/proofpoint_tap/plugin.spec.yaml index 141dfe4e2c..519ab69ed1 100644 --- a/plugins/proofpoint_tap/plugin.spec.yaml +++ b/plugins/proofpoint_tap/plugin.spec.yaml @@ -6,12 +6,12 @@ title: Proofpoint TAP description: "[Proofpoint Targeted Attack Protection](https://www.proofpoint.com/us/products/ransomware-and-targeted-attack-protection) (TAP) helps you stay ahead of attackers with an innovative approach that detects, analyzes and blocks advanced threats before they reach your inbox. This plugin enables users to parse TAP alerts" -version: 4.1.10 +version: 4.1.11 connection_version: 4 supported_versions: ["Proofpoint TAP API v2", "Tested on 2024-06-04"] sdk: type: slim - version: 6.1.0 + version: 6.2.3 user: nobody vendor: rapid7 support: community @@ -46,6 +46,7 @@ links: references: - "[Proofpoint TAP](https://www.proofpoint.com/us/products/ransomware-and-targeted-attack-protection)" version_history: + - "4.1.11 - SDK Bump to 6.2.3 | `Monitor Events` task updated to handle requests outside of Proofpoint TAP API limits" - "4.1.10 - SDK Bump to 6.1.0 | Task Connection test added Update `Parse Tap Alert` to utilise BeautifulSoup resolving vulnerabilities" - "4.1.9 - Update connection fields to be required." - "4.1.8 - Include SDK 5.4.9 | Task - Use cutoff of 7 days for first query, use cutoff of 1 hours for subsequent queries" diff --git a/plugins/proofpoint_tap/setup.py b/plugins/proofpoint_tap/setup.py index 19b7c4abce..655476e42d 100755 --- a/plugins/proofpoint_tap/setup.py +++ b/plugins/proofpoint_tap/setup.py @@ -3,7 +3,7 @@ setup(name="proofpoint_tap-rapid7-plugin", - version="4.1.10", + version="4.1.11", description="[Proofpoint Targeted Attack Protection](https://www.proofpoint.com/us/products/ransomware-and-targeted-attack-protection) (TAP) helps you stay ahead of attackers with an innovative approach that detects, analyzes and blocks advanced threats before they reach your inbox. This plugin enables users to parse TAP alerts", author="rapid7", author_email="", diff --git a/plugins/proofpoint_tap/unit_test/expected/monitor_events_bad_request.json.exp b/plugins/proofpoint_tap/unit_test/expected/monitor_events_bad_request.json.exp index 42a941ab3a..2f67bae487 100644 --- a/plugins/proofpoint_tap/unit_test/expected/monitor_events_bad_request.json.exp +++ b/plugins/proofpoint_tap/unit_test/expected/monitor_events_bad_request.json.exp @@ -2,7 +2,8 @@ "events": [], "state": { "last_collection_date": "2023-04-03T07:59:00+00:00", - "previous_logs_hashes": [] + "next_page_index": 2, + "previous_logs_hashes": ["df05d563ebc4c2044954cd88b4debf63d5845b78", "8cebe23f0ce9d4bf19d2a2f700b372108d98b45d"] }, "has_more_pages": false, "status_code": 400 diff --git a/plugins/proofpoint_tap/unit_test/expected/monitor_events_server_error.json.exp b/plugins/proofpoint_tap/unit_test/expected/monitor_events_server_error.json.exp index 65ce3f9fcf..8741ae9ee1 100644 --- a/plugins/proofpoint_tap/unit_test/expected/monitor_events_server_error.json.exp +++ b/plugins/proofpoint_tap/unit_test/expected/monitor_events_server_error.json.exp @@ -2,7 +2,8 @@ "events": [], "state": { "last_collection_date": "2023-04-04T05:00:00+00:00", - "previous_logs_hashes": [] + "next_page_index": 2, + "previous_logs_hashes": ["df05d563ebc4c2044954cd88b4debf63d5845b78", "8cebe23f0ce9d4bf19d2a2f700b372108d98b45d"] }, "has_more_pages": false, "status_code": 500 diff --git a/plugins/proofpoint_tap/unit_test/test_monitor_events.py b/plugins/proofpoint_tap/unit_test/test_monitor_events.py index 8ca546ddd2..b281b6417f 100644 --- a/plugins/proofpoint_tap/unit_test/test_monitor_events.py +++ b/plugins/proofpoint_tap/unit_test/test_monitor_events.py @@ -1,18 +1,18 @@ import sys import os +sys.path.append(os.path.abspath("../")) + + from unittest.mock import patch from komand_proofpoint_tap.tasks import MonitorEvents from test_util import Util from unittest import TestCase from parameterized import parameterized -from datetime import datetime, timezone, timedelta -from json import loads +from datetime import datetime, timezone from jsonschema import validate -sys.path.append(os.path.abspath("../")) - ENV_VALUE = '{"year": 2024, "month": 1, "day": 27, "hour": 0, "minute": 0, "second": 0}' ENV_VALUE_2 = '{"year": 2024, "month": 2, "day": 20, "hour": 12, "minute": 0, "second": 0}' TEST_PAGE_SIZE = 2 @@ -73,9 +73,29 @@ def test_monitor_events(self, _mock_request, _mock_get_time, _test_name, current state=current_state, custom_config={} ) validate(actual, self.action.output.schema) - self.assertEqual(actual, expected.get("events")) - self.assertEqual(actual_state, expected.get("state")) - self.assertEqual(has_more_pages, expected.get("has_more_pages")) + self.assertEqual(expected.get("events"), actual) + self.assertEqual(expected.get("state"), actual_state) + self.assertEqual(expected.get("has_more_pages"), has_more_pages) + + @parameterized.expand( + [ + [ + "sub_30_seconds", + {"last_collection_date": "2023-04-03T05:59:00+00:00"}, + {"state": {"last_collection_date": "2023-04-03T05:59:00+00:00"}, "has_more_pages": False}, + ], + [ + "beyond_api_limit", + {"last_collection_date": "2023-04-03T04:59:00+00:00"}, + {"state": {}, "has_more_pages": False}, + ], + ] + ) + def test_monitor_events_handle_api_error(self, _mock_request, _mock_get_time, _test_name, state, expected): + actual, actual_state, has_more_pages, status_code, error = self.action.run(state=state, custom_config={}) + self.assertEqual([], actual) + self.assertEqual(expected.get("state"), actual_state) + self.assertEqual(expected.get("has_more_pages"), has_more_pages) def test_monitor_events_last_page_not_queried_to_now(self, _mock_request, mock_time): """ @@ -92,113 +112,37 @@ def test_monitor_events_last_page_not_queried_to_now(self, _mock_request, mock_t ) actual, actual_state, has_more_pages, status_code, error = self.action.run(state=current_state) - self.assertEqual(actual, expected.get("events")) - self.assertEqual(actual_state, expected.get("state")) - self.assertEqual(has_more_pages, True) # this is different to enforce another call to the third party API + self.assertEqual(expected.get("events"), actual) + self.assertEqual(expected.get("state"), actual_state) + self.assertEqual(True, has_more_pages) # this is different to enforce another call to the third party API # add in extra failsafe that the `.exp' file has not changed this has_more_pages to True - self.assertNotEqual(has_more_pages, expected.get("has_more_pages")) - - @patch("komand_proofpoint_tap.tasks.monitor_events.task.SPECIFIC_DATE", new=ENV_VALUE) - @patch("logging.Logger.info") - def test_monitor_events_with_env_variable_empty_state(self, mock_logger, _mock_request, _mock_time): - # When we inject the env variable into the pod we should change the time we retrieve events from. - response, state, has_more_pages, status_code, _error = self.action.run(state={}, custom_config={}) - - # we should have logged using env var - self.assertIn("Using custom value of", mock_logger.call_args_list[2][0][0]) - - # state last time should be injected env var + 1 hour - self.assertEqual("2024-01-27T01:00:00+00:00", state["last_collection_date"]) - - # Split the logs being returned by the SPLIT_SIZE variable - self.assertEqual(TEST_PAGE_SIZE, len(response)) - - # We should expect next_page = True because we cut the results and NEXT_PAGE to be set - self.assertTrue(has_more_pages) - self.assertEqual(1, state["next_page_index"]) - - @patch("komand_proofpoint_tap.tasks.monitor_events.task.SPECIFIC_DATE", new=ENV_VALUE) - def test_monitor_events_with_env_variable_existing_state(self, _mock_request, _mock_time): - # Although we inject the env var because we have a state saved we can ignore and continue our usual logic - # use this date to avoid cut off logic from mocked current time. - existing_state = {"last_collection_date": "2023-04-04T01:00:00+00:00"} - response, state, has_more_pages, status_code, _error = self.action.run(state=existing_state, custom_config={}) - - # state last time should be last_collected_date + 1 hour - self.assertEqual("2023-04-04T02:00:00+00:00", state["last_collection_date"]) - - # Split the logs being returned by the SPLIT_SIZE variable - self.assertEqual(TEST_PAGE_SIZE, len(response)) - - @patch("komand_proofpoint_tap.tasks.monitor_events.task.SPECIFIC_DATE", new=ENV_VALUE_2) - @patch("logging.Logger.info") - def test_monitor_events_uses_custom_config_data(self, mock_logger, _mock_request, _mock_time): - # Even though we have an environment variable specified we use the custom_config passed from CPS - full_lookback_value = loads(ENV_VALUE) - cps_config = {"lookback": full_lookback_value} # pass this as a dict as our API will supply as it like this - response, state, has_more_pages, status_code, _error = self.action.run({}, {}, cps_config) - - # In this example we have an env var specified but this is ignored to use the value in the custom_config - date = datetime(**full_lookback_value) - self.assertIn(f"Using custom value of {date}", mock_logger.call_args_list[2][0][0]) - - # state last time should be custom config value + 1 hour - self.assertEqual("2024-01-27T01:00:00+00:00", state["last_collection_date"]) - self.assertEqual(1, state["next_page_index"]) - - # second time we call we will then pass no lookback from SDK level and now use the cutoff date value - with patch("komand_proofpoint_tap.tasks.monitor_events.task.SPECIFIC_DATE", new=""): # clear env var - cps_config = {"cutoff": {"date": full_lookback_value}} - _resp, state_2, _more_pages, _status_code, _error = self.action.run({}, state.copy(), cps_config) - - # Due to pagination the last collection date will not change in our state - self.assertEqual(state["last_collection_date"], state_2["last_collection_date"]) - self.assertEqual(2, state_2["next_page_index"]) # however we should be on the second page - - # Third time we call - we increase page size to return more_pages=False from the task - with patch("komand_proofpoint_tap.tasks.monitor_events.task.MonitorEvents.SPLIT_SIZE", new=40000): - _resp, state_3, more_pages, _status_code, _error = self.action.run({}, state_2.copy(), cps_config) - self.assertEqual(state_2["last_collection_date"], state_3["last_collection_date"]) - self.assertEqual(True, more_pages) # finished 'pages' between time A-B, we haven't caught up to now - - # Fourth time task is called - custom_config is 'normal' and we do normal time calculations / cut off. - new_now = datetime.strptime("2024-03-25T08:00:00", "%Y-%m-%dT%H:%M:%S").replace(tzinfo=timezone.utc) - with patch( - "komand_proofpoint_tap.tasks.monitor_events.task.MonitorEvents.get_current_time", return_value=new_now - ): - mock_logger.reset_mock() - config = {"cutoff": {"hours": 24}} # config reverted to normal behaviour - _resp, _state_4, _more_pages, _status_code, _error = self.action.run({}, state_3.copy(), config) - # get lookback time as task.py does with using now - (lookback + 1 minute latency delay) - lookback = new_now - timedelta(hours=config["cutoff"]["hours"], minutes=1) - lookback_msg = f"Last collection date reset to max lookback allowed: {lookback.isoformat()}" - self.assertEqual(lookback_msg, mock_logger.call_args_list[2][0][0]) + self.assertNotEqual(expected.get("has_more_pages"), has_more_pages) @parameterized.expand( [ [ "enforced_during_backfill_config", [{}, {"lookback": {"year": 2023, "month": 3, "day": 4}}], - ["Supplied a specific date further than allowed. Moving this to 2023-03-28 08:04:00", ""], + ["Supplied a start_time further than allowed. Moving this to 2023-03-28 08:14:00", ""], ], [ "enforced_during_lookback_config_date", [{}, {"cutoff": {"date": {"year": 2023, "month": 3, "day": 4}}}], - ["Supplied a max lookback further than allowed. Moving this to 2023-03-28 08:04:00", ""], + ["Supplied a custom_api_limit further than allowed. Moving this to 2023-03-28 08:14:00", ""], ], [ "enforced_during_lookback_config_hours", [{}, {"cutoff": {"hours": 24 * 10}}], # 10 days lookback - ["Supplied a max lookback further than allowed. Moving this to 2023-03-28 08:04:00", ""], + ["Supplied a custom_api_limit further than allowed. Moving this to 2023-03-28 08:14:00", ""], ], [ - "enforced_during_a_state_passed_value", + "enforced_both_lookback_date_and_cutoff_hours_config", # customer has been paused since 29th March, and have an override cutoff hours to 8 - [{"last_collection_date": "2023-03-28T07:00:00.406053+00:00"}, {"cutoff": {"hours": 24 * 8}}], + [{}, {"lookback": {"year": 2023, "month": 3, "day": 4}, "cutoff": {"hours": 24 * 8}}], [ - "Supplied a max lookback further than allowed. Moving this to 2023-03-28 08:04:00", - "Last collection date reset to max lookback allowed", + "Supplied a custom_api_limit further than allowed. Moving this to 2023-03-28 08:14:00", + "Supplied a start_time further than allowed. Moving this to 2023-03-28 08:14:00", ], ], ] @@ -213,7 +157,6 @@ def test_api_max_looback_enforced( custom_config_logger, saved_state_logger = logger_msgs _resp, _state, _has_more_pages, _status_code, _error = self.action.run({}, state, config) - self.assertIn(custom_config_logger, mock_logger.call_args_list[2][0][0]) + self.assertTrue(any(custom_config_logger in call[0][0] for call in mock_logger.call_args_list[0:])) - if saved_state_logger: - self.assertIn(saved_state_logger, mock_logger.call_args_list[3][0][0]) + self.assertTrue(any(saved_state_logger in call[0][0] for call in mock_logger.call_args_list[0:])) diff --git a/plugins/proofpoint_tap/unit_test/test_util.py b/plugins/proofpoint_tap/unit_test/test_util.py index 9e3436da92..889815b5a4 100644 --- a/plugins/proofpoint_tap/unit_test/test_util.py +++ b/plugins/proofpoint_tap/unit_test/test_util.py @@ -40,10 +40,10 @@ def read_file_to_dict(filename: str) -> dict: @staticmethod def mocked_requests_get(*args, **kwargs): class MockResponse: - def __init__(self, filename, status_code): + def __init__(self, filename, status_code, text="This is some error text"): self.filename = filename self.status_code = status_code - self.text = "This is some error text" + self.text = text def json(self): if self.filename == "error": @@ -152,6 +152,10 @@ def json(self): return MockResponse("blocked_clicks_without_time_start_end", 200) if "siem/all" in url: + if interval == "2023-04-03T05:59:00+00:00/2023-04-03T06:59:00+00:00": + return MockResponse("", 400, "The requested interval is too short.") + if interval == "2023-04-03T04:59:00+00:00/2023-04-03T05:59:00+00:00": + return MockResponse("", 400, "The requested start time is too far into the past.") if interval == "2023-04-03T06:59:00+00:00/2023-04-03T07:59:00+00:00": return MockResponse("", 400) # input state is 07:59 but we lookback an hour because of page state if interval == "2023-04-04T04:00:00+00:00/2023-04-04T05:00:00+00:00":