Timezone-aware datetime objects, disable_metrics_collection bugfix (#…

…408) * Minor updates * Update CHANGELOG.rst * Timezone-aware datetime objects
4dn-dcic · Jul 16, 2024 · d54b2aa · d54b2aa
1 parent e27ca07
commit d54b2aa
Show file tree

Hide file tree

Showing 10 changed files with 40 additions and 32 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -3,6 +3,14 @@
 Change Log
 ==========
 
+5.4.1
+=====
+
+* Disable idle instance check when `disable_metrics_collection` is active
+* Switch to timezone aware datetime object everywhere. In particular, replace deprecated `datetime.utcnow()` with `datetime.now(timezone.utc)`.
+* Update docs
+
+
 5.4.0
 =====
 

diff --git a/docs/execution_json.rst b/docs/execution_json.rst
@@ -572,9 +572,10 @@ The ``config`` field describes execution configuration.
     - type of EBS (e.g. ``gp3``, ``gp2``, ``io1``, ``io2``)
     - optional (default: gp3 (version >= ``1.0.0``) or gp2 (version < ``1.0.0``))
 
-:disable_metrics_collection:
+:disable_metrics_collection (**Not recommended**):
     - <true|false>, default: false
     - If true, the cloudwatch agent is not installed on the EC2 and CPU/memory/storage won't be collected and send to AWS CloudWatch. Disabling metrics collection can reduce CloudWatch associated costs.
+    - If true, Tibanna's check for idle or stalled instances will be disabled. Please monitor your runs accordingly.
 
 :cloudwatch_dashboard:
     - **This option is now depricated.**

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "tibanna"
-version = "5.4.0"
+version = "5.4.1"
 description = "Tibanna runs portable pipelines (in CWL/WDL) on the AWS Cloud."
 authors = ["4DN-DCIC Team <[email protected]>"]
 license = "MIT"

diff --git a/tibanna/check_task.py b/tibanna/check_task.py
@@ -152,6 +152,11 @@ def run(self):
         raise StillRunningException("job %s still running" % jobid)
 
     def terminate_idle_instance(self, jobid, instance_id, cpu, ebs_read):
+
+        # Don't check for idle instance if we don't collect any metrics
+        if self.input_json['config'].get('disable_metrics_collection'):
+            return
+
         if not cpu or cpu < 1.0:
             # the instance wasn't terminated - otherwise it would have been captured in the previous error.
             if not ebs_read or ebs_read < 1000:  # minimum 1kb
@@ -161,18 +166,15 @@ def terminate_idle_instance(self, jobid, instance_id, cpu, ebs_read):
                     public_postrun_json = self.input_json['config'].get('public_postrun_json', False)
                     self.handle_postrun_json(bucket_name, jobid, self.input_json, public_read=public_postrun_json) # We need to record the end time
                     boto3.client('ec2').terminate_instances(InstanceIds=[instance_id])
-                    errmsg = (
-                        "Nothing has been running for the past hour for job %s,"
-                        "(CPU utilization %s and EBS read %s bytes)."
-                    ) %  (jobid, str(cpu), str(ebs_read))
-                    raise EC2IdleException(errmsg)
                 except Exception as e:
-                    errmsg = (
-                        "Nothing has been running for the past hour for job %s,"
-                        "but cannot terminate the instance - cpu utilization (%s) : %s"
-                    ) %  (jobid, str(cpu), str(e))
+                    errmsg = (f"Nothing has been running for the past hour for job {jobid}",
+                              f", but instance could not be terminated. Error: {str(e)}")
                     logger.error(errmsg)
                     raise EC2IdleException(errmsg)
+
+                errmsg = (f"Nothing has been running for the past hour for job {jobid},",
+                        f"(CPU utilization {str(cpu)} and EBS read {str(ebs_read)} bytes).")
+                raise EC2IdleException(errmsg)
 
     def handle_postrun_json(self, bucket_name, jobid, input_json, public_read=False):
         postrunjson = "%s.postrun.json" % jobid

diff --git a/tibanna/core.py b/tibanna/core.py
@@ -9,7 +9,7 @@
 import shutil
 import subprocess
 import webbrowser
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, timezone
 from dateutil.tz import tzutc
 from uuid import uuid4, UUID
 from types import ModuleType
@@ -624,7 +624,7 @@ def rerun_many(self, sfn=None, stopdate='13Feb2018', stophour=13,
             sfn = self.default_stepfunction_name
         stophour = stophour + offset
         stoptime = stopdate + ' ' + str(stophour) + ':' + str(stopminute)
-        stoptime_in_datetime = datetime.strptime(stoptime, '%d%b%Y %H:%M')
+        stoptime_in_datetime = datetime.strptime(stoptime, '%d%b%Y %H:%M').replace(tzinfo=timezone.utc)
         client = boto3.client('stepfunctions')
         sflist = client.list_executions(stateMachineArn=STEP_FUNCTION_ARN(sfn), statusFilter=status)
         k = 0
@@ -1018,7 +1018,7 @@ def plot_metrics(self, job_id, sfn=None, directory='.', open_browser=True, force
             if hasattr(job, 'end_time_as_datetime') and job.end_time_as_datetime:
                 endtime = job.end_time_as_datetime
             else:
-                endtime = datetime.utcnow()
+                endtime = datetime.now(timezone.utc)
         if hasattr(job, 'filesystem') and job.filesystem:
             filesystem = job.filesystem
         else:
@@ -1049,7 +1049,7 @@ def plot_metrics(self, job_id, sfn=None, directory='.', open_browser=True, force
                             job_complete = False  # still running
                     else:
                         # waiting 10 min to be sure the istance is starting
-                        if (datetime.utcnow() - starttime) / timedelta(minutes=1) < 5:
+                        if (datetime.now(timezone.utc) - starttime) / timedelta(minutes=1) < 5:
                             raise Exception("the instance is still setting up. " +
                                             "Wait a few seconds/minutes and try again.")
                         else:

diff --git a/tibanna/cw_utils.py b/tibanna/cw_utils.py
@@ -11,8 +11,7 @@
     METRICS_COLLECTION_INTERVAL,
     S3_ENCRYT_KEY_ID
 )
-from datetime import datetime
-from datetime import timedelta
+from datetime import datetime, timezone, timedelta
 import json, math
 
 
@@ -29,9 +28,9 @@ class TibannaResource(object):
 
     @classmethod
     def convert_timestamp_to_datetime(cls, timestamp):
-        return datetime.strptime(timestamp, cls.timestamp_format)
+        return datetime.strptime(timestamp, cls.timestamp_format).replace(tzinfo=timezone.utc)
 
-    def __init__(self, instance_id, filesystem, starttime, endtime=datetime.utcnow(), cost_estimate = 0.0, cost_estimate_type = "NA"):
+    def __init__(self, instance_id, filesystem, starttime, endtime=datetime.now(timezone.utc), cost_estimate = 0.0, cost_estimate_type = "NA"):
         """All the Cloudwatch metrics are retrieved and stored at the initialization.
         :param instance_id: e.g. 'i-0167a6c2d25ce5822'
         :param filesystem: e.g. "/dev/xvdb", "/dev/nvme1n1"

diff --git a/tibanna/job.py b/tibanna/job.py
@@ -1,6 +1,6 @@
 import boto3
 import json
-from datetime import datetime
+from datetime import datetime, timezone
 from . import create_logger
 from tibanna import dd_utils
 from .vars import (
@@ -280,7 +280,7 @@ def get_info_from_dd(ddres):
 
     @staticmethod
     def add_to_dd(job_id, execution_name, sfn, logbucket, verbose=True):
-        time_stamp = datetime.strftime(datetime.utcnow(), '%Y%m%d-%H:%M:%S-UTC')
+        time_stamp = datetime.strftime(datetime.now(timezone.utc), '%Y%m%d-%H:%M:%S-UTC')
         dydb = boto3.client('dynamodb', region_name=AWS_REGION)
         try:
             # first check the table exists

diff --git a/tibanna/pricing_utils.py b/tibanna/pricing_utils.py
@@ -7,7 +7,7 @@
 import botocore
 import re
 from . import create_logger
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, timezone
 from .utils import (
     does_key_exist,
     read_s3,
@@ -30,14 +30,14 @@ def get_cost(postrunjson, job_id):
     job = postrunjson.Job
 
     def reformat_time(t, delta):
-        d = datetime.strptime(t, '%Y%m%d-%H:%M:%S-UTC') + timedelta(days=delta)
+        d = datetime.strptime(t, '%Y%m%d-%H:%M:%S-UTC').replace(tzinfo=timezone.utc) + timedelta(days=delta)
         return d.strftime("%Y-%m-%d")
 
     start_time = reformat_time(job.start_time, -1)  # give more room
     if(job.end_time != None):
         end_time = reformat_time(job.end_time, 1)
     else:
-        end_time = datetime.utcnow() + timedelta(days=1) # give more room
+        end_time = datetime.now(timezone.utc) + timedelta(days=1) # give more room
         end_time = end_time.strftime("%Y-%m-%d")
 
     billing_args = {'Filter': {'Tags': {'Key': 'Name', 'Values': ['awsem-' + job_id]}},
@@ -73,8 +73,8 @@ def get_cost_estimate(postrunjson, ebs_root_type = "gp3", aws_price_overwrite =
         logger.warning("job.end_time not available. Cannot calculate estimated cost.")
         return 0.0, "NA"
 
-    job_start = datetime.strptime(job.start_time, '%Y%m%d-%H:%M:%S-UTC')
-    job_end = datetime.strptime(job.end_time, '%Y%m%d-%H:%M:%S-UTC')
+    job_start = datetime.strptime(job.start_time, '%Y%m%d-%H:%M:%S-UTC').replace(tzinfo=timezone.utc)
+    job_end = datetime.strptime(job.end_time, '%Y%m%d-%H:%M:%S-UTC').replace(tzinfo=timezone.utc)
     job_duration = (job_end - job_start).seconds / 3600.0 # in hours
 
     if(not job.instance_type):
@@ -378,7 +378,7 @@ def get_cost_estimate(postrunjson, ebs_root_type = "gp3", aws_price_overwrite =
                 ) * job_duration / (24.0*30.0)
             estimated_cost = estimated_cost + ebs_iops_cost
 
-        time_since_run = (datetime.utcnow() - job_end).total_seconds() / (3600 * 24) # days
+        time_since_run = (datetime.now(timezone.utc) - job_end).total_seconds() / (3600 * 24) # days
         estimation_type = "retrospective estimate" if time_since_run > 10 else "immediate estimate"
 
         return estimated_cost, estimation_type

diff --git a/tibanna/top.py b/tibanna/top.py
@@ -320,7 +320,7 @@ def timestamps_as_minutes(self, timestamp_start):
 
     @classmethod
     def as_datetime(cls, timestamp):
-        return datetime.datetime.strptime(timestamp, cls.timestamp_format)
+        return datetime.datetime.strptime(timestamp, cls.timestamp_format).replace(tzinfo=datetime.timezone.utc)
 
     @staticmethod
     def wrap_in_double_quotes(string):

diff --git a/tibanna/vars.py b/tibanna/vars.py
@@ -1,8 +1,7 @@
 import os
 import boto3
 import sys
-from datetime import datetime
-from dateutil.tz import tzutc
+from datetime import datetime, timezone
 from ._version import __version__
 from . import create_logger
 
@@ -137,8 +136,7 @@
 
 
 def PARSE_AWSEM_TIME(t_str):
-  t = datetime.strptime(t_str, AWSEM_TIME_STAMP_FORMAT)
-  return t.replace(tzinfo=tzutc())
+  return datetime.strptime(t_str, AWSEM_TIME_STAMP_FORMAT).replace(tzinfo=timezone.utc)
 
 
 # EBS mount path for cloudwatch metric collection