forked from DIRACGrid/DIRAC
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
sweep: DIRACGrid#7289 fix: getting batch system info from local cfg
- Loading branch information
Showing
15 changed files
with
368 additions
and
286 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
106 changes: 106 additions & 0 deletions
106
src/DIRAC/Resources/Computing/BatchSystems/TimeLeft/MJFResourceUsage.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
""" The Machine/Job Features TimeLeft utility interrogates the MJF values | ||
for the current CPU and Wallclock consumed, as well as their limits. | ||
""" | ||
import os | ||
import time | ||
from urllib.request import urlopen | ||
|
||
from DIRAC import S_OK, S_ERROR | ||
from DIRAC.Resources.Computing.BatchSystems.TimeLeft.ResourceUsage import ResourceUsage | ||
|
||
|
||
class MJFResourceUsage(ResourceUsage): | ||
""" | ||
This is the MJF plugin of the TimeLeft Utility | ||
""" | ||
|
||
############################################################################# | ||
def __init__(self, jobID, parameters): | ||
"""Standard constructor""" | ||
super().__init__("MJF", jobID, parameters) | ||
|
||
self.log.verbose(f"jobID={self.jobID}, queue={self.queue}") | ||
self.startTime = time.time() | ||
|
||
############################################################################# | ||
def getResourceUsage(self): | ||
"""Returns S_OK with a dictionary containing the entries CPU, CPULimit, | ||
WallClock, WallClockLimit, and Unit for current slot. | ||
""" | ||
|
||
cpuLimit = None | ||
wallClockLimit = None | ||
wallClock = None | ||
jobStartSecs = None | ||
|
||
jobFeaturesPath = None | ||
machineFeaturesPath = None | ||
|
||
# Getting info from JOBFEATURES | ||
try: | ||
# We are not called from TimeLeft.py if these are not set | ||
jobFeaturesPath = os.environ["JOBFEATURES"] | ||
except KeyError: | ||
self.log.warn("$JOBFEATURES is not set") | ||
|
||
if jobFeaturesPath: | ||
try: | ||
wallClockLimit = int(urlopen(jobFeaturesPath + "/wall_limit_secs").read()) | ||
self.log.verbose("wallClockLimit from JF = %d" % wallClockLimit) | ||
except ValueError: | ||
self.log.warn("/wall_limit_secs is unreadable") | ||
except OSError as e: | ||
self.log.exception("Issue with $JOBFEATURES/wall_limit_secs", lException=e) | ||
self.log.warn("Could not determine cpu limit from $JOBFEATURES/wall_limit_secs") | ||
|
||
try: | ||
jobStartSecs = int(urlopen(jobFeaturesPath + "/jobstart_secs").read()) | ||
self.log.verbose("jobStartSecs from JF = %d" % jobStartSecs) | ||
except ValueError: | ||
self.log.warn("/jobstart_secs is unreadable, setting a default") | ||
jobStartSecs = self.startTime | ||
except OSError as e: | ||
self.log.exception("Issue with $JOBFEATURES/jobstart_secs", lException=e) | ||
self.log.warn("Can't open jobstart_secs, setting a default") | ||
jobStartSecs = self.startTime | ||
|
||
try: | ||
cpuLimit = int(urlopen(jobFeaturesPath + "/cpu_limit_secs").read()) | ||
self.log.verbose("cpuLimit from JF = %d" % cpuLimit) | ||
except ValueError: | ||
self.log.warn("/cpu_limit_secs is unreadable") | ||
except OSError as e: | ||
self.log.exception("Issue with $JOBFEATURES/cpu_limit_secs", lException=e) | ||
self.log.warn("Could not determine cpu limit from $JOBFEATURES/cpu_limit_secs") | ||
|
||
wallClock = int(time.time()) - jobStartSecs | ||
|
||
# Getting info from MACHINEFEATURES | ||
try: | ||
# We are not called from TimeLeft.py if these are not set | ||
machineFeaturesPath = os.environ["MACHINEFEATURES"] | ||
except KeyError: | ||
self.log.warn("$MACHINEFEATURES is not set") | ||
|
||
if machineFeaturesPath and jobStartSecs: | ||
try: | ||
shutdownTime = int(urlopen(machineFeaturesPath + "/shutdowntime").read()) | ||
self.log.verbose("shutdownTime from MF = %d" % shutdownTime) | ||
if int(time.time()) + wallClockLimit > shutdownTime: | ||
# reduce wallClockLimit if would overrun shutdownTime | ||
wallClockLimit = shutdownTime - jobStartSecs | ||
except ValueError: | ||
self.log.warn("/shutdowntime is unreadable") | ||
except OSError as e: | ||
self.log.warn("Issue with $MACHINEFEATURES/shutdowntime", repr(e)) | ||
self.log.warn("Could not determine a shutdowntime value from $MACHINEFEATURES/shutdowntime") | ||
|
||
# Reporting | ||
consumed = {"CPU": None, "CPULimit": cpuLimit, "WallClock": wallClock, "WallClockLimit": wallClockLimit} | ||
if cpuLimit and wallClock and wallClockLimit: | ||
self.log.verbose(f"MJF consumed: {str(consumed)}") | ||
return S_OK(consumed) | ||
self.log.info("Could not determine some parameters") | ||
retVal = S_ERROR("Could not determine some parameters") | ||
retVal["Value"] = consumed | ||
return retVal |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
38 changes: 38 additions & 0 deletions
38
src/DIRAC/Resources/Computing/BatchSystems/TimeLeft/test/Test_HTCondorResourceUsage.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
""" Test class for SGEResourceUsage utility | ||
""" | ||
|
||
import pytest | ||
|
||
from DIRAC import S_OK | ||
from DIRAC.Resources.Computing.BatchSystems.TimeLeft.HTCondorResourceUsage import HTCondorResourceUsage | ||
|
||
|
||
HTCONDOR_OUT_0 = "86400 3600" | ||
HTCONDOR_OUT_1 = "undefined 3600" | ||
HTCONDOR_OUT_2 = "" | ||
|
||
|
||
def test_getResourceUsage(mocker): | ||
mocker.patch( | ||
"DIRAC.Resources.Computing.BatchSystems.TimeLeft.HTCondorResourceUsage.runCommand", | ||
side_effect=[S_OK(HTCONDOR_OUT_0), S_OK(HTCONDOR_OUT_1), S_OK(HTCONDOR_OUT_2)], | ||
) | ||
|
||
# First test: everything is fine | ||
htcondorResourceUsage = HTCondorResourceUsage("1234", {"Queue": "Test", "InfoPath": "/path/to/condor_ad"}) | ||
res = htcondorResourceUsage.getResourceUsage() | ||
assert res["OK"], res["Message"] | ||
assert res["Value"]["WallClock"] == 3600 | ||
assert res["Value"]["WallClockLimit"] == 86400 | ||
|
||
# Second test: MaxRuntime is undefined | ||
htcondorResourceUsage = HTCondorResourceUsage("1234", {"Queue": "Test", "InfoPath": "/path/to/condor_ad"}) | ||
res = htcondorResourceUsage.getResourceUsage() | ||
assert not res["OK"] | ||
assert res["Message"] == "Current batch system is not supported" | ||
|
||
# Third test: empty output | ||
htcondorResourceUsage = HTCondorResourceUsage("1234", {"Queue": "Test", "InfoPath": "/path/to/condor_ad"}) | ||
res = htcondorResourceUsage.getResourceUsage() | ||
assert not res["OK"] | ||
assert res["Message"] == "Current batch system is not supported" |
Oops, something went wrong.