Skip to content

Commit

Permalink
FileStorageObserver: additional flags for saving sources and copying …
Browse files Browse the repository at this point in the history
…resources (#806)

* added no_duplicate and should_save_sources options

* added tests; fixed no_duplicate not working

* added documentation

* bugfix for not handling relative vs absolute paths correctly

* Renamed no_duplicate->copy_artifacts

* should_save_sources->copy_sources

Co-authored-by: Patrick Kidger <[email protected]>
  • Loading branch information
patrick-kidger and Patrick Kidger authored Feb 25, 2021
1 parent 4e299ed commit f6191d1
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 17 deletions.
7 changes: 7 additions & 0 deletions docs/observers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,13 @@ The FileStorageObserver also stores a snapshot of the source-code in a separate
Their filenames are stored in the ``run.json`` file such that the corresponding
files can be easily linked to their respective run.

Storing source-code in this way can be disabled by passing
``copy_sources=False`` when creating the FileStorageObserver. Copying any
:ref:`resources` that are already present in `my_runs/`, but not present in
`my_runs/_resources/` (for example, a resource that is the output of another
run), can be disabled my passing ``copy_artifacts=False`` when creating the
FileStorageObserver.

Template Rendering
------------------
In addition to these basic files, the FileStorageObserver can also generate a
Expand Down
63 changes: 46 additions & 17 deletions sacred/observers/file_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from typing import Optional
import warnings

from shutil import copyfile
from shutil import copyfile, SameFileError

from sacred.commandline_options import cli_option
from sacred.dependencies import get_digest
Expand Down Expand Up @@ -40,6 +40,8 @@ def __init__(
source_dir: Optional[PathType] = None,
template: Optional[PathType] = None,
priority: int = DEFAULT_FILE_STORAGE_PRIORITY,
copy_artifacts: bool = True,
copy_sources: bool = True,
):
basedir = Path(basedir)
resource_dir = resource_dir or basedir / "_resources"
Expand All @@ -53,7 +55,15 @@ def __init__(
template = basedir / "template.html"
if not template.exists():
template = None
self.initialize(basedir, resource_dir, source_dir, template, priority)
self.initialize(
basedir,
resource_dir,
source_dir,
template,
priority,
copy_artifacts,
copy_sources,
)

def initialize(
self,
Expand All @@ -62,12 +72,16 @@ def initialize(
source_dir,
template,
priority=DEFAULT_FILE_STORAGE_PRIORITY,
copy_artifacts=True,
copy_sources=True,
):
self.basedir = str(basedir)
self.resource_dir = resource_dir
self.source_dir = source_dir
self.template = template
self.priority = priority
self.copy_artifacts = copy_artifacts
self.copy_sources = copy_sources
self.dir = None
self.run_entry = None
self.config = None
Expand Down Expand Up @@ -134,18 +148,21 @@ def queued_event(
self.save_json(self.run_entry, "run.json")
self.save_json(self.config, "config.json")

for s, m in ex_info["sources"]:
self.save_file(s)
if self.copy_sources:
for s, _ in ex_info["sources"]:
self.save_file(s)

return os.path.relpath(self.dir, self.basedir) if _id is None else _id

def save_sources(self, ex_info):
base_dir = ex_info["base_dir"]
source_info = []
for s, m in ex_info["sources"]:
for s, _ in ex_info["sources"]:
abspath = os.path.join(base_dir, s)
store_path, md5sum = self.find_or_save(abspath, self.source_dir)
# assert m == md5sum
if self.copy_sources:
store_path = self.find_or_save(abspath, self.source_dir)
else:
store_path = abspath
relative_source = os.path.relpath(str(store_path), self.basedir)
source_info.append([s, relative_source])
return source_info
Expand Down Expand Up @@ -180,14 +197,23 @@ def started_event(
return os.path.relpath(self.dir, self.basedir) if _id is None else _id

def find_or_save(self, filename, store_dir: Path):
os.makedirs(str(store_dir), exist_ok=True)
source_name, ext = os.path.splitext(os.path.basename(filename))
md5sum = get_digest(filename)
store_name = source_name + "_" + md5sum + ext
store_path = store_dir / store_name
if not store_path.exists():
copyfile(filename, str(store_path))
return store_path, md5sum
try:
Path(filename).resolve().relative_to(Path(self.basedir).resolve())
is_relative_to = True
except ValueError:
is_relative_to = False

if is_relative_to and not self.copy_artifacts:
return filename
else:
store_dir.mkdir(parents=True, exist_ok=True)
source_name, ext = os.path.splitext(os.path.basename(filename))
md5sum = get_digest(filename)
store_name = source_name + "_" + md5sum + ext
store_path = store_dir / store_name
if not store_path.exists():
copyfile(filename, str(store_path))
return store_path

def save_json(self, obj, filename):
with open(os.path.join(self.dir, filename), "w") as f:
Expand All @@ -205,7 +231,10 @@ def save_file(self, filename, target_name=None):
"FileStorageObserver. "
"The list of blacklisted files is: {}".format(blacklist)
)
copyfile(filename, dest_file)
try:
copyfile(filename, dest_file)
except SameFileError:
pass

def save_cout(self):
with open(os.path.join(self.dir, "cout.txt"), "ab") as f:
Expand Down Expand Up @@ -260,7 +289,7 @@ def failed_event(self, fail_time, fail_trace):
self.render_template()

def resource_event(self, filename):
store_path, md5sum = self.find_or_save(filename, self.resource_dir)
store_path = self.find_or_save(filename, self.resource_dir)
self.run_entry["resources"].append([filename, str(store_path)])
self.save_json(self.run_entry, "run.json")

Expand Down
32 changes: 32 additions & 0 deletions tests/test_observers/test_file_storage_observer.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,3 +413,35 @@ def test_blacklist_paths(tmpdir, dir_obs, sample_run):
other_file.touch()
with pytest.raises(FileExistsError):
obs.save_file(str(other_file), "cout.txt")


def test_no_duplicate(tmpdir, sample_run):
obs = FileStorageObserver(tmpdir, copy_artifacts=False)
file = Path(str(tmpdir / "koko.txt"))
file.touch()
obs.started_event(**sample_run)
obs.resource_event(str(file))
assert not os.path.exists(tmpdir / "_resources")

# Test the test: that the resource would otherwise have been created.
obs = FileStorageObserver(tmpdir, copy_artifacts=True)
sample_run["_id"] = sample_run["_id"] + "_2"
obs.started_event(**sample_run)
obs.resource_event(str(file))
assert os.path.exists(tmpdir / "_resources")
assert any(x.startswith("koko") for x in os.listdir(tmpdir / "_resources"))


def test_no_sources(tmpdir, tmpfile, sample_run):
obs = FileStorageObserver(tmpdir, copy_sources=False)
sample_run["ex_info"]["sources"] = [[tmpfile.name, tmpfile.md5sum]]
obs.started_event(**sample_run)
assert not os.path.exists(tmpdir / "_sources")

# Test the test: that the source would otherwise have been created.
obs = FileStorageObserver(tmpdir, copy_sources=True)
sample_run["_id"] = sample_run["_id"] + "_2"
obs.started_event(**sample_run)
name, _ = os.path.splitext(os.path.basename(tmpfile.name))
assert os.path.exists(tmpdir / "_sources")
assert any(x.startswith(name) for x in os.listdir(tmpdir / "_sources"))

0 comments on commit f6191d1

Please sign in to comment.