Skip to content

Commit

Permalink
Enable fmu_context="case_symlink_realization"
Browse files Browse the repository at this point in the history
This will make it possible to write files at case but symlink
the same files at realization level, which is used in many
FMU setups.
  • Loading branch information
jcrivenaes authored Sep 8, 2022
1 parent d969279 commit 3561860
Show file tree
Hide file tree
Showing 9 changed files with 169 additions and 33 deletions.
62 changes: 47 additions & 15 deletions src/fmu/dataio/_filedata_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"""

import logging
from copy import deepcopy
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Optional
Expand Down Expand Up @@ -35,7 +36,9 @@ class _FileDataProvider:

# storing results in these variables
relative_path: Optional[str] = field(default="", init=False)
relative_path_symlink: Optional[str] = field(default="", init=False)
absolute_path: Optional[str] = field(default="", init=False)
absolute_path_symlink: Optional[str] = field(default="", init=False)
checksum_md5: Optional[str] = field(default="", init=False)

def __post_init__(self):
Expand Down Expand Up @@ -63,10 +66,23 @@ def __post_init__(self):
logger.info("Initialize %s", __class__)

def derive_filedata(self):
relpath, symrelpath = self._get_path()
relative, absolute = self._derive_filedata_generic(relpath)
self.relative_path = relative
self.absolute_path = absolute

if symrelpath:
relative, absolute = self._derive_filedata_generic(symrelpath)
self.relative_path_symlink = relative
self.absolute_path_symlink = absolute

logger.info("Derived filedata")

def _derive_filedata_generic(self, inrelpath):
"""This works with both normal data and symlinks."""
stem = self._get_filestem()
relpath = self._get_path()

path = Path(relpath) / stem.lower()
path = Path(inrelpath) / stem.lower()
path = path.with_suffix(path.suffix + self.extension)

# resolve() will fix ".." e.g. change '/some/path/../other' to '/some/other'
Expand Down Expand Up @@ -95,10 +111,8 @@ def derive_filedata(self):
else:
relpath = path.relative_to(self.rootpath)

self.relative_path = str(relpath)
self.absolute_path = str(abspath)

logger.info("Derived filedata")
return str(relpath), str(abspath)

def _get_filestem(self):
"""Construct the file"""
Expand Down Expand Up @@ -139,26 +153,39 @@ def _get_filestem(self):
return stem

def _get_path(self):
"""Construct and get the folder path and verify."""
"""Construct and get the folder path(s)."""
dest = None
linkdest = None

dest = self._get_path_generic(mode=self.fmu_context, allow_forcefolder=True)

if self.fmu_context == "case_symlink_realization":
linkdest = self._get_path_generic(
mode="realization", allow_forcefolder=False, info=self.fmu_context
)

outroot = self.rootpath
return dest, linkdest

logger.info("FMU context is %s", self.fmu_context)
if self.fmu_context == "realization":
def _get_path_generic(self, mode="realization", allow_forcefolder=True, info=""):
"""Generically construct and get the folder path and verify."""
dest = None

outroot = deepcopy(self.rootpath)

logger.info("FMU context is %s", mode)
if mode == "realization":
if self.realname:
outroot = outroot / self.realname
outroot = outroot / self.realname # TODO: if missing self.realname?

if self.itername:
outroot = outroot / self.itername

if self.fmu_context == "case_symlink_realization":
raise NotImplementedError("Symlinking not there yet...")

outroot = outroot / "share"

if self.fmu_context == "preprocessed":
if mode == "preprocessed":
outroot = outroot / "preprocessed"
else:

if mode != "preprocessed":
if self.dataio.is_observation:
outroot = outroot / "observations"
else:
Expand All @@ -181,6 +208,11 @@ def _get_path(self):
dest = dest.absolute()
self.forcefolder_is_absolute = True

if not allow_forcefolder:
raise RuntimeError(
f"You cannot use forcefolder in combination with fmucontext={info}"
)

if self.dataio.subfolder:
dest = dest / self.dataio.subfolder

Expand Down
9 changes: 8 additions & 1 deletion src/fmu/dataio/_fmu_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def __post_init__(self):
logger.info("Initialize %s", __class__)

def detect_provider(self):
"""First order method to detect provider."""
"""First order method to detect provider, ans also check fmu_context."""
if self._detect_ert2provider():
logger.info("Detecting FMU provider as ERT2")
self.provider = "ERT2"
Expand All @@ -84,6 +84,13 @@ def detect_provider(self):
self.dataio._usecontext = None # e.g. an interactive RMS run
if self.dataio.fmu_context == "preprocessed":
self.dataio._usecontext = self.dataio.fmu_context
if self.dataio.fmu_context != self.dataio._usecontext:
warn(
f"Requested fmu_context is <{self.dataio.fmu_context}> but since "
"this is detected as a non FMU run, the actual context "
f"is set to <{self.dataio._usecontext}>",
UserWarning,
)

def _detect_ert2provider(self) -> bool:
"""Detect if ERT2 is provider and set itername, casename, etc."""
Expand Down
7 changes: 7 additions & 0 deletions src/fmu/dataio/_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,10 @@ def _populate_meta_file(self):
- relative_path, seen from rootpath
- absolute_path, as above but full path
- checksum_md5, if required (a bit special treatment of this)
In additional _optional_ symlink adresses
- relative_path_symlink, seen from rootpath
- absolute_path_symlink, as above but full path
"""

fdata = _FileDataProvider(
Expand All @@ -225,6 +229,9 @@ def _populate_meta_file(self):

self.meta_file["relative_path"] = fdata.relative_path
self.meta_file["absolute_path"] = fdata.absolute_path
if fdata.absolute_path_symlink:
self.meta_file["relative_path_symlink"] = fdata.relative_path_symlink
self.meta_file["absolute_path_symlink"] = fdata.absolute_path_symlink

if self.compute_md5:
logger.info("Compute MD5 sum for tmp file...")
Expand Down
18 changes: 18 additions & 0 deletions src/fmu/dataio/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,24 @@ def export_file_compute_checksum_md5(obj, filename, extension, flag=None, tmp=Fa
return usefile, checksum


def create_symlink(source, target):
"""Create a symlinked file with some checks."""

thesource = Path(source)
if not thesource.exists():
raise IOError(f"Cannot symlink: Source file {thesource} does not exist.")

thetarget = Path(target)

if thetarget.exists() and not thetarget.is_symlink():
raise IOError(f"Target file {thetarget} exists already as a normal file.")

os.symlink(source, target)

if not (thetarget.exists() and thetarget.is_symlink()):
raise IOError(f"Target file {thesource} does not exist or is not a symlink.")


def size(fname):
return Path(fname).stat().st_size

Expand Down
25 changes: 22 additions & 3 deletions src/fmu/dataio/dataio.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from . import _metadata
from ._definitions import ALLOWED_CONTENTS, ALLOWED_FMU_CONTEXTS, CONTENTS_REQUIRED
from ._utils import (
create_symlink,
detect_inside_rms,
drop_nones,
export_file_compute_checksum_md5,
Expand Down Expand Up @@ -504,12 +505,14 @@ def __post_init__(self):
self.config = some_config_from_env(GLOBAL_ENVNAME)

self._validate_content_key()
logger.info("Validate FMU context which is %s", self.fmu_context)
self._validate_fmucontext_key()
self._update_globalconfig_from_settings()
_check_global_config(self.config, strict=True)
self._establish_pwd_rootpath()

self._show_deprecations_or_notimplemented()
logger.info("FMU context is %s", self.fmu_context)
logger.info("Ran __post_init__")

def _show_deprecations_or_notimplemented(self):
Expand Down Expand Up @@ -571,6 +574,7 @@ def _update_check_settings(self, newsettings: dict) -> None:
self._show_deprecations_or_notimplemented()
self._validate_content_key()
self._validate_fmucontext_key()
logger.info("Validate FMU context which is now %s", self.fmu_context)

def _update_globalconfig_from_settings(self):
"""A few user settings may update/append the global config directly."""
Expand Down Expand Up @@ -672,6 +676,7 @@ def generate_metadata(self, obj: Any, compute_md5: bool = True, **kwargs) -> dic
a temporary file, which may be time-consuming if the file is large.
"""
logger.info("Generate metadata...")
logger.info("KW args %s", kwargs)

self._update_check_settings(kwargs)
self._update_globalconfig_from_settings()
Expand All @@ -692,7 +697,7 @@ def generate_metadata(self, obj: Any, compute_md5: bool = True, **kwargs) -> dic

return deepcopy(self._metadata)

def export(self, obj, **kwargs) -> str:
def export(self, obj, return_symlink=False, **kwargs) -> str:
"""Export data objects of 'known' type to FMU storage solution with metadata.
This function will also collect the data spesific class metadata. For "classic"
Expand All @@ -704,6 +709,9 @@ def export(self, obj, **kwargs) -> str:
Args:
obj: XTGeo instance, a Pandas Dataframe instance or other supported object.
return_symlink: If fmu_context is 'case_symlink_realization' then the link
adress will be returned if this is True; otherwise the physical file
path will be returned.
**kwargs: For other arguments, see ExportData() input keys. If they
exist both places, this function will override!
Expand All @@ -727,17 +735,28 @@ def export(self, obj, **kwargs) -> str:
outfile, md5 = export_file_compute_checksum_md5(
obj, outfile, outfile.suffix, flag=useflag
)

# inject md5 checksum in metadata
metadata["file"]["checksum_md5"] = md5

export_metadata_file(metafile, metadata, savefmt=self.meta_format)
logger.info("Actual file is: %s", outfile)
logger.info("Metadata file is: %s", metafile)

# generate symlink if requested
outfile_target = None
if metadata["file"].get("absolute_path_symlink"):
outfile_target = Path(metadata["file"]["absolute_path_symlink"])
outfile_source = Path(metadata["file"]["absolute_path"])
create_symlink(outfile_source, outfile_target)
metafile_target = outfile_target.parent / ("." + str(outfile.name) + ".yml")
create_symlink(metafile, metafile_target)

self._metadata = metadata

return str(outfile)
if return_symlink and outfile_target:
return str(outfile_target)
else:
return str(outfile)


# ######################################################################################
Expand Down
2 changes: 2 additions & 0 deletions tests/test_units/test_dataio.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,8 @@ def test_forcefolder(tmp_path, globalconfig2, regsurf):
ExportData._inside_rms = True
edata = ExportData(config=globalconfig2, forcefolder="whatever")
meta = edata.generate_metadata(regsurf)
logger.info("RMS PATH %s", rmspath)
logger.info("\n %s", prettyprint_dict(meta))
assert meta["file"]["relative_path"].startswith("share/results/whatever/")
ExportData._inside_rms = False # reset

Expand Down
5 changes: 3 additions & 2 deletions tests/test_units/test_filedataprovider_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,9 @@ def test_get_paths_path_exists_already(regsurf, edataobj1, tmp_path):

fdata = _FileDataProvider(edataobj1, objdata)

path = fdata._get_path()
path, linkpath = fdata._get_path()
assert str(path) == "share/results/efolder"
assert linkpath is None


def test_get_paths_not_exists_so_create(regsurf, edataobj1, tmp_path):
Expand All @@ -165,7 +166,7 @@ def test_get_paths_not_exists_so_create(regsurf, edataobj1, tmp_path):

fdata = _FileDataProvider(cfg, objdata)

path = fdata._get_path()
path, _ = fdata._get_path()
assert str(path) == "share/results/efolder"


Expand Down
23 changes: 11 additions & 12 deletions tests/test_units/test_prerealization_surfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
"""
import logging
import os
from pathlib import Path

import pytest
from conftest import inside_rms

import fmu.dataio.dataio as dataio
Expand Down Expand Up @@ -50,7 +50,7 @@ def test_regsurf_case_observation(fmurun_w_casemetadata, rmsglobalconfig, regsur
def test_regsurf_case_observation_w_symlinks(
fmurun_w_casemetadata, rmsglobalconfig, regsurf
):
"""Generating case level surface, with symlinks on realization folders."""
"""Generating case level surface, with symlinks in realization folders."""
logger.info("Active folder is %s", fmurun_w_casemetadata)

os.chdir(fmurun_w_casemetadata)
Expand All @@ -61,17 +61,16 @@ def test_regsurf_case_observation_w_symlinks(
name="mymap",
is_observation=True,
)
metadata = edata.generate_metadata(regsurf)
logger.info("\n%s", utils.prettyprint_dict(metadata))
assert (
"realization-0/iter-0/share/observations/maps/mymap.gri"
in metadata["file"]["relative_path_symlink"]
)

with pytest.raises(NotImplementedError):
metadata = edata.generate_metadata(regsurf)
logger.debug("\n%s", utils.prettyprint_dict(metadata))
assert (
"ertrun1/share/observations/maps/mymap.gri"
in metadata["file"]["absolute_path"]
)

exp = edata.export(regsurf)
assert "ertrun1/share/observations/maps/mymap.gri" in exp
exp = edata.export(regsurf, return_symlink=True)
myfile = Path(exp)
assert myfile.is_symlink() is True


def test_regsurf_preprocessed_observation(
Expand Down
Loading

0 comments on commit 3561860

Please sign in to comment.