Skip to content

Commit

Permalink
Implement auto ingesting from NetCDF format
Browse files Browse the repository at this point in the history
  • Loading branch information
erick-otenyo committed Nov 14, 2023
1 parent 1eaa2e0 commit 25f134f
Show file tree
Hide file tree
Showing 12 changed files with 175 additions and 101 deletions.
4 changes: 4 additions & 0 deletions geomanager/admin/raster_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@


class RasterFileLayerCreateView(CreateView):
form_view_extra_js = ["geomanager/js/raster-file-conditional.js"]

def get_form(self):
form = super().get_form()
form.fields["dataset"].queryset = Dataset.objects.filter(layer_type="raster_file")
Expand Down Expand Up @@ -118,6 +120,8 @@ class RasterFileLayerModelAdmin(BaseModelAdmin, ModelAdminCanHide):
create_view_class = RasterFileLayerCreateView
edit_view_class = RasterFileLayerEditView

form_view_extra_js = ["geomanager/js/raster-file-conditional.js"]

def __init__(self, parent=None):
super().__init__(parent)
self.list_display = ["thumbnail_url"] + (list(self.list_display) or []) + ["dataset_link", "upload_files",
Expand Down
1 change: 0 additions & 1 deletion geomanager/admin/raster_style.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from django import forms
from django.core.exceptions import ObjectDoesNotExist
from django.utils.safestring import mark_safe
from django.utils.translation import gettext_lazy as _
from wagtail_modeladmin.views import CreateView
Expand Down
8 changes: 5 additions & 3 deletions geomanager/management/commands/ingest_geomanager_raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,19 @@ def handle(self, *args, **options):
overwrite = options['overwrite']
clip = options['clip']

logger.debug('[GEOMANAGER_INGEST] Starting auto ingest execution...')
logger.info('[GEOMANAGER_AUTO_INGEST]: Starting auto ingest execution...')

logger.info(f'[GEOMANAGER_INGEST] Event Type: {event_type}')
logger.info(f'[GEOMANAGER_AUTO_INGEST]: Event Type: {event_type}')

# Check if event type is allowed
if event_type not in ALLOWED_FILE_EVENTS:
logger.warning(f'[GEOMANAGER_INGEST] Event Type: {event_type} not in allowed file events.')
logger.warning(f'[GEOMANAGER_AUTO_INGEST]: Event Type: {event_type} not in allowed file events.')
return

# If event type is moved, use destination path
if event_type == "moved" and dst_path is not None:
src_path = dst_path

ingest_raster_file(src_path, overwrite, clip)

logger.info(f'[GEOMANAGER_AUTO_INGEST]: {src_path} done...')
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

ALLOWED_FILE_EVENTS = ["created", "moved"]

RASTER_FILE_EXTENSIONS = ['.nc', '.tif']


class Command(BaseCommand):
help = 'Process a raster file layer directory'
Expand All @@ -28,27 +30,30 @@ def handle(self, *args, **options):
auto_ingest_raster_data_dir = geomanager_settings.get("auto_ingest_raster_data_dir", None)

if not auto_ingest_raster_data_dir:
logger.error("Auto ingest raster data directory not configured.")
logger.error("[GEOMANAGER_AUTO_INGEST]: Auto ingest raster data directory not configured.")
return

if not is_valid_uuid(layer_id):
logger.error(f"Layer ID: {layer_id} is not a valid UUID.")
logger.error(f"[GEOMANAGER_AUTO_INGEST]: Layer ID: {layer_id} is not a valid UUID.")
return

logger.debug('[GEOMANAGER_INGEST] Starting auto ingest execution...')
logger.debug('[GEOMANAGER_AUTO_INGEST]: Starting directory processing...')

directory = os.path.join(auto_ingest_raster_data_dir, layer_id)

if not os.path.isdir(directory):
logger.error(f"Directory: {directory} does not exist.")
logger.error(f"[GEOMANAGER_AUTO_INGEST]: Directory: {directory} does not exist.")
return

geotiff_files = glob.glob(directory + "/*.tif")
for ext in RASTER_FILE_EXTENSIONS:
files = glob.glob(directory + f"/*{ext}")

if not geotiff_files:
logger.error(f"No GeoTIFF files found in directory: {directory}")
return
if not files:
logger.error(
f"[GEOMANAGER_AUTO_INGEST]: No files found in directory: {directory} with extension: {ext}")
return

for file in geotiff_files:
logger.info(f'[GEOMANAGER_INGEST] Processing file: {file}')
ingest_raster_file(file, overwrite, clip)
for file in files:
logger.info(f'[GEOMANAGER_AUTO_INGEST]: Processing file: {file}')
ingest_raster_file(file, overwrite, clip)
logger.info(f'[GEOMANAGER_AUTO_INGEST]: {file} done...')
2 changes: 1 addition & 1 deletion geomanager/management/commands/watchmedo_sample.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
watchmedo shell-command --patterns="*.tif" --ignore-directories --recursive \
watchmedo shell-command --patterns="*.nc;*.tif" --ignore-directories --recursive \
--command='python manage.py ingest_geomanager_raster "${watch_event_type}" "${watch_src_path}" --dst "${watch_dest_path}" --overwrite' \
/path/to/direcory/to/watch
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 4.1.10 on 2023-11-14 10:38

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('geomanager', '0031_remove_geomanagersettings_cap_auto_refresh_interval_and_more'),
]

operations = [
migrations.AddField(
model_name='rasterfilelayer',
name='auto_ingest_nc_data_variable',
field=models.CharField(blank=True, max_length=100, null=True, verbose_name='Data variable for netCDF data auto ingest'),
),
]
5 changes: 5 additions & 0 deletions geomanager/models/raster_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ class RasterFileLayer(TimeStampedModel, BaseLayer):
style = models.ForeignKey("RasterStyle", null=True, blank=True, on_delete=models.SET_NULL, verbose_name=_("style"))

auto_ingest_from_directory = models.BooleanField(default=False, verbose_name=_("Auto ingest from directory"))
auto_ingest_nc_data_variable = models.CharField(max_length=100, blank=True, null=True,
verbose_name=_("Data variable for netCDF data auto ingest"),
help_text=_("The name of the data variable to use, "
"if ingesting from netCDF files"))

analysis = StreamField([
('point_analysis', FileLayerPointAnalysisBlock(label=_("Point Analysis")),),
Expand All @@ -57,6 +61,7 @@ class Meta:
FieldPanel("date_format"),
FieldPanel("style"),
FieldPanel("auto_ingest_from_directory"),
FieldPanel("auto_ingest_nc_data_variable"),
FieldPanel("analysis"),
]

Expand Down
20 changes: 20 additions & 0 deletions geomanager/static/geomanager/js/raster-file-conditional.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
$(document).ready(function () {
const $autoIngestCheck = $('#id_auto_ingest_from_directory')
const $panelDataVariable = $('#panel-auto_ingest_nc_data_variable-section')

if ($autoIngestCheck.is(':checked')) {
$panelDataVariable.show()
} else {
$panelDataVariable.hide()
}


$autoIngestCheck.change(function () {
if ($(this).is(':checked')) {
$panelDataVariable.show()
} else {
$panelDataVariable.hide()
}
});

});
136 changes: 95 additions & 41 deletions geomanager/utils/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
import re
import tempfile
import uuid
from datetime import datetime
from os.path import splitext, isfile

import pytz
from adminboundarymanager.models import AdminBoundarySettings, AdminBoundary
from dateutil.parser import isoparse
from django.core.files import File
Expand All @@ -13,12 +15,25 @@
from wagtail.models import Site

from geomanager.models import RasterUpload, LayerRasterFile, RasterFileLayer
from geomanager.utils.raster_utils import create_layer_raster_file, read_raster_info, bounds_to_polygon, \
check_raster_bounds_with_boundary, clip_netcdf, clip_geotiff
from geomanager.utils.raster_utils import (
create_layer_raster_file,
read_raster_info,
bounds_to_polygon,
check_raster_bounds_with_boundary,
clip_netcdf,
clip_geotiff
)

logger = logging.getLogger("geomanager.ingest")
logger.setLevel(logging.INFO)

ALLOWED_RASTER_FILE_EXTENSIONS = ['.tif', '.nc']


class IngestException(Exception):
def __init__(self, message):
self.message = message


def is_valid_uuid(val):
try:
Expand All @@ -28,13 +43,15 @@ def is_valid_uuid(val):
return False


def extract_iso_date(file_name):
def extract_iso_date_from_filename(file_name):
pattern = r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$" # Pattern 'YYYY-MM-DDTHH:MM:SS.sssZ'
match = re.search(pattern, file_name) # Search for the pattern at the end of the file name

if match:
iso_date = match.group() # Extract the matched ISO format string
return isoparse(iso_date) # Convert the string to datetime object
tz_unaware_date = isoparse(iso_date) # Convert the string to datetime object
tz_aware_date = tz_unaware_date.replace(tzinfo=pytz.UTC) # Make the datetime object timezone aware
return tz_aware_date
else:
return None # Return None if the file name doesn't end with the specified format

Expand Down Expand Up @@ -114,7 +131,28 @@ def clip_raster_upload_to_boundary(upload, request=None):
return upload


def raw_raster_file_to_layer_raster_file(layer_obj, time, file_path, overwrite=False, clip_to_boundary=False):
def create_raster(layer_obj, upload, time, overwrite=False, band_index=None, data_variable=None):
# check if raster file with this time already exists
exists = LayerRasterFile.objects.filter(layer=layer_obj, time=time).exists()

# return if raster file already exists and overwrite is False
if exists and not overwrite:
logger.warning(f'LayerRasterFile for layer: {layer_obj.pk} and time: {time} already exists.')
return

# delete raster file if exists and overwrite is True, and create new raster file
if exists and overwrite:
with transaction.atomic():
layer_raster_file = LayerRasterFile.objects.get(layer=layer_obj, time=time)
layer_raster_file.delete()

create_layer_raster_file(layer_obj, upload, time, band_index=band_index, data_variable=data_variable)
else:
# create new raster file
create_layer_raster_file(layer_obj, upload, time, band_index=band_index, data_variable=data_variable)


def raw_raster_file_to_layer_raster_file(layer_obj, file_path, time=None, overwrite=False, clip_to_boundary=False):
with open(file_path, "rb") as file:
file_name = os.path.basename(file.name)

Expand All @@ -126,27 +164,38 @@ def raw_raster_file_to_layer_raster_file(layer_obj, time, file_path, overwrite=F
upload.save()

try:
# check if raster file with this time already exists
exists = LayerRasterFile.objects.filter(layer=layer_obj, time=time).exists()

# return if raster file already exists and overwrite is False
if exists and not overwrite:
logger.warning(f'LayerRasterFile for layer: {layer_obj.pk} and time: {time} already exists.')
return

if clip_to_boundary:
# clip raster upload to boundary
upload = clip_raster_upload_to_boundary(upload)

# delete raster file if exists and overwrite is True, and create new raster file
if exists and overwrite:
with transaction.atomic():
layer_raster_file = LayerRasterFile.objects.get(layer=layer_obj, time=time)
layer_raster_file.delete()
create_layer_raster_file(layer_obj, upload, time)
else:
# create new raster file
create_layer_raster_file(layer_obj, upload, time)
raster_driver = raster_metadata.get("driver")

if raster_driver == "netCDF":
data_variable = layer_obj.auto_ingest_nc_data_variable

if not data_variable:
raise IngestException(f'No NetCDF Auto ingestion data variable set for layer: {layer_obj}')

if data_variable not in raster_metadata.get("data_variables", []):
raise IngestException(
f'NetCDF Auto ingestion data variable: {data_variable} not found in NetCDF: {file_name}')

timestamps = raster_metadata.get("timestamps", None)

if not timestamps:
raise IngestException(f'No timestamps found in NetCDF: {file_name}')

for i, time_str in enumerate(timestamps):
d_time_unaware = datetime.fromisoformat(time_str)
d_time_aware = d_time_unaware.replace(tzinfo=pytz.UTC)

create_raster(layer_obj, upload, d_time_aware, overwrite=overwrite, band_index=i,
data_variable=data_variable)

elif raster_driver == "GTiff":
if time:
create_raster(layer_obj, upload, time, overwrite=overwrite)

finally:
# delete raster upload
upload.delete()
Expand All @@ -155,36 +204,41 @@ def raw_raster_file_to_layer_raster_file(layer_obj, time, file_path, overwrite=F
def ingest_raster_file(src_path, overwrite=False, clip_to_boundary=False):
# Check if source path exists
if not isfile(src_path):
logger.warning(f'[GEOMANAGER_INGEST] File path: {src_path} does not exist.')
return
raise IngestException(f'File path: {src_path} does not exist.')

# check if file is a .tif file
if not splitext(src_path)[1].lower() == '.tif':
logger.warning(f'[GEOMANAGER_INGEST] File path: {src_path} is not a tiff file.')
return
file_extension = splitext(src_path)[1].lower()

if file_extension not in ALLOWED_RASTER_FILE_EXTENSIONS:
raise IngestException(f'File path: {src_path} is not a tiff or netcdf file.')

directory = os.path.dirname(src_path)
file_name = os.path.basename(src_path)
file_name_without_extension = os.path.splitext(file_name)[0]

# check if file name ends with iso format date, return the parsed date if it does
iso_date_time = extract_iso_date(file_name_without_extension)
if not iso_date_time:
logger.warning(f'[GEOMANAGER_INGEST] File name: {file_name} does not end with iso format date.')
return

# check if the directory name is an uuid
layer_uuid = os.path.basename(os.path.normpath(directory))
if not is_valid_uuid(layer_uuid):
logger.warning(f'[GEOMANAGER_INGEST] Directory name: {directory} is not a valid uuid.')
return
raise IngestException(f'Directory name: {directory} is not a valid uuid.')

# check if layer exists
raster_file_layer = RasterFileLayer.objects.filter(pk=layer_uuid).first()
if not raster_file_layer:
logger.warning(f'[GEOMANAGER_INGEST] RasterFileLayer with UUID: {layer_uuid} does not exist.')
return
raise IngestException(f'RasterFileLayer with UUID: {layer_uuid} does not exist.')

# create layer raster file from raw tiff file
raw_raster_file_to_layer_raster_file(raster_file_layer, iso_date_time, src_path, overwrite=overwrite,
clip_to_boundary=clip_to_boundary)
if file_extension == '.tif':
# check if file name ends with iso format date, return the parsed date if it does
iso_date_time = extract_iso_date_from_filename(file_name_without_extension)
if not iso_date_time:
raise IngestException(f'File name: {file_name} does not end with iso format date.')

# create layer raster file from raw tiff file
raw_raster_file_to_layer_raster_file(raster_file_layer, src_path, time=iso_date_time, overwrite=overwrite,
clip_to_boundary=clip_to_boundary)

elif file_extension == '.nc':
# process netcdf file
raw_raster_file_to_layer_raster_file(raster_file_layer, src_path, time=None, overwrite=overwrite,
clip_to_boundary=clip_to_boundary)

else:
raise IngestException(f'File extension: {file_extension} not supported.')
3 changes: 2 additions & 1 deletion geomanager/utils/raster_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pandas as pd
import rasterio as rio
import xarray as xr
import rioxarray as rxr
from django.core.files import File
from django.forms import FileField
from django_large_image import tilesource
Expand Down Expand Up @@ -204,7 +205,7 @@ def convert_upload_to_geotiff(upload, out_file_path, band_index=None, data_varia

# make sure no data value is not nan
nodata_value = rds.encoding.get('nodata', rds.encoding.get('_FillValue'))
if np.isnan(nodata_value):
if not nodata_value or np.isnan(nodata_value):
rds = rds.rio.write_nodata(-9999, encoded=True)

netcdf_attrs = []
Expand Down
Loading

0 comments on commit 25f134f

Please sign in to comment.