diff --git a/dwd_radar/radklim_yw/radklim_yw.ipynb b/dwd_radar/radklim_yw/radklim_yw.ipynb new file mode 100644 index 0000000..b87f45b --- /dev/null +++ b/dwd_radar/radklim_yw/radklim_yw.ipynb @@ -0,0 +1,2262 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "# RADKLIM YW download and upload to metacatalog, including creation of metadata\n", + "\n", + "This is the final solution, using `radolan_to_netcdf` for download and splitting the netCDF daily when uploading to metacatalog!\n", + "\n", + "All available RADKLIM data: **2001 - 2022**" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import tarfile\n", + "from glob import glob\n", + "import os\n", + "\n", + "import tqdm\n", + "import xarray as xr\n", + "import numpy as np\n", + "\n", + "import radolan_to_netcdf as rtn\n", + "#import cf\n", + "\n", + "from metacatalog import api, ext" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Download data from DWD CDC server\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "opendata.dwd.de/cli [ <=> ] 2.76K --.-KB/s in 0s \n", + "opendata.dwd.de/cli [ <=> ] 1.77K --.-KB/s in 0s \n", + "opendata.dwd.de/cli [ <=> ] 1.77K --.-KB/s in 0s \n", + "opendata.dwd.de/cli [ <=> ] 1.77K --.-KB/s in 0s \n", + "opendata.dwd.de/cli [ <=> ] 1.77K --.-KB/s in 0s \n", + "opendata.dwd.de/cli [ <=> ] 1.77K --.-KB/s in 0s \n", + "opendata.dwd.de/cli [ <=> ] 1.77K --.-KB/s in 0s \n", + "opendata.dwd.de/cli [ <=> ] 1.77K --.-KB/s in 0s \n", + "opendata.dwd.de/cli [ <=> ] 1.77K --.-KB/s in 0s \n", + "opendata.dwd.de/cli [ <=> ] 1.77K --.-KB/s in 0s \n", + "opendata.dwd.de/cli [ <=> ] 1.77K --.-KB/s in 0s \n", + "opendata.dwd.de/cli [ <=> ] 1.77K --.-KB/s in 0s \n", + "opendata.dwd.de/cli [ <=> ] 1.77K --.-KB/s in 0s \n", + "opendata.dwd.de/cli [ <=> ] 1.77K --.-KB/s in 0s \n", + "opendata.dwd.de/cli [ <=> ] 1.77K --.-KB/s in 0s \n", + "opendata.dwd.de/cli [ <=> ] 1.77K --.-KB/s in 0s \n", + "opendata.dwd.de/cli [ <=> ] 1.77K --.-KB/s in 0s \n", + "opendata.dwd.de/cli [ <=> ] 1.77K --.-KB/s in 0s \n", + "opendata.dwd.de/cli [ <=> ] 1.77K --.-KB/s in 0s \n", + "opendata.dwd.de/cli [ <=> ] 1.77K --.-KB/s in 0s \n", + "opendata.dwd.de/cli [ <=> ] 1.77K --.-KB/s in 0s \n", + "opendata.dwd.de/cli [ <=> ] 1.77K --.-KB/s in 0s \n", + "opendata.dwd.de/cli [ <=> ] 423 --.-KB/s in 0s \n", + "opendata.dwd.de/cli [ <=> ] 442 --.-KB/s in 0s \n", + "opendata.dwd.de/cli 100%[===================>] 393.68M 336MB/s in 1.2s \n", + "opendata.dwd.de/cli 100%[===================>] 355.75M 301MB/s in 1.2s \n", + "opendata.dwd.de/cli 100%[===================>] 551.21M 285MB/s in 1.9s \n", + "opendata.dwd.de/cli 100%[===================>] 451.46M 281MB/s in 1.6s \n", + "opendata.dwd.de/cli 100%[===================>] 245.99M 293MB/s in 0.8s \n", + "opendata.dwd.de/cli 100%[===================>] 415.62M 315MB/s in 1.3s \n", + "opendata.dwd.de/cli 100%[===================>] 328.12M 301MB/s in 1.1s \n", + "opendata.dwd.de/cli 100%[===================>] 350.26M 259MB/s in 1.4s \n", + "opendata.dwd.de/cli 100%[===================>] 634.59M 326MB/s in 1.9s \n", + "opendata.dwd.de/cli 100%[===================>] 316.86M 307MB/s in 1.0s \n", + "opendata.dwd.de/cli 100%[===================>] 421.65M 267MB/s in 1.6s \n", + "opendata.dwd.de/cli 100%[===================>] 446.13M 309MB/s in 1.4s \n", + "opendata.dwd.de/cli 100%[===================>] 299.34M 308MB/s in 1.0s \n", + "opendata.dwd.de/cli 100%[===================>] 539.71M 309MB/s in 1.7s \n", + "opendata.dwd.de/cli 100%[===================>] 347.77M 255MB/s in 1.4s \n", + "opendata.dwd.de/cli 100%[===================>] 344.94M 300MB/s in 1.2s \n", + "opendata.dwd.de/cli 100%[===================>] 409.81M 322MB/s in 1.3s \n", + "opendata.dwd.de/cli 100%[===================>] 331.46M 363MB/s in 0.9s \n", + "opendata.dwd.de/cli 100%[===================>] 447.18M 319MB/s in 1.4s \n", + "opendata.dwd.de/cli 100%[===================>] 420.88M 388MB/s in 1.1s \n", + "opendata.dwd.de/cli 100%[===================>] 305.54M 325MB/s in 0.9s \n", + "opendata.dwd.de/cli 100%[===================>] 510.96M 319MB/s in 1.6s \n", + "opendata.dwd.de/cli 100%[===================>] 535.52M 347MB/s in 1.5s \n", + "opendata.dwd.de/cli 100%[===================>] 383.49M 297MB/s in 1.3s \n", + "opendata.dwd.de/cli 100%[===================>] 425.22M 268MB/s in 1.6s \n", + "opendata.dwd.de/cli 100%[===================>] 196.44M 299MB/s in 0.7s \n", + "opendata.dwd.de/cli 100%[===================>] 238.97M 317MB/s in 0.8s \n", + "opendata.dwd.de/cli 100%[===================>] 298.14M 302MB/s in 1.0s \n", + "opendata.dwd.de/cli 100%[===================>] 381.35M 354MB/s in 1.1s \n", + "opendata.dwd.de/cli 100%[===================>] 236.30M 293MB/s in 0.8s \n", + "opendata.dwd.de/cli 100%[===================>] 336.19M 294MB/s in 1.1s \n", + "opendata.dwd.de/cli 100%[===================>] 216.14M 332MB/s in 0.7s \n", + "opendata.dwd.de/cli 100%[===================>] 279.72M 323MB/s in 0.9s \n", + "opendata.dwd.de/cli 100%[===================>] 398.14M 306MB/s in 1.3s \n", + "opendata.dwd.de/cli 100%[===================>] 279.81M 312MB/s in 0.9s \n", + "opendata.dwd.de/cli 100%[===================>] 331.43M 292MB/s in 1.1s \n", + "opendata.dwd.de/cli 100%[===================>] 536.58M 316MB/s in 1.7s \n", + "opendata.dwd.de/cli 100%[===================>] 327.69M 230MB/s in 1.4s \n", + "opendata.dwd.de/cli 100%[===================>] 300.93M 249MB/s in 1.2s \n", + "opendata.dwd.de/cli 100%[===================>] 298.11M 284MB/s in 1.0s \n", + "opendata.dwd.de/cli 100%[===================>] 372.63M 270MB/s in 1.4s \n", + "opendata.dwd.de/cli 100%[===================>] 394.20M 286MB/s in 1.4s \n", + "opendata.dwd.de/cli 100%[===================>] 443.21M 288MB/s in 1.5s \n", + "opendata.dwd.de/cli 100%[===================>] 418.08M 278MB/s in 1.5s \n", + "opendata.dwd.de/cli 100%[===================>] 335.40M 364MB/s in 0.9s \n", + "opendata.dwd.de/cli 100%[===================>] 352.57M 254MB/s in 1.4s \n", + "opendata.dwd.de/cli 100%[===================>] 387.84M 268MB/s in 1.4s \n", + "opendata.dwd.de/cli 100%[===================>] 298.12M 316MB/s in 0.9s \n", + "opendata.dwd.de/cli 100%[===================>] 404.01M 324MB/s in 1.2s \n", + "opendata.dwd.de/cli 100%[===================>] 394.84M 316MB/s in 1.2s \n", + "opendata.dwd.de/cli 100%[===================>] 332.31M 296MB/s in 1.1s \n", + "opendata.dwd.de/cli 100%[===================>] 378.50M 285MB/s in 1.3s \n", + "opendata.dwd.de/cli 100%[===================>] 460.46M 280MB/s in 1.6s \n", + "opendata.dwd.de/cli 100%[===================>] 283.95M 284MB/s in 1.0s \n", + "opendata.dwd.de/cli 100%[===================>] 461.02M 285MB/s in 1.6s \n", + "opendata.dwd.de/cli 100%[===================>] 394.03M 256MB/s in 1.5s \n", + "opendata.dwd.de/cli 100%[===================>] 290.69M 346MB/s in 0.8s \n", + "opendata.dwd.de/cli 100%[===================>] 265.06M 328MB/s in 0.8s \n", + "opendata.dwd.de/cli 100%[===================>] 354.90M 308MB/s in 1.2s \n", + "opendata.dwd.de/cli 100%[===================>] 458.51M 320MB/s in 1.4s \n", + "opendata.dwd.de/cli 100%[===================>] 230.22M 320MB/s in 0.7s \n", + "opendata.dwd.de/cli 100%[===================>] 363.35M 301MB/s in 1.2s \n", + "opendata.dwd.de/cli 100%[===================>] 494.15M 261MB/s in 1.9s \n", + "opendata.dwd.de/cli 100%[===================>] 470.73M 249MB/s in 1.9s \n", + "opendata.dwd.de/cli 100%[===================>] 486.98M 289MB/s in 1.7s \n", + "opendata.dwd.de/cli 100%[===================>] 258.38M 277MB/s in 0.9s \n", + "opendata.dwd.de/cli 100%[===================>] 256.06M 302MB/s in 0.8s \n", + "opendata.dwd.de/cli 100%[===================>] 636.38M 328MB/s in 1.9s \n", + "opendata.dwd.de/cli 100%[===================>] 226.51M 324MB/s in 0.7s \n", + "opendata.dwd.de/cli 100%[===================>] 375.71M 314MB/s in 1.2s \n", + "opendata.dwd.de/cli 100%[===================>] 421.66M 299MB/s in 1.4s \n", + "opendata.dwd.de/cli 100%[===================>] 339.68M 250MB/s in 1.4s \n", + "opendata.dwd.de/cli 100%[===================>] 549.51M 295MB/s in 1.9s \n", + "opendata.dwd.de/cli 100%[===================>] 441.82M 289MB/s in 1.5s \n", + "opendata.dwd.de/cli 100%[===================>] 424.29M 313MB/s in 1.4s \n", + "opendata.dwd.de/cli 100%[===================>] 129.90M 333MB/s in 0.4s \n", + "opendata.dwd.de/cli 100%[===================>] 541.81M 333MB/s in 1.6s \n", + "opendata.dwd.de/cli 100%[===================>] 458.26M 303MB/s in 1.5s \n", + "opendata.dwd.de/cli 100%[===================>] 532.69M 314MB/s in 1.7s \n", + "opendata.dwd.de/cli 100%[===================>] 402.67M 303MB/s in 1.3s \n", + "opendata.dwd.de/cli 100%[===================>] 416.20M 288MB/s in 1.4s \n", + "opendata.dwd.de/cli 100%[===================>] 229.25M 281MB/s in 0.8s \n", + "opendata.dwd.de/cli 100%[===================>] 461.42M 289MB/s in 1.6s \n", + "opendata.dwd.de/cli 100%[===================>] 366.69M 267MB/s in 1.4s \n", + "opendata.dwd.de/cli 100%[===================>] 415.83M 287MB/s in 1.4s \n", + "opendata.dwd.de/cli 100%[===================>] 267.98M 247MB/s in 1.1s \n", + "opendata.dwd.de/cli 100%[===================>] 538.57M 316MB/s in 1.7s \n", + "opendata.dwd.de/cli 100%[===================>] 467.71M 329MB/s in 1.4s \n", + "opendata.dwd.de/cli 100%[===================>] 239.73M 263MB/s in 0.9s \n", + "opendata.dwd.de/cli 100%[===================>] 320.24M 298MB/s in 1.1s \n", + "opendata.dwd.de/cli 100%[===================>] 433.82M 293MB/s in 1.5s \n", + "opendata.dwd.de/cli 100%[===================>] 421.44M 288MB/s in 1.5s \n", + "opendata.dwd.de/cli 100%[===================>] 344.02M 308MB/s in 1.1s \n", + "opendata.dwd.de/cli 100%[===================>] 463.81M 253MB/s in 1.8s \n", + "opendata.dwd.de/cli 100%[===================>] 339.57M 274MB/s in 1.2s \n", + "opendata.dwd.de/cli 100%[===================>] 322.43M 319MB/s in 1.0s \n", + "opendata.dwd.de/cli 100%[===================>] 271.30M 322MB/s in 0.8s \n", + "opendata.dwd.de/cli 100%[===================>] 416.64M 327MB/s in 1.3s \n", + "opendata.dwd.de/cli 100%[===================>] 484.79M 323MB/s in 1.5s \n", + "opendata.dwd.de/cli 100%[===================>] 243.86M 294MB/s in 0.8s \n", + "opendata.dwd.de/cli 100%[===================>] 407.20M 298MB/s in 1.4s \n", + "opendata.dwd.de/cli 100%[===================>] 400.94M 260MB/s in 1.5s \n", + "opendata.dwd.de/cli 100%[===================>] 461.08M 309MB/s in 1.5s \n", + "opendata.dwd.de/cli 100%[===================>] 275.76M 296MB/s in 0.9s \n", + "opendata.dwd.de/cli 100%[===================>] 258.99M 296MB/s in 0.9s \n", + "opendata.dwd.de/cli 100%[===================>] 426.79M 284MB/s in 1.5s \n", + "opendata.dwd.de/cli 100%[===================>] 530.27M 309MB/s in 1.7s \n", + "opendata.dwd.de/cli 100%[===================>] 528.83M 320MB/s in 1.7s \n", + "opendata.dwd.de/cli 100%[===================>] 376.65M 324MB/s in 1.2s \n", + "opendata.dwd.de/cli 100%[===================>] 394.39M 322MB/s in 1.2s \n", + "opendata.dwd.de/cli 100%[===================>] 360.74M 303MB/s in 1.2s \n", + "opendata.dwd.de/cli 100%[===================>] 247.35M 241MB/s in 1.0s \n", + "opendata.dwd.de/cli 100%[===================>] 497.96M 284MB/s in 1.8s \n", + "opendata.dwd.de/cli 100%[===================>] 293.08M 278MB/s in 1.1s \n", + "opendata.dwd.de/cli 100%[===================>] 352.81M 298MB/s in 1.2s \n", + "opendata.dwd.de/cli 100%[===================>] 628.52M 323MB/s in 1.9s \n", + "opendata.dwd.de/cli 100%[===================>] 441.02M 321MB/s in 1.4s \n", + "opendata.dwd.de/cli 100%[===================>] 315.75M 315MB/s in 1.0s \n", + "opendata.dwd.de/cli 100%[===================>] 513.57M 311MB/s in 1.7s \n", + "opendata.dwd.de/cli 100%[===================>] 512.77M 244MB/s in 2.1s \n", + "opendata.dwd.de/cli 100%[===================>] 389.31M 244MB/s in 1.6s \n", + "opendata.dwd.de/cli 100%[===================>] 260.95M 241MB/s in 1.1s \n", + "opendata.dwd.de/cli 100%[===================>] 218.38M 202MB/s in 1.1s \n", + "opendata.dwd.de/cli 100%[===================>] 226.02M 238MB/s in 0.9s \n", + "opendata.dwd.de/cli 100%[===================>] 296.87M 261MB/s in 1.1s \n", + "opendata.dwd.de/cli 100%[===================>] 417.13M 261MB/s in 1.6s \n", + "opendata.dwd.de/cli 100%[===================>] 518.74M 267MB/s in 1.9s \n", + "opendata.dwd.de/cli 100%[===================>] 426.05M 249MB/s in 1.7s \n", + "opendata.dwd.de/cli 100%[===================>] 310.06M 255MB/s in 1.2s \n", + "opendata.dwd.de/cli 100%[===================>] 314.10M 256MB/s in 1.2s \n", + "opendata.dwd.de/cli 100%[===================>] 119.45M 212MB/s in 0.6s \n", + "opendata.dwd.de/cli 100%[===================>] 611.44M 250MB/s in 2.4s \n", + "opendata.dwd.de/cli 100%[===================>] 502.71M 257MB/s in 2.0s \n", + "opendata.dwd.de/cli 100%[===================>] 235.27M 257MB/s in 0.9s \n", + "opendata.dwd.de/cli 100%[===================>] 185.51M 266MB/s in 0.7s \n", + "opendata.dwd.de/cli 100%[===================>] 405.85M 269MB/s in 1.5s \n", + "opendata.dwd.de/cli 100%[===================>] 343.46M 261MB/s in 1.3s \n", + "opendata.dwd.de/cli 100%[===================>] 451.88M 228MB/s in 2.0s \n", + "opendata.dwd.de/cli 100%[===================>] 465.06M 256MB/s in 1.8s \n", + "opendata.dwd.de/cli 100%[===================>] 342.08M 232MB/s in 1.5s \n", + "opendata.dwd.de/cli 100%[===================>] 317.13M 237MB/s in 1.3s \n", + "opendata.dwd.de/cli 100%[===================>] 401.70M 242MB/s in 1.7s \n", + "opendata.dwd.de/cli 100%[===================>] 356.46M 259MB/s in 1.4s \n", + "opendata.dwd.de/cli 100%[===================>] 607.10M 305MB/s in 2.0s \n", + "opendata.dwd.de/cli 100%[===================>] 447.45M 298MB/s in 1.5s \n", + "opendata.dwd.de/cli 100%[===================>] 399.28M 260MB/s in 1.5s \n", + "opendata.dwd.de/cli 100%[===================>] 372.43M 232MB/s in 1.6s \n", + "opendata.dwd.de/cli 100%[===================>] 329.50M 242MB/s in 1.4s \n", + "opendata.dwd.de/cli 100%[===================>] 614.69M 244MB/s in 2.5s \n", + "opendata.dwd.de/cli 100%[===================>] 421.00M 294MB/s in 1.4s \n", + "opendata.dwd.de/cli 100%[===================>] 256.93M 273MB/s in 0.9s \n", + "opendata.dwd.de/cli 100%[===================>] 312.74M 266MB/s in 1.2s \n", + "opendata.dwd.de/cli 100%[===================>] 415.60M 268MB/s in 1.5s \n", + "opendata.dwd.de/cli 100%[===================>] 403.37M 267MB/s in 1.5s \n", + "opendata.dwd.de/cli 100%[===================>] 409.83M 261MB/s in 1.6s \n", + "opendata.dwd.de/cli 100%[===================>] 333.12M 238MB/s in 1.4s \n", + "opendata.dwd.de/cli 100%[===================>] 382.84M 293MB/s in 1.3s \n", + "opendata.dwd.de/cli 100%[===================>] 334.14M 262MB/s in 1.3s \n", + "opendata.dwd.de/cli 100%[===================>] 242.11M 252MB/s in 1.0s \n", + "opendata.dwd.de/cli 100%[===================>] 369.34M 249MB/s in 1.5s \n", + "opendata.dwd.de/cli 100%[===================>] 514.29M 263MB/s in 2.0s \n", + "opendata.dwd.de/cli 100%[===================>] 322.31M 240MB/s in 1.3s \n", + "opendata.dwd.de/cli 100%[===================>] 471.87M 260MB/s in 1.8s \n", + "opendata.dwd.de/cli 100%[===================>] 517.56M 240MB/s in 2.2s \n", + "opendata.dwd.de/cli 100%[===================>] 314.34M 273MB/s in 1.2s \n", + "opendata.dwd.de/cli 100%[===================>] 416.47M 305MB/s in 1.4s \n", + "opendata.dwd.de/cli 100%[===================>] 294.86M 250MB/s in 1.2s \n", + "opendata.dwd.de/cli 100%[===================>] 495.83M 236MB/s in 2.1s \n", + "opendata.dwd.de/cli 100%[===================>] 541.37M 247MB/s in 2.2s \n", + "opendata.dwd.de/cli 100%[===================>] 239.70M 133MB/s in 1.8s \n", + "opendata.dwd.de/cli 100%[===================>] 395.18M 236MB/s in 1.7s \n", + "opendata.dwd.de/cli 100%[===================>] 344.53M 256MB/s in 1.3s \n", + "opendata.dwd.de/cli 100%[===================>] 423.16M 240MB/s in 1.8s \n", + "opendata.dwd.de/cli 100%[===================>] 379.25M 244MB/s in 1.6s \n", + "opendata.dwd.de/cli 100%[===================>] 384.35M 241MB/s in 1.6s \n", + "opendata.dwd.de/cli 100%[===================>] 393.30M 266MB/s in 1.5s \n", + "opendata.dwd.de/cli 100%[===================>] 402.42M 263MB/s in 1.5s \n", + "opendata.dwd.de/cli 100%[===================>] 336.15M 305MB/s in 1.1s \n", + "opendata.dwd.de/cli 100%[===================>] 524.42M 305MB/s in 1.7s \n", + "opendata.dwd.de/cli 100%[===================>] 313.40M 275MB/s in 1.1s \n", + "opendata.dwd.de/cli 100%[===================>] 529.16M 294MB/s in 1.8s \n", + "opendata.dwd.de/cli 100%[===================>] 481.39M 246MB/s in 2.0s \n", + "opendata.dwd.de/cli 100%[===================>] 356.71M 276MB/s in 1.3s \n", + "opendata.dwd.de/cli 100%[===================>] 456.65M 297MB/s in 1.5s \n", + "opendata.dwd.de/cli 100%[===================>] 393.11M 265MB/s in 1.5s \n", + "opendata.dwd.de/cli 100%[===================>] 542.79M 289MB/s in 1.9s \n", + "opendata.dwd.de/cli 100%[===================>] 373.15M 282MB/s in 1.3s \n", + "opendata.dwd.de/cli 100%[===================>] 326.16M 288MB/s in 1.1s \n", + "opendata.dwd.de/cli 100%[===================>] 247.21M 257MB/s in 1.0s \n", + "opendata.dwd.de/cli 100%[===================>] 415.30M 246MB/s in 1.7s \n", + "opendata.dwd.de/cli 100%[===================>] 406.53M 280MB/s in 1.5s \n", + "opendata.dwd.de/cli 100%[===================>] 228.95M 268MB/s in 0.9s \n", + "opendata.dwd.de/cli 100%[===================>] 354.13M 273MB/s in 1.3s \n", + "opendata.dwd.de/cli 100%[===================>] 343.36M 265MB/s in 1.3s \n", + "opendata.dwd.de/cli 100%[===================>] 381.48M 260MB/s in 1.5s \n", + "opendata.dwd.de/cli 100%[===================>] 386.25M 269MB/s in 1.4s \n", + "opendata.dwd.de/cli 100%[===================>] 387.11M 270MB/s in 1.4s \n", + "opendata.dwd.de/cli 100%[===================>] 414.99M 224MB/s in 1.9s \n", + "opendata.dwd.de/cli 100%[===================>] 538.25M 256MB/s in 2.1s \n", + "opendata.dwd.de/cli 100%[===================>] 418.83M 248MB/s in 1.7s \n", + "opendata.dwd.de/cli 100%[===================>] 457.87M 248MB/s in 1.8s \n", + "opendata.dwd.de/cli 100%[===================>] 461.96M 297MB/s in 1.6s \n", + "opendata.dwd.de/cli 100%[===================>] 523.04M 279MB/s in 1.9s \n", + "opendata.dwd.de/cli 100%[===================>] 503.95M 274MB/s in 1.8s \n", + "opendata.dwd.de/cli 100%[===================>] 594.03M 263MB/s in 2.3s \n", + "opendata.dwd.de/cli 100%[===================>] 248.46M 260MB/s in 1.0s \n", + "opendata.dwd.de/cli 100%[===================>] 453.16M 254MB/s in 1.8s \n", + "opendata.dwd.de/cli 100%[===================>] 312.54M 253MB/s in 1.2s \n", + "opendata.dwd.de/cli 100%[===================>] 315.64M 264MB/s in 1.2s \n", + "opendata.dwd.de/cli 100%[===================>] 288.51M 243MB/s in 1.2s \n", + "opendata.dwd.de/cli 100%[===================>] 275.42M 271MB/s in 1.0s \n", + "opendata.dwd.de/cli 100%[===================>] 308.28M 290MB/s in 1.1s \n", + "opendata.dwd.de/cli 100%[===================>] 275.55M 311MB/s in 0.9s \n", + "opendata.dwd.de/cli 100%[===================>] 254.95M 280MB/s in 0.9s \n", + "opendata.dwd.de/cli 100%[===================>] 274.32M 276MB/s in 1.0s \n", + "opendata.dwd.de/cli 100%[===================>] 552.14M 256MB/s in 2.2s \n", + "opendata.dwd.de/cli 100%[===================>] 504.25M 241MB/s in 2.1s \n", + "opendata.dwd.de/cli 100%[===================>] 268.21M 277MB/s in 1.0s \n", + "opendata.dwd.de/cli 100%[===================>] 486.81M 302MB/s in 1.6s \n", + "opendata.dwd.de/cli 100%[===================>] 312.85M 294MB/s in 1.1s \n", + "opendata.dwd.de/cli 100%[===================>] 496.01M 301MB/s in 1.6s \n", + "opendata.dwd.de/cli 100%[===================>] 300.96M 287MB/s in 1.0s \n", + "opendata.dwd.de/cli 100%[===================>] 333.53M 281MB/s in 1.2s \n", + "opendata.dwd.de/cli 100%[===================>] 379.34M 273MB/s in 1.4s \n", + "opendata.dwd.de/cli 100%[===================>] 418.96M 236MB/s in 1.8s \n", + "opendata.dwd.de/cli 100%[===================>] 512.94M 277MB/s in 1.8s \n", + "opendata.dwd.de/cli 100%[===================>] 458.38M 305MB/s in 1.5s \n", + "opendata.dwd.de/cli 100%[===================>] 449.04M 306MB/s in 1.5s \n", + "opendata.dwd.de/cli 100%[===================>] 331.46M 310MB/s in 1.1s \n", + "opendata.dwd.de/cli 100%[===================>] 623.31M 280MB/s in 2.2s \n", + "opendata.dwd.de/cli 100%[===================>] 389.78M 268MB/s in 1.5s \n", + "opendata.dwd.de/cli 100%[===================>] 202.11M 216MB/s in 0.9s \n", + "opendata.dwd.de/cli 100%[===================>] 331.21M 262MB/s in 1.3s \n", + "opendata.dwd.de/cli 100%[===================>] 503.72M 306MB/s in 1.6s \n", + "opendata.dwd.de/cli 100%[===================>] 356.30M 300MB/s in 1.2s \n", + "opendata.dwd.de/cli 100%[===================>] 446.08M 317MB/s in 1.4s \n", + "opendata.dwd.de/cli 100%[===================>] 294.81M 299MB/s in 1.0s \n", + "opendata.dwd.de/cli 100%[===================>] 530.47M 272MB/s in 1.9s \n", + "opendata.dwd.de/cli 100%[===================>] 217.21M 258MB/s in 0.8s \n", + "opendata.dwd.de/cli 100%[===================>] 460.37M 339MB/s in 1.4s \n", + "opendata.dwd.de/cli 100%[===================>] 555.62M 256MB/s in 2.2s \n", + "opendata.dwd.de/cli 100%[===================>] 362.12M 305MB/s in 1.2s \n", + "opendata.dwd.de/cli 100%[===================>] 370.48M 284MB/s in 1.3s \n", + "opendata.dwd.de/cli 100%[===================>] 373.71M 308MB/s in 1.2s \n", + "opendata.dwd.de/cli 100%[===================>] 624.90M 275MB/s in 2.3s \n", + "opendata.dwd.de/cli 100%[===================>] 377.46M 272MB/s in 1.4s \n", + "opendata.dwd.de/cli 100%[===================>] 480.39M 239MB/s in 2.0s \n", + "opendata.dwd.de/cli 100%[===================>] 484.33M 283MB/s in 1.7s \n", + "opendata.dwd.de/cli 100%[===================>] 239.37M 297MB/s in 0.8s \n", + "opendata.dwd.de/cli 100%[===================>] 343.19M 272MB/s in 1.3s \n", + "opendata.dwd.de/cli 100%[===================>] 351.14M 290MB/s in 1.2s \n", + "opendata.dwd.de/cli 100%[===================>] 428.24M 302MB/s in 1.4s \n", + "opendata.dwd.de/cli 100%[===================>] 1.08M --.-KB/s in 0.02s \n", + "CPU times: user 11.5 s, sys: 6.35 s, total: 17.8 s\n", + "Wall time: 5min 55s\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "!wget -q -P /data/qt7760/ --show-progress -r -np -A .tar -R \"index.html*\" https://opendata.dwd.de/climate_environment/CDC/grids_germany/5_minutes/radolan/reproc/2017_002/bin/" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# delete (empty) folder /supplement\n", + "!rm -r /data/qt7760/opendata.dwd.de/climate_environment/CDC/grids_germany/5_minutes/radolan/reproc/2017_002/bin/supplement" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Read data from nested tar file\n", + "\n", + "Data is provided as monthly tar files, which contains daily tar.gz files, which contain the 5-minute binary files. To avoid extracting everything first we use the nested loop-construct below and extract only the data we want on the fly.\n", + "\n", + "**split daily**:\n", + "\n", + "folder structure:\n", + "- radklim_yw/\n", + " - 20100101_radklim_yw.nc\n", + " - 20100102_radklim_yw.nc\n", + " - ...\n", + " - 20211230_radklim_yw.nc\n", + " - 20211231_radklim_yw.nc\n", + "\n", + "-> naming pattern: `%Y%m%d_radklim_yw.nc`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Extracting data for the year 2001\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/12 [00:00, ?it/s]" + ] + } + ], + "source": [ + "%%time\n", + "#from time import time\n", + "\n", + "# absolute output_path to the folder radklim, where netCDF files are saved\n", + "output_path = \"/data/qt7760/radklim_yw/\"\n", + "\n", + "# create folder radklim_yw\n", + "os.makedirs(output_path, exist_ok=True)\n", + "\n", + "for year in sorted(glob('/data/qt7760/opendata.dwd.de/climate_environment/CDC/grids_germany/5_minutes/radolan/reproc/2017_002/bin/*')):\n", + " print(f\"Extracting data for the year {year[-4:]}\")\n", + " for month in tqdm.tqdm(sorted(glob(year + '/*'))):\n", + " with tarfile.open(month, 'r') as tar:\n", + " # fn_list: filenames of daily files ('YW2017.002_20010131.tar.gz')\n", + " fn_list = sorted([f.name for f in tar.getmembers()])\n", + " #fn_list = sorted(tar.getnames())\n", + "\n", + " # loop over daily files\n", + " for fn in fn_list:\n", + " f = tar.extractfile(fn)\n", + " # create (empty) daily netCDF\n", + " fn_netcdf = f\"{output_path}/{fn[-15:-7]}_radklim_yw.nc\"\n", + " # t1 = time()\n", + " rtn.create_empty_netcdf(fn=fn_netcdf, product_name='YW')\n", + " # t2=time()\n", + " #print(f\"create_empty_netcdf: {time() - t1}\")\n", + " # daily files contain the 5-minute data (tar_inner)\n", + " with tarfile.open(fileobj=f) as tar_inner:\n", + " # t1 = time()\n", + " # fn_list_inner = sorted(tar_inner.getnames())\n", + " # print(f\"tar_inner.getnames: {time() - t1}\")\n", + " # t1 = time()\n", + " fn_list_inner = sorted([f.name for f in tar_inner.getmembers()])\n", + " # print(f\"tar_inner.getmembers: {time() - t1}\")\n", + "\n", + " for fn_inner in fn_list_inner:\n", + " # extract 5-minute data, append to previously created daily netCDF\n", + " # t1 = time()\n", + " data, metadata = rtn.read_in_one_bin_file(tar_inner.extractfile(fn_inner))\n", + " # print(f\"read_in_one_bin_file: {time() - t1}\")\n", + " # t1 = time()\n", + " rtn.append_to_netcdf(\n", + " fn_netcdf, \n", + " data_list=[data, ], \n", + " metadata_list=[metadata, ],\n", + " )\n", + " # print(f\"append_to_netcdf: {time() - t1}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Wrap everything into a (restartable) function." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def tar2netcdf(input_path: str, output_path: str, if_exists: str, delete_last=False):\n", + " \"\"\"\n", + " Untar DWD binary downloads and store as daily netCDF files under path.\n", + "\n", + " Parameters:\n", + " ------\n", + " input_path: str \n", + " path to the folder where binary DWD downloads are stored (yearly folders).\n", + " Usually something like *\"./opendata.dwd.de/climate_environment/CDC/grids_germany/5_minutes/radolan/reproc/2017_002/bin\"*\n", + " output_path: str\n", + " where to store generated netCDF files\n", + " if_exists: {fail, replace, skip}\n", + " What to do if netcdf file already exists.\n", + " If you use 'skip', it is probably a good idea to delete the last generated netCDF in the output_folder by hand to \n", + " generate this file again and make sure that the file is not corrupted due an interruption while last creation of the file.\n", + " delete_last: bool\n", + " Whether to delete the last generated file in the output_folder.\n", + " This option makes only sense if your last call of tar2netcdf() got interrupted and you are not sure if the netCDF file \n", + " generated last was fully completed, so you can generate that netCDF file again. \n", + " \"\"\"\n", + " # create folder in output path\n", + " os.makedirs(output_path, exist_ok=True)\n", + "\n", + " # get the absolute output_path to the folder radklim, where netCDF files are saved\n", + " output_path = os.path.abspath(output_path)\n", + "\n", + " # delete last created netCDF file in output_path if delete_last == True\n", + " if delete_last:\n", + " existing_files = sorted(glob(f\"{output_path}/*\"))\n", + " if len(existing_files) >= 1:\n", + " os.remove(existing_files[-1])\n", + "\n", + " # loop over binary files\n", + " for year in sorted(glob(f\"{input_path}/*\")):\n", + " print(f\"Extracting data for the year {year[-4:]}\")\n", + " for month in tqdm.tqdm(sorted(glob(year + '/*'))):\n", + " with tarfile.open(month, 'r') as tar:\n", + " # fn_list: filenames of daily files ('YW2017.002_20010131.tar.gz')\n", + " fn_list = sorted([f.name for f in tar.getmembers()])\n", + "\n", + " # loop over daily files\n", + " for fn in fn_list:\n", + " f = tar.extractfile(fn)\n", + "\n", + " # netCDF file name\n", + " fn_netcdf = f\"{output_path}/{fn[-15:-7]}_radklim_yw.nc\"\n", + "\n", + " if os.path.exists(fn_netcdf):\n", + " if if_exists == 'fail':\n", + " raise ValueError(f\"netCDF file {output_path}/{fn[-15:-7]}_radklim_yw.nc already exists\")\n", + " elif if_exists == 'skip':\n", + " continue\n", + " \n", + " # create (empty) daily netCDF \n", + " rtn.create_empty_netcdf(fn=fn_netcdf, product_name='YW')\n", + " \n", + " # daily files contain the 5-minute data (tar_inner)\n", + " with tarfile.open(fileobj=f) as tar_inner:\n", + " fn_list_inner = sorted([f.name for f in tar_inner.getmembers()])\n", + "\n", + " for fn_inner in fn_list_inner:\n", + " # extract 5-minute data, append to previously created daily netCDF\n", + " data, metadata = rtn.read_in_one_bin_file(tar_inner.extractfile(fn_inner))\n", + " rtn.append_to_netcdf(\n", + " fn_netcdf, \n", + " data_list=[data, ], \n", + " metadata_list=[metadata, ],\n", + " )\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Execute `tar2netcdf` to extract data, if process is interrupted, the process can be continued with parameters `if_exists='skip'` and `delete_last=True`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Extracting data for the year 2001\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 12/12 [00:00<00:00, 256.75it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Extracting data for the year 2002\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 12/12 [00:00<00:00, 252.35it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Extracting data for the year 2003\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 12/12 [00:00<00:00, 243.26it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Extracting data for the year 2004\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 12/12 [00:00<00:00, 238.31it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Extracting data for the year 2005\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 12/12 [00:00<00:00, 60.91it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Extracting data for the year 2006\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 12/12 [00:00<00:00, 86.08it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Extracting data for the year 2007\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 12/12 [00:00<00:00, 99.88it/s] \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Extracting data for the year 2008\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 12/12 [00:00<00:00, 99.43it/s] \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Extracting data for the year 2009\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 12/12 [00:00<00:00, 129.09it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Extracting data for the year 2010\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 12/12 [00:00<00:00, 245.17it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Extracting data for the year 2011\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 12/12 [00:00<00:00, 226.96it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Extracting data for the year 2012\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 12/12 [00:00<00:00, 202.13it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Extracting data for the year 2013\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 25%|██▌ | 3/12 [4:03:14<13:57:29, 5583.33s/it]" + ] + } + ], + "source": [ + "tar2netcdf(input_path=\"/data/qt7760/opendata.dwd.de/climate_environment/CDC/grids_germany/5_minutes/radolan/reproc/2017_002/bin/\",\n", + " output_path=\"/data/qt7760/radklim_yw/\",\n", + " if_exists='skip',\n", + " delete_last=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Open data and check" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
<xarray.Dataset>\n", + "Dimensions: (x: 900, y: 1100, time: 288)\n", + "Coordinates:\n", + " * x (x) float64 -443.5 -442.5 -441.5 ... 453.5 454.5 455.5\n", + " * y (y) float64 -4.759e+03 -4.758e+03 ... -3.661e+03 -3.66e+03\n", + " latitudes (y, x) float64 dask.array<chunksize=(1100, 900), meta=np.ndarray>\n", + " longitudes (y, x) float64 dask.array<chunksize=(1100, 900), meta=np.ndarray>\n", + " * time (time) datetime64[ns] 2000-12-31T23:59:59.999999996 ... ...\n", + "Data variables:\n", + " rainfall_amount (time, y, x) float32 dask.array<chunksize=(288, 1100, 900), meta=np.ndarray>\n", + " maxrange (time) int16 dask.array<chunksize=(288,), meta=np.ndarray>\n", + " radarlocations (time) object dask.array<chunksize=(288,), meta=np.ndarray>\n", + " secondary (time, y, x) float32 dask.array<chunksize=(288, 1100, 900), meta=np.ndarray>\n", + " nodatamask (time, y, x) float32 dask.array<chunksize=(288, 1100, 900), meta=np.ndarray>\n", + " cluttermask (time, y, x) float32 dask.array<chunksize=(288, 1100, 900), meta=np.ndarray>\n", + " radolan_grid float64 ...\n", + "Attributes:\n", + " title: RADOLAN YW rainfall data\n", + " producttype: YW\n", + " institution: Deutscher Wetterdienst (DWD)\n", + " history: Created at 2022-12-02 15:21:49.205788\n", + " Conventions: CF-1.6