diff --git a/NAMESPACE b/NAMESPACE index d1eb2ee..94267db 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,6 +12,8 @@ export(lfa_count_returns_all_areas) export(lfa_count_returns_per_tree) export(lfa_create_boxplot) export(lfa_create_density_plots) +export(lfa_create_neighbor_mean_curves) +export(lfa_create_plot_per_area) export(lfa_create_stacked_distributions_plot) export(lfa_create_stacked_histogram) export(lfa_create_tile_location_objects) diff --git a/R/create_density_plots.R b/R/create_density_plots.R index 0053506..91be809 100644 --- a/R/create_density_plots.R +++ b/R/create_density_plots.R @@ -26,8 +26,8 @@ lfa_create_density_plots <- function(data, value_column, - category_column1, - category_column2, + category_column1 = "area", + category_column2 = "specie", title = NULL, xlims = NULL, ylims = NULL) { diff --git a/R/create_neighbour_mean_curves.R b/R/create_neighbour_mean_curves.R new file mode 100644 index 0000000..1582726 --- /dev/null +++ b/R/create_neighbour_mean_curves.R @@ -0,0 +1,56 @@ +#' Create neighbor mean curves for specified areas +#' +#' This function generates mean curves for a specified set of areas based on neighbor data. +#' The user can choose to compute mean curves for individual neighbors or averages across neighbors. +#' +#' @param neighbors A data frame containing information about neighbors, where each column represents +#' a specific neighbor, and each row corresponds to an area. +#' @param use_avg Logical. If TRUE, the function computes average curves across all neighbors. +#' If FALSE, it computes curves for individual neighbors. +#' @return A data frame with mean curves for each specified area. +#' Columns represent areas, and rows represent index values. +#' +#' @examples +#' # Assuming you have a data frame 'your_neighbors_data' with neighbor information +#' mean_curves <- lfa_create_neighbor_mean_curves(your_neighbors_data, use_avg = TRUE) +#' print(mean_curves) +#' +#' @export +lfa_create_neighbor_mean_curves <- function(neighbors, use_avg = FALSE) { + # Get information about all areas + all_areas <- lfa::lfa_get_all_areas() + + # Create a data frame with an 'index' column ranging from 1 to 100 + df <- data.frame(index = 1:100) + + # Define column names based on whether to use averages or individual neighbors + if (use_avg) { + names <- paste0("avg_", 1:100) + } else { + names <- paste0("Neighbor_", 1:100) + } + + # Iterate over each area and compute mean curves + for (area in 1:nrow(all_areas)) { + area_name <- all_areas[area, "area"] + + # Subset data for the current area + subset <- neighbors[neighbors$area == area_name, ] + + # Initialize an empty vector to store mean values + vec <- NULL + + # Compute mean values for each neighbor + for (name in names) { + vec <- c(vec, mean(subset[[name]], na.rm = TRUE)) + } + + # Add the vector as a new column to the data frame + df[[area_name]] <- vec + } + + # Remove the 'index' column + df$index <- NULL + + return(df) +} diff --git a/R/create_plot_per_area.R b/R/create_plot_per_area.R new file mode 100644 index 0000000..36388a6 --- /dev/null +++ b/R/create_plot_per_area.R @@ -0,0 +1,44 @@ +#' Create a line plot per area with one color per specie +#' +#' This function takes a data frame containing numeric columns and creates a line plot +#' using ggplot2. Each line in the plot represents a different area, with one color per specie. +#' +#' @param data A data frame with numeric columns and a column named 'specie' for species information. +#' @return A ggplot2 line plot. +#' @export +#' +#' @examples +#' data <- data.frame( +#' specie = rep(c("Species1", "Species2", "Species3"), each = 10), +#' column1 = rnorm(30), +#' column2 = rnorm(30), +#' column3 = rnorm(30) +#' ) +#' lfa_create_plot_per_area(data) +#' +#'@export +lfa_create_plot_per_area <- function(data) { + # Get all areas and species information + areas_specie <- lfa::lfa_get_all_areas() + + # Create an "index" column using the row numbers + data$index <- seq_len(nrow(data)) + + # Reshape the data using tidyr's gather function + data_long <- tidyr::gather(data, key = "area", value = "value", -index) + + # Perform a left join with areas_specie + data_long <- dplyr::left_join(data_long, areas_specie, by = "area") + + # Create a line plot using ggplot2 with colors based on specie and one line per area + return( + ggplot2::ggplot(data_long, ggplot2::aes(x = index, y = value, color = specie, group = area)) + + ggplot2::geom_line() + + ggplot2::labs( + title = "Average Distance to n-nearest Neighbors across all patches", + x = "n", + y = "Average Distance to n-nearest Neighbor (m)" + ) + + ggplot2::theme_minimal() + ) +} diff --git a/man/lfa_create_density_plots.Rd b/man/lfa_create_density_plots.Rd index 66e5b68..106e0ab 100644 --- a/man/lfa_create_density_plots.Rd +++ b/man/lfa_create_density_plots.Rd @@ -7,8 +7,8 @@ lfa_create_density_plots( data, value_column, - category_column1, - category_column2, + category_column1 = "area", + category_column2 = "specie", title = NULL, xlims = NULL, ylims = NULL diff --git a/man/lfa_create_neighbor_mean_curves.Rd b/man/lfa_create_neighbor_mean_curves.Rd new file mode 100644 index 0000000..5ec607a --- /dev/null +++ b/man/lfa_create_neighbor_mean_curves.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_neighbour_mean_curves.R +\name{lfa_create_neighbor_mean_curves} +\alias{lfa_create_neighbor_mean_curves} +\title{Create neighbor mean curves for specified areas} +\usage{ +lfa_create_neighbor_mean_curves(neighbors, use_avg = FALSE) +} +\arguments{ +\item{neighbors}{A data frame containing information about neighbors, where each column represents +a specific neighbor, and each row corresponds to an area.} + +\item{use_avg}{Logical. If TRUE, the function computes average curves across all neighbors. +If FALSE, it computes curves for individual neighbors.} +} +\value{ +A data frame with mean curves for each specified area. +Columns represent areas, and rows represent index values. +} +\description{ +This function generates mean curves for a specified set of areas based on neighbor data. +The user can choose to compute mean curves for individual neighbors or averages across neighbors. +} +\examples{ +# Assuming you have a data frame 'your_neighbors_data' with neighbor information +mean_curves <- lfa_create_neighbor_mean_curves(your_neighbors_data, use_avg = TRUE) +print(mean_curves) + +} diff --git a/man/lfa_create_plot_per_area.Rd b/man/lfa_create_plot_per_area.Rd new file mode 100644 index 0000000..4968486 --- /dev/null +++ b/man/lfa_create_plot_per_area.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_plot_per_area.R +\name{lfa_create_plot_per_area} +\alias{lfa_create_plot_per_area} +\title{Create a line plot per area with one color per specie} +\usage{ +lfa_create_plot_per_area(data) +} +\arguments{ +\item{data}{A data frame with numeric columns and a column named 'specie' for species information.} +} +\value{ +A ggplot2 line plot. +} +\description{ +This function takes a data frame containing numeric columns and creates a line plot +using ggplot2. Each line in the plot represents a different area, with one color per specie. +} +\examples{ +data <- data.frame( + specie = rep(c("Species1", "Species2", "Species3"), each = 10), + column1 = rnorm(30), + column2 = rnorm(30), + column3 = rnorm(30) +) +lfa_create_plot_per_area(data) + +} diff --git a/results/_freeze/report/execute-results/html.json b/results/_freeze/report/execute-results/html.json index 33fb8c3..b780f76 100644 --- a/results/_freeze/report/execute-results/html.json +++ b/results/_freeze/report/execute-results/html.json @@ -1,7 +1,7 @@ { - "hash": "4b4e3b5d1b324a24fc04923311675379", + "hash": "9f8331cdcca8df29add1ef17b1269fd1", "result": { - "markdown": "---\ntitle: \"Forest Data Analysis Report\"\noutput:\n pdf_document:\n latex_engine: xelatex\ntoc: true\ntoc-depth: 2\ntoc-title: Contents\nnumber-sections: true\nnumber-depth: 3\ndate: today\nauthor:\n - name: Jakob Danel\n email: jakob.danel@uni-muenster.de\n url: https://github.com/jakobdanel\n affiliations:\n - name: Universität Münster\n city: Münster\n country: Germany\n - name: Federick Bruch\n email: f_bruc03@uni-muenster.de\n url: https://www.uni-muenster.de/Geoinformatics/institute/staff/index.php/351/Frederick_Bruch\n affiliations:\n - name: Universität Münster\n city: Münster\n country: Germany\nbibliography: references.bib\nexecute-dir: .. \nprefer-html: true\n---\n\n\n# Introduction\n\nThis report documents the analysis of forest data for different tree species.\n\n# Methods\n\n## Data acquisition\n\nOur primary objective is to identify patches where one tree species exhibits a high level of dominance, striving to capture monocultural stands within the diverse forests of Nordrhein-Westfalia (NRW). Recognizing the practical challenges of finding true monocultures, we aim to identify patches where one species is highly dominant, enabling meaningful comparisons across different species.\n\nThe study is framed within the NRW region due to the availability of an easily accessible dataset. Our focus includes four prominent tree species in NRW: oak, beech, spruce, and pine, representing the most prevalent species in the region. To ensure the validity of our findings, we derive three patches for each species, thereby confirming that observed variables are characteristic of a particular species rather than a specific patch. Each patch is carefully selected to encompass an area of approximately 50-100 hectares and contain between 5,000 and 10,000 trees. Striking a balance between relevance and manageability, these patches avoid excessive size to enhance the likelihood of capturing varied species mixes and ensure compatibility with local hardware.\n\nSpecific Goals:\n\n1. Retrieve patches with highly dominant tree species.\n2. Minimize or eliminate the presence of human-made structures within the selected patches.\n\nTo achieve our goals, we utilized the waldmonitor dataset [@welle2014] and the map provided by [@Blickensdoerfer2022], both indicating dominant tree species in NRW. We identified patches of feasible size where both sources predicted the presence of a specific species. Further validation involved examining sentinel images of these forest regions to assess the evenness of structures, leaf color distribution, and the absence of significant human-made structures such as roads or buildings. The subsequent preprocessing steps, detailed in the following subsection, involved refining our selected patches and deriving relevant variables, such as tree distribution and density, to ensure that the chosen areas align with the desired research domains.\n\n## Preprocessing\n::: {.cell}\n\n:::\n\n\nIn this research study, the management and processing of a large dataset are crucial considerations. The dataset's substantial size necessitates careful maintenance to ensure efficient handling. Furthermore, the data should be easily processable and editable to facilitate necessary corrections and precalculations within the context of our research objectives. To achieve our goals, we have implemented a framework that automatically derives data based on a shapefile, delineating areas of interest. The processed data and results of precalculations are stored in a straightforward manner to enhance accessibility. Additionally, we have designed functions that establish a user-friendly interface, enabling the execution of algorithms on subsets of the data, such as distinct species. These interfaces are not only directly callable by users but can also be integrated into other functions to automate processes. The overarching aim is to streamline the entire preprocessing workflow using a single script, leveraging only the shapefile as a basis. This subsection details the accomplishments of our R-package in realizing these goals, outlining the preprocessing steps undertaken and justifying their necessity in the context of our research.\n\nThe data are stored in a data subdirectory of the root directory in the format `species/location-name/tile-name`. To automate the matching of areas of interest with the catalog from the Land NRW[^1], we utilize the intersecting tool developed by Heisig[^2]. This tool, allows for the automatic retrieval and placement of data downloaded from the Land NRW catalog. To enhance data accessibility, we have devised an object that incorporates species, location name, and tile name (the NRW internal identifier) for each area This object facilitates the specification of the area to be processed. Additionally, we have defined an initialization function that downloads all tiles, returning a list of tile location objects for subsequent processing. A pivotal component of the package's preprocessing functionality is the map function, which iterates over a list of tile locations (effectively the entire dataset) and accepts a processing function as an argument. The subsequent paragraph outlines the specific preprocessing steps employed, all of which are implemented within the mapping function.\n\nTo facilitate memory-handling capabilities, each of the tiles, where one area can span multiple tiles, has been split into manageable chunks. We employed a 50x50m size for each tile, resulting in the division of original 1km x 1km files into 400 tiles. These tiles are stored in our directory structure, with each tile housed in a directory named after its tile name and assigned an id as the filename. Implementation-wise, the `lidr::catalog_retile` function was instrumental in achieving this segmentation. The resulting smaller chunks allow for efficient iteration during subsequent preprocessing steps.\n\nThe next phase involves reducing our data to the actual size by intersecting the tiles with the defined area of interest. Using the `lidR::merge_spatial` function, we intersect the area derived from the shapefile, removing all point cloud items outside this region. Due to our tile-wise approach, empty tiles may arise, and in such cases, those tiles are simply deleted.\n\nFollowing the size reduction to our dataset, the next step involves correcting the `z` values. The `z` values in the data are originally relative to the ellipsoid used for referencing, but we require them to be relative to the ground. To achieve this, we utilize the `lidR::tin` function, which extrapolates a convex hull between all ground points (classified by the data provider) and calculates the z value based on this structure.\n\nSubsequently, we aim to perform segmentation for each distinct tree, marking each item of the point cloud with a tree ID. We employ the algorithm described by @li2012, using parameters `li2012(dt1 = 2, dt2 = 3, R = 2, Zu = 10, hmin = 5, speed_up = 12)`. The meanings of these parameters are elucidated in Li et al.'s work [@li2012].\n\nFinally, the last preprocessing step involves individual tree detection, seeking a single `POINT` object for each tree. The `lidR::lmf` function, an implementation of the tree data using a local maximum approach, is utilized for this purpose [@popescu2004]. The results are stored in GeoPackage files within our data structure.\n\nSee @sec-appendix-preprocessing for the implementation of the preprocessing.\n\n[^1]: https://www.opengeodata.nrw.de/produkte/geobasis/hm/3dm_l_las/3dm_l_las/, last visited 7th Dec 2023\n[^2]: https://github.com/joheisig/GEDIcalibratoR, last visited 7th Dec 2023\n\n## Analysis of different distributions\n\nAnalysis of data distributions is a critical aspect of our research, with a focus on comparing two or more distributions. Our objective extends beyond evaluating the disparities between species; we also aim to assess differences within a species. To gain a comprehensive understanding of the data, we employ various visualization techniques, including histograms, density functions, and box plots.\n\nIn tandem with visualizations, descriptive statistics, such as means, standard errors, and quantiles, are leveraged to provide key insights into the central tendency and variability of the data.\n\nFor a more quantitative analysis of distribution dissimilarity, statistical tests are employed. The Kullback-Leibler (KL) difference serves as a measure to compare the similarity of a set of distributions. This involves converting distributions into their density functions, with the standard error serving as the bandwidth. The KL difference is calculated for each pair of distributions, as it is asymmetric. For the two distributions the KL difference is defined as following [@kullback1951kullback]:\n\n$$\nD_{KL}(P \\, \\| \\, Q) = \\sum_i P(i) \\log\\left(\\frac{P(i)}{Q(i)}\\right)\n$$\n\nTo obtain a symmetric score, the Jensen-Shannon Divergence (JSD) is utilized [@grosse2002analysis], expressed by the formula:\n\n$$\nJS(P || Q) = \\frac{1}{2} * KL(P || M) + \\frac{1}{2} * KL(Q || M)\n$$\nHere, $M = \\frac{1}{2} * (P + Q)$. The JSD provides a balanced measure of dissimilarity between distributions [@Brownlee2019Calculate]. For comparing the different scores to each other, we will use averages.\n\nAdditionally, the Kolmogorov-Smirnov Test is implemented to assess whether two distributions significantly differ from each other. This statistical test offers a formal evaluation of the dissimilarity between empirical distribution functions.\n\n\n# Results\n::: {.cell}\n\n:::\n\n## Researched areas\n\n::: {.cell}\n\n```{.r .cell-code code-fold=\"true\"}\nlibrary(ggplot2)\nsf::sf_use_s2(FALSE)\npatches <- sf::read_sf(\"research_areas.shp\") |> sf::st_centroid()\n\nde <- sf::read_sf(\"results/results/states_de/Bundesländer_2017_mit_Einwohnerzahl.shp\") # Source: https://hub.arcgis.com/datasets/esri-de-content::bundesl%C3%A4nder-2017-mit-einwohnerzahl/explore?location=51.099647%2C10.454033%2C7.43\nnrw <- de[5,] |> sf::st_geometry()\n\n\nggplot() + geom_sf(data = nrw) + \n geom_sf(data = patches, mapping = aes(col = species))\n```\n\n::: {.cell-output-display}\n![Locations of the different patches with the dominant species for that patch. The patches centroids are displayed on a basemap describing the borders from NRW.](report_files/figure-html/fig-patches-nrw-1.png){#fig-patches-nrw width=672}\n:::\n:::\nWe draw three patches for each species from different regions (see @tbl-summary-researched-areas). We download the LiDAR data for those patches and runned all preprocessing steps as described. We than checked with certain derived parameters (e.g. tree heights, tree distributions or tree density) that all patches contain valid forest data. In that step we discovered, that in one patch some forest clearance took place in the near past. This patch was removed from the dataset and was replaced with a new one. \n\nIn our research, drawing patches evenly distributed across Nordrhein-Westfalia is inherently constrained by natural factors. Consequently, the patches for oak and pine predominantly originate from the Münsterland region, as illustrated in [@fig-patches-nrw]. For spruce, the patches were derived from Sauerland, reflecting the prevalence of spruce forests in this specific region within NRW, as corroborated by Welle et al. [@welle2014] and Blickensdörfer et al. [@Blickensdoerfer2022]. Beech patches, on the other hand, were generated from diverse locations within NRW. Across all patches, no human-made objects were identified, with the exception of small paths for pedestrians and forestry vehicles.\n\nThe distribution of area and detections is notable for each four species. Beech covers 69,791.9 hectares with a total of 5,954 detections, oak spans 63,232.49 hectares with 5,354 detections, pine extends across 72,862.4 hectares with 8,912 detections, and spruce encompasses 57,940.02 hectares with 8,619 detections. Both the amount of detections and the corresponding area exhibit a relatively uniform distribution across the diverse patches, as summarized in @tbl-summary-researched-areas. \n\nWith the selected dataset described, we intentionally chose three patches for each four species that exhibit a practical and usable size for our research objectives. These carefully chosen patches align with the conditions essential for our study, providing comprehensive and representative data for in-depth analysis and meaningful insights into the characteristics of each tree species within the specified areas.\n\n\n::: {#tbl-summary-researched-areas .cell tbl-cap='Summary of researched patches grouped by species, with their location, area and the amount of detected trees.'}\n\n```{.r .cell-code code-fold=\"true\"}\nshp <- sf::read_sf(\"research_areas.shp\")\ntable <- lfa::lfa_get_all_areas()\n\nsf::sf_use_s2(FALSE)\nfor (row in 1:nrow(table)) {\n area <-\n dplyr::filter(shp, shp$species == table[row, \"specie\"] &\n shp$name == table[row, \"area\"])\n area_size <- area |> sf::st_area()\n point <- area |> sf::st_centroid() |> sf::st_coordinates()\n table[row,\"point\"] <- paste0(\"(\",round(point[1], digits = 4),\", \",round(point[2],digits = 4),\")\")\n \n table[row, \"area_size\"] = round(area_size,digits = 2) #paste0(round(area_size,digits = 2), \" m²\")\n \n amount_det <- nrow(lfa::lfa_get_detection_area(table[row, \"specie\"], table[row, \"area\"]))\n if(is.null(amount_det)){\n cat(nrow(lfa::lfa_get_detection_area(table[row, \"specie\"], table[row, \"area\"])),table[row, \"specie\"],table[row, \"area\"])\n }\n table[row, \"amount_detections\"] = amount_det\n \n # table[row, \"specie\"] <- lfa::lfa_capitalize_first_char(table[row,\"specie\"])\n table[row, \"area\"] <- lfa::lfa_capitalize_first_char(table[row,\"area\"])\n }\ntable$area <- gsub(\"_\", \" \", table$area)\ntable$area <- gsub(\"ue\", \"ü\", table$area)\ntable = table[,!names(table) %in% c(\"specie\")]\n\nknitr::kable(table, \"html\", col.names = c(\"Patch Name\",\"Location\",\"Area size (m²)\",\"Amount tree detections\" ), caption = NULL, digits = 2, escape = TRUE) |>\n kableExtra::kable_styling(\n bootstrap_options = c(\"striped\", \"hold_position\", \"bordered\",\"responsive\"),\n stripe_index = c(1:3,7:9),\n full_width = FALSE\n ) |>\n kableExtra::pack_rows(\"Beech\", 1, 3) |>\n kableExtra::pack_rows(\"Oak\", 4, 6) |>\n kableExtra::pack_rows(\"Pine\", 7, 9) |>\n kableExtra::pack_rows(\"Spruce\", 10, 12) |>\n kableExtra::column_spec(1, bold = TRUE)\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n \n \n \n \n \n \n \n \n\n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Patch Name Location Area size (m²) Amount tree detections
Beech
Bielefeld brackwede (8.5244, 51.9902) 161410.57 1443
Billerbeck (7.3273, 51.9987) 185887.25 1732
Wülfenrath (7.0769, 51.2917) 350621.21 2779
Oak
Hamm (7.8618, 51.6639) 269397.22 2441
Münster (7.6187, 51.9174) 164116.61 1270
Rinkerode (7.6744, 51.8598) 198811.09 1643
Pine
Greffen (8.1697, 51.9913) 49418.81 513
Mesum (7.5403, 52.2573) 405072.85 5031
Telgte (7.7816, 52.0024) 274132.34 3368
Spruce
Brilon (8.5352, 51.4084) 211478.20 3342
Oberhundem (8.1861, 51.0909) 151895.53 2471
Osterwald (8.3721, 51.2151) 216026.43 2806
\n\n`````\n:::\n:::\n\n\n\n\n\n## Distribution of the tree heights\n\n::: {.cell}\n\n```{.r .cell-code code-fold=\"true\"}\ndetections <- lfa::lfa_get_detections()\n```\n:::\n\nIn this study, we scrutinize the distribution of tree heights, focusing initially on the density distribution to unravel the nuances across various tree species. Notably, our examination reveals distinctive patterns, with Oak and Pine exhibiting significantly steeper peaks in their density curves compared to Beech and Spruce. While all species present unique density curves, a commonality emerges—each curve is characterized by a single peak, except for the intriguing exception observed in Telgte. Taking Beech as an illustrative example, our findings indicate a notable shift in the peak to a considerably higher extent. The varinace in the density curves indicating that an differencation between species only with the help oof tree height values could be difficult.\n\n::: {.cell}\n\n```{.r .cell-code code-fold=\"true\"}\nlfa::lfa_create_density_plots(detections, value_column = \"Z\", category_column1 = \"area\", category_column2 = \"specie\", title = \"Density of the height distributions\", xlims = c(0,50))\n```\n\n::: {.cell-output-display}\n![Density of the height distribitions of the detectected trees. Splitted by the different researched areas and grouped by the dominant specie in this area.](report_files/figure-html/fig-density-z-1.png){#fig-density-z width=672}\n:::\n:::\n\nTo have a deeper look into the distributions of those `Z`-values we will now also have a look into the boxplots of the height distrubutions in the different areas.\nNoteworthy observations include the presence of outliers beyond the extended range of the Whisker Antennas ($1.5*\\text{IQR}$) in all datasets. Of particular interest is the Rinkerode dataset, which exhibits a higher prevalence of outliers in the upper domain. Anomalies in this dataset are attributed to potential inaccuracies, urging a critical examination of data integrity. A pairwise examination of Oak and Pine species indicates higher mean heights for Oak compared to Pine. This insight underscores the significance of species-specific attributes in shaping overall height distributions. Further exploration into the factors contributing to these mean differences enhances our understanding of the unique characteristics inherent to each species. Contrary to expectations, the spread within a particular species does not exhibit significant divergence from the spread observed between different species. This finding suggests that while species-specific traits play a crucial role in shaping height distributions, certain overarching factors may contribute to shared patterns across diverse tree populations.\n\n\n\n::: {.cell}\n\n```{.r .cell-code code-fold=\"true\"}\nlfa::lfa_create_boxplot(detections, value_column = \"Z\", category_column1 = \"area\", category_column2 = \"specie\", title = \"Boxplots of the height distributions\")\n```\n\n::: {.cell-output-display}\n![Boxplots of the height distribitions of the detectected trees. Splitted by the different researched areas and grouped by the dominant specie in this area.](report_files/figure-html/fig-boxplot-z-1.png){#fig-boxplot-z width=672}\n:::\n:::\n\n\n\nOur examination of Kullback-Leibler Divergence (KLD) and Jensen-Shannon Divergence (JSD) metrics reveals low mean values (KLD: 5.252696, JSD: 2.246663) across different species, indicating overall similarity in tree species height distributions. However, within specific species, particularly Pine, higher divergence values (see @tbl-z-values-kld-pine and @tbl-z-values-jsd-pine) suggest significant intraspecific differences.\n\nNotably, the Spruce species consistently demonstrates low divergence values across all tested areas, implying a high level of explainability. This finding highlights tree height as a reliable indicator for detecting Spruce trees, indicating its potential for accurate species identification in diverse forest ecosystems.\n\n## Distribution of number of returns per detected tree.\n\n::: {.cell}\n\n```{.r .cell-code code-fold=\"true\"}\ndata <- sf::st_read(\"data/tree_properties.gpkg\")\nneighbors <- lfa::lfa_get_neighbor_paths() |> lfa::lfa_combine_sf_obj(lfa::lfa_get_all_areas())\ndata = sf::st_join(data,neighbors, join = sf::st_within)\n```\n:::\nExamining the distribution of LiDAR returns per tree is the focus of our current investigation. Initial analysis involves the study of density graphs representing the distribution of LiDAR returns. The density curves for each species exhibit distinct peaks corresponding to their respective species, providing a clear differentiation in LiDAR return patterns. Notably, there is an exception observed in the Brilon patch (Spruce), where the curve deviates, possibly indicative of variations in forest age. A noteworthy trend is the divergent shape of density curves between coniferous and deciduous trees. Conifers exhibit steeper curves, indicating lower density for higher return values compared to deciduous trees. This disparity underscores the potential of LiDAR data to distinguish between tree types based on return density characteristics. In the case of Beech trees, the peaks' heights vary among different curves, suggesting nuanced variations within the species. Despite these differences, all species consistently peak in similar regions, emphasizing the overarching similarities in LiDAR return patterns across diverse tree species.\n\n::: {.cell}\n\n```{.r .cell-code code-fold=\"true\"}\nlfa::lfa_create_density_plots(data, value_column = \"number_of_returns\", category_column1 = \"area\", category_column2 = \"specie\", title = \"Density of the distribution of LiDAR returns per individual tree\", xlims = c(0,10000))\n```\n\n::: {.cell-output-display}\n![Density of the amount of LiDAR returns per detectected tree. Splitted by the different researched areas and grouped by the dominant specie in this area.](report_files/figure-html/fig-density-number-returns-1.png){#fig-density-number-returns width=672}\n:::\n:::\n\nCurrently, our investigation focuses on boxplots representing each patch. We observe significant size variations among plots within the same species. Notably, numerous outliers are present above the box in each patch. For Pines, the boxes exhibit a notable similarity. However, the box for Brilon is entirely shifted from other boxes associated with patches featuring Spruce forest.\n\n::: {.cell}\n\n```{.r .cell-code code-fold=\"true\"}\nlfa::lfa_create_boxplot(data, value_column = \"number_of_returns\", category_column1 = \"area\", category_column2 = \"specie\", title = \"Boxplots of the distribution of LiDAR returns per individual tree\")\n```\n\n::: {.cell-output-display}\n![Boxplots of the the amount of LiDAR returns per detectected tree. Splitted by the different researched areas and grouped by the dominant specie in this area.](report_files/figure-html/fig-boxplot-number-returns-1.png){#fig-boxplot-number-returns width=672}\n:::\n:::\n\nOverall, our analysis reveals very low results for both Kullback-Leibler Divergence (KLD) and Jensen-Shannon Divergence (JSD) metrics across different species. Within species, there is high explainability observed for the different LiDAR return curves between patches.\n\nThis suggests that the number of returns alone may not be a robust predictor for identifying the dominant species in a forest. However, the curves indicate a clear potential for distinguishing between conifers (Pine and Spruce) and deciduous trees (Beech and Oak) based on the number of returns. This observation is further supported by the JSD scores, as detailed in @tbl-number-of-returns-jsd_specie.\n\n\n\n\n\n|specie |area | density (1/m²)|\n|:------|:-------------------|---------:|\n|beech |bielefeld_brackwede | 0.0089399|\n|beech |billerbeck | 0.0093175|\n|beech |wuelfenrath | 0.0079259|\n|oak |hamm | 0.0090610|\n|oak |muenster | 0.0077384|\n|oak |rinkerode | 0.0082641|\n|pine |greffen | 0.0103807|\n|pine |mesum | 0.0124200|\n|pine |telgte | 0.0122860|\n|spruce |brilon | 0.0158030|\n|spruce |oberhundem | 0.0162678|\n|spruce |osterwald | 0.0129892|\n\n\n\n# References\n\n::: {#refs}\n:::\n\n# Appendix\n## Script which can be used to do all preprocessing {#sec-appendix-preprocessing}\n\n::: {.cell}\n\n:::\n\n\nLoad the file with the research areas\n::: {.cell}\n\n```{.r .cell-code}\nsf <- sf::read_sf(here::here(\"research_areas.shp\"))\nprint(sf)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nSimple feature collection with 12 features and 3 fields\nGeometry type: POLYGON\nDimension: XY\nBounding box: xmin: 7.071625 ymin: 51.0895 xmax: 8.539877 ymax: 52.25983\nGeodetic CRS: WGS 84\n# A tibble: 12 × 4\n id species name geometry\n \n 1 1 oak rinkerode ((7.678922 51.85789, 7.675446 51.85752, 7.…\n 2 2 oak hamm ((7.858955 51.66699, 7.866444 51.66462, 7.…\n 3 3 oak muenster ((7.618908 51.9154, 7.617384 51.9172, 7.61…\n 4 4 pine greffen ((8.168691 51.98965, 8.167178 51.99075, 8.…\n 5 5 pine telgte ((7.779728 52.00662, 7.781616 52.00662, 7.…\n 6 6 pine mesum ((7.534424 52.25499, 7.53378 52.25983, 7.5…\n 7 7 beech bielefeld_brackwede ((8.524749 51.9921, 8.528418 51.99079, 8.5…\n 8 8 beech wuelfenrath ((7.071625 51.29256, 7.072311 51.29334, 7.…\n 9 9 beech billerbeck ((7.324729 51.99783, 7.323548 51.99923, 7.…\n10 11 spruce brilon ((8.532195 51.41029, 8.535027 51.41064, 8.…\n11 12 spruce osterwald ((8.369328 51.21693, 8.371238 51.21718, 8.…\n12 10 spruce oberhundem ((8.18082 51.08999, 8.180868 51.09143, 8.1…\n```\n:::\n:::\n\n\nInit the project\n::: {.cell}\n\n```{.r .cell-code}\nlibrary(lfa)\nsf::sf_use_s2(FALSE)\nlocations <- lfa_init(\"research_areas.shp\")\n```\n:::\n\nDo all of the prprocessing steps\n::: {.cell}\n\n```{.r .cell-code}\nlfa_map_tile_locations(locations,retile,check_flag = \"retile\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nNo further processing: flag retile is set!Function is already computed, no further computings here\n```\n:::\n\n::: {.cell-output .cell-output-stdout}\n```\nNULL\n```\n:::\n\n```{.r .cell-code}\nlfa_map_tile_locations(locations, lfa_intersect_areas, ctg = NULL, areas_sf = sf,check_flag = \"intersect\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nNo further processing: flag intersect is set!Function is already computed, no further computings here\n```\n:::\n\n::: {.cell-output .cell-output-stdout}\n```\nNULL\n```\n:::\n\n```{.r .cell-code}\nlfa_map_tile_locations(locations, lfa_ground_correction, ctg = NULL,check_flag = \"z_correction\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nNo further processing: flag z_correction is set!Function is already computed, no further computings here\n```\n:::\n\n::: {.cell-output .cell-output-stdout}\n```\nNULL\n```\n:::\n\n```{.r .cell-code}\nlfa_map_tile_locations(locations, lfa_segmentation, ctg = NULL,check_flag = \"segmentation\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nNo further processing: flag segmentation is set!Function is already computed, no further computings here\n```\n:::\n\n::: {.cell-output .cell-output-stdout}\n```\nNULL\n```\n:::\n\n```{.r .cell-code}\nlfa_map_tile_locations(locations, lfa_detection, catalog = NULL, write_to_file = TRUE,check_flag = \"detection\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nNo further processing: flag detection is set!Function is already computed, no further computings here\n```\n:::\n\n::: {.cell-output .cell-output-stdout}\n```\nNULL\n```\n:::\n:::\n\n\n## Quantitative Results\n### Distribution of Z-Values\n\n\n::: {.cell}\n\n```{.r .cell-code code-fold=\"true\"}\ndata <- lfa::lfa_get_detections()\nvalue_column <- \"Z\"\n```\n:::\n\n\n\n#### Kullback-Leibler-Divergence\n\n\n\n::: {#tbl-z-values-kld_specie .cell tbl-cap='Kullback-Leibler-Divergence between the researched species Beech, Oak, Pine and Spruce for the atrribute z-values'}\n\n```{.r .cell-code code-fold=\"true\"}\nkld_results_specie <- lfa::lfa_run_test_asymmetric(data,value_column,\"specie\",lfa::lfa_kld_from_vec)\nlfa::lfa_generate_result_table_tests(kld_results_specie,\"Kullback-Leibler-Divergence between species\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Kullback-Leibler-Divergence between species
Beech Oak Pine Spruce
Beech 0.0 13.2 12.5 0.76
Oak 4.2 0.0 3.4 5.02
Pine 2.3 5.6 0.0 3.95
Spruce 2.4 14.7 16.1 0.00
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(kld_results_specie, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 5.252696\n```\n:::\n:::\n\n\n\n\n::: {#tbl-z-values-kld-beech .cell tbl-cap='Kullback-Leibler-Divergence between the researched areas which have the dominante specie beech for the atrribute z-values'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"beech\",]\nkld_results_beech <- lfa::lfa_run_test_asymmetric(specie,value_column,\"area\",lfa::lfa_kld_from_vec)\nlfa::lfa_generate_result_table_tests(kld_results_beech,\"Kullback-Leibler-Divergence between areas with beech\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Kullback-Leibler-Divergence between areas with beech
Bielefeld_brackwede Billerbeck Wuelfenrath
Bielefeld_brackwede 0.00 0.4 3.1
Billerbeck 0.27 0.0 6.0
Wuelfenrath 1.13 2.4 0.0
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(kld_results_beech, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 1.473353\n```\n:::\n:::\n\n\n\n\n::: {#tbl-z-values-kld-oak .cell tbl-cap='Kullback-Leibler-Divergence between the researched areas which have the dominante specie oak for the atrribute z-values'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"oak\",]\nkld_results_oak <- lfa::lfa_run_test_asymmetric(specie,value_column,\"area\",lfa::lfa_kld_from_vec)\nlfa::lfa_generate_result_table_tests(kld_results_oak,\"Kullback-Leibler-Divergence between areas with oak\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Kullback-Leibler-Divergence between areas with oak
Hamm Muenster Rinkerode
Hamm 0.0 2.1 16
Muenster 0.4 0.0 17
Rinkerode 7.6 17.8 0
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(kld_results_oak, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 6.779863\n```\n:::\n:::\n\n\n\n\n::: {#tbl-z-values-kld-pine .cell tbl-cap='Kullback-Leibler-Divergence between the researched areas which have the dominante specie pine for the atrribute z-values'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"pine\",]\nkld_results_pine <- lfa::lfa_run_test_asymmetric(specie,value_column,\"area\",lfa::lfa_kld_from_vec)\nlfa::lfa_generate_result_table_tests(kld_results_pine,\"Kullback-Leibler-Divergence between areas with pine\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Kullback-Leibler-Divergence between areas with pine
Greffen Mesum Telgte
Greffen 0.00 0.74 16
Mesum 0.43 0.00 18
Telgte 3.87 6.82 0
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(kld_results_pine, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 5.129383\n```\n:::\n:::\n\n\n\n\n::: {#tbl-z-values-kld-spruce .cell tbl-cap='Kullback-Leibler-Divergence between the researched areas which have the dominante specie spruce for the atrribute z-values'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"spruce\",]\nkld_results_spruce <- lfa::lfa_run_test_asymmetric(specie,value_column,\"area\",lfa::lfa_kld_from_vec)\nlfa::lfa_generate_result_table_tests(kld_results_spruce,\"Kullback-Leibler-Divergence between areas with spruce\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Kullback-Leibler-Divergence between areas with spruce
Brilon Oberhundem Osterwald
Brilon 0.000 0.092 1.7
Oberhundem 0.081 0.000 2.1
Osterwald 1.521 2.178 0.0
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(kld_results_spruce, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.8509258\n```\n:::\n:::\n\n\n\n\n#### Jensen-Shannon Divergence\n\n\n\n::: {#tbl-z-values-jsd_specie .cell tbl-cap='Jensen-Shannon Divergence between the researched species Beech, Oak, Pine and Spruce for the atrribute z-values'}\n\n```{.r .cell-code code-fold=\"true\"}\njsd_results_specie <- lfa::lfa_run_test_symmetric(data,value_column,\"specie\",lfa::lfa_jsd_from_vec)\nlfa::lfa_generate_result_table_tests(jsd_results_specie,\"Jensen-Shannon Divergence between species\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Jensen-Shannon Divergence between species
Beech Oak Pine Spruce
Beech 0 4.5 4.6 2.4
Oak NA 0.0 3.9 6.1
Pine NA NA 0.0 7.1
Spruce NA NA NA 0.0
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(jsd_results_specie, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 2.246663\n```\n:::\n:::\n\n\n\n\n::: {#tbl-z-values-jsd-beech .cell tbl-cap='Jensen-Shannon Divergence between the researched areas which have the dominante specie beech for the atrribute z-values'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"beech\",]\njsd_results_beech <- lfa::lfa_run_test_symmetric(specie,value_column,\"area\",lfa::lfa_jsd_from_vec)\nlfa::lfa_generate_result_table_tests(jsd_results_beech,\"Jensen-Shannon Divergence between areas with beech\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Jensen-Shannon Divergence between areas with beech
Bielefeld_brackwede Billerbeck Wuelfenrath
Bielefeld_brackwede 0 1.1 3.3
Billerbeck NA 0.0 4.9
Wuelfenrath NA NA 0.0
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(jsd_results_beech, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 1.10555\n```\n:::\n:::\n\n\n\n\n::: {#tbl-z-values-jsd-oak .cell tbl-cap='Jensen-Shannon Divergence between the researched areas which have the dominante specie oak for the atrribute z-values'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"oak\",]\njsd_results_oak <- lfa::lfa_run_test_symmetric(specie,value_column,\"area\",lfa::lfa_jsd_from_vec)\nlfa::lfa_generate_result_table_tests(jsd_results_oak,\"Jensen-Shannon Divergence between areas with oak\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Jensen-Shannon Divergence between areas with oak
Hamm Muenster Rinkerode
Hamm 0 1.6 6.5
Muenster NA 0.0 6.4
Rinkerode NA NA 0.0
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(jsd_results_oak, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 1.692942\n```\n:::\n:::\n\n\n\n\n::: {#tbl-z-values-jsd-pine .cell tbl-cap='Jensen-Shannon Divergence between the researched areas which have the dominante specie pine for the atrribute z-values'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"pine\",]\njsd_results_pine <- lfa::lfa_run_test_symmetric(specie,value_column,\"area\",lfa::lfa_jsd_from_vec)\nlfa::lfa_generate_result_table_tests(jsd_results_pine,\"Jensen-Shannon Divergence between areas with pine\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Jensen-Shannon Divergence between areas with pine
Greffen Mesum Telgte
Greffen 0 3.1 12
Mesum NA 0.0 10
Telgte NA NA 0
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(jsd_results_pine, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 2.956354\n```\n:::\n:::\n\n\n\n\n::: {#tbl-z-values-jsd-spruce .cell tbl-cap='Jensen-Shannon Divergence between the researched areas which have the dominante specie spruce for the atrribute z-values'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"spruce\",]\njsd_results_spruce <- lfa::lfa_run_test_symmetric(specie,value_column,\"area\",lfa::lfa_jsd_from_vec)\nlfa::lfa_generate_result_table_tests(jsd_results_spruce,\"Jensen-Shannon Divergence between areas with spruce\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Jensen-Shannon Divergence between areas with spruce
Brilon Oberhundem Osterwald
Brilon 0 0.31 4.0
Oberhundem NA 0.00 5.5
Osterwald NA NA 0.0
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(jsd_results_spruce, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 1.100383\n```\n:::\n:::\n\n\n### Distribution of the number of returns\n\n\n::: {.cell}\n\n```{.r .cell-code code-fold=\"true\"}\ndata <- sf::st_read(\"data/tree_properties.gpkg\")\nneighbors <- lfa::lfa_get_neighbor_paths() |> lfa::lfa_combine_sf_obj(lfa::lfa_get_all_areas())\ndata = sf::st_join(data,neighbors, join = sf::st_within)\nvalue_column <- \"number_of_returns\"\n```\n:::\n\n\n\n#### Kullback-Leibler-Divergence\n\n\n\n::: {#tbl-number-of-returns-kld_specie .cell tbl-cap='Kullback-Leibler-Divergence between the researched species Beech, Oak, Pine and Spruce for the atrribute number-of-returns'}\n\n```{.r .cell-code code-fold=\"true\"}\nkld_results_specie <- lfa::lfa_run_test_asymmetric(data,value_column,\"specie\",lfa::lfa_kld_from_vec)\nlfa::lfa_generate_result_table_tests(kld_results_specie,\"Kullback-Leibler-Divergence between species\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Kullback-Leibler-Divergence between species
Beech Oak Pine Spruce
Beech 0.000 0.083 0.57 0.049
Oak 0.051 0.000 0.84 0.059
Pine 0.432 0.833 0.00 0.526
Spruce 0.036 0.059 0.54 0.000
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(kld_results_specie, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.2550987\n```\n:::\n:::\n\n\n\n\n::: {#tbl-number-of-returns-kld-beech .cell tbl-cap='Kullback-Leibler-Divergence between the researched areas which have the dominante specie beech for the atrribute number-of-returns'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"beech\",]\nkld_results_beech <- lfa::lfa_run_test_asymmetric(specie,value_column,\"area\",lfa::lfa_kld_from_vec)\nlfa::lfa_generate_result_table_tests(kld_results_beech,\"Kullback-Leibler-Divergence between areas with beech\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Kullback-Leibler-Divergence between areas with beech
Bielefeld_brackwede Billerbeck Wuelfenrath
Bielefeld_brackwede 0.00 0.15 0.082
Billerbeck 0.21 0.00 0.136
Wuelfenrath 0.13 0.19 0.000
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(kld_results_beech, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.09985223\n```\n:::\n:::\n\n\n\n\n::: {#tbl-number-of-returns-kld-oak .cell tbl-cap='Kullback-Leibler-Divergence between the researched areas which have the dominante specie oak for the atrribute number-of-returns'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"oak\",]\nkld_results_oak <- lfa::lfa_run_test_asymmetric(specie,value_column,\"area\",lfa::lfa_kld_from_vec)\nlfa::lfa_generate_result_table_tests(kld_results_oak,\"Kullback-Leibler-Divergence between areas with oak\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Kullback-Leibler-Divergence between areas with oak
Hamm Muenster Rinkerode
Hamm 0.00 0.46 0.846
Muenster 0.41 0.00 0.077
Rinkerode 0.81 0.09 0.000
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(kld_results_oak, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.2994815\n```\n:::\n:::\n\n\n\n\n::: {#tbl-number-of-returns-kld-pine .cell tbl-cap='Kullback-Leibler-Divergence between the researched areas which have the dominante specie pine for the atrribute number-of-returns'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"pine\",]\nkld_results_pine <- lfa::lfa_run_test_asymmetric(specie,value_column,\"area\",lfa::lfa_kld_from_vec)\nlfa::lfa_generate_result_table_tests(kld_results_pine,\"Kullback-Leibler-Divergence between areas with pine\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Kullback-Leibler-Divergence between areas with pine
Greffen Mesum Telgte
Greffen 0.00 0.1444 0.1773
Mesum 0.14 0.0000 0.0047
Telgte 0.16 0.0045 0.0000
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(kld_results_pine, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.07005788\n```\n:::\n:::\n\n\n\n\n::: {#tbl-number-of-returns-kld-spruce .cell tbl-cap='Kullback-Leibler-Divergence between the researched areas which have the dominante specie spruce for the atrribute number-of-returns'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"spruce\",]\nkld_results_spruce <- lfa::lfa_run_test_asymmetric(specie,value_column,\"area\",lfa::lfa_kld_from_vec)\nlfa::lfa_generate_result_table_tests(kld_results_spruce,\"Kullback-Leibler-Divergence between areas with spruce\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Kullback-Leibler-Divergence between areas with spruce
Brilon Oberhundem Osterwald
Brilon 0.000 0.04 0.034
Oberhundem 0.041 0.00 0.079
Osterwald 0.045 0.10 0.000
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(kld_results_spruce, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.03779495\n```\n:::\n:::\n\n\n\n\n#### Jensen-Shannon Divergence\n\n\n\n::: {#tbl-number-of-returns-jsd_specie .cell tbl-cap='Jensen-Shannon Divergence between the researched species Beech, Oak, Pine and Spruce for the atrribute number-of-returns'}\n\n```{.r .cell-code code-fold=\"true\"}\njsd_results_specie <- lfa::lfa_run_test_symmetric(data,value_column,\"specie\",lfa::lfa_jsd_from_vec)\nlfa::lfa_generate_result_table_tests(jsd_results_specie,\"Jensen-Shannon Divergence between species\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Jensen-Shannon Divergence between species
Beech Oak Pine Spruce
Beech 0 3e-04 0.019 0.0014
Oak NA 0e+00 0.021 0.0016
Pine NA NA 0.000 0.0143
Spruce NA NA NA 0.0000
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(jsd_results_specie, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.004419638\n```\n:::\n:::\n\n\n\n\n::: {#tbl-number-of-returns-jsd-beech .cell tbl-cap='Jensen-Shannon Divergence between the researched areas which have the dominante specie beech for the atrribute number-of-returns'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"beech\",]\njsd_results_beech <- lfa::lfa_run_test_symmetric(specie,value_column,\"area\",lfa::lfa_jsd_from_vec)\nlfa::lfa_generate_result_table_tests(jsd_results_beech,\"Jensen-Shannon Divergence between areas with beech\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Jensen-Shannon Divergence between areas with beech
Bielefeld_brackwede Billerbeck Wuelfenrath
Bielefeld_brackwede 0 0.0035 0.00099
Billerbeck NA 0.0000 0.00554
Wuelfenrath NA NA 0.00000
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(jsd_results_beech, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.001314268\n```\n:::\n:::\n\n\n\n\n::: {#tbl-number-of-returns-jsd-oak .cell tbl-cap='Jensen-Shannon Divergence between the researched areas which have the dominante specie oak for the atrribute number-of-returns'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"oak\",]\njsd_results_oak <- lfa::lfa_run_test_symmetric(specie,value_column,\"area\",lfa::lfa_jsd_from_vec)\nlfa::lfa_generate_result_table_tests(jsd_results_oak,\"Jensen-Shannon Divergence between areas with oak\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Jensen-Shannon Divergence between areas with oak
Hamm Muenster Rinkerode
Hamm 0 0.0068 0.0128
Muenster NA 0.0000 0.0017
Rinkerode NA NA 0.0000
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(jsd_results_oak, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.002747351\n```\n:::\n:::\n\n\n\n\n::: {#tbl-number-of-returns-jsd-pine .cell tbl-cap='Jensen-Shannon Divergence between the researched areas which have the dominante specie pine for the atrribute number-of-returns'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"pine\",]\njsd_results_pine <- lfa::lfa_run_test_symmetric(specie,value_column,\"area\",lfa::lfa_jsd_from_vec)\nlfa::lfa_generate_result_table_tests(jsd_results_pine,\"Jensen-Shannon Divergence between areas with pine\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Jensen-Shannon Divergence between areas with pine
Greffen Mesum Telgte
Greffen 0 0.0035 0.00458
Mesum NA 0.0000 0.00037
Telgte NA NA 0.00000
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(jsd_results_pine, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.001130537\n```\n:::\n:::\n\n\n\n\n::: {#tbl-number-of-returns-jsd-spruce .cell tbl-cap='Jensen-Shannon Divergence between the researched areas which have the dominante specie spruce for the atrribute number-of-returns'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"spruce\",]\njsd_results_spruce <- lfa::lfa_run_test_symmetric(specie,value_column,\"area\",lfa::lfa_jsd_from_vec)\nlfa::lfa_generate_result_table_tests(jsd_results_spruce,\"Jensen-Shannon Divergence between areas with spruce\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Jensen-Shannon Divergence between areas with spruce
Brilon Oberhundem Osterwald
Brilon 0 0.0069 0.005
Oberhundem NA 0.0000 0.002
Osterwald NA NA 0.000
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(jsd_results_spruce, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.001939104\n```\n:::\n:::\n\n\n\n\n\n## Documentation\n### `lfa_capitalize_first_char`\n\nCapitalize First Character of a String\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`input_string` | A single-character string to be processed.\n\n\n#### Concept\n\nString Manipulation\n\n\n#### Description\n\nThis function takes a string as input and returns the same string with the\n first character capitalized. If the first character is already capitalized,\n the function does nothing. If the first character is not from the alphabet,\n an error is thrown.\n\n\n#### Details\n\nThis function performs the following steps:\n \n\n* Checks if the input is a single-character string. \n\n* Verifies if the first character is from the alphabet (A-Z or a-z). \n\n* If the first character is not already capitalized, it capitalizes it. \n\n* Returns the modified string.\n\n\n#### Keyword\n\nalphabet\n\n\n#### Note\n\nThis function is case-sensitive and assumes ASCII characters.\n\n\n#### References\n\nNone\n\n\n#### Seealso\n\nThis function is related to the basic string manipulation functions in base R.\n\n\n#### Value\n\nA modified string with the first character capitalized if it is\n not already. If the first character is already capitalized, the original\n string is returned.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Capitalize the first character of a string\ncapitalize_first_char(\"hello\") # Returns \"Hello\"\ncapitalize_first_char(\"World\") # Returns \"World\"\n\n# Error example (non-alphabetic first character)\ncapitalize_first_char(\"123abc\") # Throws an error\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_capitalize_first_char(input_string)\n```\n:::\n\n\n\n### `lfa_check_flag`\n\nCheck if a flag is set, indicating the completion of a specific process.\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`flag_name` | A character string specifying the name of the flag file. It should be a descriptive and unique identifier for the process being checked.\n\n\n#### Description\n\nThis function checks for the existence of a hidden flag file at a specified location within the working directory. If the flag file is found, a message is printed, and the function returns `TRUE` to indicate that the associated processing step has already been completed. If the flag file is not found, the function returns `FALSE` , indicating that further processing can proceed.\n\n\n#### Value\n\nA logical value indicating whether the flag is set ( `TRUE` ) or not ( `FALSE` ).\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Check if the flag for a process named \"data_processing\" is set\nlfa_check_flag(\"data_processing\")\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_check_flag(flag_name)\n```\n:::\n\n\n\n### `lfa_combine_sf_obj`\n\nCombine Spatial Feature Objects from Multiple GeoPackage Files\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`paths` | A character vector containing file paths to GeoPackage files with neighbor information.\n`area_infos` | A data frame or list containing information about the corresponding detection areas, including \"area\" and \"specie\" columns.\n\n\n#### Description\n\nThis function reads spatial feature objects (sf) from multiple GeoPackage files and combines them into a single sf object.\n Each GeoPackage file is assumed to contain neighbor information for a specific detection area, and the resulting sf object\n includes additional columns indicating the corresponding area and species information.\n\n\n#### Value\n\nA combined sf object with additional columns for area and specie information.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Assuming paths and area_infos are defined\ncombined_sf <- lfa_combine_sf_obj(paths, area_infos)\n\n# Print the combined sf object\nprint(combined_sf)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_combine_sf_obj(paths, area_infos)\n```\n:::\n\n\n\n### `lfa_count_returns_all_areas`\n\nCount tree returns for all species and areas, returning a consolidated data frame.\n\n\n#### Description\n\nThis function iterates through all species and areas obtained from the function\n [`lfa_get_all_areas`](#lfagetallareas) . For each combination of species and area, it reads\n the corresponding area as a catalog, counts the returns per tree using\n [`lfa_count_returns_per_tree`](#lfacountreturnspertree) , and consolidates the results into a data frame.\n The resulting data frame includes columns for the species, area, and return counts per tree.\n\n\n#### Keyword\n\ncounting\n\n\n#### Seealso\n\n[`lfa_get_all_areas`](#lfagetallareas) , [`lfa_read_area_as_catalog`](#lfareadareaascatalog) ,\n [`lfa_count_returns_per_tree`](#lfacountreturnspertree)\n\n\n#### Value\n\nA data frame with columns for species, area, and return counts per tree.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Count tree returns for all species and areas\nreturns_counts <- lfa_count_returns_all_areas()\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_count_returns_all_areas()\n```\n:::\n\n\n\n### `lfa_count_returns_per_tree`\n\nCount returns per tree for a given lidR catalog.\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`ctg` | A lidR catalog object containing LAS files to be processed.\n\n\n#### Description\n\nThis function takes a lidR catalog as input and counts the returns per tree.\n It uses the lidR package to read LAS files from the catalog and performs the counting\n operation on each tree. The result is a data frame containing the counts of returns\n for each unique tree ID within the lidR catalog.\n\n\n#### Keyword\n\ncounting\n\n\n#### Seealso\n\n[`lidR::readLAS`](#lidr::readlas) , [`lidR::is.empty`](#lidr::is.empty) ,\n [`base::table`](#base::table) , [`dplyr::bind_rows`](#dplyr::bindrows)\n\n\n#### Value\n\nA data frame with columns for tree ID and the corresponding count of returns.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Count returns per tree for a lidR catalog\nctg <- lfa_read_area_as_catalog(\"SpeciesA\", \"Area1\")\nreturns_counts_per_tree <- lfa_count_returns_per_tree(ctg)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_count_returns_per_tree(ctg)\n```\n:::\n\n\n\n### `lfa_create_boxplot`\n\nCreate a box plot from a data frame\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`data` | A data frame containing the data.\n`value_column` | The name of the column containing the values for the box plot.\n`category_column1` | The name of the column containing the first categorical variable.\n`category_column2` | The name of the column containing the second categorical variable.\n`title` | An optional title for the plot. If not provided, a default title is generated based on the data frame name.\n\n\n#### Description\n\nThis function generates a box plot using ggplot2 based on the specified data frame and columns.\n\n\n#### Details\n\nThe function creates a box plot where the x-axis is based on the second categorical variable,\n the y-axis is based on the specified value column, and the box plots are colored based on the first\n categorical variable. The grouping of box plots is done based on the unique values in the second categorical variable.\n\n\n#### Value\n\nA ggplot object representing the box plot.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Assuming you have a data frame 'your_data' with columns 'value', 'category1', and 'category2'\ncreate_boxplot(your_data, \"value\", \"category1\", \"category2\")\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_create_boxplot(\n data,\n value_column,\n category_column1,\n category_column2,\n title = NULL\n)\n```\n:::\n\n\n\n### `lfa_create_density_plots`\n\nCreate density plots for groups in a data frame\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`data` | A data frame containing the data.\n`value_column` | The name of the column containing the values for the density plot.\n`category_column1` | The name of the column containing the categorical variable for grouping.\n`category_column2` | The name of the column containing the categorical variable for arranging plots.\n`title` | An optional title for the plot. If not provided, a default title is generated based on the data frame name.\n`xlims` | Optional limits for the x-axis. Should be a numeric vector with two elements (lower and upper bounds).\n`ylims` | Optional limits for the y-axis. Should be a numeric vector with two elements (lower and upper bounds).\n\n\n#### Description\n\nThis function generates density plots using ggplot2 based on the specified data frame and columns.\n\n\n#### Details\n\nThe function creates density plots where the x-axis is based on the specified value column,\n and the density plots are colored based on the first categorical variable. The arrangement of plots\n is done based on the unique values in the second categorical variable. The plots are arranged in a 2x2 grid.\n\n\n#### Value\n\nA ggplot object representing the density plots arranged in a 2x2 grid.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Assuming you have a data frame 'your_data' with columns 'value', 'category1', and 'category2'\ncreate_density_plots(your_data, \"value\", \"category1\", \"category2\", title = \"Density Plots\", xlims = c(0, 10), ylims = c(0, 0.5))\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_create_density_plots(\n data,\n value_column,\n category_column1,\n category_column2,\n title = NULL,\n xlims = NULL,\n ylims = NULL\n)\n```\n:::\n\n\n\n### `lfa_create_stacked_distributions_plot`\n\nCreate a stacked distribution plot for tree detections, visualizing the distribution\n of a specified variable on the x-axis, differentiated by another variable.\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`trees` | A data frame containing tree detection data.\n`x_value` | A character string specifying the column name used for finding the values on the x-axis of the histogram.\n`fill_value` | A character string specifying the column name by which the data are differentiated in the plot.\n`bin` | An integer specifying the number of bins for the histogram. Default is 100.\n`ylab` | A character string specifying the y-axis label. Default is \"Amount trees.\"\n`xlim` | A numeric vector of length 2 specifying the x-axis limits. Default is c(0, 100).\n`ylim` | A numeric vector of length 2 specifying the y-axis limits. Default is c(0, 1000).\n`title` | The title of the plot.\n\n\n#### Description\n\nThis function generates a stacked distribution plot using the ggplot2 package,\n providing a visual representation of the distribution of a specified variable\n ( `x_value` ) on the x-axis, with differentiation based on another variable\n ( `fill_value` ). The data for the plot are derived from the provided `trees` \n data frame.\n\n\n#### Keyword\n\ndata\n\n\n#### Seealso\n\n[`ggplot2::geom_histogram`](#ggplot2::geomhistogram) , [`ggplot2::facet_wrap`](#ggplot2::facetwrap) ,\n [`ggplot2::ylab`](#ggplot2::ylab) , [`ggplot2::scale_fill_brewer`](#ggplot2::scalefillbrewer) ,\n [`ggplot2::coord_cartesian`](#ggplot2::coordcartesian)\n\n\n#### Value\n\nA ggplot object representing the stacked distribution plot.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Create a stacked distribution plot for variable \"Z,\" differentiated by \"area\"\ntrees <- lfa_get_detections()\nlfa_create_stacked_distributions_plot(trees, \"Z\", \"area\")\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_create_stacked_distributions_plot(\n trees,\n x_value,\n fill_value,\n bin = 100,\n ylab = \"Amount trees\",\n xlim = c(0, 100),\n ylim = c(0, 1000),\n title =\n \"Histograms of height distributions between species 'beech', 'oak', 'pine' and 'spruce' divided by the different areas of Interest\"\n)\n```\n:::\n\n\n\n### `lfa_create_stacked_histogram`\n\nCreate a stacked histogram for tree detections, summing up the values for each species.\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`trees` | A data frame containing tree detection data.\n`x_value` | A character string specifying the column name used for finding the values on the x-axis of the histogram.\n`fill_value` | A character string specifying the column name by which the data are differentiated in the plot.\n`bin` | An integer specifying the number of bins for the histogram. Default is 30.\n`ylab` | A character string specifying the y-axis label. Default is \"Frequency.\"\n`xlim` | A numeric vector of length 2 specifying the x-axis limits. Default is c(0, 100).\n`ylim` | A numeric vector of length 2 specifying the y-axis limits. Default is NULL.\n\n\n#### Description\n\nThis function generates a stacked histogram using the ggplot2 package,\n summing up the values for each species and visualizing the distribution of\n a specified variable ( `x_value` ) on the x-axis, differentiated by another\n variable ( `fill_value` ). The data for the plot are derived from the provided\n `trees` data frame.\n\n\n#### Keyword\n\ndata\n\n\n#### Seealso\n\n[`ggplot2::geom_histogram`](#ggplot2::geomhistogram) , [`ggplot2::ylab`](#ggplot2::ylab) ,\n [`ggplot2::scale_fill_brewer`](#ggplot2::scalefillbrewer) , [`ggplot2::coord_cartesian`](#ggplot2::coordcartesian)\n\n\n#### Value\n\nA ggplot object representing the stacked histogram.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Create a stacked histogram for variable \"Z,\" differentiated by \"area\"\ntrees <- lfa_get_detections()\nlfa_create_stacked_histogram(trees, \"Z\", \"area\")\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_create_stacked_histogram(\n trees,\n x_value,\n fill_value,\n bin = 30,\n ylab = \"Frequency\",\n xlim = c(0, 100),\n ylim = NULL\n)\n```\n:::\n\n\n\n### `lfa_create_tile_location_objects`\n\nCreate tile location objects\n\n\n#### Author\n\nJakob Danel\n\n\n#### Description\n\nThis function traverses a directory structure to find LAZ files and creates\n tile location objects for each file. The function looks into the the `data` \n directory of the repository/working directory. It then creates `tile_location` \n objects based on the folder structure. The folder structure should not be\n touched by hand, but created by `lfa_init_data_structure()` which builds the\n structure based on a shape file.\n\n\n#### Seealso\n\n[`tile_location`](#tilelocation)\n\n\n#### Value\n\nA vector containing tile location objects.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_create_tile_location_objects()\n\nlfa_create_tile_location_objects()\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_create_tile_location_objects()\n```\n:::\n\n\n\n### `lfa_detection`\n\nPerform tree detection on a lidar catalog and optionally save the results to a file.\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`catalog` | A lidar catalog containing point cloud data. If set to NULL, the function attempts to read the catalog from the specified tile location.\n`tile_location` | An object specifying the location of the lidar tile. If catalog is NULL, the function attempts to read the catalog from this tile location.\n`write_to_file` | A logical value indicating whether to save the detected tree information to a file. Default is TRUE.\n\n\n#### Description\n\nThis function utilizes lidar data to detect trees within a specified catalog. The detected tree information can be optionally saved to a file in the GeoPackage format. The function uses parallel processing to enhance efficiency.\n\n\n#### Value\n\nA sf style data frame containing information about the detected trees.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Perform tree detection on a catalog and save the results to a file\nlfa_detection(catalog = my_catalog, tile_location = my_tile_location, write_to_file = TRUE)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_detection(catalog, tile_location, write_to_file = TRUE)\n```\n:::\n\n\n\n### `lfa_download_areas`\n\nDownload areas based on spatial features\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`sf_areas` | Spatial features representing areas to be downloaded. It must include columns like \"species\" \"name\" See details for more information.\n\n\n#### Author\n\nJakob Danel\n\n\n#### Description\n\nThis function initiates the data structure and downloads areas based on spatial features.\n\n\n#### Details\n\nThe input data frame, `sf_areas` , must have the following columns:\n \n\n* \"species\": The species associated with the area. \n\n* \"name\": The name of the area. \n \n The function uses the `lfa_init_data_structure` function to set up the data structure\n and then iterates through the rows of `sf_areas` to download each specified area.\n\n\n#### Value\n\nNone\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_download_areas(sf_areas)\n\n\n# Example spatial features data frame\nsf_areas <- data.frame(\nspecies = c(\"SpeciesA\", \"SpeciesB\"),\nname = c(\"Area1\", \"Area2\"),\n# Must include also other attributes specialized to sf objects\n# such as geometry, for processing of the download\n)\n\nlfa_download_areas(sf_areas)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_download_areas(sf_areas)\n```\n:::\n\n\n\n### `lfa_download`\n\nDownload an las file from the state NRW from a specific location\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`species` | The species of the tree which is observed at this location\n`name` | The name of the area that is observed\n`location` | An sf object, which holds the location information for the area where the tile should be downloaded from.\n\n\n#### Description\n\nIt will download the file and save it to data/ list(list(\"html\"), list(list(\"\"))) / list(list(\"html\"), list(list(\"\"))) with the name of the tile\n\n\n#### Value\n\nThe LASCatalog object of the downloaded file\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_download(species, name, location)\n```\n:::\n\n\n\n### `lfa_find_n_nearest_trees`\n\nFind n Nearest Trees\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`trees` | A sf object containing tree coordinates.\n`n` | The number of nearest trees to find for each tree (default is 100).\n\n\n#### Description\n\nThis function calculates the distances to the n nearest trees for each tree in the input dataset.\n\n\n#### Value\n\nA data frame with additional columns representing the distances to the n nearest trees.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Load tree data using lfa_get_detections() (not provided)\ntree_data <- lfa_get_detections()\n\n# Filter tree data for a specific species and area\ntree_data = tree_data[tree_data$specie == \"pine\" & tree_data$area == \"greffen\", ]\n\n# Find the 100 nearest trees for each tree in the filtered dataset\ntree_data <- lfa_find_n_nearest_trees(tree_data)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_find_n_nearest_trees(trees, n = 100)\n```\n:::\n\n\n\n### `lfa_generate_result_table_tests`\n\nGenerate Result Table for Tests\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`table` | A data frame representing the result table.\n\n\n#### Description\n\nThis function generates a result table for tests using the knitr::kable function.\n\n\n#### Details\n\nThis function uses the knitr::kable function to create a formatted table, making it suitable for HTML output.\n The input table is expected to be a data frame with test results, and the resulting table will have capitalized\n row and column names with lines between columns and rows.\n\n\n#### Value\n\nA formatted table suitable for HTML output with lines between columns and rows.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Generate a result table for tests\nresult_table <- data.frame(\nTest1 = c(0.05, 0.10, 0.03),\nTest2 = c(0.02, 0.08, 0.01),\nTest3 = c(0.08, 0.12, 0.05)\n)\nformatted_table <- lfa_generate_result_table_tests(result_table)\nprint(formatted_table)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_generate_result_table_tests(table, caption = \"Table Caption\")\n```\n:::\n\n\n\n### `lfa_get_all_areas`\n\nRetrieve a data frame containing all species and corresponding areas.\n\n\n#### Description\n\nThis function scans the \"data\" directory within the current working directory to\n obtain a list of species. It then iterates through each species to retrieve the list\n of areas associated with that species. The resulting data frame contains two columns:\n \"specie\" representing the species and \"area\" representing the corresponding area.\n\n\n#### Keyword\n\ndata\n\n\n#### Seealso\n\n[`list.dirs`](#list.dirs)\n\n\n#### Value\n\nA data frame with columns \"specie\" and \"area\" containing information about\n all species and their associated areas.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Retrieve a data frame with information about all species and areas\nall_areas_df <- lfa_get_all_areas()\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_get_all_areas()\n```\n:::\n\n\n\n### `lfa_get_detection_area`\n\nGet Detection for an area\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`species` | A character string specifying the target species.\n`name` | A character string specifying the name of the tile.\n\n\n#### Description\n\nRetrieves the tree detection information for a specified species and tile.\n\n\n#### Details\n\nThis function reads tree detection data from geopackage files within the specified tile location for a given species. It then combines the data into a single SF data frame and returns it. The function assumes that the tree detection files follow a naming convention with the pattern \"_detection.gpkg\".\n\n\n#### Keyword\n\nspatial\n\n\n#### References\n\nThis function is part of the LiDAR Forest Analysis (LFA) package.\n\n\n#### Seealso\n\n[`get_tile_dir`](#gettiledir)\n\n\n#### Value\n\nA Simple Features (SF) data frame containing tree detection information for the specified species and tile.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Retrieve tree detection data for species \"example_species\" in tile \"example_tile\"\ntrees_data <- lfa_get_detection_tile_location(\"example_species\", \"example_tile\")\n\n# Example usage:\ntrees_data <- lfa_get_detection_tile_location(\"example_species\", \"example_tile\")\n\n# No trees found scenario:\nempty_data <- lfa_get_detection_tile_location(\"nonexistent_species\", \"nonexistent_tile\")\n# The result will be an empty data frame if no trees are found for the specified species and tile.\n\n# Error handling:\n# In case of invalid inputs, the function may throw errors. Ensure correct species and tile names are provided.\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_get_detection_area(species, name)\n```\n:::\n\n\n\n### `lfa_get_detections_species`\n\nRetrieve detections for a specific species.\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`species` | A character string specifying the target species.\n\n\n#### Description\n\nThis function retrieves detection data for a given species from multiple areas.\n\n\n#### Details\n\nThe function looks for detection data in the \"data\" directory for the specified species.\n It then iterates through each subdirectory (representing different areas) and consolidates the\n detection data into a single data frame.\n\n\n#### Value\n\nA data frame containing detection information for the specified species in different areas.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Example usage:\ndetections_data <- lfa_get_detections_species(\"example_species\")\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_get_detections_species(species)\n```\n:::\n\n\n\n### `lfa_get_detections`\n\nRetrieve aggregated detection data for multiple species.\n\n\n#### Concept\n\ndata retrieval functions\n\n\n#### Description\n\nThis function obtains aggregated detection data for multiple species by iterating\n through the list of species obtained from [`lfa_get_species`](#lfagetspecies) . For each\n species, it calls [`lfa_get_detections_species`](#lfagetdetectionsspecies) to retrieve the\n corresponding detection data and aggregates the results into a single data frame.\n The resulting data frame includes columns for the species, tree detection data,\n and the area in which the detections occurred.\n\n\n#### Keyword\n\naggregation\n\n\n#### Seealso\n\n[`lfa_get_species`](#lfagetspecies) , [`lfa_get_detections_species`](#lfagetdetectionsspecies) \n \n Other data retrieval functions:\n [`lfa_get_species`](#lfagetspecies)\n\n\n#### Value\n\nA data frame containing aggregated detection data for multiple species.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_get_detections()\n\n# Retrieve aggregated detection data for multiple species\ndetections_data <- lfa_get_detections()\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_get_detections()\n```\n:::\n\n\n\n### `lfa_get_flag_path`\n\nGet the path to a flag file indicating the completion of a specific process.\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`flag_name` | A character string specifying the name of the flag file. It should be a descriptive and unique identifier for the process being flagged.\n\n\n#### Description\n\nThis function constructs and returns the path to a hidden flag file, which serves as an indicator that a particular processing step has been completed. The flag file is created in a designated location within the working directory.\n\n\n#### Value\n\nA character string representing the absolute path to the hidden flag file.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Get the flag path for a process named \"data_processing\"\nlfa_get_flag_path(\"data_processing\")\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_get_flag_path(flag_name)\n```\n:::\n\n\n\n### `lfa_get_neighbor_paths`\n\nGet Paths to Neighbor GeoPackage Files\n\n\n#### Description\n\nThis function retrieves the file paths to GeoPackage files containing neighbor information for each detection area.\n The GeoPackage files are assumed to be named \"neighbours.gpkg\" and organized in a directory structure under the \"data\" folder.\n\n\n#### Value\n\nA character vector containing file paths to GeoPackage files for each detection area's neighbors.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Get paths to neighbor GeoPackage files for all areas\npaths <- lfa_get_neighbor_paths()\n\n# Print the obtained file paths\nprint(paths)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_get_neighbor_paths()\n```\n:::\n\n\n\n### `lfa_get_species`\n\nGet a list of species from the data directory.\n\n\n#### Concept\n\ndata retrieval functions\n\n\n#### Description\n\nThis function retrieves a list of species by scanning the \"data\" directory\n located in the current working directory.\n\n\n#### Keyword\n\ndata\n\n\n#### References\n\nThis function relies on the [`list.dirs`](#list.dirs) function for directory listing.\n\n\n#### Seealso\n\n[`list.dirs`](#list.dirs) \n \n Other data retrieval functions:\n [`lfa_get_detections`](#lfagetdetections)\n\n\n#### Value\n\nA character vector containing the names of species found in the \"data\" directory.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Retrieve the list of species\nspecies_list <- lfa_get_species()\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_get_species()\n```\n:::\n\n\n\n### `lfa_ground_correction`\n\nCorrect the point clouds for correct ground imagery\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`ctg` | An LASCatalog object. If not null, it will perform the actions on this object, if NULL inferring the catalog from the tile_location\n`tile_location` | A tile_location type object holding the information about the location of the cataog. This is used to save the catalog after processing too.\n\n\n#### Author\n\nJakob Danel\n\n\n#### Description\n\nThis function is needed to correct the Z value of the point cloud, relative to the real\n ground height. After using this function to your catalog, the Z values can be seen as the\n real elevation about the ground. At the moment the function uses the `tin()` function from\n the `lidr` package. NOTE : The operation is inplace and can not be reverted, the old values\n of the point cloud will be deleted!\n\n\n#### Value\n\nA catalog with the corrected z values. The catalog is always stored at tile_location and\n holding only the transformed values.\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_ground_correction(ctg, tile_location)\n```\n:::\n\n\n\n### `lfa_init_data_structure`\n\nInitialize data structure for species and areas\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`sf_species` | A data frame with information about species and associated areas.\n\n\n#### Description\n\nThis function initializes the data structure for storing species and associated areas.\n\n\n#### Details\n\nThe input data frame, `sf_species` , should have at least the following columns:\n \n\n* \"species\": The names of the species for which the data structure needs to be initialized. \n\n* \"name\": The names of the associated areas. \n \n The function creates directories based on the species and area information provided in\n the `sf_species` data frame. It checks whether the directories already exist and creates\n them if they don't.\n\n\n#### Value\n\nNone\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Example species data frame\nsf_species <- data.frame(\nspecies = c(\"SpeciesA\", \"SpeciesB\"),\nname = c(\"Area1\", \"Area2\"),\n# Other necessary columns\n)\n\nlfa_init_data_structure(sf_species)\n\n# Example species data frame\nsf_species <- data.frame(\nspecies = c(\"SpeciesA\", \"SpeciesB\"),\nname = c(\"Area1\", \"Area2\"),\n# Other necessary columns\n)\n\nlfa_init_data_structure(sf_species)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_init_data_structure(sf_species)\n```\n:::\n\n\n\n### `lfa_init`\n\nInitialize LFA (LiDAR forest analysis) data processing\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`sf_file` | A character string specifying the path to the shapefile containing spatial features of research areas.\n\n\n#### Description\n\nThis function initializes the LFA data processing by reading a shapefile containing\n spatial features of research areas, downloading the specified areas, and creating\n tile location objects for each area.\n\n\n#### Details\n\nThis function reads a shapefile ( `sf_file` ) using the `sf` package, which should\n contain information about research areas. It then calls the `lfa_download_areas` \n function to download the specified areas and `lfa_create_tile_location_objects` \n to create tile location objects based on Lidar data files in those areas. The\n shapefile MUST follow the following requirements:\n \n\n* Each geometry must be a single object of type polygon \n\n* Each entry must have the following attributes: \n\n* species: A string describing the tree species of the area. \n\n* name: A string describing the location of the area.\n\n\n#### Value\n\nA vector containing tile location objects.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Initialize LFA processing with the default shapefile\nlfa_init()\n\n# Initialize LFA processing with a custom shapefile\nlfa_init(\"custom_areas.shp\")\n\n# Example usage with the default shapefile\nlfa_init()\n\n# Example usage with a custom shapefile\nlfa_init(\"custom_areas.shp\")\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_init(sf_file = \"research_areas.shp\")\n```\n:::\n\n\n\n### `lfa_intersect_areas`\n\nIntersect Lidar Catalog with Spatial Features\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`ctg` | A LAScatalog object representing the Lidar data to be processed.\n`tile_location` | A tile location object representing the specific area of interest.\n`areas_sf` | Spatial features defining areas.\n\n\n#### Description\n\nThis function intersects a Lidar catalog with a specific area defined by spatial features.\n\n\n#### Details\n\nThe function intersects the Lidar catalog specified by `ctg` with a specific area defined by\n the `tile_location` object and `areas_sf` . It removes points outside the specified area and\n returns a modified LAScatalog object.\n \n The specified area is identified based on the `species` and `name` attributes in the\n `tile_location` object. If a matching area is not found in `areas_sf` , the function\n stops with an error.\n \n The function then transforms the spatial reference of the identified area to match that of\n the Lidar catalog using `sf::st_transform` .\n \n The processing is applied to each chunk in the catalog using the `identify_area` function,\n which merges spatial information and filters out points that are not classified as inside\n the identified area. After processing, the function writes the modified LAS files back to\n the original file locations, removing points outside the specified area.\n \n If an error occurs during the processing of a chunk, a warning is issued, and the function\n continues processing the next chunks. If no points are found after filtering, a warning is\n issued, and NULL is returned.\n\n\n#### Seealso\n\nOther functions in the Lidar forest analysis (LFA) package.\n\n\n#### Value\n\nA modified LAScatalog object with points outside the specified area removed.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Example usage\nlfa_intersect_areas(ctg, tile_location, areas_sf)\n\n# Example usage\nlfa_intersect_areas(ctg, tile_location, areas_sf)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_intersect_areas(ctg, tile_location, areas_sf)\n```\n:::\n\n\n\n### `lfa_jsd_from_vec`\n\nCompute Jensen-Shannon Divergence from Vectors\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`x` | A numeric vector.\n`y` | A numeric vector.\n\n\n#### Description\n\nThis function calculates the Jensen-Shannon Divergence (JSD) between two vectors.\n\n\n#### Value\n\nJensen-Shannon Divergence between the density distributions of x and y.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\nx <- rnorm(100)\ny <- rnorm(100, mean = 2)\nlfa_jsd_from_vec(x, y)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_jsd_from_vec(x, y)\n```\n:::\n\n\n\n### `lfa_jsd`\n\nJensen-Shannon Divergence Calculation\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`p` | A numeric vector representing the probability distribution P.\n`q` | A numeric vector representing the probability distribution Q.\n`epsilon` | A small positive constant added to both P and Q to avoid logarithm of zero. Default is 1e-10.\n\n\n#### Description\n\nThis function calculates the Jensen-Shannon Divergence (JSD) between two probability distributions P and Q.\n\n\n#### Details\n\nThe JSD is computed using the Kullback-Leibler Divergence (KLD) as follows:\n `sum((p * log((p + epsilon) / (m + epsilon)) + q * log((q + epsilon) / (m + epsilon))) / 2)` \n where `m = (p + q) / 2` .\n\n\n#### Seealso\n\n[`kld`](#kld) , [`sum`](#sum) , [`log`](#log)\n\n\n#### Value\n\nA numeric value representing the Jensen-Shannon Divergence between P and Q.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Calculate JSD between two probability distributions\np_distribution <- c(0.2, 0.3, 0.5)\nq_distribution <- c(0.1, 0, 0.9)\njsd_result <- jsd(p_distribution, q_distribution)\nprint(jsd_result)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_jsd(p, q, epsilon = 1e-10)\n```\n:::\n\n\n\n### `lfa_kld_from_vec`\n\nCompute Kullback-Leibler Divergence from Vectors\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`x` | A numeric vector.\n`y` | A numeric vector.\n\n\n#### Description\n\nThis function calculates the Kullback-Leibler Divergence (KLD) between two vectors.\n\n\n#### Value\n\nKullback-Leibler Divergence between the density distributions of x and y.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\nx <- rnorm(100)\ny <- rnorm(100, mean = 2)\nlfa_kld_from_vec(x, y)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_kld_from_vec(x, y)\n```\n:::\n\n\n\n### `lfa_kld`\n\nKullback-Leibler Divergence Calculation\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`p` | A numeric vector representing the probability distribution P.\n`q` | A numeric vector representing the probability distribution Q.\n`epsilon` | A small positive constant added to both P and Q to avoid logarithm of zero. Default is 1e-10.\n\n\n#### Description\n\nThis function calculates the Kullback-Leibler Divergence (KLD) between two probability distributions P and Q.\n\n\n#### Details\n\nThe KLD is computed using the formula:\n `sum(p * log((p + epsilon) / (q + epsilon)))` \n This avoids issues when the denominator (Q) contains zero probabilities.\n\n\n#### Seealso\n\n[`sum`](#sum) , [`log`](#log)\n\n\n#### Value\n\nA numeric value representing the Kullback-Leibler Divergence between P and Q.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Calculate KLD between two probability distributions\np_distribution <- c(0.2, 0.3, 0.5)\nq_distribution <- c(0.1, 0, 0.9)\nkld_result <- kld(p_distribution, q_distribution)\nprint(kld_result)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_kld(p, q, epsilon = 1e-10)\n```\n:::\n\n\n\n### `lfa_ks_test`\n\nKolmogorov-Smirnov Test Wrapper Function\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`x` | A numeric vector representing the first sample.\n`y` | A numeric vector representing the second sample.\n`output_variable` | A character string specifying the output variable to extract from the ks.test result. Default is \"p.value\". Other possible values include \"statistic\" and \"alternative\".\n`...` | Additional arguments to be passed to the ks.test function.\n\n\n#### Description\n\nThis function serves as a wrapper for the Kolmogorov-Smirnov (KS) test between two samples.\n\n\n#### Details\n\nThe function uses the ks.test function to perform a two-sample KS test and returns the specified output variable.\n The default output variable is the p-value. Other possible output variables include \"statistic\" and \"alternative\".\n\n\n#### Seealso\n\n[`ks.test`](#ks.test)\n\n\n#### Value\n\nA numeric value representing the specified output variable from the KS test result.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Perform KS test and extract the p-value\nresult <- lfa_ks_test(sample1, sample2)\nprint(result)\n\n# Perform KS test and extract the test statistic\nresult_statistic <- lfa_ks_test(sample1, sample2, output_variable = \"statistic\")\nprint(result_statistic)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_ks_test(x, y, output_variable = \"p.value\", ...)\n```\n:::\n\n\n\n### `lfa_load_ctg_if_not_present`\n\nLoading the catalog if it is not present\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`ctg` | Catalog object. Can be NULL\n`tile_location` | The location to look for the catalog tiles, if their are not present\n\n\n#### Description\n\nThis function checks if the catalog is `NULL` . If it is it will load the\n catalog from the `tile_location`\n\n\n#### Value\n\nThe provided ctg object if not null, else the catalog for the tiles\n of the tile_location.\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_load_ctg_if_not_present(ctg, tile_location)\n```\n:::\n\n\n\n### `lfa_map_tile_locations`\n\nMap Function Over Tile Locations\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`tile_locations` | A list of tile location objects.\n`map_function` | The mapping function to be applied to each tile location.\n`...` | Additional arguments to be passed to the mapping function.\n\n\n#### Description\n\nThis function applies a specified mapping function to each tile location in a list.\n\n\n#### Details\n\nThis function iterates over each tile location in the provided list ( `tile_locations` )\n and applies the specified mapping function ( `map_function` ) to each tile location.\n The mapping function should accept a tile location object as its first argument, and\n additional arguments can be passed using the ellipsis ( `...` ) syntax.\n \n This function is useful for performing operations on multiple tile locations concurrently,\n such as loading Lidar data, processing areas, or other tasks that involve tile locations.\n\n\n#### Seealso\n\nThe mapping function provided should be compatible with the structure and requirements\n of the tile locations and the specific task being performed.\n\n\n#### Value\n\nNone\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Example usage\nlfa_map_tile_locations(tile_locations, my_mapping_function, param1 = \"value\")\n\n# Example usage\nlfa_map_tile_locations(tile_locations, my_mapping_function, param1 = \"value\")\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_map_tile_locations(tile_locations, map_function, check_flag = NULL, ...)\n```\n:::\n\n\n\n### `lfa_merge_and_save`\n\nMerge and Save Text Files in a Directory\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`input_directory` | The path to the input directory containing text files.\n`output_name` | The name for the output file where the merged content will be saved.\n\n\n#### Description\n\nThis function takes an input directory and an output name as arguments.\n It merges the textual content of all files in the specified directory into\n a single string, with each file's content separated by a newline character.\n The merged content is then saved into a file named after the output name\n in the same directory. After the merging is complete, all input files are\n deleted.\n\n\n#### Details\n\nThis function reads the content of each text file in the specified input directory\n and concatenates them into a single string. Each file's content is separated by a newline\n character. The merged content is then saved into a file named after the output name\n in the same directory. Finally, all input files are deleted from the directory.\n\n\n#### Seealso\n\n[`readLines`](#readlines) , [`writeLines`](#writelines) , [`file.remove`](#file.remove)\n\n\n#### Value\n\nThis function does not explicitly return any value. It prints a message\n indicating the successful completion of the merging and saving process.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Merge text files in the \"data_files\" directory and save the result in \"merged_output\"\nlfa_merge_and_save(\"data_files\", \"merged_output\")\n\n# Merge text files in the \"data_files\" directory and save the result in \"merged_output\"\nlfa_merge_and_save(\"data_files\", \"merged_output\")\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_merge_and_save(input_directory, output_name)\n```\n:::\n\n\n\n### `lfa_random_forest`\n\nRandom Forest Classifier with Leave-One-Out Cross-Validation\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`tree_data` | A data frame containing the tree data, including the response variable (\"specie\") and predictor variables.\n`excluded_input_columns` | A character vector specifying columns to be excluded from predictor variables.\n`response_variable` | The response variable to be predicted (default is \"specie\").\n`seed` | An integer to set the seed for reproducibility (default is 123).\n`...` | Additional parameters to be passed to the randomForest function.\n\n\n#### Description\n\nThis function performs a random forest classification using leave-one-out cross-validation for each area in the input tree data.\n It returns a list containing various results, including predicted species, confusion matrix, accuracy, and the formula used for modeling.\n\n\n#### Value\n\nA list containing the following elements:\n \n\n* `predicted_species_absolute` : A data frame with observed and predicted species for each area. \n\n* `predicted_species_relative` : A data frame wit the relative precictions per speices and areas, normalized by the total predictions in each area. \n\n* `confusion_matrix` : A confusion matrix showing the counts of predicted vs. observed species. \n\n* `accuracy` : The accuracy of the model, calculated as the sum of diagonal elements in the confusion matrix divided by the total count. \n\n* `formula` : The formula used for modeling.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Assuming tree_data is defined\nresults <- lfa_random_forest(tree_data, excluded_input_columns = c(\"column1\", \"column2\"))\n\n# Print the list of results\nprint(results)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_random_forest(\n tree_data,\n excluded_input_columns,\n response_variable = \"specie\",\n ntree = 100,\n seed = 123,\n ...\n)\n```\n:::\n\n\n\n### `lfa_rd_to_qmd`\n\nConvert Rd File to Markdown\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`rdfile` | The path to the Rd file or a parsed Rd object.\n`outfile` | The path to the output Markdown file (including the file extension).\n`append` | Logical, indicating whether to append to an existing file (default is FALSE).\n\n\n#### Description\n\nIMPORTANT NOTE: \n This function is nearly identical to the `Rd2md::Rd2markdown` function from the `Rd2md` \n package. We needed to implement our own version of it because of various reasons:\n \n\n* The algorithm uses hardcoded header sizes (h1 and h2 in original) which is not feasible for our use-case of the markdown. \n\n* We needed to add some Quarto Markdown specifics, e.g. to make sure that the examples will not be runned. \n\n* We want to exclude certain tags from our implementation.\n\n\n#### Details\n\nFor that reason we copied the method and made changes as needed and also added this custom documentation.\n \n This function converts an Rd (R documentation) file to Markdown format (.md) and\n saves the converted file at the specified location. The function allows appending\n to an existing file or creating a new one. The resulting Markdown file includes\n sections for the function's name, title, and additional content such as examples,\n usage, arguments, and other sections present in the Rd file.\n \n The function performs the following steps:\n \n\n* Parses the Rd file using the Rd2md package. \n\n* Creates a Markdown file with sections for the function's name, title, and additional content. \n\n* Appends the content to an existing file if `append` is set to TRUE. \n\n* Saves the resulting Markdown file at the specified location.\n\n\n#### Seealso\n\n[`Rd2md::parseRd`](#rd2md::parserd)\n\n\n#### Value\n\nThis function does not explicitly return any value. It saves the converted Markdown file\n at the specified location as described in the details section.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Convert Rd file to Markdown and save it\nlfa_rd_to_md(\"path/to/your/file.Rd\", \"path/to/your/output/file.md\")\n\n# Convert Rd file to Markdown and append to an existing file\nlfa_rd_to_md(\"path/to/your/file.Rd\", \"path/to/existing/output/file.md\", append = TRUE)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_rd_to_qmd(rdfile, outfile, append = FALSE)\n```\n:::\n\n\n\n### `lfa_rd_to_results`\n\nConvert Rd Files to Markdown and Merge Results\n\n\n#### Description\n\nThis function converts all Rd (R documentation) files in the \"man\" directory\n to Markdown format (.qmd) and saves the converted files in the \"results/appendix/package-docs\" directory.\n It then merges the converted Markdown files into a single string and saves\n the merged content into a file named \"docs.qmd\" in the \"results/appendix/package-docs\" directory.\n\n\n#### Details\n\nThe function performs the following steps:\n \n\n* Removes any existing \"docs.qmd\" file in the \"results/appendix/package-docs\" directory. \n\n* Finds all Rd files in the \"man\" directory. \n\n* Converts each Rd file to Markdown format (.qmd) using the `lfa_rd_to_qmd` function. \n\n* Saves the converted Markdown files in the \"results/appendix/package-docs\" directory. \n\n* Merges the content of all converted Markdown files into a single string. \n\n* Saves the merged content into a file named \"docs.qmd\" in the \"results/appendix/package-docs\" directory.\n\n\n#### Seealso\n\n[`lfa_rd_to_qmd`](#lfardtoqmd) , [`lfa_merge_and_save`](#lfamergeandsave)\n\n\n#### Value\n\nThis function does not explicitly return any value. It performs the conversion,\n merging, and saving operations as described in the details section.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Convert Rd files to Markdown and merge the results\nlfa_rd_to_results()\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_rd_to_results()\n```\n:::\n\n\n\n### `lfa_read_area_as_catalog`\n\nRead LiDAR data from a specified species and location as a catalog.\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`specie` | A character string specifying the species of interest.\n`location_name` | A character string specifying the name of the location.\n\n\n#### Description\n\nThis function constructs the file path based on the specified `specie` and `location_name` ,\n lists the directories at that path, and reads the LiDAR data into a `lidR::LAScatalog` .\n\n\n#### Value\n\nA `lidR::LAScatalog` object containing the LiDAR data from the specified location and species.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_read_area_as_catalog(\"beech\", \"location1\")\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_read_area_as_catalog(specie, location_name)\n```\n:::\n\n\n\n### `lfa_run_test_asymmetric`\n\nAsymmetric Pairwise Test for Categories\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`data` | A data frame containing the relevant columns.\n`data_column` | A character string specifying the column containing the numerical data.\n`category_column` | A character string specifying the column containing the categorical variable.\n`test_function` | A function used to perform the pairwise test between two sets of data. It should accept two vectors of numeric data and additional parameters specified by `...` . The function should return a numeric value representing the test result.\n`...` | Additional parameters to be passed to the `test_function` .\n\n\n#### Description\n\nThis function performs an asymmetric pairwise test for categories using a user-defined `test_function` .\n\n\n#### Details\n\nThe function calculates the test results for each unique combination of categories using the specified\n `test_function` . The resulting table is asymmetric, containing the test results for comparisons\n from the rows to the columns.\n\n\n#### Seealso\n\n[`outer`](#outer) , [`Vectorize`](#vectorize)\n\n\n#### Value\n\nA data frame representing the results of the asymmetric pairwise tests between categories.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Define a custom test function\ncustom_test_function <- function(x, y) {\n# Your test logic here\n# Return a numeric result\nreturn(mean(x) - mean(y))\n}\n\n# Perform an asymmetric pairwise test\nresult <- lfa_run_test_asymmetric(your_data, \"numeric_column\", \"category_column\", custom_test_function)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_run_test_asymmetric(data, data_column, category_column, test_function, ...)\n```\n:::\n\n\n\n### `lfa_run_test_symmetric`\n\nSymmetric Pairwise Test for Categories\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`data` | A data frame containing the relevant columns.\n`data_column` | A character string specifying the column containing the numerical data.\n`category_column` | A character string specifying the column containing the categorical variable.\n`test_function` | A function used to perform the pairwise test between two sets of data. It should accept two vectors of numeric data and additional parameters specified by `...` . The function should return a numeric value representing the test result.\n`...` | Additional parameters to be passed to the `test_function` .\n\n\n#### Description\n\nThis function performs a symmetric pairwise test for categories using a user-defined `test_function` .\n\n\n#### Details\n\nThe function calculates the test results for each unique combination of categories using the specified\n `test_function` . The resulting table is symmetric, containing the test results for comparisons\n from the rows to the columns. The upper triangle of the matrix is filled with `NA` to avoid duplicate results.\n\n\n#### Seealso\n\n[`outer`](#outer) , [`Vectorize`](#vectorize)\n\n\n#### Value\n\nA data frame representing the results of the symmetric pairwise tests between categories.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Define a custom test function\ncustom_test_function <- function(x, y) {\n# Your test logic here\n# Return a numeric result\nreturn(mean(x) - mean(y))\n}\n\n# Perform a symmetric pairwise test\nresult <- lfa_run_test_symmetric(your_data, \"numeric_column\", \"category_column\", custom_test_function)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_run_test_symmetric(data, data_column, category_column, test_function, ...)\n```\n:::\n\n\n\n### `lfa_save_all_neighbours`\n\nSave Neighbors for All Areas\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`n` | The number of nearest trees to find for each tree (default is 100).\n\n\n#### Description\n\nThis function iterates through all detection areas, finds the n nearest trees for each tree,\n and saves the result to a GeoPackage file for each area.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Save neighbors for all areas with default value (n=100)\nlfa_save_all_neighbours()\n\n# Save neighbors for all areas with a specific value of n (e.g., n=50)\nlfa_save_all_neighbours(n = 50)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_save_all_neighbours(n = 100)\n```\n:::\n\n\n\n### `lfa_segmentation`\n\nSegment the elements of an point cloud by trees\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`ctg` | An LASCatalog object. If not null, it will perform the actions on this object, if NULL inferring the catalog from the tile_location\n`tile_location` | A tile_location type object holding the information about the location of the catalog. This is used to save the catalog after processing too.\n\n\n#### Author\n\nJakob Danel\n\n\n#### Description\n\nThis function will try to to divide the hole point cloud into unique trees.\n Therefore it is assigning for each chunk of the catalog a `treeID` for each\n point. Therefore the algorithm uses the `li2012` implementation with the\n following parameters: `li2012(dt1 = 2, dt2 = 3, R = 2, Zu = 10, hmin = 5, speed_up = 12)` \n NOTE : The operation is in place and can not be reverted, the old values\n of the point cloud will be deleted!\n\n\n#### Value\n\nA catalog where each chunk has additional `treeID` values indicating the belonging tree.\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_segmentation(ctg, tile_location)\n```\n:::\n\n\n\n### `lfa_set_flag`\n\nSet a flag to indicate the completion of a specific process.\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`flag_name` | A character string specifying the name of the flag file. It should be a descriptive and unique identifier for the process being flagged.\n\n\n#### Description\n\nThis function creates a hidden flag file at a specified location within the working directory to indicate that a particular processing step has been completed. If the flag file already exists, a warning is issued.\n\n\n#### Value\n\nThis function does not have a formal return value.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Set the flag for a process named \"data_processing\"\nlfa_set_flag(\"data_processing\")\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_set_flag(flag_name)\n```\n:::\n\n\n\n", + "markdown": "---\ntitle: \"Forest Data Analysis Report\"\noutput:\n pdf_document:\n latex_engine: xelatex\ntoc: true\ntoc-depth: 2\ntoc-title: Contents\nnumber-sections: true\nnumber-depth: 3\ndate: today\nauthor:\n - name: Jakob Danel\n email: jakob.danel@uni-muenster.de\n url: https://github.com/jakobdanel\n affiliations:\n - name: Universität Münster\n city: Münster\n country: Germany\n - name: Federick Bruch\n email: f_bruc03@uni-muenster.de\n url: https://www.uni-muenster.de/Geoinformatics/institute/staff/index.php/351/Frederick_Bruch\n affiliations:\n - name: Universität Münster\n city: Münster\n country: Germany\nbibliography: references.bib\nexecute-dir: .. \nprefer-html: true\n---\n\n\n# Introduction\n\nThis report documents the analysis of forest data for different tree species.\n\n# Methods\n\n## Data acquisition\n\nOur primary objective is to identify patches where one tree species exhibits a high level of dominance, striving to capture monocultural stands within the diverse forests of Nordrhein-Westfalia (NRW). Recognizing the practical challenges of finding true monocultures, we aim to identify patches where one species is highly dominant, enabling meaningful comparisons across different species.\n\nThe study is framed within the NRW region due to the availability of an easily accessible dataset. Our focus includes four prominent tree species in NRW: oak, beech, spruce, and pine, representing the most prevalent species in the region. To ensure the validity of our findings, we derive three patches for each species, thereby confirming that observed variables are characteristic of a particular species rather than a specific patch. Each patch is carefully selected to encompass an area of approximately 50-100 hectares and contain between 5,000 and 10,000 trees. Striking a balance between relevance and manageability, these patches avoid excessive size to enhance the likelihood of capturing varied species mixes and ensure compatibility with local hardware.\n\nSpecific Goals:\n\n1. Retrieve patches with highly dominant tree species.\n2. Minimize or eliminate the presence of human-made structures within the selected patches.\n\nTo achieve our goals, we utilized the waldmonitor dataset [@welle2014] and the map provided by [@Blickensdoerfer2022], both indicating dominant tree species in NRW. We identified patches of feasible size where both sources predicted the presence of a specific species. Further validation involved examining sentinel images of these forest regions to assess the evenness of structures, leaf color distribution, and the absence of significant human-made structures such as roads or buildings. The subsequent preprocessing steps, detailed in the following subsection, involved refining our selected patches and deriving relevant variables, such as tree distribution and density, to ensure that the chosen areas align with the desired research domains.\n\n## Preprocessing\n::: {.cell}\n\n:::\n\n\nIn this research study, the management and processing of a large dataset are crucial considerations. The dataset's substantial size necessitates careful maintenance to ensure efficient handling. Furthermore, the data should be easily processable and editable to facilitate necessary corrections and precalculations within the context of our research objectives. To achieve our goals, we have implemented a framework that automatically derives data based on a shapefile, delineating areas of interest. The processed data and results of precalculations are stored in a straightforward manner to enhance accessibility. Additionally, we have designed functions that establish a user-friendly interface, enabling the execution of algorithms on subsets of the data, such as distinct species. These interfaces are not only directly callable by users but can also be integrated into other functions to automate processes. The overarching aim is to streamline the entire preprocessing workflow using a single script, leveraging only the shapefile as a basis. This subsection details the accomplishments of our R-package in realizing these goals, outlining the preprocessing steps undertaken and justifying their necessity in the context of our research.\n\nThe data are stored in a data subdirectory of the root directory in the format `species/location-name/tile-name`. To automate the matching of areas of interest with the catalog from the Land NRW[^1], we utilize the intersecting tool developed by Heisig[^2]. This tool, allows for the automatic retrieval and placement of data downloaded from the Land NRW catalog. To enhance data accessibility, we have devised an object that incorporates species, location name, and tile name (the NRW internal identifier) for each area This object facilitates the specification of the area to be processed. Additionally, we have defined an initialization function that downloads all tiles, returning a list of tile location objects for subsequent processing. A pivotal component of the package's preprocessing functionality is the map function, which iterates over a list of tile locations (effectively the entire dataset) and accepts a processing function as an argument. The subsequent paragraph outlines the specific preprocessing steps employed, all of which are implemented within the mapping function.\n\nTo facilitate memory-handling capabilities, each of the tiles, where one area can span multiple tiles, has been split into manageable chunks. We employed a 50x50m size for each tile, resulting in the division of original 1km x 1km files into 400 tiles. These tiles are stored in our directory structure, with each tile housed in a directory named after its tile name and assigned an id as the filename. Implementation-wise, the `lidr::catalog_retile` function was instrumental in achieving this segmentation. The resulting smaller chunks allow for efficient iteration during subsequent preprocessing steps.\n\nThe next phase involves reducing our data to the actual size by intersecting the tiles with the defined area of interest. Using the `lidR::merge_spatial` function, we intersect the area derived from the shapefile, removing all point cloud items outside this region. Due to our tile-wise approach, empty tiles may arise, and in such cases, those tiles are simply deleted.\n\nFollowing the size reduction to our dataset, the next step involves correcting the `z` values. The `z` values in the data are originally relative to the ellipsoid used for referencing, but we require them to be relative to the ground. To achieve this, we utilize the `lidR::tin` function, which extrapolates a convex hull between all ground points (classified by the data provider) and calculates the z value based on this structure.\n\nSubsequently, we aim to perform segmentation for each distinct tree, marking each item of the point cloud with a tree ID. We employ the algorithm described by @li2012, using parameters `li2012(dt1 = 2, dt2 = 3, R = 2, Zu = 10, hmin = 5, speed_up = 12)`. The meanings of these parameters are elucidated in Li et al.'s work [@li2012].\n\nFinally, the last preprocessing step involves individual tree detection, seeking a single `POINT` object for each tree. The `lidR::lmf` function, an implementation of the tree data using a local maximum approach, is utilized for this purpose [@popescu2004]. The results are stored in GeoPackage files within our data structure.\n\nSee @sec-appendix-preprocessing for the implementation of the preprocessing.\n\n[^1]: https://www.opengeodata.nrw.de/produkte/geobasis/hm/3dm_l_las/3dm_l_las/, last visited 7th Dec 2023\n[^2]: https://github.com/joheisig/GEDIcalibratoR, last visited 7th Dec 2023\n\n## Analysis of different distributions\n\nAnalysis of data distributions is a critical aspect of our research, with a focus on comparing two or more distributions. Our objective extends beyond evaluating the disparities between species; we also aim to assess differences within a species. To gain a comprehensive understanding of the data, we employ various visualization techniques, including histograms, density functions, and box plots.\n\nIn tandem with visualizations, descriptive statistics, such as means, standard errors, and quantiles, are leveraged to provide key insights into the central tendency and variability of the data.\n\nFor a more quantitative analysis of distribution dissimilarity, statistical tests are employed. The Kullback-Leibler (KL) difference serves as a measure to compare the similarity of a set of distributions. This involves converting distributions into their density functions, with the standard error serving as the bandwidth. The KL difference is calculated for each pair of distributions, as it is asymmetric. For the two distributions the KL difference is defined as following [@kullback1951kullback]:\n\n$$\nD_{KL}(P \\, \\| \\, Q) = \\sum_i P(i) \\log\\left(\\frac{P(i)}{Q(i)}\\right)\n$$\n\nTo obtain a symmetric score, the Jensen-Shannon Divergence (JSD) is utilized [@grosse2002analysis], expressed by the formula:\n\n$$\nJS(P || Q) = \\frac{1}{2} * KL(P || M) + \\frac{1}{2} * KL(Q || M)\n$$\nHere, $M = \\frac{1}{2} * (P + Q)$. The JSD provides a balanced measure of dissimilarity between distributions [@Brownlee2019Calculate]. For comparing the different scores to each other, we will use averages.\n\nAdditionally, the Kolmogorov-Smirnov Test is implemented to assess whether two distributions significantly differ from each other. This statistical test offers a formal evaluation of the dissimilarity between empirical distribution functions.\n\n\n# Results\n::: {.cell}\n\n:::\n\n## Researched areas\n\n::: {.cell}\n\n```{.r .cell-code code-fold=\"true\"}\nlibrary(ggplot2)\nsf::sf_use_s2(FALSE)\npatches <- sf::read_sf(\"research_areas.shp\") |> sf::st_centroid()\n\nde <- sf::read_sf(\"results/results/states_de/Bundesländer_2017_mit_Einwohnerzahl.shp\") # Source: https://hub.arcgis.com/datasets/esri-de-content::bundesl%C3%A4nder-2017-mit-einwohnerzahl/explore?location=51.099647%2C10.454033%2C7.43\nnrw <- de[5,] |> sf::st_geometry()\n\n\nggplot() + geom_sf(data = nrw) + \n geom_sf(data = patches, mapping = aes(col = species))\n```\n\n::: {.cell-output-display}\n![Locations of the different patches with the dominant species for that patch. The patches centroids are displayed on a basemap describing the borders from NRW.](report_files/figure-html/fig-patches-nrw-1.png){#fig-patches-nrw width=672}\n:::\n:::\nWe draw three patches for each species from different regions (see @tbl-summary-researched-areas). We download the LiDAR data for those patches and runned all preprocessing steps as described. We than checked with certain derived parameters (e.g. tree heights, tree distributions or tree density) that all patches contain valid forest data. In that step we discovered, that in one patch some forest clearance took place in the near past. This patch was removed from the dataset and was replaced with a new one. \n\nIn our research, drawing patches evenly distributed across Nordrhein-Westfalia is inherently constrained by natural factors. Consequently, the patches for oak and pine predominantly originate from the Münsterland region, as illustrated in [@fig-patches-nrw]. For spruce, the patches were derived from Sauerland, reflecting the prevalence of spruce forests in this specific region within NRW, as corroborated by Welle et al. [@welle2014] and Blickensdörfer et al. [@Blickensdoerfer2022]. Beech patches, on the other hand, were generated from diverse locations within NRW. Across all patches, no human-made objects were identified, with the exception of small paths for pedestrians and forestry vehicles.\n\nThe distribution of area and detections is notable for each four species. Beech covers 69,791.9 hectares with a total of 5,954 detections, oak spans 63,232.49 hectares with 5,354 detections, pine extends across 72,862.4 hectares with 8,912 detections, and spruce encompasses 57,940.02 hectares with 8,619 detections. Both the amount of detections and the corresponding area exhibit a relatively uniform distribution across the diverse patches, as summarized in @tbl-summary-researched-areas. \n\nWith the selected dataset described, we intentionally chose three patches for each four species that exhibit a practical and usable size for our research objectives. These carefully chosen patches align with the conditions essential for our study, providing comprehensive and representative data for in-depth analysis and meaningful insights into the characteristics of each tree species within the specified areas.\n\n\n::: {#tbl-summary-researched-areas .cell tbl-cap='Summary of researched patches grouped by species, with their location, area and the amount of detected trees.'}\n\n```{.r .cell-code code-fold=\"true\"}\nshp <- sf::read_sf(\"research_areas.shp\")\ntable <- lfa::lfa_get_all_areas()\n\nsf::sf_use_s2(FALSE)\nfor (row in 1:nrow(table)) {\n area <-\n dplyr::filter(shp, shp$species == table[row, \"specie\"] &\n shp$name == table[row, \"area\"])\n area_size <- area |> sf::st_area()\n point <- area |> sf::st_centroid() |> sf::st_coordinates()\n table[row,\"point\"] <- paste0(\"(\",round(point[1], digits = 4),\", \",round(point[2],digits = 4),\")\")\n \n table[row, \"area_size\"] = round(area_size,digits = 2) #paste0(round(area_size,digits = 2), \" m²\")\n \n amount_det <- nrow(lfa::lfa_get_detection_area(table[row, \"specie\"], table[row, \"area\"]))\n if(is.null(amount_det)){\n cat(nrow(lfa::lfa_get_detection_area(table[row, \"specie\"], table[row, \"area\"])),table[row, \"specie\"],table[row, \"area\"])\n }\n table[row, \"amount_detections\"] = amount_det\n \n # table[row, \"specie\"] <- lfa::lfa_capitalize_first_char(table[row,\"specie\"])\n table[row, \"area\"] <- lfa::lfa_capitalize_first_char(table[row,\"area\"])\n }\ntable$area <- gsub(\"_\", \" \", table$area)\ntable$area <- gsub(\"ue\", \"ü\", table$area)\ntable = table[,!names(table) %in% c(\"specie\")]\n\nknitr::kable(table, \"html\", col.names = c(\"Patch Name\",\"Location\",\"Area size (m²)\",\"Amount tree detections\" ), caption = NULL, digits = 2, escape = TRUE) |>\n kableExtra::kable_styling(\n bootstrap_options = c(\"striped\", \"hold_position\", \"bordered\",\"responsive\"),\n stripe_index = c(1:3,7:9),\n full_width = FALSE\n ) |>\n kableExtra::pack_rows(\"Beech\", 1, 3) |>\n kableExtra::pack_rows(\"Oak\", 4, 6) |>\n kableExtra::pack_rows(\"Pine\", 7, 9) |>\n kableExtra::pack_rows(\"Spruce\", 10, 12) |>\n kableExtra::column_spec(1, bold = TRUE)\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n \n \n \n \n \n \n \n \n\n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Patch Name Location Area size (m²) Amount tree detections
Beech
Bielefeld brackwede (8.5244, 51.9902) 161410.57 1443
Billerbeck (7.3273, 51.9987) 185887.25 1732
Wülfenrath (7.0769, 51.2917) 350621.21 2779
Oak
Hamm (7.8618, 51.6639) 269397.22 2441
Münster (7.6187, 51.9174) 164116.61 1270
Rinkerode (7.6744, 51.8598) 198811.09 1643
Pine
Greffen (8.1697, 51.9913) 49418.81 513
Mesum (7.5403, 52.2573) 405072.85 5031
Telgte (7.7816, 52.0024) 274132.34 3368
Spruce
Brilon (8.5352, 51.4084) 211478.20 3342
Oberhundem (8.1861, 51.0909) 151895.53 2471
Osterwald (8.3721, 51.2151) 216026.43 2806
\n\n`````\n:::\n:::\n\n\n\n\n\n## Distribution of the tree heights\n\n::: {.cell}\n\n```{.r .cell-code code-fold=\"true\"}\ndetections <- lfa::lfa_get_detections()\n```\n:::\n\nIn this study, we scrutinize the distribution of tree heights, focusing initially on the density distribution to unravel the nuances across various tree species. Notably, our examination reveals distinctive patterns, with Oak and Pine exhibiting significantly steeper peaks in their density curves compared to Beech and Spruce. While all species present unique density curves, a commonality emerges—each curve is characterized by a single peak, except for the intriguing exception observed in Telgte. Taking Beech as an illustrative example, our findings indicate a notable shift in the peak to a considerably higher extent. The varinace in the density curves indicating that an differencation between species only with the help oof tree height values could be difficult.\n\n::: {.cell}\n\n```{.r .cell-code code-fold=\"true\"}\nlfa::lfa_create_density_plots(detections, value_column = \"Z\", category_column1 = \"area\", category_column2 = \"specie\", title = \"Density of the height distributions\", xlims = c(0,50))\n```\n\n::: {.cell-output-display}\n![Density of the height distribitions of the detectected trees. Splitted by the different researched areas and grouped by the dominant specie in this area.](report_files/figure-html/fig-density-z-1.png){#fig-density-z width=672}\n:::\n:::\n\nTo have a deeper look into the distributions of those `Z`-values we will now also have a look into the boxplots of the height distrubutions in the different areas.\nNoteworthy observations include the presence of outliers beyond the extended range of the Whisker Antennas ($1.5*\\text{IQR}$) in all datasets. Of particular interest is the Rinkerode dataset, which exhibits a higher prevalence of outliers in the upper domain. Anomalies in this dataset are attributed to potential inaccuracies, urging a critical examination of data integrity. A pairwise examination of Oak and Pine species indicates higher mean heights for Oak compared to Pine. This insight underscores the significance of species-specific attributes in shaping overall height distributions. Further exploration into the factors contributing to these mean differences enhances our understanding of the unique characteristics inherent to each species. Contrary to expectations, the spread within a particular species does not exhibit significant divergence from the spread observed between different species. This finding suggests that while species-specific traits play a crucial role in shaping height distributions, certain overarching factors may contribute to shared patterns across diverse tree populations.\n\n\n\n::: {.cell}\n\n```{.r .cell-code code-fold=\"true\"}\nlfa::lfa_create_boxplot(detections, value_column = \"Z\", category_column1 = \"area\", category_column2 = \"specie\", title = \"Boxplots of the height distributions\")\n```\n\n::: {.cell-output-display}\n![Boxplots of the height distribitions of the detectected trees. Splitted by the different researched areas and grouped by the dominant specie in this area.](report_files/figure-html/fig-boxplot-z-1.png){#fig-boxplot-z width=672}\n:::\n:::\n\n\n\nOur examination of Kullback-Leibler Divergence (KLD) and Jensen-Shannon Divergence (JSD) metrics reveals low mean values (KLD: 5.252696, JSD: 2.246663) across different species, indicating overall similarity in tree species height distributions. However, within specific species, particularly Pine, higher divergence values (see @tbl-z-values-kld-pine and @tbl-z-values-jsd-pine) suggest significant intraspecific differences.\n\nNotably, the Spruce species consistently demonstrates low divergence values across all tested areas, implying a high level of explainability. This finding highlights tree height as a reliable indicator for detecting Spruce trees, indicating its potential for accurate species identification in diverse forest ecosystems.\n\n## n-nearest Neighbours\n\n::: {.cell}\n\n```{.r .cell-code code-fold=\"true\"}\nneighbors <- lfa::lfa_combine_sf_obj(lfa::lfa_get_neighbor_paths(),lfa::lfa_get_all_areas())\n```\n:::\n\n### Overview\nTo initiate our analysis, we first establish a framework for selecting neighbors by examining the distance development with different n, as illustrated in @fig-n-nearest-overview. The curves share a similar design, but the actual values vary. Notably, as n increases, the distance between all patches also increases, indicating a broader spatial context.\n\nConsidering this trend, we extend our investigation beyond the nearest neighbor to include the 100th nearest neighbor. The $\\Delta$distance shows a consistent decrease with each increment in n, reinforcing our decision to limit exploration beyond n of a hundred. Additionally, the constraint is driven by practical considerations, as our sample size occasionally lacks the capacity to explore larger n values, resulting in inaccurate values due to the absence of the true nearest neighbor within the sample area.\n\n\n::: {.cell}\n\n```{.r .cell-code code-fold=\"true\"}\nlfa::lfa_create_neighbor_mean_curves(neighbors) |> lfa::lfa_create_plot_per_area()\n```\n\n::: {.cell-output-display}\n![Average Distance to n-nearest neighbor from each patch. For simplicity colored by the dominant specie of each tree.](report_files/figure-html/fig-n-nearest-overview-1.png){#fig-n-nearest-overview width=672}\n:::\n:::\n\n\n### The Nearest Neighbour\nOur initial focus centers on examining the distance to the nearest neighbor for each tree. Notably, the curve representing Spruce exhibits distinct characteristics compared to the three other curves—displaying a steeper profile with less variance, as depicted in @fig-density-1-nearest.\n\nFurther analysis of all patches reveals similar distributions, as evident in the boxplot shown in Figure 2 (@fig-boxplot-1-nearest), where mean and variance demonstrate consistency across patches. However, these graphical statistics present challenges in effectively distinguishing between different tree species based on the distance to the nearest neighbor.\n\n\n::: {.cell}\n\n```{.r .cell-code code-fold=\"true\"}\nlfa::lfa_create_density_plots(neighbors,value_column = \"Neighbor_1\",category_column1 = \"area\",category_column2 = \"specie\", title = \"Density plots for the nearest neighbor among species and areas\", xlims = c(0,15))\n```\n\n::: {.cell-output-display}\n![Density plot of the distance to the nearest neighbor distribution across all patches grouped by the dominant species.](report_files/figure-html/fig-density-1-nearest-1.png){#fig-density-1-nearest width=672}\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code code-fold=\"true\"}\nlfa::lfa_create_boxplot(neighbors,value_column = \"Neighbor_1\",category_column1 = \"area\",category_column2 = \"specie\", title = \"Box plots for the nearest neighbor among species and areas\")\n```\n\n::: {.cell-output-display}\n![Density plot of the distance to the nearest neighbor distribution across all patches grouped by the dominant species.](report_files/figure-html/fig-boxplot-1-nearest-1.png){#fig-boxplot-1-nearest width=672}\n:::\n:::\n\n\n### The 100th nearest Neighbor\nMoving on to the analysis of the 100th nearest neighbor, intriguing patterns emerge. Peaks in the curves display varying heights and positions, with a notable example being the complete shift between Oak and Spruce, as illustrated in @fig-density-100-nearest.\n\nHowever, it is essential to acknowledge the high variance observed between curves within a species, such as Pine or Beech. While this variance could serve as a potential indicator, it comes with the caveat that the sample size must be substantial for reliable conclusions.\n\nExamining boxplots reveals numerous outliers above the boxes, hinting at potential edge effects on the sides of patches. This observation raises concerns about the adequacy of trees in these areas for a more in-depth analysis, posing challenges in deriving accurate insights.\n\n\n::: {.cell}\n\n```{.r .cell-code code-fold=\"true\"}\nlfa::lfa_create_density_plots(neighbors,value_column = \"Neighbor_100\",category_column1 = \"area\",category_column2 = \"specie\", title = \"Density plots for the nearest neighbor along species and areas\", xlims = c(35,100))\n```\n\n::: {.cell-output-display}\n![Density plot of the distance to the nearest neighbor distribution across all patches grouped by the dominant species.](report_files/figure-html/fig-density-100-nearest-1.png){#fig-density-100-nearest width=672}\n:::\n:::\n\n\n\n\n::: {.cell}\n\n```{.r .cell-code code-fold=\"true\"}\nlfa::lfa_create_boxplot(neighbors,value_column = \"Neighbor_100\",category_column1 = \"area\",category_column2 = \"specie\", title = \"Box plots for the nearest neighbor along species and areas\")\n```\n\n::: {.cell-output-display}\n![Density plot of the distance to the nearest neighbor distribution across all patches grouped by the dominant species.](report_files/figure-html/fig-boxplot-100-nearest-1.png){#fig-boxplot-100-nearest width=672}\n:::\n:::\n\n#### Average distance to 100 nearest neighbors\n\n::: {.cell}\n\n```{.r .cell-code code-fold=\"true\"}\nnames <- paste0(\"Neighbor_\",1:100)\nneighbors$avg = rowMeans(dplyr::select(as.data.frame(neighbors),names))\n```\n:::\n\nTurning our attention to the averages of the first 100 neighbors, our analysis indicates strikingly similar results. There is considerable variance observed between different species, as well as within individual species, as depicted in @fig-density-avg-nearest.\n\nDespite the uniformity in average results, the issue of outliers persists, as evident in the boxplot representation shown in @fig-boxplot-avg-nearest. These outliers pose challenges and may be indicative of specific environmental conditions affecting tree distributions. Further exploration is required to better understand and mitigate the impact of outliers on our analysis.\n\n\n\n::: {.cell}\n\n```{.r .cell-code code-fold=\"true\"}\nlfa::lfa_create_density_plots(neighbors,value_column = \"avg\",category_column1 = \"area\",category_column2 = \"specie\", title = \"Density plots for the avergae of 100 nearest neighbors across species and areas\", xlims = c(25,60))\n```\n\n::: {.cell-output-display}\n![Density plot of the average distance to the nearest neighbor (n=100) distribution across all patches grouped by the dominant species.](report_files/figure-html/fig-density-avg-nearest-1.png){#fig-density-avg-nearest width=672}\n:::\n:::\n\n\n\nThe neighbor analysis proves potentially useful for distinguishing between tree species, yet the observed variances within each species suggest that relying solely on distance to neighbors may not suffice.\n\nA critical consideration is the sample size problem, wherein more distinguishable patterns emerge with higher neighbor levels, but this necessitates a sufficiently large sample size. Unfortunately, deriving a clear relationship between sample size and the number of tree neighbors remains elusive in our current findings. This gap in understanding could be a pertinent subject for further research, delving into the intricate interplay between sample size and the effectiveness of neighbor analysis in species differentiation.\n\n\n\n::: {.cell}\n\n```{.r .cell-code code-fold=\"true\"}\nlfa::lfa_create_boxplot(neighbors,value_column = \"avg\",category_column1 = \"area\",category_column2 = \"specie\", title = \"Box plots for the average to the nearest neighbor across all species and areas\")\n```\n\n::: {.cell-output-display}\n![Density plot of the average distance to the nearest neighbor (n = 100) distribution across all patches grouped by the dominant species.](report_files/figure-html/fig-boxplot-avg-nearest-1.png){#fig-boxplot-avg-nearest width=672}\n:::\n:::\n\n\n\n\n|specie |area | density (1/m²)|\n|:------|:-------------------|---------:|\n|beech |bielefeld_brackwede | 0.0089399|\n|beech |billerbeck | 0.0093175|\n|beech |wuelfenrath | 0.0079259|\n|oak |hamm | 0.0090610|\n|oak |muenster | 0.0077384|\n|oak |rinkerode | 0.0082641|\n|pine |greffen | 0.0103807|\n|pine |mesum | 0.0124200|\n|pine |telgte | 0.0122860|\n|spruce |brilon | 0.0158030|\n|spruce |oberhundem | 0.0162678|\n|spruce |osterwald | 0.0129892|\n\n\n\n# References\n\n::: {#refs}\n:::\n\n# Appendix\n## Script which can be used to do all preprocessing {#sec-appendix-preprocessing}\n\n::: {.cell}\n\n:::\n\n\nLoad the file with the research areas\n::: {.cell}\n\n```{.r .cell-code}\nsf <- sf::read_sf(here::here(\"research_areas.shp\"))\nprint(sf)\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nSimple feature collection with 12 features and 3 fields\nGeometry type: POLYGON\nDimension: XY\nBounding box: xmin: 7.071625 ymin: 51.0895 xmax: 8.539877 ymax: 52.25983\nGeodetic CRS: WGS 84\n# A tibble: 12 × 4\n id species name geometry\n \n 1 1 oak rinkerode ((7.678922 51.85789, 7.675446 51.85752, 7.…\n 2 2 oak hamm ((7.858955 51.66699, 7.866444 51.66462, 7.…\n 3 3 oak muenster ((7.618908 51.9154, 7.617384 51.9172, 7.61…\n 4 4 pine greffen ((8.168691 51.98965, 8.167178 51.99075, 8.…\n 5 5 pine telgte ((7.779728 52.00662, 7.781616 52.00662, 7.…\n 6 6 pine mesum ((7.534424 52.25499, 7.53378 52.25983, 7.5…\n 7 7 beech bielefeld_brackwede ((8.524749 51.9921, 8.528418 51.99079, 8.5…\n 8 8 beech wuelfenrath ((7.071625 51.29256, 7.072311 51.29334, 7.…\n 9 9 beech billerbeck ((7.324729 51.99783, 7.323548 51.99923, 7.…\n10 11 spruce brilon ((8.532195 51.41029, 8.535027 51.41064, 8.…\n11 12 spruce osterwald ((8.369328 51.21693, 8.371238 51.21718, 8.…\n12 10 spruce oberhundem ((8.18082 51.08999, 8.180868 51.09143, 8.1…\n```\n:::\n:::\n\n\nInit the project\n::: {.cell}\n\n```{.r .cell-code}\nlibrary(lfa)\nsf::sf_use_s2(FALSE)\nlocations <- lfa_init(\"research_areas.shp\")\n```\n:::\n\nDo all of the prprocessing steps\n::: {.cell}\n\n```{.r .cell-code}\nlfa_map_tile_locations(locations,retile,check_flag = \"retile\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nNo further processing: flag retile is set!Function is already computed, no further computings here\n```\n:::\n\n::: {.cell-output .cell-output-stdout}\n```\nNULL\n```\n:::\n\n```{.r .cell-code}\nlfa_map_tile_locations(locations, lfa_intersect_areas, ctg = NULL, areas_sf = sf,check_flag = \"intersect\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nNo further processing: flag intersect is set!Function is already computed, no further computings here\n```\n:::\n\n::: {.cell-output .cell-output-stdout}\n```\nNULL\n```\n:::\n\n```{.r .cell-code}\nlfa_map_tile_locations(locations, lfa_ground_correction, ctg = NULL,check_flag = \"z_correction\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nNo further processing: flag z_correction is set!Function is already computed, no further computings here\n```\n:::\n\n::: {.cell-output .cell-output-stdout}\n```\nNULL\n```\n:::\n\n```{.r .cell-code}\nlfa_map_tile_locations(locations, lfa_segmentation, ctg = NULL,check_flag = \"segmentation\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nNo further processing: flag segmentation is set!Function is already computed, no further computings here\n```\n:::\n\n::: {.cell-output .cell-output-stdout}\n```\nNULL\n```\n:::\n\n```{.r .cell-code}\nlfa_map_tile_locations(locations, lfa_detection, catalog = NULL, write_to_file = TRUE,check_flag = \"detection\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nNo further processing: flag detection is set!Function is already computed, no further computings here\n```\n:::\n\n::: {.cell-output .cell-output-stdout}\n```\nNULL\n```\n:::\n:::\n\n\n## Quantitative Results\n### Distribution of Z-Values\n\n\n::: {.cell}\n\n```{.r .cell-code code-fold=\"true\"}\ndata <- lfa::lfa_get_detections()\nvalue_column <- \"Z\"\n```\n:::\n\n\n\n#### Kullback-Leibler-Divergence\n\n\n\n::: {#tbl-z-values-kld_specie .cell tbl-cap='Kullback-Leibler-Divergence between the researched species Beech, Oak, Pine and Spruce for the atrribute z-values'}\n\n```{.r .cell-code code-fold=\"true\"}\nkld_results_specie <- lfa::lfa_run_test_asymmetric(data,value_column,\"specie\",lfa::lfa_kld_from_vec)\nlfa::lfa_generate_result_table_tests(kld_results_specie,\"Kullback-Leibler-Divergence between species\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Kullback-Leibler-Divergence between species
Beech Oak Pine Spruce
Beech 0.0 13.2 12.5 0.76
Oak 4.2 0.0 3.4 5.02
Pine 2.3 5.6 0.0 3.95
Spruce 2.4 14.7 16.1 0.00
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(kld_results_specie, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 5.252696\n```\n:::\n:::\n\n\n\n\n::: {#tbl-z-values-kld-beech .cell tbl-cap='Kullback-Leibler-Divergence between the researched areas which have the dominante specie beech for the atrribute z-values'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"beech\",]\nkld_results_beech <- lfa::lfa_run_test_asymmetric(specie,value_column,\"area\",lfa::lfa_kld_from_vec)\nlfa::lfa_generate_result_table_tests(kld_results_beech,\"Kullback-Leibler-Divergence between areas with beech\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Kullback-Leibler-Divergence between areas with beech
Bielefeld_brackwede Billerbeck Wuelfenrath
Bielefeld_brackwede 0.00 0.4 3.1
Billerbeck 0.27 0.0 6.0
Wuelfenrath 1.13 2.4 0.0
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(kld_results_beech, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 1.473353\n```\n:::\n:::\n\n\n\n\n::: {#tbl-z-values-kld-oak .cell tbl-cap='Kullback-Leibler-Divergence between the researched areas which have the dominante specie oak for the atrribute z-values'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"oak\",]\nkld_results_oak <- lfa::lfa_run_test_asymmetric(specie,value_column,\"area\",lfa::lfa_kld_from_vec)\nlfa::lfa_generate_result_table_tests(kld_results_oak,\"Kullback-Leibler-Divergence between areas with oak\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Kullback-Leibler-Divergence between areas with oak
Hamm Muenster Rinkerode
Hamm 0.0 2.1 16
Muenster 0.4 0.0 17
Rinkerode 7.6 17.8 0
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(kld_results_oak, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 6.779863\n```\n:::\n:::\n\n\n\n\n::: {#tbl-z-values-kld-pine .cell tbl-cap='Kullback-Leibler-Divergence between the researched areas which have the dominante specie pine for the atrribute z-values'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"pine\",]\nkld_results_pine <- lfa::lfa_run_test_asymmetric(specie,value_column,\"area\",lfa::lfa_kld_from_vec)\nlfa::lfa_generate_result_table_tests(kld_results_pine,\"Kullback-Leibler-Divergence between areas with pine\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Kullback-Leibler-Divergence between areas with pine
Greffen Mesum Telgte
Greffen 0.00 0.74 16
Mesum 0.43 0.00 18
Telgte 3.87 6.82 0
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(kld_results_pine, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 5.129383\n```\n:::\n:::\n\n\n\n\n::: {#tbl-z-values-kld-spruce .cell tbl-cap='Kullback-Leibler-Divergence between the researched areas which have the dominante specie spruce for the atrribute z-values'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"spruce\",]\nkld_results_spruce <- lfa::lfa_run_test_asymmetric(specie,value_column,\"area\",lfa::lfa_kld_from_vec)\nlfa::lfa_generate_result_table_tests(kld_results_spruce,\"Kullback-Leibler-Divergence between areas with spruce\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Kullback-Leibler-Divergence between areas with spruce
Brilon Oberhundem Osterwald
Brilon 0.000 0.092 1.7
Oberhundem 0.081 0.000 2.1
Osterwald 1.521 2.178 0.0
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(kld_results_spruce, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.8509258\n```\n:::\n:::\n\n\n\n\n#### Jensen-Shannon Divergence\n\n\n\n::: {#tbl-z-values-jsd_specie .cell tbl-cap='Jensen-Shannon Divergence between the researched species Beech, Oak, Pine and Spruce for the atrribute z-values'}\n\n```{.r .cell-code code-fold=\"true\"}\njsd_results_specie <- lfa::lfa_run_test_symmetric(data,value_column,\"specie\",lfa::lfa_jsd_from_vec)\nlfa::lfa_generate_result_table_tests(jsd_results_specie,\"Jensen-Shannon Divergence between species\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Jensen-Shannon Divergence between species
Beech Oak Pine Spruce
Beech 0 4.5 4.6 2.4
Oak NA 0.0 3.9 6.1
Pine NA NA 0.0 7.1
Spruce NA NA NA 0.0
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(jsd_results_specie, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 2.246663\n```\n:::\n:::\n\n\n\n\n::: {#tbl-z-values-jsd-beech .cell tbl-cap='Jensen-Shannon Divergence between the researched areas which have the dominante specie beech for the atrribute z-values'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"beech\",]\njsd_results_beech <- lfa::lfa_run_test_symmetric(specie,value_column,\"area\",lfa::lfa_jsd_from_vec)\nlfa::lfa_generate_result_table_tests(jsd_results_beech,\"Jensen-Shannon Divergence between areas with beech\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Jensen-Shannon Divergence between areas with beech
Bielefeld_brackwede Billerbeck Wuelfenrath
Bielefeld_brackwede 0 1.1 3.3
Billerbeck NA 0.0 4.9
Wuelfenrath NA NA 0.0
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(jsd_results_beech, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 1.10555\n```\n:::\n:::\n\n\n\n\n::: {#tbl-z-values-jsd-oak .cell tbl-cap='Jensen-Shannon Divergence between the researched areas which have the dominante specie oak for the atrribute z-values'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"oak\",]\njsd_results_oak <- lfa::lfa_run_test_symmetric(specie,value_column,\"area\",lfa::lfa_jsd_from_vec)\nlfa::lfa_generate_result_table_tests(jsd_results_oak,\"Jensen-Shannon Divergence between areas with oak\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Jensen-Shannon Divergence between areas with oak
Hamm Muenster Rinkerode
Hamm 0 1.6 6.5
Muenster NA 0.0 6.4
Rinkerode NA NA 0.0
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(jsd_results_oak, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 1.692942\n```\n:::\n:::\n\n\n\n\n::: {#tbl-z-values-jsd-pine .cell tbl-cap='Jensen-Shannon Divergence between the researched areas which have the dominante specie pine for the atrribute z-values'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"pine\",]\njsd_results_pine <- lfa::lfa_run_test_symmetric(specie,value_column,\"area\",lfa::lfa_jsd_from_vec)\nlfa::lfa_generate_result_table_tests(jsd_results_pine,\"Jensen-Shannon Divergence between areas with pine\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Jensen-Shannon Divergence between areas with pine
Greffen Mesum Telgte
Greffen 0 3.1 12
Mesum NA 0.0 10
Telgte NA NA 0
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(jsd_results_pine, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 2.956354\n```\n:::\n:::\n\n\n\n\n::: {#tbl-z-values-jsd-spruce .cell tbl-cap='Jensen-Shannon Divergence between the researched areas which have the dominante specie spruce for the atrribute z-values'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"spruce\",]\njsd_results_spruce <- lfa::lfa_run_test_symmetric(specie,value_column,\"area\",lfa::lfa_jsd_from_vec)\nlfa::lfa_generate_result_table_tests(jsd_results_spruce,\"Jensen-Shannon Divergence between areas with spruce\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Jensen-Shannon Divergence between areas with spruce
Brilon Oberhundem Osterwald
Brilon 0 0.31 4.0
Oberhundem NA 0.00 5.5
Osterwald NA NA 0.0
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(jsd_results_spruce, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 1.100383\n```\n:::\n:::\n\n\n\n\n### Nearest Neighbours\n#### Distribution of nearest neighbor distances\n\n\n::: {.cell}\n\n```{.r .cell-code code-fold=\"true\"}\ndata <- lfa::lfa_combine_sf_obj(lfa::lfa_get_neighbor_paths(),lfa::lfa_get_all_areas())\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/beech/bielefeld_brackwede/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 1443 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 466999.8 ymin: 5759839 xmax: 467617.1 ymax: 5760261\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/beech/billerbeck/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 1732 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 384890.8 ymin: 5761918 xmax: 385590.9 ymax: 5762478\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/beech/wuelfenrath/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 2779 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 365546.3 ymin: 5683711 xmax: 366356.1 ymax: 5684321\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/oak/hamm/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 2441 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 420953.3 ymin: 5723884 xmax: 421596 ymax: 5724609\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/oak/muenster/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 1270 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 404615.6 ymin: 5752535 xmax: 405396.8 ymax: 5752971\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/oak/rinkerode/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 1643 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 408428.2 ymin: 5746021 xmax: 409014.8 ymax: 5746511\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/pine/greffen/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 513 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 442816.1 ymin: 5760217 xmax: 443148.9 ymax: 5760567\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/pine/mesum/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 5031 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 399930.6 ymin: 5790412 xmax: 400969.7 ymax: 5790950\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/pine/telgte/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 3368 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 416135.1 ymin: 5761663 xmax: 416697.1 ymax: 5762477\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/spruce/brilon/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 3342 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 467305.7 ymin: 5695055 xmax: 467996.9 ymax: 5695593\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/spruce/oberhundem/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 2471 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 442631.7 ymin: 5660096 xmax: 443309.5 ymax: 5660502\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/spruce/osterwald/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 2806 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 455822 ymin: 5673761 xmax: 456483.2 ymax: 5674162\nProjected CRS: ETRS89 / UTM zone 32N\n```\n:::\n\n```{.r .cell-code code-fold=\"true\"}\nvalue_column <- \"Neighbor_1\"\n```\n:::\n\n\n\n##### Kullback-Leibler-Divergence\n\n\n\n::: {#tbl-nearest-neighbor-1-kld_specie .cell tbl-cap='Kullback-Leibler-Divergence between the researched species Beech, Oak, Pine and Spruce for the atrribute nearest-neighbor-1'}\n\n```{.r .cell-code code-fold=\"true\"}\nkld_results_specie <- lfa::lfa_run_test_asymmetric(data,value_column,\"specie\",lfa::lfa_kld_from_vec)\nlfa::lfa_generate_result_table_tests(kld_results_specie,\"Kullback-Leibler-Divergence between species\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Kullback-Leibler-Divergence between species
Beech Oak Pine Spruce
Beech 0.000 0.029 0.40 3.3
Oak 0.031 0.000 0.25 3.9
Pine 0.213 0.128 0.00 4.9
Spruce 2.735 3.199 4.52 0.0
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(kld_results_specie, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 1.477983\n```\n:::\n:::\n\n\n\n\n::: {#tbl-nearest-neighbor-1-kld-beech .cell tbl-cap='Kullback-Leibler-Divergence between the researched areas which have the dominante specie beech for the atrribute nearest-neighbor-1'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"beech\",]\nkld_results_beech <- lfa::lfa_run_test_asymmetric(specie,value_column,\"area\",lfa::lfa_kld_from_vec)\nlfa::lfa_generate_result_table_tests(kld_results_beech,\"Kullback-Leibler-Divergence between areas with beech\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Kullback-Leibler-Divergence between areas with beech
Bielefeld_brackwede Billerbeck Wuelfenrath
Bielefeld_brackwede 0.000 0.35 0.051
Billerbeck 0.380 0.00 0.138
Wuelfenrath 0.059 0.15 0.000
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(kld_results_beech, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.1249588\n```\n:::\n:::\n\n\n\n\n::: {#tbl-nearest-neighbor-1-kld-oak .cell tbl-cap='Kullback-Leibler-Divergence between the researched areas which have the dominante specie oak for the atrribute nearest-neighbor-1'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"oak\",]\nkld_results_oak <- lfa::lfa_run_test_asymmetric(specie,value_column,\"area\",lfa::lfa_kld_from_vec)\nlfa::lfa_generate_result_table_tests(kld_results_oak,\"Kullback-Leibler-Divergence between areas with oak\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Kullback-Leibler-Divergence between areas with oak
Hamm Muenster Rinkerode
Hamm 0.000 0.079 0.078
Muenster 0.092 0.000 0.019
Rinkerode 0.086 0.020 0.000
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(kld_results_oak, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.04167636\n```\n:::\n:::\n\n\n\n\n::: {#tbl-nearest-neighbor-1-kld-pine .cell tbl-cap='Kullback-Leibler-Divergence between the researched areas which have the dominante specie pine for the atrribute nearest-neighbor-1'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"pine\",]\nkld_results_pine <- lfa::lfa_run_test_asymmetric(specie,value_column,\"area\",lfa::lfa_kld_from_vec)\nlfa::lfa_generate_result_table_tests(kld_results_pine,\"Kullback-Leibler-Divergence between areas with pine\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Kullback-Leibler-Divergence between areas with pine
Greffen Mesum Telgte
Greffen 0.00 0.495 0.258
Mesum 0.48 0.000 0.098
Telgte 0.22 0.076 0.000
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(kld_results_pine, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.1812239\n```\n:::\n:::\n\n\n\n\n::: {#tbl-nearest-neighbor-1-kld-spruce .cell tbl-cap='Kullback-Leibler-Divergence between the researched areas which have the dominante specie spruce for the atrribute nearest-neighbor-1'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"spruce\",]\nkld_results_spruce <- lfa::lfa_run_test_asymmetric(specie,value_column,\"area\",lfa::lfa_kld_from_vec)\nlfa::lfa_generate_result_table_tests(kld_results_spruce,\"Kullback-Leibler-Divergence between areas with spruce\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Kullback-Leibler-Divergence between areas with spruce
Brilon Oberhundem Osterwald
Brilon 0.00 0.67 5.1
Oberhundem 0.41 0.00 7.2
Osterwald 6.09 6.23 0.0
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(kld_results_spruce, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 2.863587\n```\n:::\n:::\n\n\n\n\n##### Jensen-Shannon Divergence\n\n\n\n::: {#tbl-nearest-neighbor-1-jsd_specie .cell tbl-cap='Jensen-Shannon Divergence between the researched species Beech, Oak, Pine and Spruce for the atrribute nearest-neighbor-1'}\n\n```{.r .cell-code code-fold=\"true\"}\njsd_results_specie <- lfa::lfa_run_test_symmetric(data,value_column,\"specie\",lfa::lfa_jsd_from_vec)\nlfa::lfa_generate_result_table_tests(jsd_results_specie,\"Jensen-Shannon Divergence between species\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Jensen-Shannon Divergence between species
Beech Oak Pine Spruce
Beech 0 0.22 2.1 9.3
Oak NA 0.00 1.3 10.6
Pine NA NA 0.0 14.7
Spruce NA NA NA 0.0
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(jsd_results_specie, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 2.470051\n```\n:::\n:::\n\n\n\n\n::: {#tbl-nearest-neighbor-1-jsd-beech .cell tbl-cap='Jensen-Shannon Divergence between the researched areas which have the dominante specie beech for the atrribute nearest-neighbor-1'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"beech\",]\njsd_results_beech <- lfa::lfa_run_test_symmetric(specie,value_column,\"area\",lfa::lfa_jsd_from_vec)\nlfa::lfa_generate_result_table_tests(jsd_results_beech,\"Jensen-Shannon Divergence between areas with beech\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Jensen-Shannon Divergence between areas with beech
Bielefeld_brackwede Billerbeck Wuelfenrath
Bielefeld_brackwede 0 2.2 0.39
Billerbeck NA 0.0 0.85
Wuelfenrath NA NA 0.00
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(jsd_results_beech, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.5042359\n```\n:::\n:::\n\n\n\n\n::: {#tbl-nearest-neighbor-1-jsd-oak .cell tbl-cap='Jensen-Shannon Divergence between the researched areas which have the dominante specie oak for the atrribute nearest-neighbor-1'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"oak\",]\njsd_results_oak <- lfa::lfa_run_test_symmetric(specie,value_column,\"area\",lfa::lfa_jsd_from_vec)\nlfa::lfa_generate_result_table_tests(jsd_results_oak,\"Jensen-Shannon Divergence between areas with oak\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Jensen-Shannon Divergence between areas with oak
Hamm Muenster Rinkerode
Hamm 0 0.57 0.61
Muenster NA 0.00 0.17
Rinkerode NA NA 0.00
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(jsd_results_oak, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.1803836\n```\n:::\n:::\n\n\n\n\n::: {#tbl-nearest-neighbor-1-jsd-pine .cell tbl-cap='Jensen-Shannon Divergence between the researched areas which have the dominante specie pine for the atrribute nearest-neighbor-1'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"pine\",]\njsd_results_pine <- lfa::lfa_run_test_symmetric(specie,value_column,\"area\",lfa::lfa_jsd_from_vec)\nlfa::lfa_generate_result_table_tests(jsd_results_pine,\"Jensen-Shannon Divergence between areas with pine\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Jensen-Shannon Divergence between areas with pine
Greffen Mesum Telgte
Greffen 0 3.6 1.89
Mesum NA 0.0 0.68
Telgte NA NA 0.00
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(jsd_results_pine, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.891592\n```\n:::\n:::\n\n\n\n\n::: {#tbl-nearest-neighbor-1-jsd-spruce .cell tbl-cap='Jensen-Shannon Divergence between the researched areas which have the dominante specie spruce for the atrribute nearest-neighbor-1'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"spruce\",]\njsd_results_spruce <- lfa::lfa_run_test_symmetric(specie,value_column,\"area\",lfa::lfa_jsd_from_vec)\nlfa::lfa_generate_result_table_tests(jsd_results_spruce,\"Jensen-Shannon Divergence between areas with spruce\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Jensen-Shannon Divergence between areas with spruce
Brilon Oberhundem Osterwald
Brilon 0 4.1 16
Oberhundem NA 0.0 18
Osterwald NA NA 0
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(jsd_results_spruce, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 4.471632\n```\n:::\n:::\n\n\n#### Distribution of distances to 100th nearest neighbor\n\n\n::: {.cell}\n\n```{.r .cell-code code-fold=\"true\"}\ndata <- lfa::lfa_combine_sf_obj(lfa::lfa_get_neighbor_paths(),lfa::lfa_get_all_areas())\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/beech/bielefeld_brackwede/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 1443 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 466999.8 ymin: 5759839 xmax: 467617.1 ymax: 5760261\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/beech/billerbeck/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 1732 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 384890.8 ymin: 5761918 xmax: 385590.9 ymax: 5762478\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/beech/wuelfenrath/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 2779 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 365546.3 ymin: 5683711 xmax: 366356.1 ymax: 5684321\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/oak/hamm/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 2441 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 420953.3 ymin: 5723884 xmax: 421596 ymax: 5724609\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/oak/muenster/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 1270 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 404615.6 ymin: 5752535 xmax: 405396.8 ymax: 5752971\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/oak/rinkerode/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 1643 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 408428.2 ymin: 5746021 xmax: 409014.8 ymax: 5746511\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/pine/greffen/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 513 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 442816.1 ymin: 5760217 xmax: 443148.9 ymax: 5760567\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/pine/mesum/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 5031 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 399930.6 ymin: 5790412 xmax: 400969.7 ymax: 5790950\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/pine/telgte/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 3368 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 416135.1 ymin: 5761663 xmax: 416697.1 ymax: 5762477\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/spruce/brilon/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 3342 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 467305.7 ymin: 5695055 xmax: 467996.9 ymax: 5695593\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/spruce/oberhundem/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 2471 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 442631.7 ymin: 5660096 xmax: 443309.5 ymax: 5660502\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/spruce/osterwald/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 2806 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 455822 ymin: 5673761 xmax: 456483.2 ymax: 5674162\nProjected CRS: ETRS89 / UTM zone 32N\n```\n:::\n\n```{.r .cell-code code-fold=\"true\"}\nvalue_column <- \"Neighbor_100\"\n```\n:::\n\n\n\n##### Kullback-Leibler-Divergence\n\n\n\n::: {#tbl-nearest-neighbor-100-kld_specie .cell tbl-cap='Kullback-Leibler-Divergence between the researched species Beech, Oak, Pine and Spruce for the atrribute nearest-neighbor-100'}\n\n```{.r .cell-code code-fold=\"true\"}\nkld_results_specie <- lfa::lfa_run_test_asymmetric(data,value_column,\"specie\",lfa::lfa_kld_from_vec)\nlfa::lfa_generate_result_table_tests(kld_results_specie,\"Kullback-Leibler-Divergence between species\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Kullback-Leibler-Divergence between species
Beech Oak Pine Spruce
Beech 0.000 0.194 0.082 0.89
Oak 0.183 0.000 0.063 0.67
Pine 0.084 0.069 0.000 0.86
Spruce 1.083 0.809 1.200 0.00
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(kld_results_specie, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.3862841\n```\n:::\n:::\n\n\n\n\n::: {#tbl-nearest-neighbor-100-kld-beech .cell tbl-cap='Kullback-Leibler-Divergence between the researched areas which have the dominante specie beech for the atrribute nearest-neighbor-100'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"beech\",]\nkld_results_beech <- lfa::lfa_run_test_asymmetric(specie,value_column,\"area\",lfa::lfa_kld_from_vec)\nlfa::lfa_generate_result_table_tests(kld_results_beech,\"Kullback-Leibler-Divergence between areas with beech\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Kullback-Leibler-Divergence between areas with beech
Bielefeld_brackwede Billerbeck Wuelfenrath
Bielefeld_brackwede 0.00 0.12 0.12
Billerbeck 0.14 0.00 0.40
Wuelfenrath 0.12 0.31 0.00
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(kld_results_beech, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.1338066\n```\n:::\n:::\n\n\n\n\n::: {#tbl-nearest-neighbor-100-kld-oak .cell tbl-cap='Kullback-Leibler-Divergence between the researched areas which have the dominante specie oak for the atrribute nearest-neighbor-100'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"oak\",]\nkld_results_oak <- lfa::lfa_run_test_asymmetric(specie,value_column,\"area\",lfa::lfa_kld_from_vec)\nlfa::lfa_generate_result_table_tests(kld_results_oak,\"Kullback-Leibler-Divergence between areas with oak\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Kullback-Leibler-Divergence between areas with oak
Hamm Muenster Rinkerode
Hamm 0.00 0.19 0.11
Muenster 0.20 0.00 0.06
Rinkerode 0.11 0.07 0.00
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(kld_results_oak, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.08182597\n```\n:::\n:::\n\n\n\n\n::: {#tbl-nearest-neighbor-100-kld-pine .cell tbl-cap='Kullback-Leibler-Divergence between the researched areas which have the dominante specie pine for the atrribute nearest-neighbor-100'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"pine\",]\nkld_results_pine <- lfa::lfa_run_test_asymmetric(specie,value_column,\"area\",lfa::lfa_kld_from_vec)\nlfa::lfa_generate_result_table_tests(kld_results_pine,\"Kullback-Leibler-Divergence between areas with pine\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Kullback-Leibler-Divergence between areas with pine
Greffen Mesum Telgte
Greffen 0.00 0.25 0.51
Mesum 0.20 0.00 0.25
Telgte 0.54 0.26 0.00
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(kld_results_pine, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.22229\n```\n:::\n:::\n\n\n\n\n::: {#tbl-nearest-neighbor-100-kld-spruce .cell tbl-cap='Kullback-Leibler-Divergence between the researched areas which have the dominante specie spruce for the atrribute nearest-neighbor-100'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"spruce\",]\nkld_results_spruce <- lfa::lfa_run_test_asymmetric(specie,value_column,\"area\",lfa::lfa_kld_from_vec)\nlfa::lfa_generate_result_table_tests(kld_results_spruce,\"Kullback-Leibler-Divergence between areas with spruce\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Kullback-Leibler-Divergence between areas with spruce
Brilon Oberhundem Osterwald
Brilon 0.000 0.05 0.23
Oberhundem 0.046 0.00 0.37
Osterwald 0.276 0.46 0.00
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(kld_results_spruce, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.1591879\n```\n:::\n:::\n\n\n\n\n##### Jensen-Shannon Divergence\n\n\n\n::: {#tbl-nearest-neighbor-100-jsd_specie .cell tbl-cap='Jensen-Shannon Divergence between the researched species Beech, Oak, Pine and Spruce for the atrribute nearest-neighbor-100'}\n\n```{.r .cell-code code-fold=\"true\"}\njsd_results_specie <- lfa::lfa_run_test_symmetric(data,value_column,\"specie\",lfa::lfa_jsd_from_vec)\nlfa::lfa_generate_result_table_tests(jsd_results_specie,\"Jensen-Shannon Divergence between species\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Jensen-Shannon Divergence between species
Beech Oak Pine Spruce
Beech 0 0.38 0.14 1.27
Oak NA 0.00 0.30 0.78
Pine NA NA 0.00 1.39
Spruce NA NA NA 0.00
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(jsd_results_specie, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.2997233\n```\n:::\n:::\n\n\n\n\n::: {#tbl-nearest-neighbor-100-jsd-beech .cell tbl-cap='Jensen-Shannon Divergence between the researched areas which have the dominante specie beech for the atrribute nearest-neighbor-100'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"beech\",]\njsd_results_beech <- lfa::lfa_run_test_symmetric(specie,value_column,\"area\",lfa::lfa_jsd_from_vec)\nlfa::lfa_generate_result_table_tests(jsd_results_beech,\"Jensen-Shannon Divergence between areas with beech\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Jensen-Shannon Divergence between areas with beech
Bielefeld_brackwede Billerbeck Wuelfenrath
Bielefeld_brackwede 0 0.22 0.21
Billerbeck NA 0.00 0.57
Wuelfenrath NA NA 0.00
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(jsd_results_beech, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.124106\n```\n:::\n:::\n\n\n\n\n::: {#tbl-nearest-neighbor-100-jsd-oak .cell tbl-cap='Jensen-Shannon Divergence between the researched areas which have the dominante specie oak for the atrribute nearest-neighbor-100'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"oak\",]\njsd_results_oak <- lfa::lfa_run_test_symmetric(specie,value_column,\"area\",lfa::lfa_jsd_from_vec)\nlfa::lfa_generate_result_table_tests(jsd_results_oak,\"Jensen-Shannon Divergence between areas with oak\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Jensen-Shannon Divergence between areas with oak
Hamm Muenster Rinkerode
Hamm 0 0.34 0.17
Muenster NA 0.00 0.23
Rinkerode NA NA 0.00
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(jsd_results_oak, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.1007612\n```\n:::\n:::\n\n\n\n\n::: {#tbl-nearest-neighbor-100-jsd-pine .cell tbl-cap='Jensen-Shannon Divergence between the researched areas which have the dominante specie pine for the atrribute nearest-neighbor-100'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"pine\",]\njsd_results_pine <- lfa::lfa_run_test_symmetric(specie,value_column,\"area\",lfa::lfa_jsd_from_vec)\nlfa::lfa_generate_result_table_tests(jsd_results_pine,\"Jensen-Shannon Divergence between areas with pine\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Jensen-Shannon Divergence between areas with pine
Greffen Mesum Telgte
Greffen 0 0.45 0.86
Mesum NA 0.00 0.50
Telgte NA NA 0.00
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(jsd_results_pine, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.2265055\n```\n:::\n:::\n\n\n\n\n::: {#tbl-nearest-neighbor-100-jsd-spruce .cell tbl-cap='Jensen-Shannon Divergence between the researched areas which have the dominante specie spruce for the atrribute nearest-neighbor-100'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"spruce\",]\njsd_results_spruce <- lfa::lfa_run_test_symmetric(specie,value_column,\"area\",lfa::lfa_jsd_from_vec)\nlfa::lfa_generate_result_table_tests(jsd_results_spruce,\"Jensen-Shannon Divergence between areas with spruce\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Jensen-Shannon Divergence between areas with spruce
Brilon Oberhundem Osterwald
Brilon 0 0.1 0.57
Oberhundem NA 0.0 0.73
Osterwald NA NA 0.00
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(jsd_results_spruce, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.1613747\n```\n:::\n:::\n\n\n#### Distribution of average nearest neighbor distances\n\n\n::: {.cell}\n\n```{.r .cell-code code-fold=\"true\"}\ndata <- lfa::lfa_combine_sf_obj(lfa::lfa_get_neighbor_paths(),lfa::lfa_get_all_areas())\n```\n\n::: {.cell-output .cell-output-stdout}\n```\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/beech/bielefeld_brackwede/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 1443 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 466999.8 ymin: 5759839 xmax: 467617.1 ymax: 5760261\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/beech/billerbeck/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 1732 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 384890.8 ymin: 5761918 xmax: 385590.9 ymax: 5762478\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/beech/wuelfenrath/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 2779 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 365546.3 ymin: 5683711 xmax: 366356.1 ymax: 5684321\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/oak/hamm/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 2441 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 420953.3 ymin: 5723884 xmax: 421596 ymax: 5724609\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/oak/muenster/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 1270 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 404615.6 ymin: 5752535 xmax: 405396.8 ymax: 5752971\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/oak/rinkerode/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 1643 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 408428.2 ymin: 5746021 xmax: 409014.8 ymax: 5746511\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/pine/greffen/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 513 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 442816.1 ymin: 5760217 xmax: 443148.9 ymax: 5760567\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/pine/mesum/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 5031 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 399930.6 ymin: 5790412 xmax: 400969.7 ymax: 5790950\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/pine/telgte/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 3368 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 416135.1 ymin: 5761663 xmax: 416697.1 ymax: 5762477\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/spruce/brilon/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 3342 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 467305.7 ymin: 5695055 xmax: 467996.9 ymax: 5695593\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/spruce/oberhundem/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 2471 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 442631.7 ymin: 5660096 xmax: 443309.5 ymax: 5660502\nProjected CRS: ETRS89 / UTM zone 32N\nReading layer `neighbours' from data source \n `/home/jakob/gi-master/project-courses/lidar-forest-analysis/src/data/spruce/osterwald/neighbours.gpkg' \n using driver `GPKG'\nSimple feature collection with 2806 features and 102 fields\nGeometry type: POINT\nDimension: XY\nBounding box: xmin: 455822 ymin: 5673761 xmax: 456483.2 ymax: 5674162\nProjected CRS: ETRS89 / UTM zone 32N\n```\n:::\n\n```{.r .cell-code code-fold=\"true\"}\nnames <- paste0(\"Neighbor_\",1:100)\ndata$avg = rowMeans(dplyr::select(as.data.frame(data),names))\nvalue_column <- \"avg\"\n```\n:::\n\n\n\n##### Kullback-Leibler-Divergence\n\n\n\n::: {#tbl-nearest-neighbor-avg-kld_specie .cell tbl-cap='Kullback-Leibler-Divergence between the researched species Beech, Oak, Pine and Spruce for the atrribute nearest-neighbor-avg'}\n\n```{.r .cell-code code-fold=\"true\"}\nkld_results_specie <- lfa::lfa_run_test_asymmetric(data,value_column,\"specie\",lfa::lfa_kld_from_vec)\nlfa::lfa_generate_result_table_tests(kld_results_specie,\"Kullback-Leibler-Divergence between species\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Kullback-Leibler-Divergence between species
Beech Oak Pine Spruce
Beech 0.000 0.31 0.065 1.28
Oak 0.302 0.00 0.178 0.83
Pine 0.067 0.17 0.000 1.23
Spruce 1.660 0.92 1.869 0.00
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(kld_results_specie, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.5552882\n```\n:::\n:::\n\n\n\n\n::: {#tbl-nearest-neighbor-avg-kld-beech .cell tbl-cap='Kullback-Leibler-Divergence between the researched areas which have the dominante specie beech for the atrribute nearest-neighbor-avg'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"beech\",]\nkld_results_beech <- lfa::lfa_run_test_asymmetric(specie,value_column,\"area\",lfa::lfa_kld_from_vec)\nlfa::lfa_generate_result_table_tests(kld_results_beech,\"Kullback-Leibler-Divergence between areas with beech\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Kullback-Leibler-Divergence between areas with beech
Bielefeld_brackwede Billerbeck Wuelfenrath
Bielefeld_brackwede 0.000 0.052 0.50
Billerbeck 0.052 0.000 0.91
Wuelfenrath 0.348 0.612 0.00
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(kld_results_beech, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.27574\n```\n:::\n:::\n\n\n\n\n::: {#tbl-nearest-neighbor-avg-kld-oak .cell tbl-cap='Kullback-Leibler-Divergence between the researched areas which have the dominante specie oak for the atrribute nearest-neighbor-avg'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"oak\",]\nkld_results_oak <- lfa::lfa_run_test_asymmetric(specie,value_column,\"area\",lfa::lfa_kld_from_vec)\nlfa::lfa_generate_result_table_tests(kld_results_oak,\"Kullback-Leibler-Divergence between areas with oak\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Kullback-Leibler-Divergence between areas with oak
Hamm Muenster Rinkerode
Hamm 0.00 0.166 0.217
Muenster 0.16 0.000 0.031
Rinkerode 0.21 0.037 0.000
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(kld_results_oak, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.09154318\n```\n:::\n:::\n\n\n\n\n::: {#tbl-nearest-neighbor-avg-kld-pine .cell tbl-cap='Kullback-Leibler-Divergence between the researched areas which have the dominante specie pine for the atrribute nearest-neighbor-avg'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"pine\",]\nkld_results_pine <- lfa::lfa_run_test_asymmetric(specie,value_column,\"area\",lfa::lfa_kld_from_vec)\nlfa::lfa_generate_result_table_tests(kld_results_pine,\"Kullback-Leibler-Divergence between areas with pine\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Kullback-Leibler-Divergence between areas with pine
Greffen Mesum Telgte
Greffen 0.00 0.17 0.29
Mesum 0.14 0.00 0.30
Telgte 0.26 0.32 0.00
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(kld_results_pine, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.1637513\n```\n:::\n:::\n\n\n\n\n::: {#tbl-nearest-neighbor-avg-kld-spruce .cell tbl-cap='Kullback-Leibler-Divergence between the researched areas which have the dominante specie spruce for the atrribute nearest-neighbor-avg'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"spruce\",]\nkld_results_spruce <- lfa::lfa_run_test_asymmetric(specie,value_column,\"area\",lfa::lfa_kld_from_vec)\nlfa::lfa_generate_result_table_tests(kld_results_spruce,\"Kullback-Leibler-Divergence between areas with spruce\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Kullback-Leibler-Divergence between areas with spruce
Brilon Oberhundem Osterwald
Brilon 0.000 0.11 0.29
Oberhundem 0.097 0.00 0.59
Osterwald 0.341 0.75 0.00
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(kld_results_spruce, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.2404004\n```\n:::\n:::\n\n\n\n\n##### Jensen-Shannon Divergence\n\n\n\n::: {#tbl-nearest-neighbor-avg-jsd_specie .cell tbl-cap='Jensen-Shannon Divergence between the researched species Beech, Oak, Pine and Spruce for the atrribute nearest-neighbor-avg'}\n\n```{.r .cell-code code-fold=\"true\"}\njsd_results_specie <- lfa::lfa_run_test_symmetric(data,value_column,\"specie\",lfa::lfa_jsd_from_vec)\nlfa::lfa_generate_result_table_tests(jsd_results_specie,\"Jensen-Shannon Divergence between species\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Jensen-Shannon Divergence between species
Beech Oak Pine Spruce
Beech 0 0.73 0.19 2.6
Oak NA 0.00 0.64 1.4
Pine NA NA 0.00 3.0
Spruce NA NA NA 0.0
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(jsd_results_specie, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.5999417\n```\n:::\n:::\n\n\n\n\n::: {#tbl-nearest-neighbor-avg-jsd-beech .cell tbl-cap='Jensen-Shannon Divergence between the researched areas which have the dominante specie beech for the atrribute nearest-neighbor-avg'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"beech\",]\njsd_results_beech <- lfa::lfa_run_test_symmetric(specie,value_column,\"area\",lfa::lfa_jsd_from_vec)\nlfa::lfa_generate_result_table_tests(jsd_results_beech,\"Jensen-Shannon Divergence between areas with beech\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Jensen-Shannon Divergence between areas with beech
Bielefeld_brackwede Billerbeck Wuelfenrath
Bielefeld_brackwede 0 0.14 1.0
Billerbeck NA 0.00 1.7
Wuelfenrath NA NA 0.0
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(jsd_results_beech, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.3215991\n```\n:::\n:::\n\n\n\n\n::: {#tbl-nearest-neighbor-avg-jsd-oak .cell tbl-cap='Jensen-Shannon Divergence between the researched areas which have the dominante specie oak for the atrribute nearest-neighbor-avg'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"oak\",]\njsd_results_oak <- lfa::lfa_run_test_symmetric(specie,value_column,\"area\",lfa::lfa_jsd_from_vec)\nlfa::lfa_generate_result_table_tests(jsd_results_oak,\"Jensen-Shannon Divergence between areas with oak\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Jensen-Shannon Divergence between areas with oak
Hamm Muenster Rinkerode
Hamm 0 0.41 0.53
Muenster NA 0.00 0.26
Rinkerode NA NA 0.00
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(jsd_results_oak, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.1558436\n```\n:::\n:::\n\n\n\n\n::: {#tbl-nearest-neighbor-avg-jsd-pine .cell tbl-cap='Jensen-Shannon Divergence between the researched areas which have the dominante specie pine for the atrribute nearest-neighbor-avg'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"pine\",]\njsd_results_pine <- lfa::lfa_run_test_symmetric(specie,value_column,\"area\",lfa::lfa_jsd_from_vec)\nlfa::lfa_generate_result_table_tests(jsd_results_pine,\"Jensen-Shannon Divergence between areas with pine\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Jensen-Shannon Divergence between areas with pine
Greffen Mesum Telgte
Greffen 0 0.44 0.76
Mesum NA 0.00 0.89
Telgte NA NA 0.00
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(jsd_results_pine, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.2560143\n```\n:::\n:::\n\n\n\n\n::: {#tbl-nearest-neighbor-avg-jsd-spruce .cell tbl-cap='Jensen-Shannon Divergence between the researched areas which have the dominante specie spruce for the atrribute nearest-neighbor-avg'}\n\n```{.r .cell-code code-fold=\"true\"}\nspecie <- data[data$specie==\"spruce\",]\njsd_results_spruce <- lfa::lfa_run_test_symmetric(specie,value_column,\"area\",lfa::lfa_jsd_from_vec)\nlfa::lfa_generate_result_table_tests(jsd_results_spruce,\"Jensen-Shannon Divergence between areas with spruce\")\n```\n\n::: {.cell-output-display}\n`````{=html}\n\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
Jensen-Shannon Divergence between areas with spruce
Brilon Oberhundem Osterwald
Brilon 0 0.32 1.1
Oberhundem NA 0.00 1.8
Osterwald NA NA 0.0
\n\n`````\n:::\n:::\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncolMeans(jsd_results_spruce, na.rm = TRUE) |> mean()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 0.3713411\n```\n:::\n:::\n\n\n\n\n## Documentation\n### `lfa_capitalize_first_char`\n\nCapitalize First Character of a String\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`input_string` | A single-character string to be processed.\n\n\n#### Concept\n\nString Manipulation\n\n\n#### Description\n\nThis function takes a string as input and returns the same string with the\n first character capitalized. If the first character is already capitalized,\n the function does nothing. If the first character is not from the alphabet,\n an error is thrown.\n\n\n#### Details\n\nThis function performs the following steps:\n \n\n* Checks if the input is a single-character string. \n\n* Verifies if the first character is from the alphabet (A-Z or a-z). \n\n* If the first character is not already capitalized, it capitalizes it. \n\n* Returns the modified string.\n\n\n#### Keyword\n\nalphabet\n\n\n#### Note\n\nThis function is case-sensitive and assumes ASCII characters.\n\n\n#### References\n\nNone\n\n\n#### Seealso\n\nThis function is related to the basic string manipulation functions in base R.\n\n\n#### Value\n\nA modified string with the first character capitalized if it is\n not already. If the first character is already capitalized, the original\n string is returned.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Capitalize the first character of a string\ncapitalize_first_char(\"hello\") # Returns \"Hello\"\ncapitalize_first_char(\"World\") # Returns \"World\"\n\n# Error example (non-alphabetic first character)\ncapitalize_first_char(\"123abc\") # Throws an error\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_capitalize_first_char(input_string)\n```\n:::\n\n\n\n### `lfa_check_flag`\n\nCheck if a flag is set, indicating the completion of a specific process.\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`flag_name` | A character string specifying the name of the flag file. It should be a descriptive and unique identifier for the process being checked.\n\n\n#### Description\n\nThis function checks for the existence of a hidden flag file at a specified location within the working directory. If the flag file is found, a message is printed, and the function returns `TRUE` to indicate that the associated processing step has already been completed. If the flag file is not found, the function returns `FALSE` , indicating that further processing can proceed.\n\n\n#### Value\n\nA logical value indicating whether the flag is set ( `TRUE` ) or not ( `FALSE` ).\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Check if the flag for a process named \"data_processing\" is set\nlfa_check_flag(\"data_processing\")\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_check_flag(flag_name)\n```\n:::\n\n\n\n### `lfa_combine_sf_obj`\n\nCombine Spatial Feature Objects from Multiple GeoPackage Files\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`paths` | A character vector containing file paths to GeoPackage files with neighbor information.\n`area_infos` | A data frame or list containing information about the corresponding detection areas, including \"area\" and \"specie\" columns.\n\n\n#### Description\n\nThis function reads spatial feature objects (sf) from multiple GeoPackage files and combines them into a single sf object.\n Each GeoPackage file is assumed to contain neighbor information for a specific detection area, and the resulting sf object\n includes additional columns indicating the corresponding area and species information.\n\n\n#### Value\n\nA combined sf object with additional columns for area and specie information.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Assuming paths and area_infos are defined\ncombined_sf <- lfa_combine_sf_obj(paths, area_infos)\n\n# Print the combined sf object\nprint(combined_sf)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_combine_sf_obj(paths, area_infos)\n```\n:::\n\n\n\n### `lfa_count_returns_all_areas`\n\nCount tree returns for all species and areas, returning a consolidated data frame.\n\n\n#### Description\n\nThis function iterates through all species and areas obtained from the function\n [`lfa_get_all_areas`](#lfagetallareas) . For each combination of species and area, it reads\n the corresponding area as a catalog, counts the returns per tree using\n [`lfa_count_returns_per_tree`](#lfacountreturnspertree) , and consolidates the results into a data frame.\n The resulting data frame includes columns for the species, area, and return counts per tree.\n\n\n#### Keyword\n\ncounting\n\n\n#### Seealso\n\n[`lfa_get_all_areas`](#lfagetallareas) , [`lfa_read_area_as_catalog`](#lfareadareaascatalog) ,\n [`lfa_count_returns_per_tree`](#lfacountreturnspertree)\n\n\n#### Value\n\nA data frame with columns for species, area, and return counts per tree.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Count tree returns for all species and areas\nreturns_counts <- lfa_count_returns_all_areas()\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_count_returns_all_areas()\n```\n:::\n\n\n\n### `lfa_count_returns_per_tree`\n\nCount returns per tree for a given lidR catalog.\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`ctg` | A lidR catalog object containing LAS files to be processed.\n\n\n#### Description\n\nThis function takes a lidR catalog as input and counts the returns per tree.\n It uses the lidR package to read LAS files from the catalog and performs the counting\n operation on each tree. The result is a data frame containing the counts of returns\n for each unique tree ID within the lidR catalog.\n\n\n#### Keyword\n\ncounting\n\n\n#### Seealso\n\n[`lidR::readLAS`](#lidr::readlas) , [`lidR::is.empty`](#lidr::is.empty) ,\n [`base::table`](#base::table) , [`dplyr::bind_rows`](#dplyr::bindrows)\n\n\n#### Value\n\nA data frame with columns for tree ID and the corresponding count of returns.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Count returns per tree for a lidR catalog\nctg <- lfa_read_area_as_catalog(\"SpeciesA\", \"Area1\")\nreturns_counts_per_tree <- lfa_count_returns_per_tree(ctg)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_count_returns_per_tree(ctg)\n```\n:::\n\n\n\n### `lfa_create_boxplot`\n\nCreate a box plot from a data frame\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`data` | A data frame containing the data.\n`value_column` | The name of the column containing the values for the box plot.\n`category_column1` | The name of the column containing the first categorical variable.\n`category_column2` | The name of the column containing the second categorical variable.\n`title` | An optional title for the plot. If not provided, a default title is generated based on the data frame name.\n\n\n#### Description\n\nThis function generates a box plot using ggplot2 based on the specified data frame and columns.\n\n\n#### Details\n\nThe function creates a box plot where the x-axis is based on the second categorical variable,\n the y-axis is based on the specified value column, and the box plots are colored based on the first\n categorical variable. The grouping of box plots is done based on the unique values in the second categorical variable.\n\n\n#### Value\n\nA ggplot object representing the box plot.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Assuming you have a data frame 'your_data' with columns 'value', 'category1', and 'category2'\ncreate_boxplot(your_data, \"value\", \"category1\", \"category2\")\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_create_boxplot(\n data,\n value_column,\n category_column1,\n category_column2,\n title = NULL\n)\n```\n:::\n\n\n\n### `lfa_create_density_plots`\n\nCreate density plots for groups in a data frame\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`data` | A data frame containing the data.\n`value_column` | The name of the column containing the values for the density plot.\n`category_column1` | The name of the column containing the categorical variable for grouping.\n`category_column2` | The name of the column containing the categorical variable for arranging plots.\n`title` | An optional title for the plot. If not provided, a default title is generated based on the data frame name.\n`xlims` | Optional limits for the x-axis. Should be a numeric vector with two elements (lower and upper bounds).\n`ylims` | Optional limits for the y-axis. Should be a numeric vector with two elements (lower and upper bounds).\n\n\n#### Description\n\nThis function generates density plots using ggplot2 based on the specified data frame and columns.\n\n\n#### Details\n\nThe function creates density plots where the x-axis is based on the specified value column,\n and the density plots are colored based on the first categorical variable. The arrangement of plots\n is done based on the unique values in the second categorical variable. The plots are arranged in a 2x2 grid.\n\n\n#### Value\n\nA ggplot object representing the density plots arranged in a 2x2 grid.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Assuming you have a data frame 'your_data' with columns 'value', 'category1', and 'category2'\ncreate_density_plots(your_data, \"value\", \"category1\", \"category2\", title = \"Density Plots\", xlims = c(0, 10), ylims = c(0, 0.5))\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_create_density_plots(\n data,\n value_column,\n category_column1 = \"area\",\n category_column2 = \"specie\",\n title = NULL,\n xlims = NULL,\n ylims = NULL\n)\n```\n:::\n\n\n\n### `lfa_create_stacked_distributions_plot`\n\nCreate a stacked distribution plot for tree detections, visualizing the distribution\n of a specified variable on the x-axis, differentiated by another variable.\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`trees` | A data frame containing tree detection data.\n`x_value` | A character string specifying the column name used for finding the values on the x-axis of the histogram.\n`fill_value` | A character string specifying the column name by which the data are differentiated in the plot.\n`bin` | An integer specifying the number of bins for the histogram. Default is 100.\n`ylab` | A character string specifying the y-axis label. Default is \"Amount trees.\"\n`xlim` | A numeric vector of length 2 specifying the x-axis limits. Default is c(0, 100).\n`ylim` | A numeric vector of length 2 specifying the y-axis limits. Default is c(0, 1000).\n`title` | The title of the plot.\n\n\n#### Description\n\nThis function generates a stacked distribution plot using the ggplot2 package,\n providing a visual representation of the distribution of a specified variable\n ( `x_value` ) on the x-axis, with differentiation based on another variable\n ( `fill_value` ). The data for the plot are derived from the provided `trees` \n data frame.\n\n\n#### Keyword\n\ndata\n\n\n#### Seealso\n\n[`ggplot2::geom_histogram`](#ggplot2::geomhistogram) , [`ggplot2::facet_wrap`](#ggplot2::facetwrap) ,\n [`ggplot2::ylab`](#ggplot2::ylab) , [`ggplot2::scale_fill_brewer`](#ggplot2::scalefillbrewer) ,\n [`ggplot2::coord_cartesian`](#ggplot2::coordcartesian)\n\n\n#### Value\n\nA ggplot object representing the stacked distribution plot.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Create a stacked distribution plot for variable \"Z,\" differentiated by \"area\"\ntrees <- lfa_get_detections()\nlfa_create_stacked_distributions_plot(trees, \"Z\", \"area\")\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_create_stacked_distributions_plot(\n trees,\n x_value,\n fill_value,\n bin = 100,\n ylab = \"Amount trees\",\n xlim = c(0, 100),\n ylim = c(0, 1000),\n title =\n \"Histograms of height distributions between species 'beech', 'oak', 'pine' and 'spruce' divided by the different areas of Interest\"\n)\n```\n:::\n\n\n\n### `lfa_create_stacked_histogram`\n\nCreate a stacked histogram for tree detections, summing up the values for each species.\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`trees` | A data frame containing tree detection data.\n`x_value` | A character string specifying the column name used for finding the values on the x-axis of the histogram.\n`fill_value` | A character string specifying the column name by which the data are differentiated in the plot.\n`bin` | An integer specifying the number of bins for the histogram. Default is 30.\n`ylab` | A character string specifying the y-axis label. Default is \"Frequency.\"\n`xlim` | A numeric vector of length 2 specifying the x-axis limits. Default is c(0, 100).\n`ylim` | A numeric vector of length 2 specifying the y-axis limits. Default is NULL.\n\n\n#### Description\n\nThis function generates a stacked histogram using the ggplot2 package,\n summing up the values for each species and visualizing the distribution of\n a specified variable ( `x_value` ) on the x-axis, differentiated by another\n variable ( `fill_value` ). The data for the plot are derived from the provided\n `trees` data frame.\n\n\n#### Keyword\n\ndata\n\n\n#### Seealso\n\n[`ggplot2::geom_histogram`](#ggplot2::geomhistogram) , [`ggplot2::ylab`](#ggplot2::ylab) ,\n [`ggplot2::scale_fill_brewer`](#ggplot2::scalefillbrewer) , [`ggplot2::coord_cartesian`](#ggplot2::coordcartesian)\n\n\n#### Value\n\nA ggplot object representing the stacked histogram.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Create a stacked histogram for variable \"Z,\" differentiated by \"area\"\ntrees <- lfa_get_detections()\nlfa_create_stacked_histogram(trees, \"Z\", \"area\")\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_create_stacked_histogram(\n trees,\n x_value,\n fill_value,\n bin = 30,\n ylab = \"Frequency\",\n xlim = c(0, 100),\n ylim = NULL\n)\n```\n:::\n\n\n\n### `lfa_create_tile_location_objects`\n\nCreate tile location objects\n\n\n#### Author\n\nJakob Danel\n\n\n#### Description\n\nThis function traverses a directory structure to find LAZ files and creates\n tile location objects for each file. The function looks into the the `data` \n directory of the repository/working directory. It then creates `tile_location` \n objects based on the folder structure. The folder structure should not be\n touched by hand, but created by `lfa_init_data_structure()` which builds the\n structure based on a shape file.\n\n\n#### Seealso\n\n[`tile_location`](#tilelocation)\n\n\n#### Value\n\nA vector containing tile location objects.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_create_tile_location_objects()\n\nlfa_create_tile_location_objects()\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_create_tile_location_objects()\n```\n:::\n\n\n\n### `lfa_detection`\n\nPerform tree detection on a lidar catalog and optionally save the results to a file.\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`catalog` | A lidar catalog containing point cloud data. If set to NULL, the function attempts to read the catalog from the specified tile location.\n`tile_location` | An object specifying the location of the lidar tile. If catalog is NULL, the function attempts to read the catalog from this tile location.\n`write_to_file` | A logical value indicating whether to save the detected tree information to a file. Default is TRUE.\n\n\n#### Description\n\nThis function utilizes lidar data to detect trees within a specified catalog. The detected tree information can be optionally saved to a file in the GeoPackage format. The function uses parallel processing to enhance efficiency.\n\n\n#### Value\n\nA sf style data frame containing information about the detected trees.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Perform tree detection on a catalog and save the results to a file\nlfa_detection(catalog = my_catalog, tile_location = my_tile_location, write_to_file = TRUE)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_detection(catalog, tile_location, write_to_file = TRUE)\n```\n:::\n\n\n\n### `lfa_download_areas`\n\nDownload areas based on spatial features\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`sf_areas` | Spatial features representing areas to be downloaded. It must include columns like \"species\" \"name\" See details for more information.\n\n\n#### Author\n\nJakob Danel\n\n\n#### Description\n\nThis function initiates the data structure and downloads areas based on spatial features.\n\n\n#### Details\n\nThe input data frame, `sf_areas` , must have the following columns:\n \n\n* \"species\": The species associated with the area. \n\n* \"name\": The name of the area. \n \n The function uses the `lfa_init_data_structure` function to set up the data structure\n and then iterates through the rows of `sf_areas` to download each specified area.\n\n\n#### Value\n\nNone\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_download_areas(sf_areas)\n\n\n# Example spatial features data frame\nsf_areas <- data.frame(\nspecies = c(\"SpeciesA\", \"SpeciesB\"),\nname = c(\"Area1\", \"Area2\"),\n# Must include also other attributes specialized to sf objects\n# such as geometry, for processing of the download\n)\n\nlfa_download_areas(sf_areas)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_download_areas(sf_areas)\n```\n:::\n\n\n\n### `lfa_download`\n\nDownload an las file from the state NRW from a specific location\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`species` | The species of the tree which is observed at this location\n`name` | The name of the area that is observed\n`location` | An sf object, which holds the location information for the area where the tile should be downloaded from.\n\n\n#### Description\n\nIt will download the file and save it to data/ list(list(\"html\"), list(list(\"\"))) / list(list(\"html\"), list(list(\"\"))) with the name of the tile\n\n\n#### Value\n\nThe LASCatalog object of the downloaded file\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_download(species, name, location)\n```\n:::\n\n\n\n### `lfa_find_n_nearest_trees`\n\nFind n Nearest Trees\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`trees` | A sf object containing tree coordinates.\n`n` | The number of nearest trees to find for each tree (default is 100).\n\n\n#### Description\n\nThis function calculates the distances to the n nearest trees for each tree in the input dataset.\n\n\n#### Value\n\nA data frame with additional columns representing the distances to the n nearest trees.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Load tree data using lfa_get_detections() (not provided)\ntree_data <- lfa_get_detections()\n\n# Filter tree data for a specific species and area\ntree_data = tree_data[tree_data$specie == \"pine\" & tree_data$area == \"greffen\", ]\n\n# Find the 100 nearest trees for each tree in the filtered dataset\ntree_data <- lfa_find_n_nearest_trees(tree_data)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_find_n_nearest_trees(trees, n = 100)\n```\n:::\n\n\n\n### `lfa_generate_result_table_tests`\n\nGenerate Result Table for Tests\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`table` | A data frame representing the result table.\n\n\n#### Description\n\nThis function generates a result table for tests using the knitr::kable function.\n\n\n#### Details\n\nThis function uses the knitr::kable function to create a formatted table, making it suitable for HTML output.\n The input table is expected to be a data frame with test results, and the resulting table will have capitalized\n row and column names with lines between columns and rows.\n\n\n#### Value\n\nA formatted table suitable for HTML output with lines between columns and rows.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Generate a result table for tests\nresult_table <- data.frame(\nTest1 = c(0.05, 0.10, 0.03),\nTest2 = c(0.02, 0.08, 0.01),\nTest3 = c(0.08, 0.12, 0.05)\n)\nformatted_table <- lfa_generate_result_table_tests(result_table)\nprint(formatted_table)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_generate_result_table_tests(table, caption = \"Table Caption\")\n```\n:::\n\n\n\n### `lfa_get_all_areas`\n\nRetrieve a data frame containing all species and corresponding areas.\n\n\n#### Description\n\nThis function scans the \"data\" directory within the current working directory to\n obtain a list of species. It then iterates through each species to retrieve the list\n of areas associated with that species. The resulting data frame contains two columns:\n \"specie\" representing the species and \"area\" representing the corresponding area.\n\n\n#### Keyword\n\ndata\n\n\n#### Seealso\n\n[`list.dirs`](#list.dirs)\n\n\n#### Value\n\nA data frame with columns \"specie\" and \"area\" containing information about\n all species and their associated areas.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Retrieve a data frame with information about all species and areas\nall_areas_df <- lfa_get_all_areas()\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_get_all_areas()\n```\n:::\n\n\n\n### `lfa_get_detection_area`\n\nGet Detection for an area\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`species` | A character string specifying the target species.\n`name` | A character string specifying the name of the tile.\n\n\n#### Description\n\nRetrieves the tree detection information for a specified species and tile.\n\n\n#### Details\n\nThis function reads tree detection data from geopackage files within the specified tile location for a given species. It then combines the data into a single SF data frame and returns it. The function assumes that the tree detection files follow a naming convention with the pattern \"_detection.gpkg\".\n\n\n#### Keyword\n\nspatial\n\n\n#### References\n\nThis function is part of the LiDAR Forest Analysis (LFA) package.\n\n\n#### Seealso\n\n[`get_tile_dir`](#gettiledir)\n\n\n#### Value\n\nA Simple Features (SF) data frame containing tree detection information for the specified species and tile.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Retrieve tree detection data for species \"example_species\" in tile \"example_tile\"\ntrees_data <- lfa_get_detection_tile_location(\"example_species\", \"example_tile\")\n\n# Example usage:\ntrees_data <- lfa_get_detection_tile_location(\"example_species\", \"example_tile\")\n\n# No trees found scenario:\nempty_data <- lfa_get_detection_tile_location(\"nonexistent_species\", \"nonexistent_tile\")\n# The result will be an empty data frame if no trees are found for the specified species and tile.\n\n# Error handling:\n# In case of invalid inputs, the function may throw errors. Ensure correct species and tile names are provided.\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_get_detection_area(species, name)\n```\n:::\n\n\n\n### `lfa_get_detections_species`\n\nRetrieve detections for a specific species.\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`species` | A character string specifying the target species.\n\n\n#### Description\n\nThis function retrieves detection data for a given species from multiple areas.\n\n\n#### Details\n\nThe function looks for detection data in the \"data\" directory for the specified species.\n It then iterates through each subdirectory (representing different areas) and consolidates the\n detection data into a single data frame.\n\n\n#### Value\n\nA data frame containing detection information for the specified species in different areas.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Example usage:\ndetections_data <- lfa_get_detections_species(\"example_species\")\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_get_detections_species(species)\n```\n:::\n\n\n\n### `lfa_get_detections`\n\nRetrieve aggregated detection data for multiple species.\n\n\n#### Concept\n\ndata retrieval functions\n\n\n#### Description\n\nThis function obtains aggregated detection data for multiple species by iterating\n through the list of species obtained from [`lfa_get_species`](#lfagetspecies) . For each\n species, it calls [`lfa_get_detections_species`](#lfagetdetectionsspecies) to retrieve the\n corresponding detection data and aggregates the results into a single data frame.\n The resulting data frame includes columns for the species, tree detection data,\n and the area in which the detections occurred.\n\n\n#### Keyword\n\naggregation\n\n\n#### Seealso\n\n[`lfa_get_species`](#lfagetspecies) , [`lfa_get_detections_species`](#lfagetdetectionsspecies) \n \n Other data retrieval functions:\n [`lfa_get_species`](#lfagetspecies)\n\n\n#### Value\n\nA data frame containing aggregated detection data for multiple species.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_get_detections()\n\n# Retrieve aggregated detection data for multiple species\ndetections_data <- lfa_get_detections()\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_get_detections()\n```\n:::\n\n\n\n### `lfa_get_flag_path`\n\nGet the path to a flag file indicating the completion of a specific process.\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`flag_name` | A character string specifying the name of the flag file. It should be a descriptive and unique identifier for the process being flagged.\n\n\n#### Description\n\nThis function constructs and returns the path to a hidden flag file, which serves as an indicator that a particular processing step has been completed. The flag file is created in a designated location within the working directory.\n\n\n#### Value\n\nA character string representing the absolute path to the hidden flag file.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Get the flag path for a process named \"data_processing\"\nlfa_get_flag_path(\"data_processing\")\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_get_flag_path(flag_name)\n```\n:::\n\n\n\n### `lfa_get_neighbor_paths`\n\nGet Paths to Neighbor GeoPackage Files\n\n\n#### Description\n\nThis function retrieves the file paths to GeoPackage files containing neighbor information for each detection area.\n The GeoPackage files are assumed to be named \"neighbours.gpkg\" and organized in a directory structure under the \"data\" folder.\n\n\n#### Value\n\nA character vector containing file paths to GeoPackage files for each detection area's neighbors.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Get paths to neighbor GeoPackage files for all areas\npaths <- lfa_get_neighbor_paths()\n\n# Print the obtained file paths\nprint(paths)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_get_neighbor_paths()\n```\n:::\n\n\n\n### `lfa_get_species`\n\nGet a list of species from the data directory.\n\n\n#### Concept\n\ndata retrieval functions\n\n\n#### Description\n\nThis function retrieves a list of species by scanning the \"data\" directory\n located in the current working directory.\n\n\n#### Keyword\n\ndata\n\n\n#### References\n\nThis function relies on the [`list.dirs`](#list.dirs) function for directory listing.\n\n\n#### Seealso\n\n[`list.dirs`](#list.dirs) \n \n Other data retrieval functions:\n [`lfa_get_detections`](#lfagetdetections)\n\n\n#### Value\n\nA character vector containing the names of species found in the \"data\" directory.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Retrieve the list of species\nspecies_list <- lfa_get_species()\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_get_species()\n```\n:::\n\n\n\n### `lfa_ground_correction`\n\nCorrect the point clouds for correct ground imagery\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`ctg` | An LASCatalog object. If not null, it will perform the actions on this object, if NULL inferring the catalog from the tile_location\n`tile_location` | A tile_location type object holding the information about the location of the cataog. This is used to save the catalog after processing too.\n\n\n#### Author\n\nJakob Danel\n\n\n#### Description\n\nThis function is needed to correct the Z value of the point cloud, relative to the real\n ground height. After using this function to your catalog, the Z values can be seen as the\n real elevation about the ground. At the moment the function uses the `tin()` function from\n the `lidr` package. NOTE : The operation is inplace and can not be reverted, the old values\n of the point cloud will be deleted!\n\n\n#### Value\n\nA catalog with the corrected z values. The catalog is always stored at tile_location and\n holding only the transformed values.\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_ground_correction(ctg, tile_location)\n```\n:::\n\n\n\n### `lfa_init_data_structure`\n\nInitialize data structure for species and areas\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`sf_species` | A data frame with information about species and associated areas.\n\n\n#### Description\n\nThis function initializes the data structure for storing species and associated areas.\n\n\n#### Details\n\nThe input data frame, `sf_species` , should have at least the following columns:\n \n\n* \"species\": The names of the species for which the data structure needs to be initialized. \n\n* \"name\": The names of the associated areas. \n \n The function creates directories based on the species and area information provided in\n the `sf_species` data frame. It checks whether the directories already exist and creates\n them if they don't.\n\n\n#### Value\n\nNone\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Example species data frame\nsf_species <- data.frame(\nspecies = c(\"SpeciesA\", \"SpeciesB\"),\nname = c(\"Area1\", \"Area2\"),\n# Other necessary columns\n)\n\nlfa_init_data_structure(sf_species)\n\n# Example species data frame\nsf_species <- data.frame(\nspecies = c(\"SpeciesA\", \"SpeciesB\"),\nname = c(\"Area1\", \"Area2\"),\n# Other necessary columns\n)\n\nlfa_init_data_structure(sf_species)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_init_data_structure(sf_species)\n```\n:::\n\n\n\n### `lfa_init`\n\nInitialize LFA (LiDAR forest analysis) data processing\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`sf_file` | A character string specifying the path to the shapefile containing spatial features of research areas.\n\n\n#### Description\n\nThis function initializes the LFA data processing by reading a shapefile containing\n spatial features of research areas, downloading the specified areas, and creating\n tile location objects for each area.\n\n\n#### Details\n\nThis function reads a shapefile ( `sf_file` ) using the `sf` package, which should\n contain information about research areas. It then calls the `lfa_download_areas` \n function to download the specified areas and `lfa_create_tile_location_objects` \n to create tile location objects based on Lidar data files in those areas. The\n shapefile MUST follow the following requirements:\n \n\n* Each geometry must be a single object of type polygon \n\n* Each entry must have the following attributes: \n\n* species: A string describing the tree species of the area. \n\n* name: A string describing the location of the area.\n\n\n#### Value\n\nA vector containing tile location objects.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Initialize LFA processing with the default shapefile\nlfa_init()\n\n# Initialize LFA processing with a custom shapefile\nlfa_init(\"custom_areas.shp\")\n\n# Example usage with the default shapefile\nlfa_init()\n\n# Example usage with a custom shapefile\nlfa_init(\"custom_areas.shp\")\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_init(sf_file = \"research_areas.shp\")\n```\n:::\n\n\n\n### `lfa_intersect_areas`\n\nIntersect Lidar Catalog with Spatial Features\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`ctg` | A LAScatalog object representing the Lidar data to be processed.\n`tile_location` | A tile location object representing the specific area of interest.\n`areas_sf` | Spatial features defining areas.\n\n\n#### Description\n\nThis function intersects a Lidar catalog with a specific area defined by spatial features.\n\n\n#### Details\n\nThe function intersects the Lidar catalog specified by `ctg` with a specific area defined by\n the `tile_location` object and `areas_sf` . It removes points outside the specified area and\n returns a modified LAScatalog object.\n \n The specified area is identified based on the `species` and `name` attributes in the\n `tile_location` object. If a matching area is not found in `areas_sf` , the function\n stops with an error.\n \n The function then transforms the spatial reference of the identified area to match that of\n the Lidar catalog using `sf::st_transform` .\n \n The processing is applied to each chunk in the catalog using the `identify_area` function,\n which merges spatial information and filters out points that are not classified as inside\n the identified area. After processing, the function writes the modified LAS files back to\n the original file locations, removing points outside the specified area.\n \n If an error occurs during the processing of a chunk, a warning is issued, and the function\n continues processing the next chunks. If no points are found after filtering, a warning is\n issued, and NULL is returned.\n\n\n#### Seealso\n\nOther functions in the Lidar forest analysis (LFA) package.\n\n\n#### Value\n\nA modified LAScatalog object with points outside the specified area removed.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Example usage\nlfa_intersect_areas(ctg, tile_location, areas_sf)\n\n# Example usage\nlfa_intersect_areas(ctg, tile_location, areas_sf)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_intersect_areas(ctg, tile_location, areas_sf)\n```\n:::\n\n\n\n### `lfa_jsd_from_vec`\n\nCompute Jensen-Shannon Divergence from Vectors\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`x` | A numeric vector.\n`y` | A numeric vector.\n\n\n#### Description\n\nThis function calculates the Jensen-Shannon Divergence (JSD) between two vectors.\n\n\n#### Value\n\nJensen-Shannon Divergence between the density distributions of x and y.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\nx <- rnorm(100)\ny <- rnorm(100, mean = 2)\nlfa_jsd_from_vec(x, y)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_jsd_from_vec(x, y)\n```\n:::\n\n\n\n### `lfa_jsd`\n\nJensen-Shannon Divergence Calculation\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`p` | A numeric vector representing the probability distribution P.\n`q` | A numeric vector representing the probability distribution Q.\n`epsilon` | A small positive constant added to both P and Q to avoid logarithm of zero. Default is 1e-10.\n\n\n#### Description\n\nThis function calculates the Jensen-Shannon Divergence (JSD) between two probability distributions P and Q.\n\n\n#### Details\n\nThe JSD is computed using the Kullback-Leibler Divergence (KLD) as follows:\n `sum((p * log((p + epsilon) / (m + epsilon)) + q * log((q + epsilon) / (m + epsilon))) / 2)` \n where `m = (p + q) / 2` .\n\n\n#### Seealso\n\n[`kld`](#kld) , [`sum`](#sum) , [`log`](#log)\n\n\n#### Value\n\nA numeric value representing the Jensen-Shannon Divergence between P and Q.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Calculate JSD between two probability distributions\np_distribution <- c(0.2, 0.3, 0.5)\nq_distribution <- c(0.1, 0, 0.9)\njsd_result <- jsd(p_distribution, q_distribution)\nprint(jsd_result)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_jsd(p, q, epsilon = 1e-10)\n```\n:::\n\n\n\n### `lfa_kld_from_vec`\n\nCompute Kullback-Leibler Divergence from Vectors\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`x` | A numeric vector.\n`y` | A numeric vector.\n\n\n#### Description\n\nThis function calculates the Kullback-Leibler Divergence (KLD) between two vectors.\n\n\n#### Value\n\nKullback-Leibler Divergence between the density distributions of x and y.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\nx <- rnorm(100)\ny <- rnorm(100, mean = 2)\nlfa_kld_from_vec(x, y)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_kld_from_vec(x, y)\n```\n:::\n\n\n\n### `lfa_kld`\n\nKullback-Leibler Divergence Calculation\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`p` | A numeric vector representing the probability distribution P.\n`q` | A numeric vector representing the probability distribution Q.\n`epsilon` | A small positive constant added to both P and Q to avoid logarithm of zero. Default is 1e-10.\n\n\n#### Description\n\nThis function calculates the Kullback-Leibler Divergence (KLD) between two probability distributions P and Q.\n\n\n#### Details\n\nThe KLD is computed using the formula:\n `sum(p * log((p + epsilon) / (q + epsilon)))` \n This avoids issues when the denominator (Q) contains zero probabilities.\n\n\n#### Seealso\n\n[`sum`](#sum) , [`log`](#log)\n\n\n#### Value\n\nA numeric value representing the Kullback-Leibler Divergence between P and Q.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Calculate KLD between two probability distributions\np_distribution <- c(0.2, 0.3, 0.5)\nq_distribution <- c(0.1, 0, 0.9)\nkld_result <- kld(p_distribution, q_distribution)\nprint(kld_result)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_kld(p, q, epsilon = 1e-10)\n```\n:::\n\n\n\n### `lfa_ks_test`\n\nKolmogorov-Smirnov Test Wrapper Function\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`x` | A numeric vector representing the first sample.\n`y` | A numeric vector representing the second sample.\n`output_variable` | A character string specifying the output variable to extract from the ks.test result. Default is \"p.value\". Other possible values include \"statistic\" and \"alternative\".\n`...` | Additional arguments to be passed to the ks.test function.\n\n\n#### Description\n\nThis function serves as a wrapper for the Kolmogorov-Smirnov (KS) test between two samples.\n\n\n#### Details\n\nThe function uses the ks.test function to perform a two-sample KS test and returns the specified output variable.\n The default output variable is the p-value. Other possible output variables include \"statistic\" and \"alternative\".\n\n\n#### Seealso\n\n[`ks.test`](#ks.test)\n\n\n#### Value\n\nA numeric value representing the specified output variable from the KS test result.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Perform KS test and extract the p-value\nresult <- lfa_ks_test(sample1, sample2)\nprint(result)\n\n# Perform KS test and extract the test statistic\nresult_statistic <- lfa_ks_test(sample1, sample2, output_variable = \"statistic\")\nprint(result_statistic)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_ks_test(x, y, output_variable = \"p.value\", ...)\n```\n:::\n\n\n\n### `lfa_load_ctg_if_not_present`\n\nLoading the catalog if it is not present\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`ctg` | Catalog object. Can be NULL\n`tile_location` | The location to look for the catalog tiles, if their are not present\n\n\n#### Description\n\nThis function checks if the catalog is `NULL` . If it is it will load the\n catalog from the `tile_location`\n\n\n#### Value\n\nThe provided ctg object if not null, else the catalog for the tiles\n of the tile_location.\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_load_ctg_if_not_present(ctg, tile_location)\n```\n:::\n\n\n\n### `lfa_map_tile_locations`\n\nMap Function Over Tile Locations\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`tile_locations` | A list of tile location objects.\n`map_function` | The mapping function to be applied to each tile location.\n`...` | Additional arguments to be passed to the mapping function.\n\n\n#### Description\n\nThis function applies a specified mapping function to each tile location in a list.\n\n\n#### Details\n\nThis function iterates over each tile location in the provided list ( `tile_locations` )\n and applies the specified mapping function ( `map_function` ) to each tile location.\n The mapping function should accept a tile location object as its first argument, and\n additional arguments can be passed using the ellipsis ( `...` ) syntax.\n \n This function is useful for performing operations on multiple tile locations concurrently,\n such as loading Lidar data, processing areas, or other tasks that involve tile locations.\n\n\n#### Seealso\n\nThe mapping function provided should be compatible with the structure and requirements\n of the tile locations and the specific task being performed.\n\n\n#### Value\n\nNone\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Example usage\nlfa_map_tile_locations(tile_locations, my_mapping_function, param1 = \"value\")\n\n# Example usage\nlfa_map_tile_locations(tile_locations, my_mapping_function, param1 = \"value\")\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_map_tile_locations(tile_locations, map_function, check_flag = NULL, ...)\n```\n:::\n\n\n\n### `lfa_merge_and_save`\n\nMerge and Save Text Files in a Directory\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`input_directory` | The path to the input directory containing text files.\n`output_name` | The name for the output file where the merged content will be saved.\n\n\n#### Description\n\nThis function takes an input directory and an output name as arguments.\n It merges the textual content of all files in the specified directory into\n a single string, with each file's content separated by a newline character.\n The merged content is then saved into a file named after the output name\n in the same directory. After the merging is complete, all input files are\n deleted.\n\n\n#### Details\n\nThis function reads the content of each text file in the specified input directory\n and concatenates them into a single string. Each file's content is separated by a newline\n character. The merged content is then saved into a file named after the output name\n in the same directory. Finally, all input files are deleted from the directory.\n\n\n#### Seealso\n\n[`readLines`](#readlines) , [`writeLines`](#writelines) , [`file.remove`](#file.remove)\n\n\n#### Value\n\nThis function does not explicitly return any value. It prints a message\n indicating the successful completion of the merging and saving process.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Merge text files in the \"data_files\" directory and save the result in \"merged_output\"\nlfa_merge_and_save(\"data_files\", \"merged_output\")\n\n# Merge text files in the \"data_files\" directory and save the result in \"merged_output\"\nlfa_merge_and_save(\"data_files\", \"merged_output\")\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_merge_and_save(input_directory, output_name)\n```\n:::\n\n\n\n### `lfa_random_forest`\n\nRandom Forest Classifier with Leave-One-Out Cross-Validation\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`tree_data` | A data frame containing the tree data, including the response variable (\"specie\") and predictor variables.\n`excluded_input_columns` | A character vector specifying columns to be excluded from predictor variables.\n`response_variable` | The response variable to be predicted (default is \"specie\").\n`seed` | An integer to set the seed for reproducibility (default is 123).\n`...` | Additional parameters to be passed to the randomForest function.\n\n\n#### Description\n\nThis function performs a random forest classification using leave-one-out cross-validation for each area in the input tree data.\n It returns a list containing various results, including predicted species, confusion matrix, accuracy, and the formula used for modeling.\n\n\n#### Value\n\nA list containing the following elements:\n \n\n* `predicted_species_absolute` : A data frame with observed and predicted species for each area. \n\n* `predicted_species_relative` : A data frame wit the relative precictions per speices and areas, normalized by the total predictions in each area. \n\n* `confusion_matrix` : A confusion matrix showing the counts of predicted vs. observed species. \n\n* `accuracy` : The accuracy of the model, calculated as the sum of diagonal elements in the confusion matrix divided by the total count. \n\n* `formula` : The formula used for modeling.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Assuming tree_data is defined\nresults <- lfa_random_forest(tree_data, excluded_input_columns = c(\"column1\", \"column2\"))\n\n# Print the list of results\nprint(results)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_random_forest(\n tree_data,\n excluded_input_columns,\n response_variable = \"specie\",\n ntree = 100,\n seed = 123,\n ...\n)\n```\n:::\n\n\n\n### `lfa_rd_to_qmd`\n\nConvert Rd File to Markdown\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`rdfile` | The path to the Rd file or a parsed Rd object.\n`outfile` | The path to the output Markdown file (including the file extension).\n`append` | Logical, indicating whether to append to an existing file (default is FALSE).\n\n\n#### Description\n\nIMPORTANT NOTE: \n This function is nearly identical to the `Rd2md::Rd2markdown` function from the `Rd2md` \n package. We needed to implement our own version of it because of various reasons:\n \n\n* The algorithm uses hardcoded header sizes (h1 and h2 in original) which is not feasible for our use-case of the markdown. \n\n* We needed to add some Quarto Markdown specifics, e.g. to make sure that the examples will not be runned. \n\n* We want to exclude certain tags from our implementation.\n\n\n#### Details\n\nFor that reason we copied the method and made changes as needed and also added this custom documentation.\n \n This function converts an Rd (R documentation) file to Markdown format (.md) and\n saves the converted file at the specified location. The function allows appending\n to an existing file or creating a new one. The resulting Markdown file includes\n sections for the function's name, title, and additional content such as examples,\n usage, arguments, and other sections present in the Rd file.\n \n The function performs the following steps:\n \n\n* Parses the Rd file using the Rd2md package. \n\n* Creates a Markdown file with sections for the function's name, title, and additional content. \n\n* Appends the content to an existing file if `append` is set to TRUE. \n\n* Saves the resulting Markdown file at the specified location.\n\n\n#### Seealso\n\n[`Rd2md::parseRd`](#rd2md::parserd)\n\n\n#### Value\n\nThis function does not explicitly return any value. It saves the converted Markdown file\n at the specified location as described in the details section.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Convert Rd file to Markdown and save it\nlfa_rd_to_md(\"path/to/your/file.Rd\", \"path/to/your/output/file.md\")\n\n# Convert Rd file to Markdown and append to an existing file\nlfa_rd_to_md(\"path/to/your/file.Rd\", \"path/to/existing/output/file.md\", append = TRUE)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_rd_to_qmd(rdfile, outfile, append = FALSE)\n```\n:::\n\n\n\n### `lfa_rd_to_results`\n\nConvert Rd Files to Markdown and Merge Results\n\n\n#### Description\n\nThis function converts all Rd (R documentation) files in the \"man\" directory\n to Markdown format (.qmd) and saves the converted files in the \"results/appendix/package-docs\" directory.\n It then merges the converted Markdown files into a single string and saves\n the merged content into a file named \"docs.qmd\" in the \"results/appendix/package-docs\" directory.\n\n\n#### Details\n\nThe function performs the following steps:\n \n\n* Removes any existing \"docs.qmd\" file in the \"results/appendix/package-docs\" directory. \n\n* Finds all Rd files in the \"man\" directory. \n\n* Converts each Rd file to Markdown format (.qmd) using the `lfa_rd_to_qmd` function. \n\n* Saves the converted Markdown files in the \"results/appendix/package-docs\" directory. \n\n* Merges the content of all converted Markdown files into a single string. \n\n* Saves the merged content into a file named \"docs.qmd\" in the \"results/appendix/package-docs\" directory.\n\n\n#### Seealso\n\n[`lfa_rd_to_qmd`](#lfardtoqmd) , [`lfa_merge_and_save`](#lfamergeandsave)\n\n\n#### Value\n\nThis function does not explicitly return any value. It performs the conversion,\n merging, and saving operations as described in the details section.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Convert Rd files to Markdown and merge the results\nlfa_rd_to_results()\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_rd_to_results()\n```\n:::\n\n\n\n### `lfa_read_area_as_catalog`\n\nRead LiDAR data from a specified species and location as a catalog.\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`specie` | A character string specifying the species of interest.\n`location_name` | A character string specifying the name of the location.\n\n\n#### Description\n\nThis function constructs the file path based on the specified `specie` and `location_name` ,\n lists the directories at that path, and reads the LiDAR data into a `lidR::LAScatalog` .\n\n\n#### Value\n\nA `lidR::LAScatalog` object containing the LiDAR data from the specified location and species.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_read_area_as_catalog(\"beech\", \"location1\")\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_read_area_as_catalog(specie, location_name)\n```\n:::\n\n\n\n### `lfa_run_test_asymmetric`\n\nAsymmetric Pairwise Test for Categories\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`data` | A data frame containing the relevant columns.\n`data_column` | A character string specifying the column containing the numerical data.\n`category_column` | A character string specifying the column containing the categorical variable.\n`test_function` | A function used to perform the pairwise test between two sets of data. It should accept two vectors of numeric data and additional parameters specified by `...` . The function should return a numeric value representing the test result.\n`...` | Additional parameters to be passed to the `test_function` .\n\n\n#### Description\n\nThis function performs an asymmetric pairwise test for categories using a user-defined `test_function` .\n\n\n#### Details\n\nThe function calculates the test results for each unique combination of categories using the specified\n `test_function` . The resulting table is asymmetric, containing the test results for comparisons\n from the rows to the columns.\n\n\n#### Seealso\n\n[`outer`](#outer) , [`Vectorize`](#vectorize)\n\n\n#### Value\n\nA data frame representing the results of the asymmetric pairwise tests between categories.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Define a custom test function\ncustom_test_function <- function(x, y) {\n# Your test logic here\n# Return a numeric result\nreturn(mean(x) - mean(y))\n}\n\n# Perform an asymmetric pairwise test\nresult <- lfa_run_test_asymmetric(your_data, \"numeric_column\", \"category_column\", custom_test_function)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_run_test_asymmetric(data, data_column, category_column, test_function, ...)\n```\n:::\n\n\n\n### `lfa_run_test_symmetric`\n\nSymmetric Pairwise Test for Categories\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`data` | A data frame containing the relevant columns.\n`data_column` | A character string specifying the column containing the numerical data.\n`category_column` | A character string specifying the column containing the categorical variable.\n`test_function` | A function used to perform the pairwise test between two sets of data. It should accept two vectors of numeric data and additional parameters specified by `...` . The function should return a numeric value representing the test result.\n`...` | Additional parameters to be passed to the `test_function` .\n\n\n#### Description\n\nThis function performs a symmetric pairwise test for categories using a user-defined `test_function` .\n\n\n#### Details\n\nThe function calculates the test results for each unique combination of categories using the specified\n `test_function` . The resulting table is symmetric, containing the test results for comparisons\n from the rows to the columns. The upper triangle of the matrix is filled with `NA` to avoid duplicate results.\n\n\n#### Seealso\n\n[`outer`](#outer) , [`Vectorize`](#vectorize)\n\n\n#### Value\n\nA data frame representing the results of the symmetric pairwise tests between categories.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Define a custom test function\ncustom_test_function <- function(x, y) {\n# Your test logic here\n# Return a numeric result\nreturn(mean(x) - mean(y))\n}\n\n# Perform a symmetric pairwise test\nresult <- lfa_run_test_symmetric(your_data, \"numeric_column\", \"category_column\", custom_test_function)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_run_test_symmetric(data, data_column, category_column, test_function, ...)\n```\n:::\n\n\n\n### `lfa_save_all_neighbours`\n\nSave Neighbors for All Areas\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`n` | The number of nearest trees to find for each tree (default is 100).\n\n\n#### Description\n\nThis function iterates through all detection areas, finds the n nearest trees for each tree,\n and saves the result to a GeoPackage file for each area.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Save neighbors for all areas with default value (n=100)\nlfa_save_all_neighbours()\n\n# Save neighbors for all areas with a specific value of n (e.g., n=50)\nlfa_save_all_neighbours(n = 50)\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_save_all_neighbours(n = 100)\n```\n:::\n\n\n\n### `lfa_segmentation`\n\nSegment the elements of an point cloud by trees\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`ctg` | An LASCatalog object. If not null, it will perform the actions on this object, if NULL inferring the catalog from the tile_location\n`tile_location` | A tile_location type object holding the information about the location of the catalog. This is used to save the catalog after processing too.\n\n\n#### Author\n\nJakob Danel\n\n\n#### Description\n\nThis function will try to to divide the hole point cloud into unique trees.\n Therefore it is assigning for each chunk of the catalog a `treeID` for each\n point. Therefore the algorithm uses the `li2012` implementation with the\n following parameters: `li2012(dt1 = 2, dt2 = 3, R = 2, Zu = 10, hmin = 5, speed_up = 12)` \n NOTE : The operation is in place and can not be reverted, the old values\n of the point cloud will be deleted!\n\n\n#### Value\n\nA catalog where each chunk has additional `treeID` values indicating the belonging tree.\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_segmentation(ctg, tile_location)\n```\n:::\n\n\n\n### `lfa_set_flag`\n\nSet a flag to indicate the completion of a specific process.\n\n\n#### Arguments\n\nArgument |Description\n------------- |----------------\n`flag_name` | A character string specifying the name of the flag file. It should be a descriptive and unique identifier for the process being flagged.\n\n\n#### Description\n\nThis function creates a hidden flag file at a specified location within the working directory to indicate that a particular processing step has been completed. If the flag file already exists, a warning is issued.\n\n\n#### Value\n\nThis function does not have a formal return value.\n\n\n#### Examples\n\n::: {.cell}\n\n```{.r .cell-code}\n# Set the flag for a process named \"data_processing\"\nlfa_set_flag(\"data_processing\")\n```\n:::\n\n\n#### Usage\n\n::: {.cell}\n\n```{.r .cell-code}\nlfa_set_flag(flag_name)\n```\n:::\n\n\n\n", "supporting": [ "report_files/figure-html" ], diff --git a/results/_freeze/report/figure-html/fig-boxplot-1-nearest-1.png b/results/_freeze/report/figure-html/fig-boxplot-1-nearest-1.png new file mode 100644 index 0000000..2d5e1a0 Binary files /dev/null and b/results/_freeze/report/figure-html/fig-boxplot-1-nearest-1.png differ diff --git a/results/_freeze/report/figure-html/fig-boxplot-100-nearest-1.png b/results/_freeze/report/figure-html/fig-boxplot-100-nearest-1.png new file mode 100644 index 0000000..cb73738 Binary files /dev/null and b/results/_freeze/report/figure-html/fig-boxplot-100-nearest-1.png differ diff --git a/results/_freeze/report/figure-html/fig-boxplot-avg-nearest-1.png b/results/_freeze/report/figure-html/fig-boxplot-avg-nearest-1.png new file mode 100644 index 0000000..9cae412 Binary files /dev/null and b/results/_freeze/report/figure-html/fig-boxplot-avg-nearest-1.png differ diff --git a/results/_freeze/report/figure-html/fig-density-1-nearest-1.png b/results/_freeze/report/figure-html/fig-density-1-nearest-1.png new file mode 100644 index 0000000..aebbd8d Binary files /dev/null and b/results/_freeze/report/figure-html/fig-density-1-nearest-1.png differ diff --git a/results/_freeze/report/figure-html/fig-density-100-nearest-1.png b/results/_freeze/report/figure-html/fig-density-100-nearest-1.png new file mode 100644 index 0000000..01cd81c Binary files /dev/null and b/results/_freeze/report/figure-html/fig-density-100-nearest-1.png differ diff --git a/results/_freeze/report/figure-html/fig-density-avg-nearest-1.png b/results/_freeze/report/figure-html/fig-density-avg-nearest-1.png new file mode 100644 index 0000000..28aa903 Binary files /dev/null and b/results/_freeze/report/figure-html/fig-density-avg-nearest-1.png differ diff --git a/results/_freeze/report/figure-html/fig-n-nearest-overview-1.png b/results/_freeze/report/figure-html/fig-n-nearest-overview-1.png new file mode 100644 index 0000000..b2e21f9 Binary files /dev/null and b/results/_freeze/report/figure-html/fig-n-nearest-overview-1.png differ diff --git a/results/_freeze/report/figure-html/unnamed-chunk-9-1.png b/results/_freeze/report/figure-html/unnamed-chunk-9-1.png index cdc2f1a..b2e21f9 100644 Binary files a/results/_freeze/report/figure-html/unnamed-chunk-9-1.png and b/results/_freeze/report/figure-html/unnamed-chunk-9-1.png differ diff --git a/results/appendix/build_quantitativ_results.py b/results/appendix/build_quantitativ_results.py index ae6aaa8..8c772c1 100644 --- a/results/appendix/build_quantitativ_results.py +++ b/results/appendix/build_quantitativ_results.py @@ -127,6 +127,14 @@ def main(): content = build_quantitativ_results("z-values", "Distribution of Z-Values", '"Z"', header_size=3) write_file("z_values.qmd", content) + preprocessing_nearest ="data <- lfa::lfa_combine_sf_obj(lfa::lfa_get_neighbor_paths(),lfa::lfa_get_all_areas())" + write_file("nearest_1.qmd", build_quantitativ_results("nearest-neighbor-1", "Distribution of nearest neighbor distances", '"Neighbor_1"', header_size=4, preprocessing=preprocessing_nearest)) + write_file("nearest_100.qmd", build_quantitativ_results("nearest-neighbor-100", "Distribution of distances to 100th nearest neighbor", '"Neighbor_100"', header_size=4, preprocessing=preprocessing_nearest)) + + preprocessing_nearest_avg= preprocessing_nearest+ "\n" +"""names <- paste0("Neighbor_",1:100) +data$avg = rowMeans(dplyr::select(as.data.frame(data),names))""" + write_file("nearest_avg.qmd", build_quantitativ_results("nearest-neighbor-avg", "Distribution of average nearest neighbor distances", '"avg"', header_size=4, preprocessing=preprocessing_nearest_avg)) + preprocessing_number_of_returns = """data <- sf::st_read("data/tree_properties.gpkg") neighbors <- lfa::lfa_get_neighbor_paths() |> lfa::lfa_combine_sf_obj(lfa::lfa_get_all_areas()) diff --git a/results/appendix/nearest_1.qmd b/results/appendix/nearest_1.qmd new file mode 100644 index 0000000..da52f3b --- /dev/null +++ b/results/appendix/nearest_1.qmd @@ -0,0 +1,215 @@ +#### Distribution of nearest neighbor distances + + +```{r} +#| warning: false +#| code-fold: true +data <- lfa::lfa_combine_sf_obj(lfa::lfa_get_neighbor_paths(),lfa::lfa_get_all_areas()) +value_column <- "Neighbor_1" +``` + + + +##### Kullback-Leibler-Divergence + + + +```{r} +#| warning: false +#| code-fold: true +#| label: tbl-nearest-neighbor-1-kld_specie +#| tbl-cap: "Kullback-Leibler-Divergence between the researched species Beech, Oak, Pine and Spruce for the atrribute nearest-neighbor-1" +kld_results_specie <- lfa::lfa_run_test_asymmetric(data,value_column,"specie",lfa::lfa_kld_from_vec) +lfa::lfa_generate_result_table_tests(kld_results_specie,"Kullback-Leibler-Divergence between species") +``` + + + +```{r} +#| warning: false +colMeans(kld_results_specie, na.rm = TRUE) |> mean() +``` + + + + +```{r} +#| warning: false +#| code-fold: true +#| label: tbl-nearest-neighbor-1-kld-beech +#| tbl-cap: "Kullback-Leibler-Divergence between the researched areas which have the dominante specie beech for the atrribute nearest-neighbor-1" +specie <- data[data$specie=="beech",] +kld_results_beech <- lfa::lfa_run_test_asymmetric(specie,value_column,"area",lfa::lfa_kld_from_vec) +lfa::lfa_generate_result_table_tests(kld_results_beech,"Kullback-Leibler-Divergence between areas with beech") +``` + + + +```{r} +#| warning: false +colMeans(kld_results_beech, na.rm = TRUE) |> mean() +``` + + + + +```{r} +#| warning: false +#| code-fold: true +#| label: tbl-nearest-neighbor-1-kld-oak +#| tbl-cap: "Kullback-Leibler-Divergence between the researched areas which have the dominante specie oak for the atrribute nearest-neighbor-1" +specie <- data[data$specie=="oak",] +kld_results_oak <- lfa::lfa_run_test_asymmetric(specie,value_column,"area",lfa::lfa_kld_from_vec) +lfa::lfa_generate_result_table_tests(kld_results_oak,"Kullback-Leibler-Divergence between areas with oak") +``` + + + +```{r} +#| warning: false +colMeans(kld_results_oak, na.rm = TRUE) |> mean() +``` + + + + +```{r} +#| warning: false +#| code-fold: true +#| label: tbl-nearest-neighbor-1-kld-pine +#| tbl-cap: "Kullback-Leibler-Divergence between the researched areas which have the dominante specie pine for the atrribute nearest-neighbor-1" +specie <- data[data$specie=="pine",] +kld_results_pine <- lfa::lfa_run_test_asymmetric(specie,value_column,"area",lfa::lfa_kld_from_vec) +lfa::lfa_generate_result_table_tests(kld_results_pine,"Kullback-Leibler-Divergence between areas with pine") +``` + + + +```{r} +#| warning: false +colMeans(kld_results_pine, na.rm = TRUE) |> mean() +``` + + + + +```{r} +#| warning: false +#| code-fold: true +#| label: tbl-nearest-neighbor-1-kld-spruce +#| tbl-cap: "Kullback-Leibler-Divergence between the researched areas which have the dominante specie spruce for the atrribute nearest-neighbor-1" +specie <- data[data$specie=="spruce",] +kld_results_spruce <- lfa::lfa_run_test_asymmetric(specie,value_column,"area",lfa::lfa_kld_from_vec) +lfa::lfa_generate_result_table_tests(kld_results_spruce,"Kullback-Leibler-Divergence between areas with spruce") +``` + + + +```{r} +#| warning: false +colMeans(kld_results_spruce, na.rm = TRUE) |> mean() +``` + + + + +##### Jensen-Shannon Divergence + + + +```{r} +#| warning: false +#| code-fold: true +#| label: tbl-nearest-neighbor-1-jsd_specie +#| tbl-cap: "Jensen-Shannon Divergence between the researched species Beech, Oak, Pine and Spruce for the atrribute nearest-neighbor-1" +jsd_results_specie <- lfa::lfa_run_test_symmetric(data,value_column,"specie",lfa::lfa_jsd_from_vec) +lfa::lfa_generate_result_table_tests(jsd_results_specie,"Jensen-Shannon Divergence between species") +``` + + + +```{r} +#| warning: false +colMeans(jsd_results_specie, na.rm = TRUE) |> mean() +``` + + + + +```{r} +#| warning: false +#| code-fold: true +#| label: tbl-nearest-neighbor-1-jsd-beech +#| tbl-cap: "Jensen-Shannon Divergence between the researched areas which have the dominante specie beech for the atrribute nearest-neighbor-1" +specie <- data[data$specie=="beech",] +jsd_results_beech <- lfa::lfa_run_test_symmetric(specie,value_column,"area",lfa::lfa_jsd_from_vec) +lfa::lfa_generate_result_table_tests(jsd_results_beech,"Jensen-Shannon Divergence between areas with beech") +``` + + + +```{r} +#| warning: false +colMeans(jsd_results_beech, na.rm = TRUE) |> mean() +``` + + + + +```{r} +#| warning: false +#| code-fold: true +#| label: tbl-nearest-neighbor-1-jsd-oak +#| tbl-cap: "Jensen-Shannon Divergence between the researched areas which have the dominante specie oak for the atrribute nearest-neighbor-1" +specie <- data[data$specie=="oak",] +jsd_results_oak <- lfa::lfa_run_test_symmetric(specie,value_column,"area",lfa::lfa_jsd_from_vec) +lfa::lfa_generate_result_table_tests(jsd_results_oak,"Jensen-Shannon Divergence between areas with oak") +``` + + + +```{r} +#| warning: false +colMeans(jsd_results_oak, na.rm = TRUE) |> mean() +``` + + + + +```{r} +#| warning: false +#| code-fold: true +#| label: tbl-nearest-neighbor-1-jsd-pine +#| tbl-cap: "Jensen-Shannon Divergence between the researched areas which have the dominante specie pine for the atrribute nearest-neighbor-1" +specie <- data[data$specie=="pine",] +jsd_results_pine <- lfa::lfa_run_test_symmetric(specie,value_column,"area",lfa::lfa_jsd_from_vec) +lfa::lfa_generate_result_table_tests(jsd_results_pine,"Jensen-Shannon Divergence between areas with pine") +``` + + + +```{r} +#| warning: false +colMeans(jsd_results_pine, na.rm = TRUE) |> mean() +``` + + + + +```{r} +#| warning: false +#| code-fold: true +#| label: tbl-nearest-neighbor-1-jsd-spruce +#| tbl-cap: "Jensen-Shannon Divergence between the researched areas which have the dominante specie spruce for the atrribute nearest-neighbor-1" +specie <- data[data$specie=="spruce",] +jsd_results_spruce <- lfa::lfa_run_test_symmetric(specie,value_column,"area",lfa::lfa_jsd_from_vec) +lfa::lfa_generate_result_table_tests(jsd_results_spruce,"Jensen-Shannon Divergence between areas with spruce") +``` + + + +```{r} +#| warning: false +colMeans(jsd_results_spruce, na.rm = TRUE) |> mean() +``` + diff --git a/results/appendix/nearest_100.qmd b/results/appendix/nearest_100.qmd new file mode 100644 index 0000000..e493a99 --- /dev/null +++ b/results/appendix/nearest_100.qmd @@ -0,0 +1,215 @@ +#### Distribution of distances to 100th nearest neighbor + + +```{r} +#| warning: false +#| code-fold: true +data <- lfa::lfa_combine_sf_obj(lfa::lfa_get_neighbor_paths(),lfa::lfa_get_all_areas()) +value_column <- "Neighbor_100" +``` + + + +##### Kullback-Leibler-Divergence + + + +```{r} +#| warning: false +#| code-fold: true +#| label: tbl-nearest-neighbor-100-kld_specie +#| tbl-cap: "Kullback-Leibler-Divergence between the researched species Beech, Oak, Pine and Spruce for the atrribute nearest-neighbor-100" +kld_results_specie <- lfa::lfa_run_test_asymmetric(data,value_column,"specie",lfa::lfa_kld_from_vec) +lfa::lfa_generate_result_table_tests(kld_results_specie,"Kullback-Leibler-Divergence between species") +``` + + + +```{r} +#| warning: false +colMeans(kld_results_specie, na.rm = TRUE) |> mean() +``` + + + + +```{r} +#| warning: false +#| code-fold: true +#| label: tbl-nearest-neighbor-100-kld-beech +#| tbl-cap: "Kullback-Leibler-Divergence between the researched areas which have the dominante specie beech for the atrribute nearest-neighbor-100" +specie <- data[data$specie=="beech",] +kld_results_beech <- lfa::lfa_run_test_asymmetric(specie,value_column,"area",lfa::lfa_kld_from_vec) +lfa::lfa_generate_result_table_tests(kld_results_beech,"Kullback-Leibler-Divergence between areas with beech") +``` + + + +```{r} +#| warning: false +colMeans(kld_results_beech, na.rm = TRUE) |> mean() +``` + + + + +```{r} +#| warning: false +#| code-fold: true +#| label: tbl-nearest-neighbor-100-kld-oak +#| tbl-cap: "Kullback-Leibler-Divergence between the researched areas which have the dominante specie oak for the atrribute nearest-neighbor-100" +specie <- data[data$specie=="oak",] +kld_results_oak <- lfa::lfa_run_test_asymmetric(specie,value_column,"area",lfa::lfa_kld_from_vec) +lfa::lfa_generate_result_table_tests(kld_results_oak,"Kullback-Leibler-Divergence between areas with oak") +``` + + + +```{r} +#| warning: false +colMeans(kld_results_oak, na.rm = TRUE) |> mean() +``` + + + + +```{r} +#| warning: false +#| code-fold: true +#| label: tbl-nearest-neighbor-100-kld-pine +#| tbl-cap: "Kullback-Leibler-Divergence between the researched areas which have the dominante specie pine for the atrribute nearest-neighbor-100" +specie <- data[data$specie=="pine",] +kld_results_pine <- lfa::lfa_run_test_asymmetric(specie,value_column,"area",lfa::lfa_kld_from_vec) +lfa::lfa_generate_result_table_tests(kld_results_pine,"Kullback-Leibler-Divergence between areas with pine") +``` + + + +```{r} +#| warning: false +colMeans(kld_results_pine, na.rm = TRUE) |> mean() +``` + + + + +```{r} +#| warning: false +#| code-fold: true +#| label: tbl-nearest-neighbor-100-kld-spruce +#| tbl-cap: "Kullback-Leibler-Divergence between the researched areas which have the dominante specie spruce for the atrribute nearest-neighbor-100" +specie <- data[data$specie=="spruce",] +kld_results_spruce <- lfa::lfa_run_test_asymmetric(specie,value_column,"area",lfa::lfa_kld_from_vec) +lfa::lfa_generate_result_table_tests(kld_results_spruce,"Kullback-Leibler-Divergence between areas with spruce") +``` + + + +```{r} +#| warning: false +colMeans(kld_results_spruce, na.rm = TRUE) |> mean() +``` + + + + +##### Jensen-Shannon Divergence + + + +```{r} +#| warning: false +#| code-fold: true +#| label: tbl-nearest-neighbor-100-jsd_specie +#| tbl-cap: "Jensen-Shannon Divergence between the researched species Beech, Oak, Pine and Spruce for the atrribute nearest-neighbor-100" +jsd_results_specie <- lfa::lfa_run_test_symmetric(data,value_column,"specie",lfa::lfa_jsd_from_vec) +lfa::lfa_generate_result_table_tests(jsd_results_specie,"Jensen-Shannon Divergence between species") +``` + + + +```{r} +#| warning: false +colMeans(jsd_results_specie, na.rm = TRUE) |> mean() +``` + + + + +```{r} +#| warning: false +#| code-fold: true +#| label: tbl-nearest-neighbor-100-jsd-beech +#| tbl-cap: "Jensen-Shannon Divergence between the researched areas which have the dominante specie beech for the atrribute nearest-neighbor-100" +specie <- data[data$specie=="beech",] +jsd_results_beech <- lfa::lfa_run_test_symmetric(specie,value_column,"area",lfa::lfa_jsd_from_vec) +lfa::lfa_generate_result_table_tests(jsd_results_beech,"Jensen-Shannon Divergence between areas with beech") +``` + + + +```{r} +#| warning: false +colMeans(jsd_results_beech, na.rm = TRUE) |> mean() +``` + + + + +```{r} +#| warning: false +#| code-fold: true +#| label: tbl-nearest-neighbor-100-jsd-oak +#| tbl-cap: "Jensen-Shannon Divergence between the researched areas which have the dominante specie oak for the atrribute nearest-neighbor-100" +specie <- data[data$specie=="oak",] +jsd_results_oak <- lfa::lfa_run_test_symmetric(specie,value_column,"area",lfa::lfa_jsd_from_vec) +lfa::lfa_generate_result_table_tests(jsd_results_oak,"Jensen-Shannon Divergence between areas with oak") +``` + + + +```{r} +#| warning: false +colMeans(jsd_results_oak, na.rm = TRUE) |> mean() +``` + + + + +```{r} +#| warning: false +#| code-fold: true +#| label: tbl-nearest-neighbor-100-jsd-pine +#| tbl-cap: "Jensen-Shannon Divergence between the researched areas which have the dominante specie pine for the atrribute nearest-neighbor-100" +specie <- data[data$specie=="pine",] +jsd_results_pine <- lfa::lfa_run_test_symmetric(specie,value_column,"area",lfa::lfa_jsd_from_vec) +lfa::lfa_generate_result_table_tests(jsd_results_pine,"Jensen-Shannon Divergence between areas with pine") +``` + + + +```{r} +#| warning: false +colMeans(jsd_results_pine, na.rm = TRUE) |> mean() +``` + + + + +```{r} +#| warning: false +#| code-fold: true +#| label: tbl-nearest-neighbor-100-jsd-spruce +#| tbl-cap: "Jensen-Shannon Divergence between the researched areas which have the dominante specie spruce for the atrribute nearest-neighbor-100" +specie <- data[data$specie=="spruce",] +jsd_results_spruce <- lfa::lfa_run_test_symmetric(specie,value_column,"area",lfa::lfa_jsd_from_vec) +lfa::lfa_generate_result_table_tests(jsd_results_spruce,"Jensen-Shannon Divergence between areas with spruce") +``` + + + +```{r} +#| warning: false +colMeans(jsd_results_spruce, na.rm = TRUE) |> mean() +``` + diff --git a/results/appendix/nearest_avg.qmd b/results/appendix/nearest_avg.qmd new file mode 100644 index 0000000..cd9a716 --- /dev/null +++ b/results/appendix/nearest_avg.qmd @@ -0,0 +1,217 @@ +#### Distribution of average nearest neighbor distances + + +```{r} +#| warning: false +#| code-fold: true +data <- lfa::lfa_combine_sf_obj(lfa::lfa_get_neighbor_paths(),lfa::lfa_get_all_areas()) +names <- paste0("Neighbor_",1:100) +data$avg = rowMeans(dplyr::select(as.data.frame(data),names)) +value_column <- "avg" +``` + + + +##### Kullback-Leibler-Divergence + + + +```{r} +#| warning: false +#| code-fold: true +#| label: tbl-nearest-neighbor-avg-kld_specie +#| tbl-cap: "Kullback-Leibler-Divergence between the researched species Beech, Oak, Pine and Spruce for the atrribute nearest-neighbor-avg" +kld_results_specie <- lfa::lfa_run_test_asymmetric(data,value_column,"specie",lfa::lfa_kld_from_vec) +lfa::lfa_generate_result_table_tests(kld_results_specie,"Kullback-Leibler-Divergence between species") +``` + + + +```{r} +#| warning: false +colMeans(kld_results_specie, na.rm = TRUE) |> mean() +``` + + + + +```{r} +#| warning: false +#| code-fold: true +#| label: tbl-nearest-neighbor-avg-kld-beech +#| tbl-cap: "Kullback-Leibler-Divergence between the researched areas which have the dominante specie beech for the atrribute nearest-neighbor-avg" +specie <- data[data$specie=="beech",] +kld_results_beech <- lfa::lfa_run_test_asymmetric(specie,value_column,"area",lfa::lfa_kld_from_vec) +lfa::lfa_generate_result_table_tests(kld_results_beech,"Kullback-Leibler-Divergence between areas with beech") +``` + + + +```{r} +#| warning: false +colMeans(kld_results_beech, na.rm = TRUE) |> mean() +``` + + + + +```{r} +#| warning: false +#| code-fold: true +#| label: tbl-nearest-neighbor-avg-kld-oak +#| tbl-cap: "Kullback-Leibler-Divergence between the researched areas which have the dominante specie oak for the atrribute nearest-neighbor-avg" +specie <- data[data$specie=="oak",] +kld_results_oak <- lfa::lfa_run_test_asymmetric(specie,value_column,"area",lfa::lfa_kld_from_vec) +lfa::lfa_generate_result_table_tests(kld_results_oak,"Kullback-Leibler-Divergence between areas with oak") +``` + + + +```{r} +#| warning: false +colMeans(kld_results_oak, na.rm = TRUE) |> mean() +``` + + + + +```{r} +#| warning: false +#| code-fold: true +#| label: tbl-nearest-neighbor-avg-kld-pine +#| tbl-cap: "Kullback-Leibler-Divergence between the researched areas which have the dominante specie pine for the atrribute nearest-neighbor-avg" +specie <- data[data$specie=="pine",] +kld_results_pine <- lfa::lfa_run_test_asymmetric(specie,value_column,"area",lfa::lfa_kld_from_vec) +lfa::lfa_generate_result_table_tests(kld_results_pine,"Kullback-Leibler-Divergence between areas with pine") +``` + + + +```{r} +#| warning: false +colMeans(kld_results_pine, na.rm = TRUE) |> mean() +``` + + + + +```{r} +#| warning: false +#| code-fold: true +#| label: tbl-nearest-neighbor-avg-kld-spruce +#| tbl-cap: "Kullback-Leibler-Divergence between the researched areas which have the dominante specie spruce for the atrribute nearest-neighbor-avg" +specie <- data[data$specie=="spruce",] +kld_results_spruce <- lfa::lfa_run_test_asymmetric(specie,value_column,"area",lfa::lfa_kld_from_vec) +lfa::lfa_generate_result_table_tests(kld_results_spruce,"Kullback-Leibler-Divergence between areas with spruce") +``` + + + +```{r} +#| warning: false +colMeans(kld_results_spruce, na.rm = TRUE) |> mean() +``` + + + + +##### Jensen-Shannon Divergence + + + +```{r} +#| warning: false +#| code-fold: true +#| label: tbl-nearest-neighbor-avg-jsd_specie +#| tbl-cap: "Jensen-Shannon Divergence between the researched species Beech, Oak, Pine and Spruce for the atrribute nearest-neighbor-avg" +jsd_results_specie <- lfa::lfa_run_test_symmetric(data,value_column,"specie",lfa::lfa_jsd_from_vec) +lfa::lfa_generate_result_table_tests(jsd_results_specie,"Jensen-Shannon Divergence between species") +``` + + + +```{r} +#| warning: false +colMeans(jsd_results_specie, na.rm = TRUE) |> mean() +``` + + + + +```{r} +#| warning: false +#| code-fold: true +#| label: tbl-nearest-neighbor-avg-jsd-beech +#| tbl-cap: "Jensen-Shannon Divergence between the researched areas which have the dominante specie beech for the atrribute nearest-neighbor-avg" +specie <- data[data$specie=="beech",] +jsd_results_beech <- lfa::lfa_run_test_symmetric(specie,value_column,"area",lfa::lfa_jsd_from_vec) +lfa::lfa_generate_result_table_tests(jsd_results_beech,"Jensen-Shannon Divergence between areas with beech") +``` + + + +```{r} +#| warning: false +colMeans(jsd_results_beech, na.rm = TRUE) |> mean() +``` + + + + +```{r} +#| warning: false +#| code-fold: true +#| label: tbl-nearest-neighbor-avg-jsd-oak +#| tbl-cap: "Jensen-Shannon Divergence between the researched areas which have the dominante specie oak for the atrribute nearest-neighbor-avg" +specie <- data[data$specie=="oak",] +jsd_results_oak <- lfa::lfa_run_test_symmetric(specie,value_column,"area",lfa::lfa_jsd_from_vec) +lfa::lfa_generate_result_table_tests(jsd_results_oak,"Jensen-Shannon Divergence between areas with oak") +``` + + + +```{r} +#| warning: false +colMeans(jsd_results_oak, na.rm = TRUE) |> mean() +``` + + + + +```{r} +#| warning: false +#| code-fold: true +#| label: tbl-nearest-neighbor-avg-jsd-pine +#| tbl-cap: "Jensen-Shannon Divergence between the researched areas which have the dominante specie pine for the atrribute nearest-neighbor-avg" +specie <- data[data$specie=="pine",] +jsd_results_pine <- lfa::lfa_run_test_symmetric(specie,value_column,"area",lfa::lfa_jsd_from_vec) +lfa::lfa_generate_result_table_tests(jsd_results_pine,"Jensen-Shannon Divergence between areas with pine") +``` + + + +```{r} +#| warning: false +colMeans(jsd_results_pine, na.rm = TRUE) |> mean() +``` + + + + +```{r} +#| warning: false +#| code-fold: true +#| label: tbl-nearest-neighbor-avg-jsd-spruce +#| tbl-cap: "Jensen-Shannon Divergence between the researched areas which have the dominante specie spruce for the atrribute nearest-neighbor-avg" +specie <- data[data$specie=="spruce",] +jsd_results_spruce <- lfa::lfa_run_test_symmetric(specie,value_column,"area",lfa::lfa_jsd_from_vec) +lfa::lfa_generate_result_table_tests(jsd_results_spruce,"Jensen-Shannon Divergence between areas with spruce") +``` + + + +```{r} +#| warning: false +colMeans(jsd_results_spruce, na.rm = TRUE) |> mean() +``` + diff --git a/results/appendix/package-docs/docs.qmd b/results/appendix/package-docs/docs.qmd index 6ab840a..1f7babe 100644 --- a/results/appendix/package-docs/docs.qmd +++ b/results/appendix/package-docs/docs.qmd @@ -383,8 +383,8 @@ create_density_plots(your_data, "value", "category1", "category2", title = "Dens lfa_create_density_plots( data, value_column, - category_column1, - category_column2, + category_column1 = "area", + category_column2 = "specie", title = NULL, xlims = NULL, ylims = NULL diff --git a/results/report.qmd b/results/report.qmd index f4056db..d267d43 100644 --- a/results/report.qmd +++ b/results/report.qmd @@ -71,6 +71,11 @@ This report documents the analysis of forest data for different tree species. {{< include appendix/preprocessing.qmd >}} ## Quantitative Results {{< include appendix/z_values.qmd >}} + +### Nearest Neighbours +{{< include appendix/nearest_1.qmd >}} +{{< include appendix/nearest_100.qmd >}} +{{< include appendix/nearest_avg.qmd >}} {{< include appendix/number_of_returns.qmd >}} diff --git a/results/results/nearest-neighbors.qmd b/results/results/nearest-neighbors.qmd new file mode 100644 index 0000000..e6ee11c --- /dev/null +++ b/results/results/nearest-neighbors.qmd @@ -0,0 +1,113 @@ +## n-nearest Neighbours + +```{r} +#| code-fold: true +#| results: hide +neighbors <- lfa::lfa_combine_sf_obj(lfa::lfa_get_neighbor_paths(),lfa::lfa_get_all_areas()) +``` + +### Overview +To initiate our analysis, we first establish a framework for selecting neighbors by examining the distance development with different n, as illustrated in @fig-n-nearest-overview. The curves share a similar design, but the actual values vary. Notably, as n increases, the distance between all patches also increases, indicating a broader spatial context. + +Considering this trend, we extend our investigation beyond the nearest neighbor to include the 100th nearest neighbor. The $\Delta$distance shows a consistent decrease with each increment in n, reinforcing our decision to limit exploration beyond n of a hundred. Additionally, the constraint is driven by practical considerations, as our sample size occasionally lacks the capacity to explore larger n values, resulting in inaccurate values due to the absence of the true nearest neighbor within the sample area. + + +```{r} +#| code-fold: true +#| fig-cap: Average Distance to n-nearest neighbor from each patch. For simplicity colored by the dominant specie of each tree. +#| label: fig-n-nearest-overview +lfa::lfa_create_neighbor_mean_curves(neighbors) |> lfa::lfa_create_plot_per_area() +``` + + +### The Nearest Neighbour +Our initial focus centers on examining the distance to the nearest neighbor for each tree. Notably, the curve representing Spruce exhibits distinct characteristics compared to the three other curves—displaying a steeper profile with less variance, as depicted in @fig-density-1-nearest. + +Further analysis of all patches reveals similar distributions, as evident in the boxplot shown in Figure 2 (@fig-boxplot-1-nearest), where mean and variance demonstrate consistency across patches. However, these graphical statistics present challenges in effectively distinguishing between different tree species based on the distance to the nearest neighbor. + + +```{r} +#| warning: false +#| label: fig-density-1-nearest +#| code-fold: true +#| fig-cap: Density plot of the distance to the nearest neighbor distribution across all patches grouped by the dominant species. +lfa::lfa_create_density_plots(neighbors,value_column = "Neighbor_1",category_column1 = "area",category_column2 = "specie", title = "Density plots for the nearest neighbor among species and areas", xlims = c(0,15)) +``` + + + +```{r} +#| warning: false +#| label: fig-boxplot-1-nearest +#| code-fold: true +#| fig-cap: Density plot of the distance to the nearest neighbor distribution across all patches grouped by the dominant species. +lfa::lfa_create_boxplot(neighbors,value_column = "Neighbor_1",category_column1 = "area",category_column2 = "specie", title = "Box plots for the nearest neighbor among species and areas") +``` + + +### The 100th nearest Neighbor +Moving on to the analysis of the 100th nearest neighbor, intriguing patterns emerge. Peaks in the curves display varying heights and positions, with a notable example being the complete shift between Oak and Spruce, as illustrated in @fig-density-100-nearest. + +However, it is essential to acknowledge the high variance observed between curves within a species, such as Pine or Beech. While this variance could serve as a potential indicator, it comes with the caveat that the sample size must be substantial for reliable conclusions. + +Examining boxplots reveals numerous outliers above the boxes, hinting at potential edge effects on the sides of patches. This observation raises concerns about the adequacy of trees in these areas for a more in-depth analysis, posing challenges in deriving accurate insights. + + +```{r} +#| warning: false +#| label: fig-density-100-nearest +#| code-fold: true +#| fig-cap: Density plot of the distance to the nearest neighbor distribution across all patches grouped by the dominant species. +lfa::lfa_create_density_plots(neighbors,value_column = "Neighbor_100",category_column1 = "area",category_column2 = "specie", title = "Density plots for the nearest neighbor along species and areas", xlims = c(35,100)) +``` + + + + +```{r} +#| warning: false +#| label: fig-boxplot-100-nearest +#| code-fold: true +#| fig-cap: Density plot of the distance to the nearest neighbor distribution across all patches grouped by the dominant species. +lfa::lfa_create_boxplot(neighbors,value_column = "Neighbor_100",category_column1 = "area",category_column2 = "specie", title = "Box plots for the nearest neighbor along species and areas") +``` + +#### Average distance to 100 nearest neighbors + +```{r} +#| warning: false +#| code-fold: true +#| results: hide +names <- paste0("Neighbor_",1:100) +neighbors$avg = rowMeans(dplyr::select(as.data.frame(neighbors),names)) +``` + +Turning our attention to the averages of the first 100 neighbors, our analysis indicates strikingly similar results. There is considerable variance observed between different species, as well as within individual species, as depicted in @fig-density-avg-nearest. + +Despite the uniformity in average results, the issue of outliers persists, as evident in the boxplot representation shown in @fig-boxplot-avg-nearest. These outliers pose challenges and may be indicative of specific environmental conditions affecting tree distributions. Further exploration is required to better understand and mitigate the impact of outliers on our analysis. + + + +```{r} +#| warning: false +#| label: fig-density-avg-nearest +#| code-fold: true +#| fig-cap: Density plot of the average distance to the nearest neighbor (n=100) distribution across all patches grouped by the dominant species. +lfa::lfa_create_density_plots(neighbors,value_column = "avg",category_column1 = "area",category_column2 = "specie", title = "Density plots for the avergae of 100 nearest neighbors across species and areas", xlims = c(25,60)) +``` + + + +The neighbor analysis proves potentially useful for distinguishing between tree species, yet the observed variances within each species suggest that relying solely on distance to neighbors may not suffice. + +A critical consideration is the sample size problem, wherein more distinguishable patterns emerge with higher neighbor levels, but this necessitates a sufficiently large sample size. Unfortunately, deriving a clear relationship between sample size and the number of tree neighbors remains elusive in our current findings. This gap in understanding could be a pertinent subject for further research, delving into the intricate interplay between sample size and the effectiveness of neighbor analysis in species differentiation. + + + +```{r} +#| warning: false +#| label: fig-boxplot-avg-nearest +#| code-fold: true +#| fig-cap: Density plot of the average distance to the nearest neighbor (n = 100) distribution across all patches grouped by the dominant species. +lfa::lfa_create_boxplot(neighbors,value_column = "avg",category_column1 = "area",category_column2 = "specie", title = "Box plots for the average to the nearest neighbor across all species and areas") +```