diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml new file mode 100644 index 00000000..1933d3a0 --- /dev/null +++ b/.github/workflows/R-CMD-check.yaml @@ -0,0 +1,31 @@ +name: R-CMD-check + +on: + push: + branches: + - main + +jobs: + R-CMD-check: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + - name: Set up R + uses: r-lib/actions/setup-r@v1 + with: + r-version: 4.1 + + - name: Install dependencies + run: R -e 'install.packages(c("devtools", "roxygen2"))' + + - name: R CMD check + run: R CMD check . + + - name: Install package + run: R -e 'devtools::install()' + + - name: Run tests + run: R -e 'devtools::test()' \ No newline at end of file diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 00000000..a5722068 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,63 @@ +name: build + +on: + push: + branches: + - main + - dev + +jobs: + build: + runs-on: ${{ matrix.config.os }} + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - { os: ubuntu-latest, r: 'devel', cran: "https://demo.rstudiopm.com/all/__linux__/xenial/latest"} + + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + GITHUB_PAT: ${{ secrets.GH_PAT}} + HOMEBREW_NO_INSTALL_CLEANUP: 1 + + steps: + - uses: actions/checkout@v1 + - uses: r-lib/actions/setup-r@v2-branch + with: + r-version: '3.6' + - uses: r-lib/actions/setup-pandoc@v2-branch + - uses: r-lib/actions/setup-tinytex@v2-branch + + - name: Cache R packages + uses: actions/cache@v1 + with: + path: ${{ env.R_LIBS_USER }} + key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- + + - name: Ubuntu config + if: startsWith(matrix.config.os, 'ubuntu') + run: | + sudo add-apt-repository ppa:ubuntugis/ppa + sudo apt-get update + sudo apt-get install -y libcurl4-openssl-dev libssl-dev libxml2-dev libudunits2-dev libgdal-dev libgeos-dev libproj-dev libavfilter-dev libmagick++-dev + + - name: MacOS Config + if: startsWith(matrix.config.os, 'macOS') + run: | + brew install pkg-config + brew install gdal + brew install geos + brew install imagemagick@6 + + - name: Install dependencies + run: | + install.packages(c("remotes","rcmdcheck"), repos = "https://cloud.r-project.org") + remotes::install_deps(dependencies = TRUE) + shell: Rscript {0} + + - name: Check + run: rcmdcheck::rcmdcheck(args = "--no-manual", error_on = "error") + shell: Rscript {0} diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml new file mode 100644 index 00000000..03f577ba --- /dev/null +++ b/.github/workflows/docs.yaml @@ -0,0 +1,49 @@ +name: docs + +on: + push: + branches: + - main + + +jobs: + docs: + runs-on: ubuntu-latest + + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + GITHUB_PAT: ${{ secrets.GH_PAT}} + + steps: + + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - name: Ubuntu config + if: startsWith(matrix.config.os, 'ubuntu') + run: | + sudo add-apt-repository ppa:ubuntugis/ppa + sudo apt-get update + sudo apt-get install -y libcurl4-openssl-dev libssl-dev libxml2-dev libudunits2-dev libgdal-dev libgeos-dev libproj-dev libmagick++-dev libavfilter-dev + + - name: Install dependencies + run: | + install.packages(c("remotes","rcmdcheck"), repos = "https://cloud.r-project.org") + remotes::install_deps(dependencies = TRUE) + shell: Rscript {0} + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::pkgdown, local::. + needs: website + + - name: Deploy package + run: | + git config --local user.name "$GITHUB_ACTOR" + git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" + Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)' diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index 66f858ea..a9381edb 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -22,7 +22,7 @@ jobs: runs-on: macos-latest strategy: matrix: - r-version: ['3.6.3', '4.1.1'] + r-version: ['4.0.1', '4.1.1'] steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/test_coverage.yml b/.github/workflows/test_coverage.yml new file mode 100644 index 00000000..d25c3023 --- /dev/null +++ b/.github/workflows/test_coverage.yml @@ -0,0 +1,48 @@ +name: test_coverage + +on: + push: + branches: + - main + - dev + +jobs: + test: + runs-on: ubuntu-latest + + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + GITHUB_PAT: ${{ secrets.GH_PAT}} + + steps: + - uses: actions/checkout@v1 + - uses: r-lib/actions/setup-r@v2-branch + with: + r-version: '3.6' + - uses: r-lib/actions/setup-pandoc@v2-branch + - uses: r-lib/actions/setup-tinytex@v2-branch + + - name: Cache R packages + uses: actions/cache@v1 + with: + path: ${{ env.R_LIBS_USER }} + key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- + + - name: Ubuntu config + run: | + sudo add-apt-repository ppa:ubuntugis/ppa + sudo apt-get update + sudo apt-get install -y libcurl4-openssl-dev libssl-dev libxml2-dev libudunits2-dev libgdal-dev libgeos-dev libproj-dev libmagick++-dev libavfilter-dev + + - name: Install dependencies + run: | + install.packages(c("remotes","rcmdcheck"), repos = "https://cloud.r-project.org") + remotes::install_deps(dependencies = TRUE) + shell: Rscript {0} + + - name: Test coverage + run: | + remotes::install_cran("covr") + covr::codecov(token = "${{secrets.CODECOV_TOKEN}}") + shell: Rscript {0} diff --git a/README.md b/README.md index 58842c06..24a10e90 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,12 @@ + +[![build](https://github.com/JGCRI/gcamfaostat/workflows/build/badge.svg)](https://github.com/JGCRI/gcamfaostat/workflows/build/badge.svg?branch=main) +[![test_coverage](https://github.com/JGCRI/gcamfaostat/actions/workflows/test_coverage.yml/badge.svg?branch=main)](https://github.com/JGCRI/gcamfaostat/actions/workflows/test_coverage.yml) +[![docs](https://github.com/JGCRI/gcamfaostat/actions/workflows/docs.yaml/badge.svg?branch=main)](https://github.com/JGCRI/gcamfaostat/actions/workflows/docs.yaml) + -# `gcamfaostat`: An R package to prepare, process, and synthesize FAOSTAT data for global agroeconomic and multisector dynamic modeling +### `gcamfaostat`: An R package to prepare, process, and synthesize FAOSTAT data for global agroeconomic and multisector dynamic modeling -## Summary +### Summary The `gcamfaostat` R package is developed to prepare, process, and synthesize [FAOSTAT](https://www.fao.org/faostat/en/#data) agroeconomic dataset for global economic and multisector dynamic modeling, in a traceable, transparent, and reproducible manner. Here, we demonstrate the use of the `gcamfaostat` framework for generating and updating agroeconomic data needed for the Global Change Analysis Model ([GCAM](https://jgcri.github.io/gcam-doc/toc.html)). However, our initiative seeks to enhance the quality and accessibility of data for the global agroeconomic modeling community, with the aim of fostering more robust and harmonized outcomes in a collaborative, efficient, open-source manner. The processed data and visualizations in `gcamfaostat` can be valuable to a broader range of users interested in understanding global agriculture. @@ -12,61 +17,55 @@ This tool bridges a crucial gap in the literature by offering several key featur 3. **Community Collaboration and Efficiency**: The package provides an open-source platform for researchers to continually enhance the processing methods. This collaborative approach, which establishes a standardized and streamlined process for data preparation and processing, carries benefits that extend to all modeling groups. By reducing the efforts required for data processing and fostering harmonized base calibration data, it contributes to a reduction in modeling uncertainty and enhances the overall research efficiency. 4. **User Accessibility**: Where applicable, the processed data can be mapped and aggregated to user-specified regions and sectors for agroeconomic modeling. However, beyond the modeling community, `gcamfaostat` can be valuable to a broader range of users interested in understanding global agriculture trends and dynamics, as it provides accessible and processed data and visualization functions. -## User Guide -The package is documented in the [online manual](https://jgcri.github.io/gcamfaostat/index.html) - -To contribute, see [contribution guidance](https://jgcri.github.io/gcamfaostat/CONTRIBUTE.html) -## Contributing - -Please read our [Contributing Guidelines](CONTRIBUTING.md) for information on how to contribute to this project. - - - - - +*** +### User Guide +The package is documented in the [online manual](https://jgcri.github.io/gcamfaostat/index.html) +*** -![Figure 2. Structure of gcamfaostat](man/figures/Fig_data_processing_flow.jpg) -**Figure 2. Structure of gcamfaostat** - - +### Quick Start +#### 1. Download and install: +* Using `devtools` to download and install (The size < 1 GB): +* `devtools::install_github("jgcri/gcamfaostat")` -## Download and install: +#### 2. Load and run the gcamdata package -```r -install.packages("devtools") -devtools::install_github("jgcri/gcamfaostat") -``` -# Loading and run the gcamdata package +* Open the `gcamfaostat.Rproj` file in the `gcamfaostat` folder using RStudio. +* Load the `gcamdata` package: +* `devtools::load_all()` -Open the `gcamfaostat.Rproj` file in the `gcamfaostat` folder. RStudio should open the project. +#### 3. Modify configurations +* To export csv output files, in `constants.R`, + * set `OUTPUT_Export_CSV` to `TRUE` + * specify the directory path (`DIR_OUTPUT_CSV`) for output files; the default is `output/gcamfaostat_csv_output`. -To load the `gcamdata` package, enter: +#### 4. Run the driver +* `driver_drake()` -`devtools::load_all()` +#### 5. Use data and package functions +* Data saved in `DIR_OUTPUT_CSV` can be used in downstream models. +* Once `drive_drake` has been run, all the intermediate data are saved and can be explored (see examples in [Use Cases](https://jgcri.github.io/gcamfaostat/articles/vignette_use_cases.html) and [Visualization](https://jgcri.github.io/gcamfaostat/articles/vignette_visualization.html). -## Run the driver -There are two ways to run the driver: -1. `driver_drake()` +*** -`driver_drake()` runs the driver and stores the outputs in a hidden cache. When you run `driver_drake()` again it will skip steps that are up-to-date. This is useful if you will be adjusting the data inputs and code and running the data system multiple times. For this reason, we almost always recommend using `driver_drake()`. More details can be found in the [vignette](https://jgcri.github.io/gcamdata/articles/driverdrake_vignette.html). +### Package structure -2. `driver()` -See [the documentation](https://jgcri.github.io/gcamdata/reference/driver.html) for more options when running the driver, such as what outputs to generate or when to stop. +* `gcamfaostat` processes [input data](https://jgcri.github.io/gcamfaostat/articles/vignette_preparing_data.html#metadata) to output data in a format that is needed for downstream processing and modeling, e.g., [data used in gcamdata-aglu-FAO](https://github.com/JGCRI/gcam-core/tree/master/input/gcamdata/inst/extdata/aglu/FAO) (see the schmatic below). +* Input data was stored in the [Prebuilt Data](https://github.com/JGCRI/gcamfaostat/blob/main/data/PREBUILT_DATA.rda) of the package. The raw data is archived on Zenodo (see URL in the [`FF_download_RemoteArchive`](https://github.com/JGCRI/gcamfaostat/blob/main/R/xfaostat_helper_funcs.R#L144) function) to ensure the processing is 100% replicable. Users can also download the latest data using [`FF_download_FAOSTAT`](https://github.com/JGCRI/gcamfaostat/blob/main/R/xfaostat_helper_funcs.R#90). +* All intermediate processing and data flows are transparent and traceable. See [Processing Flow](https://jgcri.github.io/gcamfaostat/articles/vignette_processing_flow.html) for data-tracing examples. -## Output files -Users can specify the output directory (`DIR_OUTPUT_CSV`) that stores the output csv files in `constants.R`. The default directory is `outputs/CSV`. The the file will be exported when `OUTPUT_Export_CSV == TRUE` (an option in `constants.R`). -Users can also make use of the functions to trace the processing by step, when`driver_drake()` is employed. +![](man/figures/Fig_data_processing_flow.jpg){width=90%} +Schmatic: module (data processing chunk) structure of gcamfaostat -# Contributing - -Please read the [Contribution Guidelines](CONTRIBUTING.md) for details on how to contribute to this project. + +### Contributing +Please read our [Contributing Guidelines](CONTRIBUTING.md) for information on how to contribute to this project. -# Related publications +### Related publications - Bond-Lamberty, Ben, Kalyn Dorheim, Ryna Cui, Russell Horowitz, Abigail Snyder, Katherine Calvin, Leyang Feng et al. "gcamdata: An R package for preparation, synthesis, and tracking of input data for the GCAM integrated human-earth systems model." Journal of Open Research Software 7, no. 1 (2019). DOI: 10.5334/jors.232 - Calvin, Katherine V., Abigail Snyder, Xin Zhao, and Marshall Wise. "Modeling land use and land cover change: using a hindcast to estimate economic parameters in gcamland v2. 0." Geoscientific Model Development 15, no. 2 (2022): 429-447. https://doi.org/10.5194/gmd-15-429-2022 - Chepeliev, Maksym. "Incorporating nutritional accounts to the GTAP Data Base." Journal of Global Economic Analysis 7, no. 1 (2022): 1-43. https://doi.org/10.21642/JGEA.070101AF diff --git a/_pkgdown.yml b/_pkgdown.yml index 009bc334..a098ab65 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -18,7 +18,7 @@ navbar: - icon: fas fa-book text: "Vignettes" menu: - - text: "Quick Start" + - text: "Getting Started" href: articles/vignette_getting_started.html - text: "Preparing Data" href: articles/vignette_preparing_data.html diff --git a/gcamfaostat.Rproj b/gcamfaostat.Rproj index 40212432..270314b8 100644 --- a/gcamfaostat.Rproj +++ b/gcamfaostat.Rproj @@ -18,5 +18,4 @@ StripTrailingWhitespace: Yes BuildType: Package PackageUseDevtools: Yes PackageInstallArgs: --no-multiarch --with-keep.source -PackageCheckArgs: --no-codoc PackageRoxygenize: rd,collate,namespace diff --git a/vignettes/vignette_getting_started.Rmd b/vignettes/vignette_getting_started.Rmd index 187d9434..dbe5eaaf 100644 --- a/vignettes/vignette_getting_started.Rmd +++ b/vignettes/vignette_getting_started.Rmd @@ -120,4 +120,22 @@ See [the documentation](https://jgcri.github.io/gcamdata/reference/driver.html) +## Output files +Users can specify the output directory (`DIR_OUTPUT_CSV`) that stores the output csv files in `constants.R`. The default directory is `outputs/CSV`. The the file will be exported when `OUTPUT_Export_CSV == TRUE` (an option in `constants.R`). +Users can also make use of the functions to trace the processing by step, when`driver_drake()` is employed. + + + +There are two ways to run the driver: +1. `driver_drake()` + +`driver_drake()` runs the driver and stores the outputs in a hidden cache. When you run `driver_drake()` again it will skip steps that are up-to-date. This is useful if you will be adjusting the data inputs and code and running the data system multiple times. For this reason, we almost always recommend using `driver_drake()`. More details can be found in the [vignette](https://jgcri.github.io/gcamdata/articles/driverdrake_vignette.html). + +2. `driver()` + +See [the documentation](https://jgcri.github.io/gcamdata/reference/driver.html) for more options when running the driver, such as what outputs to generate or when to stop. + + + + # References diff --git a/vignettes/vignette_news.Rmd b/vignettes/vignette_news.Rmd index bc4f7f58..d70d4507 100644 --- a/vignettes/vignette_news.Rmd +++ b/vignettes/vignette_news.Rmd @@ -17,6 +17,14 @@ knitr::opts_chunk$set( # October updates +2023-10-25 +ToDo before JOSS submission +* Check package license (PNNL) +* Check Zenodo license for FAOSTAT data [*Done*: `CC BY-NC-SA`] +* Adding contribution.MD and Community side-bar [*Done*] + +* Test & Check; Adding GitHub tests/logos + 2023-10-24 * Visualization Page is live now * `gcamfaostat` moved to the JGCRI GitHub account