From 0af006d24b39e29e18df013837bdd664f93b414d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 6 Jan 2025 17:47:30 +0000 Subject: [PATCH 01/17] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.6.9 → v0.8.6](https://github.com/astral-sh/ruff-pre-commit/compare/v0.6.9...v0.8.6) - [github.com/sirosen/check-jsonschema: 0.29.4 → 0.30.0](https://github.com/sirosen/check-jsonschema/compare/0.29.4...0.30.0) --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 04c896e..9bcf138 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -44,7 +44,7 @@ repos: # Linting and formatting for Python code (see pyproject.toml for config) - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.6.9 + rev: v0.8.6 hooks: # Run the linter. - id: ruff @@ -54,6 +54,6 @@ repos: # Ensure GitHub workflows match the expected schema. - repo: https://github.com/sirosen/check-jsonschema - rev: 0.29.4 + rev: 0.30.0 hooks: - id: check-github-workflows From 612c046b5241b0f81796f77d98e793c41db1211d Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 7 Jan 2025 16:34:47 -0700 Subject: [PATCH 02/17] Update: Contributor & review file update (#241) Co-authored-by: Leah --- _data/contributors.yml | 203 ++++++++++++++++++++++++++++++--- _data/packages.yml | 252 +++++++++++++++++++++++++---------------- 2 files changed, 342 insertions(+), 113 deletions(-) diff --git a/_data/contributors.yml b/_data/contributors.yml index ea05c98..f003c25 100644 --- a/_data/contributors.yml +++ b/_data/contributors.yml @@ -277,6 +277,7 @@ contributor_type: - leadership - package-guide + - web-contrib packages_eic: packages_editor: packages_submitted: @@ -802,6 +803,7 @@ - web-contrib packages_eic: - pooltool + - quadratik packages_editor: - cardsort - harmonize-wq @@ -1255,7 +1257,7 @@ mastodon: orcidid: partners: - website: https://www.dov.vlaanderen.be + website: board: false contributor_type: - package-maintainer @@ -4280,6 +4282,7 @@ packages_eic: - automata packages_editor: + - quadratik - rdata packages_submitted: packages_reviewed: @@ -6062,6 +6065,7 @@ board: false contributor_type: - community + - package-guide packages_eic: packages_editor: packages_submitted: @@ -7975,9 +7979,9 @@ sort: bio: Full stack scientific programmer - from raw data to decisions organization: Penn State University - date_added: '2024-12-01' + date_added: '2024-12-13' deia_advisory: false - editorial_board: + editorial_board: false emeritus_editor: false advisory: false emeritus_advisory: false @@ -8004,9 +8008,9 @@ sort: bio: organization: - date_added: '2024-12-01' + date_added: '2024-12-13' deia_advisory: false - editorial_board: + editorial_board: false emeritus_editor: false advisory: false emeritus_advisory: false @@ -8033,9 +8037,9 @@ sort: bio: organization: - date_added: '2024-12-01' + date_added: '2024-12-13' deia_advisory: false - editorial_board: + editorial_board: false emeritus_editor: false advisory: false emeritus_advisory: false @@ -8063,9 +8067,9 @@ sort: bio: organization: - date_added: '2024-12-01' + date_added: '2024-12-13' deia_advisory: false - editorial_board: + editorial_board: false emeritus_editor: false advisory: false emeritus_advisory: false @@ -8092,9 +8096,9 @@ sort: bio: organization: - date_added: '2024-12-01' + date_added: '2024-12-13' deia_advisory: false - editorial_board: + editorial_board: false emeritus_editor: false advisory: false emeritus_advisory: false @@ -8121,9 +8125,9 @@ sort: bio: organization: - date_added: '2024-12-01' + date_added: '2024-12-13' deia_advisory: false - editorial_board: + editorial_board: false emeritus_editor: false advisory: false emeritus_advisory: false @@ -8143,3 +8147,176 @@ packages_reviewed: location: email: +- name: Alyssa Columbus + github_username: acolum + github_image_id: 21285656 + title: + sort: + bio: + organization: + date_added: '2024-12-13' + deia_advisory: false + editorial_board: false + emeritus_editor: false + advisory: false + emeritus_advisory: false + twitter: + mastodon: + orcidid: + partners: + website: https://alyssacolumbus.com + board: false + contributor_type: + - peer-review + - reviewer + packages_eic: + packages_editor: + packages_submitted: + packages_reviewed: + - quadratik + location: + email: +- name: Avik Basu + github_username: ab93 + github_image_id: 3485425 + title: + sort: + bio: Data Scientist | Machine Learning Engineer + organization: Intuit + date_added: '2024-12-13' + deia_advisory: false + editorial_board: false + emeritus_editor: false + advisory: false + emeritus_advisory: false + twitter: + mastodon: + orcidid: + partners: + website: + board: false + contributor_type: + - peer-review + - reviewer + packages_eic: + packages_editor: + packages_submitted: + packages_reviewed: + - quadratik + location: Sunnyvale + email: +- name: Raktim Mukhopadhyay + github_username: rmj3197 + github_image_id: 54344732 + title: + sort: + bio: + organization: SUNY at Buffalo + date_added: '2024-12-13' + deia_advisory: false + editorial_board: false + emeritus_editor: false + advisory: false + emeritus_advisory: false + twitter: + mastodon: + orcidid: + partners: + website: + board: false + contributor_type: + - maintainer + - peer-review + - submitting-author + packages_eic: + packages_editor: + packages_submitted: + - quadratik + packages_reviewed: + location: Buffalo, New York, USA + email: +- name: Giovanni Saraceno + github_username: giovsaraceno + github_image_id: 92368611 + title: + sort: + bio: My principal area of research are robust statistics, directional statistics, + kernel-based methods, neural networks, neuroscience applications. + organization: University of Padova + date_added: '2024-12-13' + deia_advisory: false + editorial_board: false + emeritus_editor: false + advisory: false + emeritus_advisory: false + twitter: + mastodon: + orcidid: + partners: + website: + board: false + contributor_type: + - maintainer + - peer-review + packages_eic: + packages_editor: + packages_submitted: + - quadratik + packages_reviewed: + location: + email: +- name: Santiago Soler + github_username: santisoler + github_image_id: 11541317 + title: + sort: + bio: PhD in Geophysics and Physicist. Postdoc researcher at UBC. Develop @fatiando + and @simpeg. + organization: '@fatiando @simpeg @ubcgif @compgeolab @GeoLatinas' + date_added: '2025-01-01' + deia_advisory: false + editorial_board: + emeritus_editor: false + advisory: false + emeritus_advisory: false + twitter: + mastodon: + orcidid: + partners: + website: https://www.santisoler.com + board: false + contributor_type: + - package-guide + packages_eic: + packages_editor: + packages_submitted: + packages_reviewed: + location: British Columbia, Canada + email: santisoler@fastmail.com +- name: Seth Michael Larson + github_username: sethmlarson + github_image_id: 18519037 + title: + sort: + bio: PSF Security Developer-in-Residence 🐍 PSF Fellow ✨ Minnesoootan, he/him + organization: '@psf' + date_added: '2025-01-01' + deia_advisory: false + editorial_board: + emeritus_editor: false + advisory: false + emeritus_advisory: false + twitter: + mastodon: + orcidid: + partners: + website: https://sethmlarson.dev + board: false + contributor_type: + - web-contrib + packages_eic: + packages_editor: + packages_submitted: + packages_reviewed: + location: Minneapolis, MN + email: sethmichaellarson@gmail.com diff --git a/_data/packages.yml b/_data/packages.yml index cb8ed8d..8d8ad13 100644 --- a/_data/packages.yml +++ b/_data/packages.yml @@ -25,8 +25,8 @@ version_accepted: 0.5.5 date_accepted: '2024-11-22' created_at: 2024-08-01 11:54:53+00:00 - updated_at: 2024-11-26 18:46:20+00:00 - closed_at: + updated_at: 2024-12-03 02:41:20+00:00 + closed_at: 2024-12-03 02:41:20+00:00 issue_link: https://github.com/pyOpenSci/software-submission/issues/209 joss: partners: @@ -34,16 +34,16 @@ name: thztools description: Tools for terahertz time-domain spectroscopy (THz-TDS) created_at: '2022-11-22' - stargazers_count: 10 - watchers_count: 10 + stargazers_count: 12 + watchers_count: 12 open_issues_count: 2 - forks_count: 2 + forks_count: 3 documentation: https://dodge-research-group.github.io/thztools/ contrib_count: 5 - last_commit: '2024-11-23' + last_commit: '2024-12-03' labels: - 6/pyOS-approved - - 7/under-joss-review + - 9/joss-approved - package_name: Stingray package_description: A spectral-timing software package for astrophysical X-ray (and other) data @@ -90,17 +90,68 @@ description: Anything can happen in the next half hour (including spectral timing made easy)! created_at: '2015-09-30' - stargazers_count: 176 - watchers_count: 176 + stargazers_count: 179 + watchers_count: 179 open_issues_count: 44 forks_count: 144 documentation: https://stingray.science/stingray contrib_count: 30 - last_commit: '2024-11-22' + last_commit: '2024-12-04' labels: - 6/pyOS-approved - 9/joss-approved - astropy +- package_name: QuadratiK + package_description: QuadratiK includes test for multivariate normality, test + for uniformity on the sphere, non-parametric two- and k-sample tests, random + generation of points from the Poisson kernel-based density and clustering + algorithm for spherical data. + submitting_author: + name: Raktim Mukhopadhyay + github_username: rmj3197 + all_current_maintainers: + - name: Raktim Mukhopadhyay + github_username: rmj3197 giovsaraceno + repository_link: https://github.com/rmj3197/QuadratiK + version_submitted: 1.1.0 + categories: + - data-processing-munging + editor: + name: Isabel Zimmerman + github_username: isabelizimm + eic: + name: Alex Batisse + github_username: Batalex + reviewers: + - name: Alyssa Columbus + github_username: acolum + - name: Avik Basu + github_username: ab93 + archive: TBD + version_accepted: TBD + date_accepted: missing + created_at: 2024-05-13 21:23:44+00:00 + updated_at: 2024-12-23 10:31:49+00:00 + closed_at: + issue_link: https://github.com/pyOpenSci/software-submission/issues/180 + joss: + partners: + gh_meta: + name: QuadratiK + description: QuadratiK includes test for multivariate normality, test for + uniformity on the sphere, non-parametric two- and k-sample tests, random + generation of points from the Poisson kernel-based density and clustering + algorithm for spherical data. + created_at: '2024-02-04' + stargazers_count: 2 + watchers_count: 2 + open_issues_count: 0 + forks_count: 0 + documentation: https://quadratik.readthedocs.io/ + contrib_count: 1 + last_commit: '2024-12-23' + labels: + - 6/pyOS-approved - package_name: pooltool package_description: Pooltool is a general purpose billiards simulator crafted specifically for science and engineering. @@ -139,13 +190,13 @@ name: pooltool description: A sandbox billiards game that emphasizes realistic physics created_at: '2020-04-09' - stargazers_count: 234 - watchers_count: 234 - open_issues_count: 7 - forks_count: 37 + stargazers_count: 241 + watchers_count: 241 + open_issues_count: 8 + forks_count: 39 documentation: https://pooltool.readthedocs.io contrib_count: 7 - last_commit: '2024-11-25' + last_commit: '2024-12-31' labels: - 6/pyOS-approved - 9/joss-approved @@ -190,11 +241,11 @@ created_at: '2017-10-19' stargazers_count: 15 watchers_count: 15 - open_issues_count: 14 - forks_count: 4 + open_issues_count: 15 + forks_count: 5 documentation: https://martini.readthedocs.io/ contrib_count: 1 - last_commit: '2024-11-30' + last_commit: '2024-12-08' labels: - 6/pyOS-approved - 9/joss-approved @@ -243,7 +294,7 @@ stargazers_count: 11 watchers_count: 11 open_issues_count: 3 - forks_count: 6 + forks_count: 8 documentation: https://cosmoglobe.github.io/zodipy/ contrib_count: 8 last_commit: '2024-11-19' @@ -287,13 +338,13 @@ description: Standardize, clean, and wrangle Water Quality Portal data into more analytic-ready formats created_at: '2022-06-27' - stargazers_count: 15 - watchers_count: 15 + stargazers_count: 16 + watchers_count: 16 open_issues_count: 25 - forks_count: 5 + forks_count: 6 documentation: https://usepa.github.io/harmonize-wq/ contrib_count: 5 - last_commit: '2024-11-05' + last_commit: '2024-12-05' labels: - 6/pyOS-approved - 9/joss-approved @@ -335,8 +386,8 @@ created_at: '2020-01-24' stargazers_count: 21 watchers_count: 21 - open_issues_count: 1 - forks_count: 10 + open_issues_count: 2 + forks_count: 11 documentation: https://xnemogcm.readthedocs.io/ contrib_count: 6 last_commit: '2024-09-11' @@ -386,8 +437,8 @@ description: A Python library for simulating finite automata, pushdown automata, and Turing machines created_at: '2016-02-16' - stargazers_count: 354 - watchers_count: 354 + stargazers_count: 365 + watchers_count: 365 open_issues_count: 11 forks_count: 64 documentation: https://caleb531.github.io/automata/ @@ -436,7 +487,7 @@ forks_count: 1 documentation: https://pypi.org/project/sleplet contrib_count: 5 - last_commit: '2024-11-21' + last_commit: '2024-12-03' labels: - 6/pyOS-approved - 9/joss-approved @@ -501,13 +552,13 @@ name: sunpy description: SunPy - Python for Solar Physics created_at: '2011-08-06' - stargazers_count: 923 - watchers_count: 923 - open_issues_count: 296 - forks_count: 591 + stargazers_count: 932 + watchers_count: 932 + open_issues_count: 306 + forks_count: 602 documentation: https://www.sunpy.org contrib_count: 30 - last_commit: '2024-11-28' + last_commit: '2024-12-30' labels: - 6/pyOS-approved - package_name: ncompare @@ -546,13 +597,13 @@ name: ncompare description: Compare the structure of two netCDF files at the command line created_at: '2023-08-17' - stargazers_count: 27 - watchers_count: 27 - open_issues_count: 7 + stargazers_count: 26 + watchers_count: 26 + open_issues_count: 5 forks_count: 9 documentation: https://ncompare.readthedocs.io contrib_count: 6 - last_commit: '2024-11-14' + last_commit: '2024-12-20' labels: - 6/pyOS-approved - 9/joss-approved @@ -590,10 +641,10 @@ name: rdata description: Reader of R datasets in .rda format, in Python created_at: '2018-07-02' - stargazers_count: 46 - watchers_count: 46 + stargazers_count: 50 + watchers_count: 50 open_issues_count: 7 - forks_count: 2 + forks_count: 3 documentation: https://rdata.readthedocs.io contrib_count: 2 last_commit: '2024-10-11' @@ -634,8 +685,8 @@ name: EOmaps description: A library to create interactive maps of geographical datasets created_at: '2021-09-27' - stargazers_count: 339 - watchers_count: 339 + stargazers_count: 341 + watchers_count: 341 open_issues_count: 18 forks_count: 25 documentation: https://eomaps.readthedocs.io/ @@ -684,13 +735,13 @@ description: Quickly search, compare, and analyze genomic and metagenomic data sets. created_at: '2016-04-09' - stargazers_count: 478 - watchers_count: 478 - open_issues_count: 732 - forks_count: 79 + stargazers_count: 482 + watchers_count: 482 + open_issues_count: 750 + forks_count: 80 documentation: https://sourmash.readthedocs.io/en/latest/ contrib_count: 30 - last_commit: '2024-11-26' + last_commit: '2024-12-24' labels: - 6/pyOS-approved - 9/joss-approved @@ -729,8 +780,8 @@ name: sciform description: A package for formatting numbers into scientific formatted strings. created_at: '2023-05-26' - stargazers_count: 14 - watchers_count: 14 + stargazers_count: 15 + watchers_count: 15 open_issues_count: 8 forks_count: 2 documentation: https://sciform.readthedocs.io/en/stable/ @@ -774,10 +825,10 @@ name: astartes description: Better Data Splits for Machine Learning created_at: '2022-04-22' - stargazers_count: 66 - watchers_count: 66 + stargazers_count: 68 + watchers_count: 68 open_issues_count: 6 - forks_count: 3 + forks_count: 4 documentation: https://jacksonburns.github.io/astartes/ contrib_count: 3 last_commit: '2024-09-25' @@ -835,13 +886,13 @@ description: CompleX Group Interactions (XGI) is a Python package for higher-order networks. created_at: '2021-09-08' - stargazers_count: 187 - watchers_count: 187 - open_issues_count: 52 - forks_count: 32 + stargazers_count: 190 + watchers_count: 190 + open_issues_count: 50 + forks_count: 33 documentation: https://xgi.readthedocs.io - contrib_count: 15 - last_commit: '2024-11-26' + contrib_count: 16 + last_commit: '2024-12-20' labels: - 6/pyOS-approved - 9/joss-approved @@ -887,8 +938,8 @@ name: biocypher description: A unifying framework for biomedical research knowledge graphs created_at: '2021-09-10' - stargazers_count: 225 - watchers_count: 225 + stargazers_count: 228 + watchers_count: 228 open_issues_count: 172 forks_count: 31 documentation: https://biocypher.org @@ -1075,8 +1126,8 @@ name: bibat description: A batteries-included template for Bayesian data analysis projects created_at: '2021-03-04' - stargazers_count: 18 - watchers_count: 18 + stargazers_count: 19 + watchers_count: 19 open_issues_count: 13 forks_count: 2 documentation: https://bibat.readthedocs.io/ @@ -1125,9 +1176,9 @@ description: 'Python library for GraphBLAS: high-performance sparse linear algebra for scalable graph analytics' created_at: '2019-11-11' - stargazers_count: 126 - watchers_count: 126 - open_issues_count: 60 + stargazers_count: 129 + watchers_count: 129 + open_issues_count: 61 forks_count: 15 documentation: https://python-graphblas.readthedocs.io/en/stable/ contrib_count: 11 @@ -1175,13 +1226,13 @@ description: 'Library of derived climate variables, ie climate indicators, based on xarray. ' created_at: '2018-07-27' - stargazers_count: 333 - watchers_count: 333 - open_issues_count: 64 + stargazers_count: 335 + watchers_count: 335 + open_issues_count: 68 forks_count: 59 documentation: https://xclim.readthedocs.io/en/stable/ contrib_count: 30 - last_commit: '2024-11-22' + last_commit: '2024-12-16' labels: - 6/pyOS-approved - 9/joss-approved @@ -1223,8 +1274,8 @@ name: crowsetta description: A tool to work with any format for annotating animal sounds created_at: '2018-12-01' - stargazers_count: 52 - watchers_count: 52 + stargazers_count: 51 + watchers_count: 51 open_issues_count: 40 forks_count: 3 documentation: https://crowsetta.readthedocs.io/en/latest/ @@ -1319,8 +1370,8 @@ description: 'Jointly: A Python package for synchronizing sensors with accelerometer data' created_at: '2020-10-14' - stargazers_count: 10 - watchers_count: 10 + stargazers_count: 11 + watchers_count: 11 open_issues_count: 0 forks_count: 1 documentation: https://jointly.readthedocs.io/ @@ -1361,11 +1412,11 @@ github_username: jbusecke - name: Szymon Moliński github_username: simonmolinsky - archive: '[Zenodo Archive](https://zenodo.org/record/6702566)' + archive: https://zenodo.org/record/6702566 version_accepted: V 0.7.0 date_accepted: 2022-9-1 created_at: 2021-07-23 00:37:07+00:00 - updated_at: 2023-09-14 17:47:44+00:00 + updated_at: 2024-12-15 05:27:30+00:00 closed_at: 2023-06-12 22:47:16+00:00 issue_link: https://github.com/pyOpenSci/software-submission/issues/43 joss: @@ -1374,13 +1425,13 @@ name: pygmt description: A Python interface for the Generic Mapping Tools. created_at: '2017-03-17' - stargazers_count: 769 - watchers_count: 769 - open_issues_count: 193 - forks_count: 222 + stargazers_count: 775 + watchers_count: 775 + open_issues_count: 202 + forks_count: 224 documentation: https://www.pygmt.org contrib_count: 30 - last_commit: '2024-11-30' + last_commit: '2024-12-31' labels: - 6/pyOS-approved - package_name: Devicely @@ -1559,8 +1610,8 @@ name: pystiche description: Framework for Neural Style Transfer (NST) built upon PyTorch created_at: '2019-09-16' - stargazers_count: 270 - watchers_count: 270 + stargazers_count: 271 + watchers_count: 271 open_issues_count: 15 forks_count: 28 documentation: @@ -1651,10 +1702,10 @@ name: pyrolite description: A set of tools for getting the most from your geochemical data. created_at: '2018-06-13' - stargazers_count: 135 - watchers_count: 135 + stargazers_count: 139 + watchers_count: 139 open_issues_count: 10 - forks_count: 37 + forks_count: 38 documentation: https://pyrolite.readthedocs.io contrib_count: 14 last_commit: '2024-10-29' @@ -1696,13 +1747,13 @@ name: movingpandas description: Movement trajectory classes and functions built on top of GeoPandas created_at: '2018-12-16' - stargazers_count: 1243 - watchers_count: 1243 - open_issues_count: 33 - forks_count: 194 + stargazers_count: 1255 + watchers_count: 1255 + open_issues_count: 27 + forks_count: 196 documentation: https://movingpandas.org contrib_count: 30 - last_commit: '2024-11-23' + last_commit: '2024-12-30' labels: - 6/pyOS-approved - package_name: pandera @@ -1742,13 +1793,13 @@ description: A light-weight, flexible, and expressive statistical data testing library created_at: '2018-11-01' - stargazers_count: 3424 - watchers_count: 3424 - open_issues_count: 405 - forks_count: 311 + stargazers_count: 3499 + watchers_count: 3499 + open_issues_count: 399 + forks_count: 316 documentation: https://www.union.ai/pandera contrib_count: 30 - last_commit: '2024-11-13' + last_commit: '2024-12-29' labels: - 6/pyOS-approved - package_name: Nbless @@ -1778,7 +1829,7 @@ version_accepted: TBD date_accepted: '2019-06-17' created_at: 2019-05-30 18:27:38+00:00 - updated_at: 2023-09-10 20:43:12+00:00 + updated_at: 2024-12-17 18:02:52+00:00 closed_at: 2019-08-06 15:45:11+00:00 issue_link: https://github.com/pyOpenSci/software-submission/issues/7 joss: @@ -1788,8 +1839,8 @@ description: Construct, deconstruct, convert, execute, and prepare slides from Jupyter notebooks created_at: '2018-09-05' - stargazers_count: 35 - watchers_count: 35 + stargazers_count: 34 + watchers_count: 34 open_issues_count: 20 forks_count: 6 documentation: https://py4ds.github.io/nbless/ @@ -1797,6 +1848,7 @@ last_commit: '2019-07-25' labels: - 6/pyOS-approved + - archived - package_name: earthpy package_description: A package built to support working with spatial data using open source python @@ -1841,8 +1893,8 @@ description: A package built to support working with spatial data using open source python created_at: '2018-02-20' - stargazers_count: 511 - watchers_count: 511 + stargazers_count: 512 + watchers_count: 512 open_issues_count: 42 forks_count: 160 documentation: https://earthpy.readthedocs.io From 26bf8674cbb3ffe237b36dfcd30bd690bc76d79e Mon Sep 17 00:00:00 2001 From: Bane Sullivan Date: Sat, 14 Dec 2024 22:01:34 -0800 Subject: [PATCH 03/17] feat: parse and clean archive badges and markdown links to URL --- pyproject.toml | 1 + src/pyosmeta/models/base.py | 20 +++++++++++++++++++- src/pyosmeta/utils_clean.py | 29 +++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 138cded..9ee53d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ classifiers = [ ] dependencies = [ "pydantic>=2.0", + "python-doi", "python-dotenv", "requests", "ruamel-yaml>=0.17.21", diff --git a/src/pyosmeta/models/base.py b/src/pyosmeta/models/base.py index 63a4a18..2f743af 100644 --- a/src/pyosmeta/models/base.py +++ b/src/pyosmeta/models/base.py @@ -19,7 +19,7 @@ ) from pyosmeta.models.github import Labels -from pyosmeta.utils_clean import clean_date, clean_markdown +from pyosmeta.utils_clean import clean_archive, clean_date, clean_markdown class Partnerships(str, Enum): @@ -403,3 +403,21 @@ def extract_label(cls, labels: list[str | Labels]) -> list[str]: label.name if isinstance(label, Labels) else label for label in labels ] + + @field_validator( + "archive", + mode="before", + ) + @classmethod + def clean_archive(cls, archive: str) -> str: + """Clean the archive value to ensure it's a valid archive URL.""" + return clean_archive(archive) + + @field_validator( + "joss", + mode="before", + ) + @classmethod + def clean_joss(cls, joss: str) -> str: + """Clean the joss value to ensure it's a valid URL.""" + return clean_archive(joss) diff --git a/src/pyosmeta/utils_clean.py b/src/pyosmeta/utils_clean.py index c87ea92..ed0572a 100644 --- a/src/pyosmeta/utils_clean.py +++ b/src/pyosmeta/utils_clean.py @@ -7,6 +7,8 @@ from datetime import datetime from typing import Any +import doi + def get_clean_user(username: str) -> str: """Cleans a GitHub username provided in a review issue by removing any @@ -125,3 +127,30 @@ def clean_date_accepted_key(review_dict: dict[str, Any]) -> dict[str, str]: review_dict["date_accepted"] = value break return review_dict + + +def clean_archive(archive): + """Clean an archive link to ensure it is a valid URL.""" + + def is_doi(archive): + try: + return doi.validate_doi(archive) + except ValueError: + return False + + if archive.startswith("[") and archive.endswith(")"): + # Extract the outermost link + link = archive[archive.rfind("](") + 2 : -1] + if not link.startswith("http"): + return clean_archive(link) + return link + elif archive.startswith("http"): + return archive + elif link := is_doi(archive): + return link + elif archive.lower() == "n/a": + return None + elif archive.lower() == "tbd": + return None + else: + raise ValueError(f"Invalid archive URL: {archive}") From 9173f3eadf1cf8628c0b539282d9b504f6fbdf2b Mon Sep 17 00:00:00 2001 From: Bane Sullivan Date: Wed, 18 Dec 2024 23:51:14 -0800 Subject: [PATCH 04/17] Improve docs, reuse URL checker, improve test data --- src/pyosmeta/models/base.py | 29 +++------ src/pyosmeta/utils_clean.py | 68 +++++++++++++++++++--- tests/data/reviews/bolded_keys.txt | 4 +- tests/data/reviews/partnership_astropy.txt | 4 +- tests/data/reviews/reviewer_keyed.txt | 2 +- tests/data/reviews/reviewer_list.txt | 2 +- 6 files changed, 73 insertions(+), 36 deletions(-) diff --git a/src/pyosmeta/models/base.py b/src/pyosmeta/models/base.py index 2f743af..1742bd2 100644 --- a/src/pyosmeta/models/base.py +++ b/src/pyosmeta/models/base.py @@ -8,7 +8,6 @@ from enum import Enum from typing import Any, Optional, Set, Union -import requests from pydantic import ( AliasChoices, BaseModel, @@ -19,7 +18,12 @@ ) from pyosmeta.models.github import Labels -from pyosmeta.utils_clean import clean_archive, clean_date, clean_markdown +from pyosmeta.utils_clean import ( + check_url, + clean_archive, + clean_date, + clean_markdown, +) class Partnerships(str, Enum): @@ -59,29 +63,12 @@ def format_url(cls, url: str) -> str: elif not url.startswith("http"): print("Oops, missing http") url = "https://" + url - if cls._check_url(url=url): + if check_url(url=url): return url else: + print(f"Oops, url `{url}` is not valid, removing it") return None - @staticmethod - def _check_url(url: str) -> bool: - """Test url. Return true if there's a valid response, False if not - - Parameters - ---------- - url : str - String for a url to a website to test. - - """ - - try: - response = requests.get(url, timeout=6) - return response.status_code == 200 - except Exception: - print("Oops, url", url, "is not valid, removing it") - return False - class PersonModel(BaseModel, UrlValidatorMixin): model_config = ConfigDict( diff --git a/src/pyosmeta/utils_clean.py b/src/pyosmeta/utils_clean.py index ed0572a..69c9264 100644 --- a/src/pyosmeta/utils_clean.py +++ b/src/pyosmeta/utils_clean.py @@ -8,6 +8,7 @@ from typing import Any import doi +import requests def get_clean_user(username: str) -> str: @@ -129,22 +130,71 @@ def clean_date_accepted_key(review_dict: dict[str, Any]) -> dict[str, str]: return review_dict +def check_url(url: str) -> bool: + """Test url. Return true if there's a valid response, False if not + + Parameters + ---------- + url : str + String for a url to a website to test. + + """ + + try: + response = requests.get(url, timeout=6) + return response.status_code == 200 + except Exception: + return False + + +def is_doi(archive) -> str | None: + """Check if the DOI is valid and return the DOI link. + + Parameters + ---------- + archive : str + The DOI string to validate, e.g., `10.1234/zenodo.12345678` + + Returns + ------- + str | None + The DOI link in the form `https://doi.org/10.1234/zenodo.12345678` or `None` + if the DOI is invalid. + """ + try: + return doi.validate_doi(archive) + except ValueError: + pass + + def clean_archive(archive): - """Clean an archive link to ensure it is a valid URL.""" + """Clean an archive link to ensure it is a valid URL. - def is_doi(archive): - try: - return doi.validate_doi(archive) - except ValueError: - return False + This utility will attempt to parse the DOI link from the various formats + that are commonly present in review metadata. This utility will handle: + + * Markdown links in the format `[label](URL)`, e.g., `[my archive](https://doi.org/10.1234/zenodo.12345678)` + * Raw text in the format `DOI` e.g., `10.1234/zenodo.12345678` + * URLs in the format `http(s)://...` e.g., `https://doi.org/10.1234/zenodo.12345678` + * The special cases `n/a` and `tbd` which will be returned as `None` in anticipation of future data + + If the archive link is a URL, it will be returned as is with a check that + it resolves but is not required to be a valid DOI. If the archive link is + a DOI, it will be validated and returned as a URL in the form + `https://doi.org/10.1234/zenodo.12345678` using the `python-doi` package. + """ if archive.startswith("[") and archive.endswith(")"): # Extract the outermost link link = archive[archive.rfind("](") + 2 : -1] - if not link.startswith("http"): - return clean_archive(link) - return link + # recursively clean the archive link + return clean_archive(link) elif archive.startswith("http"): + if archive.startswith("http://"): + archive = archive.replace("http://", "https://") + # Validate that the URL resolves + if not check_url(archive): + raise ValueError(f"Invalid archive URL: {archive}") return archive elif link := is_doi(archive): return link diff --git a/tests/data/reviews/bolded_keys.txt b/tests/data/reviews/bolded_keys.txt index 7275920..a3ddd3a 100644 --- a/tests/data/reviews/bolded_keys.txt +++ b/tests/data/reviews/bolded_keys.txt @@ -9,8 +9,8 @@ **Reviewer 1:** @fakereviewer1 **Reviewer 2:** @fakereviewer2 **Reviews Expected By:** fake date -**Archive:** [![DOI](https://example.com/fakearchive)](https://example.com/fakearchive) -**Version accepted:** 2.0.0 ([repo](https://example.com/fakeauthor1/fake_package/releases/tag/v2.0.0), [pypi](https://pypi.org/project/fake_project/2.0.0), [archive](https://example.com/fakearchive)) +**Archive:** [![DOI](https://example.com/fakearchive)](https://zenodo.org/records/8415866) +**Version accepted:** 2.0.0 ([repo](https://example.com/fakeauthor1/fake_package/releases/tag/v2.0.0), [pypi](https://pypi.org/project/fake_project/2.0.0), [archive](https://zenodo.org/records/8415866)) **Date accepted (month/day/year):** 06/29/2024 --- diff --git a/tests/data/reviews/partnership_astropy.txt b/tests/data/reviews/partnership_astropy.txt index 3c9a475..7c86395 100644 --- a/tests/data/reviews/partnership_astropy.txt +++ b/tests/data/reviews/partnership_astropy.txt @@ -7,8 +7,8 @@ Version submitted: v.0.8.5 Editor: @editoruser Reviewer 1: @reviewer1 Reviewer 2: @reviewer2 -Archive: [![DOI](https://zenodo.org/badge/DOI/fakedoi/doi.svg)](https://doi.org/fakedoi/doi.svg) -JOSS DOI: [![DOI](https://joss.theoj.org/papers/fakedoi.svg)](https://joss.theoj.org/papers/fakedoi) +Archive: [![DOI](https://zenodo.org/badge/DOI/fakedoi/doi.svg)](https://zenodo.org/records/8415866) +JOSS DOI: [![DOI](https://joss.theoj.org/papers/fakedoi.svg)](https://doi.org/10.21105/joss.01450) Version accepted: v.0.9.2 Date accepted (month/day/year): 04/21/2024 diff --git a/tests/data/reviews/reviewer_keyed.txt b/tests/data/reviews/reviewer_keyed.txt index 3a3560a..b5437f8 100644 --- a/tests/data/reviews/reviewer_keyed.txt +++ b/tests/data/reviews/reviewer_keyed.txt @@ -9,7 +9,7 @@ Editor: @fakeeditor Reviewer 1: @fakereviewer1 Reviewer 2: @fakereviewer2 Reviews Expected By: fake date -Archive: [![DOI](https://example.com/fakearchive)](https://example.com/fakearchive) +Archive: [![DOI](https://example.com/fakearchive)](https://zenodo.org/records/8415866) Version accepted: 2.0.0 ([repo](https://example.com/fakeauthor1/fake_package/releases/tag/v2.0.0), [pypi](https://pypi.org/project/fake_project/2.0.0), [archive](https://example.com/fakearchive)) Date accepted (month/day/year): 06/29/2024 diff --git a/tests/data/reviews/reviewer_list.txt b/tests/data/reviews/reviewer_list.txt index 3e9f9e4..3014b14 100644 --- a/tests/data/reviews/reviewer_list.txt +++ b/tests/data/reviews/reviewer_list.txt @@ -8,7 +8,7 @@ EiC: @fakeeic Editor: @fakeeditor Reviewers: @fakereviewer1 , @fakereviewer2, @fakereviewer3 Reviews Expected By: fake date -Archive: [![DOI](https://example.com/fakearchive)](https://example.com/fakearchive) +Archive: [![DOI](https://example.com/fakearchive)](https://zenodo.org/records/8415866) Version accepted: 2.0.0 ([repo](https://example.com/fakeauthor1/fake_package/releases/tag/v2.0.0), [pypi](https://pypi.org/project/fake_project/2.0.0), [archive](https://example.com/fakearchive)) Date accepted (month/day/year): 06/29/2024 From 439f404256efdd86f3ddd0435d3be7b855bce94f Mon Sep 17 00:00:00 2001 From: Bane Sullivan Date: Thu, 19 Dec 2024 00:00:40 -0800 Subject: [PATCH 05/17] coverage --- src/pyosmeta/models/base.py | 2 +- src/pyosmeta/utils_clean.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pyosmeta/models/base.py b/src/pyosmeta/models/base.py index 1742bd2..4933611 100644 --- a/src/pyosmeta/models/base.py +++ b/src/pyosmeta/models/base.py @@ -65,7 +65,7 @@ def format_url(cls, url: str) -> str: url = "https://" + url if check_url(url=url): return url - else: + else: # pragma: no cover print(f"Oops, url `{url}` is not valid, removing it") return None diff --git a/src/pyosmeta/utils_clean.py b/src/pyosmeta/utils_clean.py index 69c9264..5d6371c 100644 --- a/src/pyosmeta/utils_clean.py +++ b/src/pyosmeta/utils_clean.py @@ -143,7 +143,7 @@ def check_url(url: str) -> bool: try: response = requests.get(url, timeout=6) return response.status_code == 200 - except Exception: + except Exception: # pragma: no cover return False From 084ef278f83b2c6a2a0c04810c4fab7b92eeadf3 Mon Sep 17 00:00:00 2001 From: Bane Sullivan Date: Thu, 19 Dec 2024 00:00:48 -0800 Subject: [PATCH 06/17] Test with JOSS DOI --- tests/data/reviews/reviewer_list.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/data/reviews/reviewer_list.txt b/tests/data/reviews/reviewer_list.txt index 3014b14..17df318 100644 --- a/tests/data/reviews/reviewer_list.txt +++ b/tests/data/reviews/reviewer_list.txt @@ -9,6 +9,7 @@ Editor: @fakeeditor Reviewers: @fakereviewer1 , @fakereviewer2, @fakereviewer3 Reviews Expected By: fake date Archive: [![DOI](https://example.com/fakearchive)](https://zenodo.org/records/8415866) +JOSS DOI: [![DOI](https://example.com/fakearchive)](https://doi.org/10.21105/joss.01450) Version accepted: 2.0.0 ([repo](https://example.com/fakeauthor1/fake_package/releases/tag/v2.0.0), [pypi](https://pypi.org/project/fake_project/2.0.0), [archive](https://example.com/fakearchive)) Date accepted (month/day/year): 06/29/2024 From 53b7968f92b45e4bdfacb5948bce42db3343c624 Mon Sep 17 00:00:00 2001 From: Bane Sullivan Date: Thu, 19 Dec 2024 00:02:13 -0800 Subject: [PATCH 07/17] cleanup --- src/pyosmeta/utils_clean.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/pyosmeta/utils_clean.py b/src/pyosmeta/utils_clean.py index 5d6371c..da18331 100644 --- a/src/pyosmeta/utils_clean.py +++ b/src/pyosmeta/utils_clean.py @@ -189,6 +189,9 @@ def clean_archive(archive): link = archive[archive.rfind("](") + 2 : -1] # recursively clean the archive link return clean_archive(link) + elif link := is_doi(archive): + # is_doi returns the DOI link if it is valid + return link elif archive.startswith("http"): if archive.startswith("http://"): archive = archive.replace("http://", "https://") @@ -196,8 +199,6 @@ def clean_archive(archive): if not check_url(archive): raise ValueError(f"Invalid archive URL: {archive}") return archive - elif link := is_doi(archive): - return link elif archive.lower() == "n/a": return None elif archive.lower() == "tbd": From c5d0709312b4621587f2334563bd8872b2c37bec Mon Sep 17 00:00:00 2001 From: Bane Sullivan Date: Thu, 19 Dec 2024 00:03:57 -0800 Subject: [PATCH 08/17] Add changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index cd26941..1df2c1c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ Notes: it looks like i may have mistakenly bumped to 1.3.7 in august. rather tha * Fix: Eix field not processing correctly (@lwasser, #234) * Fix: Updated documentation throughout with a focus on how a user's name is accessed and updated (@lwasser) * Fix: ReviewUser object name can be optional. There are times when we don't have the actual person's name only the GH username (@lwasser) +* Fix: Parse archive and JOSS links to handle markdown links and validate DOI links are valid ## [v1.3.7] - 2024-08-27 From 5922ad970f9d3f06dee4d9c6417388aa04310fd6 Mon Sep 17 00:00:00 2001 From: Bane Sullivan Date: Thu, 19 Dec 2024 00:04:21 -0800 Subject: [PATCH 09/17] attribution in changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1df2c1c..8dc20f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,7 @@ Notes: it looks like i may have mistakenly bumped to 1.3.7 in august. rather tha * Fix: Eix field not processing correctly (@lwasser, #234) * Fix: Updated documentation throughout with a focus on how a user's name is accessed and updated (@lwasser) * Fix: ReviewUser object name can be optional. There are times when we don't have the actual person's name only the GH username (@lwasser) -* Fix: Parse archive and JOSS links to handle markdown links and validate DOI links are valid +* Fix: Parse archive and JOSS links to handle markdown links and validate DOI links are valid (@banesullivan) ## [v1.3.7] - 2024-08-27 From e8dc83462c4050b47531076dff3aebe0f368a40b Mon Sep 17 00:00:00 2001 From: Bane Sullivan Date: Thu, 19 Dec 2024 00:24:52 -0800 Subject: [PATCH 10/17] Better testing and normalize the JOSS field --- src/pyosmeta/parse_issues.py | 3 +++ tests/data/reviews/archives_doi.txt | 28 +++++++++++++++++++++++++ tests/data/reviews/unknown_archives.txt | 28 +++++++++++++++++++++++++ tests/integration/test_parse_issues.py | 17 +++++++++++++++ 4 files changed, 76 insertions(+) create mode 100644 tests/data/reviews/archives_doi.txt create mode 100644 tests/data/reviews/unknown_archives.txt diff --git a/src/pyosmeta/parse_issues.py b/src/pyosmeta/parse_issues.py index 5e1696e..411fe6b 100644 --- a/src/pyosmeta/parse_issues.py +++ b/src/pyosmeta/parse_issues.py @@ -218,6 +218,9 @@ def _postprocess_meta(self, meta: dict, body: List[str]) -> dict: meta["partners"] = self.get_categories( body, "## Community Partnerships", 3, keyed=True ) + if "joss_doi" in meta: + # Normalize the JOSS archive field. Some issues use `JOSS DOI` others `JOSS` + meta["joss"] = meta.pop("joss_doi") return meta diff --git a/tests/data/reviews/archives_doi.txt b/tests/data/reviews/archives_doi.txt new file mode 100644 index 0000000..3356a90 --- /dev/null +++ b/tests/data/reviews/archives_doi.txt @@ -0,0 +1,28 @@ +Submitting Author: Fakename (@fakeauthor) +All current maintainers: (@fakeauthor1, @fakeauthor2) +Package Name: fake_package +One-Line Description of Package: A fake python package +Repository Link: https://example.com/fakeauthor1/fake_package +Version submitted: v1.0.0 +EiC: @fakeeic +Editor: @fakeeditor +Reviewer 1: @fakereviewer1 +Reviewer 2: @fakereviewer2 +Reviews Expected By: fake date +Archive: 10.5281/zenodo.8415866 +JOSS DOI: 10.21105/joss.01450 +Version accepted: 2.0.0 ([repo](https://example.com/fakeauthor1/fake_package/releases/tag/v2.0.0), [pypi](https://pypi.org/project/fake_project/2.0.0), [archive](https://example.com/fakearchive)) +Date accepted (month/day/year): 06/29/2024 + +--- + +## Scope + +- [x] I agree to abide by [pyOpenSci's Code of Conduct][PyOpenSciCodeOfConduct] during the review process and in maintaining my package after should it be accepted. +- [x] I have read and will commit to package maintenance after the review as per the [pyOpenSci Policies Guidelines][Commitment]. +(etc) + +## Community Partnerships + +- [ ] etc +- [ ] aaaaaa diff --git a/tests/data/reviews/unknown_archives.txt b/tests/data/reviews/unknown_archives.txt new file mode 100644 index 0000000..739a558 --- /dev/null +++ b/tests/data/reviews/unknown_archives.txt @@ -0,0 +1,28 @@ +Submitting Author: Fakename (@fakeauthor) +All current maintainers: (@fakeauthor1, @fakeauthor2) +Package Name: fake_package +One-Line Description of Package: A fake python package +Repository Link: https://example.com/fakeauthor1/fake_package +Version submitted: v1.0.0 +EiC: @fakeeic +Editor: @fakeeditor +Reviewer 1: @fakereviewer1 +Reviewer 2: @fakereviewer2 +Reviews Expected By: fake date +Archive: TBD +JOSS DOI: N/A +Version accepted: 2.0.0 ([repo](https://example.com/fakeauthor1/fake_package/releases/tag/v2.0.0), [pypi](https://pypi.org/project/fake_project/2.0.0), [archive](https://example.com/fakearchive)) +Date accepted (month/day/year): 06/29/2024 + +--- + +## Scope + +- [x] I agree to abide by [pyOpenSci's Code of Conduct][PyOpenSciCodeOfConduct] during the review process and in maintaining my package after should it be accepted. +- [x] I have read and will commit to package maintenance after the review as per the [pyOpenSci Policies Guidelines][Commitment]. +(etc) + +## Community Partnerships + +- [ ] etc +- [ ] aaaaaa diff --git a/tests/integration/test_parse_issues.py b/tests/integration/test_parse_issues.py index 237ebfe..1373c34 100644 --- a/tests/integration/test_parse_issues.py +++ b/tests/integration/test_parse_issues.py @@ -51,6 +51,23 @@ def test_parse_bolded_keys(process_issues, data_file): assert review.package_name == "fake_package" +def test_parse_doi_archives(process_issues, data_file): + """ + Test handling of DOI archives in various formats. + + This is a smoke test to ensure graceful handling of these cases. + """ + review = data_file("reviews/archives_doi.txt", True) + review = process_issues.parse_issue(review) + assert review.archive == "https://zenodo.org/record/8415866" + assert review.joss == "http://joss.theoj.org/papers/10.21105/joss.01450" + + review = data_file("reviews/unknown_archives.txt", True) + review = process_issues.parse_issue(review) + assert review.archive is None + assert review.joss is None + + def test_parse_labels(issue_list, process_issues): """ `Label` models should be coerced to a string when parsing an issue From 149fc4394b3919dd159b7f847d464b905e2efaa5 Mon Sep 17 00:00:00 2001 From: Bane Sullivan Date: Thu, 19 Dec 2024 00:28:12 -0800 Subject: [PATCH 11/17] Correct changelog --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8dc20f6..80f986d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ ## [Unreleased] +* Fix: Parse archive and JOSS links to handle markdown links and validate DOI links are valid (@banesullivan) + [v1.4] - 2024-11-22 Notes: it looks like i may have mistakenly bumped to 1.3.7 in august. rather than try to fix on pypi we will just go with it to ensure our release cycles are smooth given no one else uses this package except pyopensci. @@ -17,7 +19,6 @@ Notes: it looks like i may have mistakenly bumped to 1.3.7 in august. rather tha * Fix: Eix field not processing correctly (@lwasser, #234) * Fix: Updated documentation throughout with a focus on how a user's name is accessed and updated (@lwasser) * Fix: ReviewUser object name can be optional. There are times when we don't have the actual person's name only the GH username (@lwasser) -* Fix: Parse archive and JOSS links to handle markdown links and validate DOI links are valid (@banesullivan) ## [v1.3.7] - 2024-08-27 From fc14b702f247c996cc4befe83fe739786478be0e Mon Sep 17 00:00:00 2001 From: Bane Sullivan Date: Sun, 12 Jan 2025 21:40:44 -0800 Subject: [PATCH 12/17] Apply suggestions from code review Co-authored-by: Leah Wasser --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 80f986d..4e7c1db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,9 +4,9 @@ ## [Unreleased] -* Fix: Parse archive and JOSS links to handle markdown links and validate DOI links are valid (@banesullivan) +* Fix: Parse archive and JOSS links to handle markdown links and validate DOI links are valid. Added python-doi as a dependency (@banesullivan) -[v1.4] - 2024-11-22 +## [v1.4] - 2024-11-22 Notes: it looks like i may have mistakenly bumped to 1.3.7 in august. rather than try to fix on pypi we will just go with it to ensure our release cycles are smooth given no one else uses this package except pyopensci. From 5655d619a4cdf91ad4afc478b4e7e85da10e1b49 Mon Sep 17 00:00:00 2001 From: Bane Sullivan Date: Sun, 12 Jan 2025 21:59:26 -0800 Subject: [PATCH 13/17] Handle missing values --- src/pyosmeta/utils_clean.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/pyosmeta/utils_clean.py b/src/pyosmeta/utils_clean.py index da18331..a0b789e 100644 --- a/src/pyosmeta/utils_clean.py +++ b/src/pyosmeta/utils_clean.py @@ -168,7 +168,7 @@ def is_doi(archive) -> str | None: def clean_archive(archive): - """Clean an archive link to ensure it is a valid URL. + """Clean an archive link to ensure it is a valid DOI URL. This utility will attempt to parse the DOI link from the various formats that are commonly present in review metadata. This utility will handle: @@ -184,6 +184,10 @@ def clean_archive(archive): `https://doi.org/10.1234/zenodo.12345678` using the `python-doi` package. """ + archive = archive.strip() # Remove leading/trailing whitespace + if not archive: + # If field is empty, return None + return None if archive.startswith("[") and archive.endswith(")"): # Extract the outermost link link = archive[archive.rfind("](") + 2 : -1] @@ -197,7 +201,7 @@ def clean_archive(archive): archive = archive.replace("http://", "https://") # Validate that the URL resolves if not check_url(archive): - raise ValueError(f"Invalid archive URL: {archive}") + raise ValueError(f"Invalid archive URL (not resolving): {archive}") return archive elif archive.lower() == "n/a": return None From 756eb3d3a9d67366ae9ee956a2c9e792ce8fcba2 Mon Sep 17 00:00:00 2001 From: Bane Sullivan Date: Sun, 12 Jan 2025 22:06:03 -0800 Subject: [PATCH 14/17] Add print statement to indicate progress --- src/pyosmeta/parse_issues.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/pyosmeta/parse_issues.py b/src/pyosmeta/parse_issues.py index 411fe6b..220bce6 100644 --- a/src/pyosmeta/parse_issues.py +++ b/src/pyosmeta/parse_issues.py @@ -306,6 +306,7 @@ def parse_issues( reviews = {} errors = {} for issue in issues: + print(f"Processing review {issue.title}") try: review = self.parse_issue(issue) reviews[review.package_name] = review From a0a7f16b2219d1e7ee80700e0dfcf4060ad3fdea Mon Sep 17 00:00:00 2001 From: Bane Sullivan Date: Sun, 12 Jan 2025 22:07:48 -0800 Subject: [PATCH 15/17] Update changelog with note on python-doi --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e7c1db..9e308af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ ## [Unreleased] -* Fix: Parse archive and JOSS links to handle markdown links and validate DOI links are valid. Added python-doi as a dependency (@banesullivan) +* Fix: Parse archive and JOSS links to handle markdown links and validate DOI links are valid. Added python-doi as a dependency to ensure archive/DOI URLs fully resolve (@banesullivan) ## [v1.4] - 2024-11-22 From 2b9c1cf5e6fbd58d1fb3437452396d7ff472b915 Mon Sep 17 00:00:00 2001 From: Bane Sullivan Date: Sun, 12 Jan 2025 22:22:15 -0800 Subject: [PATCH 16/17] Improve testing --- tests/data/reviews/archives_invalid.txt | 28 +++++++++++++++++++ tests/data/reviews/archives_missing.txt | 28 +++++++++++++++++++ ...nown_archives.txt => archives_unknown.txt} | 0 tests/integration/test_parse_issues.py | 12 +++++++- 4 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 tests/data/reviews/archives_invalid.txt create mode 100644 tests/data/reviews/archives_missing.txt rename tests/data/reviews/{unknown_archives.txt => archives_unknown.txt} (100%) diff --git a/tests/data/reviews/archives_invalid.txt b/tests/data/reviews/archives_invalid.txt new file mode 100644 index 0000000..7617b34 --- /dev/null +++ b/tests/data/reviews/archives_invalid.txt @@ -0,0 +1,28 @@ +Submitting Author: Fakename (@fakeauthor) +All current maintainers: (@fakeauthor1, @fakeauthor2) +Package Name: fake_package +One-Line Description of Package: A fake python package +Repository Link: https://example.com/fakeauthor1/fake_package +Version submitted: v1.0.0 +EiC: @fakeeic +Editor: @fakeeditor +Reviewer 1: @fakereviewer1 +Reviewer 2: @fakereviewer2 +Reviews Expected By: fake date +Archive: 10.1234/zenodo.12345678 +JOSS DOI: 10.21105/joss.00000 +Version accepted: 2.0.0 ([repo](https://example.com/fakeauthor1/fake_package/releases/tag/v2.0.0), [pypi](https://pypi.org/project/fake_project/2.0.0), [archive](https://example.com/fakearchive)) +Date accepted (month/day/year): 06/29/2024 + +--- + +## Scope + +- [x] I agree to abide by [pyOpenSci's Code of Conduct][PyOpenSciCodeOfConduct] during the review process and in maintaining my package after should it be accepted. +- [x] I have read and will commit to package maintenance after the review as per the [pyOpenSci Policies Guidelines][Commitment]. +(etc) + +## Community Partnerships + +- [ ] etc +- [ ] aaaaaa diff --git a/tests/data/reviews/archives_missing.txt b/tests/data/reviews/archives_missing.txt new file mode 100644 index 0000000..4f30063 --- /dev/null +++ b/tests/data/reviews/archives_missing.txt @@ -0,0 +1,28 @@ +Submitting Author: Fakename (@fakeauthor) +All current maintainers: (@fakeauthor1, @fakeauthor2) +Package Name: fake_package +One-Line Description of Package: A fake python package +Repository Link: https://example.com/fakeauthor1/fake_package +Version submitted: v1.0.0 +EiC: @fakeeic +Editor: @fakeeditor +Reviewer 1: @fakereviewer1 +Reviewer 2: @fakereviewer2 +Reviews Expected By: fake date +Archive: +JOSS DOI: +Version accepted: 2.0.0 ([repo](https://example.com/fakeauthor1/fake_package/releases/tag/v2.0.0), [pypi](https://pypi.org/project/fake_project/2.0.0), [archive](https://example.com/fakearchive)) +Date accepted (month/day/year): 06/29/2024 + +--- + +## Scope + +- [x] I agree to abide by [pyOpenSci's Code of Conduct][PyOpenSciCodeOfConduct] during the review process and in maintaining my package after should it be accepted. +- [x] I have read and will commit to package maintenance after the review as per the [pyOpenSci Policies Guidelines][Commitment]. +(etc) + +## Community Partnerships + +- [ ] etc +- [ ] aaaaaa diff --git a/tests/data/reviews/unknown_archives.txt b/tests/data/reviews/archives_unknown.txt similarity index 100% rename from tests/data/reviews/unknown_archives.txt rename to tests/data/reviews/archives_unknown.txt diff --git a/tests/integration/test_parse_issues.py b/tests/integration/test_parse_issues.py index 1373c34..41d4301 100644 --- a/tests/integration/test_parse_issues.py +++ b/tests/integration/test_parse_issues.py @@ -62,11 +62,21 @@ def test_parse_doi_archives(process_issues, data_file): assert review.archive == "https://zenodo.org/record/8415866" assert review.joss == "http://joss.theoj.org/papers/10.21105/joss.01450" - review = data_file("reviews/unknown_archives.txt", True) + review = data_file("reviews/archives_unknown.txt", True) review = process_issues.parse_issue(review) assert review.archive is None assert review.joss is None + review = data_file("reviews/archives_missing.txt", True) + review = process_issues.parse_issue(review) + assert review.archive is None + assert review.joss is None + + review = data_file("reviews/archives_invalid.txt", True) + + with pytest.raises(ValueError): + review = process_issues.parse_issue(review) + def test_parse_labels(issue_list, process_issues): """ From 383e10ff7ea305246948ea8459ed3dc08b3d167e Mon Sep 17 00:00:00 2001 From: Bane Sullivan Date: Sun, 12 Jan 2025 22:24:12 -0800 Subject: [PATCH 17/17] Use `match=` in validation testing --- tests/integration/test_parse_issues.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_parse_issues.py b/tests/integration/test_parse_issues.py index 41d4301..8fa2586 100644 --- a/tests/integration/test_parse_issues.py +++ b/tests/integration/test_parse_issues.py @@ -74,7 +74,7 @@ def test_parse_doi_archives(process_issues, data_file): review = data_file("reviews/archives_invalid.txt", True) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="Invalid archive"): review = process_issues.parse_issue(review)