docs: documentatio

prefix-dev · Nov 12, 2024 · 7b1249c · 7b1249c
1 parent 377962b
commit 7b1249c
Show file tree

Hide file tree

Showing 3 changed files with 59 additions and 19 deletions.
diff --git a/README.md b/README.md
@@ -9,7 +9,6 @@
 [![Build Status][build-badge]][build]
 [![Project Chat][chat-badge]][chat-url]
 
-
 [license-badge]: https://img.shields.io/badge/license-BSD--3--Clause-blue?style=flat-square
 [build-badge]: https://img.shields.io/github/actions/workflow/status/prefix-dev/parselmouth/updater.yml?style=flat-square&branch=main
 [build]: https://github.com/prefix-dev/parselmouth/actions
@@ -22,28 +21,53 @@
 
 ## Overview
 
-`parselmouth` is a utility designed to facilitate the mapping of Conda package names to their corresponding PyPI names. This tool automates the process of generating and updating mappings on an hourly basis, ensuring that users have access to the most accurate and up-to-date information.
+`parselmouth` is a utility designed to facilitate the mapping of Conda package names to their corresponding PyPI names and the inverse. This tool automates the process of generating and updating mappings on an hourly basis, ensuring that users have access to the most accurate and up-to-date information.
+
+## Conda to PyPI
 
 Example of mapping for `numpy-1.26.4-py311h64a7726_0.conda` with sha256 `3f4365e11b28e244c95ba8579942b0802761ba7bb31c026f50d1a9ea9c728149`
 
 ```json
 {
-   "pypi_normalized_names":[
-      "numpy"
-   ],
-   "versions":{
-      "numpy":"1.26.4"
-   },
-   "conda_name":"numpy",
-   "package_name":"numpy-1.26.4-py311h64a7726_0.conda",
-   "direct_url":[
-      "https://github.com/numpy/numpy/releases/download/v1.26.4/numpy-1.26.4.tar.gz"
-   ]
+  "pypi_normalized_names": ["numpy"],
+  "versions": {
+    "numpy": "1.26.4"
+  },
+  "conda_name": "numpy",
+  "package_name": "numpy-1.26.4-py311h64a7726_0.conda",
+  "direct_url": [
+    "https://github.com/numpy/numpy/releases/download/v1.26.4/numpy-1.26.4.tar.gz"
+  ]
 }
 ```
 
-More simplified version of our mapping is stored here: `files/mapping_as_grayskull.json`
+A more simplified version of our mapping is stored here: `files/mapping_as_grayskull.json`
+
+## PyPI to conda
+
+Example of mapping `requests` to the corresponding conda versions is, this shows you the known conda names per PyPI version, if a version is missing it is not available on that conda channel:
+
+```
+{"2.10.0": ["requests"], "2.11.0": ["requests"], "2.11.1": ["requests"], "2.12.0": ["requests"], "2.12.1": ["requests"], "2.12.4": ["requests"], "2.12.5": ["requests"], "2.13.0": ["requests"], "2.17.3": ["requests"], "2.18.1": ["requests"], "2.18.2": ["requests"], "2.18.3": ["requests"], "2.18.4": ["requests"], "2.19.0": ["requests"], "2.19.1": ["requests"], "2.20.0": ["requests"], "2.20.1": ["requests"], "2.21.0": ["requests"], "2.22.0": ["requests"], "2.23.0": ["requests"], "2.9.2": ["requests"], "2.27.1": ["requests", "arm_pyart"], "2.24.0": ["requests", "google-cloud-bigquery-storage-core"], "2.26.0": ["requests"], "2.25.1": ["requests"], "2.25.0": ["requests"], "2.27.0": ["requests"], "2.28.0": ["requests"], "2.28.1": ["requests"], "2.31.0": ["requests", "jupyter-sphinx"], "2.28.2": ["requests"], "2.29.0": ["requests"], "2.32.1": ["requests"], "2.32.2": ["requests"], "2.32.3": ["requests"]}
+```
+
+## Online availability
+
+There are currently two mappings that are online, one of which is work in progress (#2) and are available behind the following URL:
+`https://conda-mapping.prefix.dev/`:
+
+1. The **Conda - PyPI** name mapping that maps a conda package version and name to it's known PyPI counterpart.
+
+   This is available at `https://conda-mapping.prefix.dev/conda-forge/hash-v0/{sha256}` where the
+   `{sha256}` is the sha256 of the conda package, taken from a package record from the channels `repodata.json` file.
+
+   So, for example, to find the PyPI name of `numpy-1.26.4-py310h4bfa8fc_0.conda` you can use the following URI:
+   `https://conda-mapping.prefix.dev/hash-v0/914476e2d3273fdf9c0419a7bdcb7b31a5ec25949e4afbc847297ff3a50c62c8`
 
+2. **(WIP)** The **PyPI - Conda** name mapping that maps a PyPI package to it's known Conda counterpart. This only works for packages that are available on the conda channels that it references. This is available at `https://conda-mapping.prefix.dev/pypi-to-conda-v0/{channel}/{pypi-normalized-name}.json` where the channel is the name of the channel and the `{pypi-normalized-name}` is the normalized name of the package on PyPI.
+   E.g for `requests` we can use `https://conda-mapping.prefix.dev/pypi-to-conda-v0/conda-forge/requests.json`, which will give you the corresponding json.
+   There is
 
+## Thanks!
 
 Developed with ❤️ at [prefix.dev](https://prefix.dev).
diff --git a/src/parselmouth/internals/types.py b/src/parselmouth/internals/types.py
@@ -0,0 +1,11 @@
+from typing import Annotated
+
+# Type aliases for common PyPI types
+type PyPIName = Annotated[str, "Name of the package on PyPi"]
+type PyPIVersion = Annotated[str, "Version of the package on PyPi"]
+type CondaVersion = Annotated[str, "Version of the package on Conda"]
+type CondaPackageName = Annotated[str, "Name of the package on Conda"]
+type CondaFileName = Annotated[
+    str, "File name of the package on Conda, e.g boltons-21.0.0-py310h06a4308_0.conda"
+]
+type PyPISourceUrl = Annotated[str, "Url of the package when its not on a PyPI index"]
diff --git a/src/parselmouth/internals/updater_producer.py b/src/parselmouth/internals/updater_producer.py
@@ -24,47 +24,52 @@ def main(
     channel: SupportedChannels,
     subdir: str | None = None,
 ):
+    # Get all available architectures
     subdirs = get_all_archs_available(channel)
 
-    # filter out the subdir we want to update
+    # filter out the subdir which want to update
     if subdir and subdir in subdirs:
         subdirs = [subdir]
     elif subdir and subdir not in subdirs:
         raise ValueError(f"Subdir {subdir} not found in channel {channel}")
 
+    # List of all packages
     all_packages: list[tuple[str, str]] = []
 
     if check_if_exists:
+        # Get the complete channel indexes
         existing_mapping_data = s3_client.get_channel_index(channel=channel)
         if not existing_mapping_data:
+            # a new channel may not have any mapping data. so we need to create an empty one
             existing_mapping_data = IndexMapping(root={})
     else:
-        # a new channel may not have any mapping data. so we need to create an empty one
         existing_mapping_data = IndexMapping(root={})
 
     letters = set()
 
     for subdir in subdirs:
         repodatas = {}
+        # Get the repodata for this specific subdirectory
         repodata = get_subdir_repodata(subdir, channel)
-
         repodatas.update(repodata["packages"])
         repodatas.update(repodata["packages.conda"])
 
+        # Record package name and Sha256
         for package_name in repodatas:
             package = repodatas[package_name]
             sha256 = package["sha256"]
 
+            # Add the sha256 if it does not already exist
             if sha256 not in existing_mapping_data.root:
                 all_packages.append(package_name)
                 letters.add(f"{subdir}@{package_name[0]}")
 
+    # Write the index file to disk
     index_location = Path(output_dir) / channel / "index.json"
     os.makedirs(index_location.parent, exist_ok=True)
-
     with open(index_location, mode="w") as mapping_file:
         json.dump(existing_mapping_data.model_dump(), mapping_file)
 
+    # Print the processed packages
     json_letters = json.dumps(list(letters))
-
     print(json_letters)