Skip to content

Commit

Permalink
feat: adds the ability to set the index strategy (#1986)
Browse files Browse the repository at this point in the history
This PR enables to set the UV `index-strategy` from pixi.

closes: #1377

---------

Co-authored-by: Bas Zalmstra <[email protected]>
Co-authored-by: Bas Zalmstra <[email protected]>
Co-authored-by: Hofer-Julian <[email protected]>
  • Loading branch information
4 people authored Sep 10, 2024
1 parent e21fa6d commit 78850aa
Show file tree
Hide file tree
Showing 43 changed files with 969 additions and 44 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

86 changes: 84 additions & 2 deletions crates/pixi_manifest/src/pypi/pypi_options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,36 @@ use indexmap::IndexSet;
use rattler_lock::{FindLinksUrlOrPath, PypiIndexes};
use serde::{Deserialize, Serialize};
use serde_with::serde_as;
use std::{hash::Hash, iter};
use std::{fmt::Display, hash::Hash, iter};
use thiserror::Error;
use url::Url;

// taken from: https://docs.astral.sh/uv/reference/settings/#index-strategy
/// The strategy to use when resolving against multiple index URLs.
/// By default, uv will stop at the first index on which a given package is available, and limit resolutions to those present on that first index (first-match). This prevents "dependency confusion" attacks, whereby an attack can upload a malicious package under the same name to a secondary.
#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize, Eq)]
#[serde(rename_all = "kebab-case")]
pub enum IndexStrategy {
#[default]
/// Only use results from the first index that returns a match for a given package name
FirstIndex,
/// Search for every package name across all indexes, exhausting the versions from the first index before moving on to the next
UnsafeFirstMatch,
/// Search for every package name across all indexes, preferring the "best" version found. If a package version is in multiple indexes, only look at the entry for the first index
UnsafeBestMatch,
}

impl Display for IndexStrategy {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let s = match self {
IndexStrategy::FirstIndex => "first-index",
IndexStrategy::UnsafeFirstMatch => "unsafe-first-match",
IndexStrategy::UnsafeBestMatch => "unsafe-best-match",
};
write!(f, "{}", s)
}
}

/// Specific options for a PyPI registries
#[serde_as]
#[derive(Debug, Clone, PartialEq, Serialize, Eq, Deserialize, Default)]
Expand All @@ -21,6 +47,8 @@ pub struct PypiOptions {
pub find_links: Option<Vec<FindLinksUrlOrPath>>,
/// Disable isolated builds
pub no_build_isolation: Option<Vec<String>>,
/// The strategy to use when resolving against multiple index URLs.
pub index_strategy: Option<IndexStrategy>,
}

/// Clones and deduplicates two iterators of values
Expand All @@ -42,12 +70,14 @@ impl PypiOptions {
extra_indexes: Option<Vec<Url>>,
flat_indexes: Option<Vec<FindLinksUrlOrPath>>,
no_build_isolation: Option<Vec<String>>,
index_strategy: Option<IndexStrategy>,
) -> Self {
Self {
index_url: index,
extra_index_urls: extra_indexes,
find_links: flat_indexes,
no_build_isolation,
index_strategy,
}
}

Expand Down Expand Up @@ -93,6 +123,20 @@ impl PypiOptions {
self.index_url.clone()
};

// Allow only one index strategy
let index_strategy = if let Some(other_index_strategy) = other.index_strategy.clone() {
if let Some(own_index_strategy) = &self.index_strategy {
return Err(PypiOptionsMergeError::MultipleIndexStrategies {
first: own_index_strategy.to_string(),
second: other_index_strategy.to_string(),
});
} else {
Some(other_index_strategy)
}
} else {
self.index_strategy.clone()
};

// Chain together and deduplicate the extra indexes
let extra_indexes = self
.extra_index_urls
Expand Down Expand Up @@ -135,6 +179,7 @@ impl PypiOptions {
extra_index_urls: extra_indexes,
find_links: flat_indexes,
no_build_isolation,
index_strategy,
})
}
}
Expand Down Expand Up @@ -165,10 +210,16 @@ pub enum PypiOptionsMergeError {
"multiple primary pypi indexes are not supported, found both {first} and {second} across multiple pypi options"
)]
MultiplePrimaryIndexes { first: String, second: String },
#[error(
"multiple index strategies are not supported, found both {first} and {second} across multiple pypi options"
)]
MultipleIndexStrategies { first: String, second: String },
}

#[cfg(test)]
mod tests {
use crate::pypi::pypi_options::IndexStrategy;

use super::PypiOptions;
use rattler_lock::FindLinksUrlOrPath;
use url::Url;
Expand Down Expand Up @@ -196,7 +247,8 @@ mod tests {
FindLinksUrlOrPath::Path("/path/to/flat/index".into()),
FindLinksUrlOrPath::Url(Url::parse("https://flat.index").unwrap())
]),
no_build_isolation: Some(vec!["pkg1".to_string(), "pkg2".to_string()])
no_build_isolation: Some(vec!["pkg1".to_string(), "pkg2".to_string()]),
index_strategy: None,
},
);
}
Expand All @@ -212,6 +264,7 @@ mod tests {
FindLinksUrlOrPath::Url(Url::parse("https://flat.index").unwrap()),
]),
no_build_isolation: Some(vec!["foo".to_string(), "bar".to_string()]),
index_strategy: None,
};

// Create the second set of options
Expand All @@ -223,6 +276,7 @@ mod tests {
FindLinksUrlOrPath::Url(Url::parse("https://flat.index2").unwrap()),
]),
no_build_isolation: Some(vec!["foo".to_string()]),
index_strategy: None,
};

// Merge the two options
Expand All @@ -239,6 +293,7 @@ mod tests {
extra_index_urls: None,
find_links: None,
no_build_isolation: None,
index_strategy: None,
};

// Create the second set of options
Expand All @@ -247,11 +302,38 @@ mod tests {
extra_index_urls: None,
find_links: None,
no_build_isolation: None,
index_strategy: None,
};

// Merge the two options
// This should error because there are two primary indexes
let merged_opts = opts.union(&opts2);
insta::assert_snapshot!(merged_opts.err().unwrap());
}

#[test]
fn test_error_on_multiple_index_strategies() {
// Create the first set of options
let opts = PypiOptions {
index_url: None,
extra_index_urls: None,
find_links: None,
no_build_isolation: None,
index_strategy: Some(IndexStrategy::FirstIndex),
};

// Create the second set of options
let opts2 = PypiOptions {
index_url: None,
extra_index_urls: None,
find_links: None,
no_build_isolation: None,
index_strategy: Some(IndexStrategy::UnsafeBestMatch),
};

// Merge the two options
// This should error because there are two index strategies
let merged_opts = opts.union(&opts2);
insta::assert_snapshot!(merged_opts.err().unwrap());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: crates/pixi_manifest/src/pypi/pypi_options.rs
expression: merged_opts.err().unwrap()
---
multiple index strategies are not supported, found both first-index and unsafe-best-match across multiple pypi options
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ find-links:
no-build-isolation:
- foo
- bar
index-strategy: ~
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ find-links:
- path: "../foo"
- url: "https://example.com/bar"
no-build-isolation: ~
index-strategy: ~
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ extra-index-urls:
- "https://pypi.org/simple2"
find-links: ~
no-build-isolation: ~
index-strategy: ~
1 change: 1 addition & 0 deletions crates/pixi_uv_conversions/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ pypi-types = { workspace = true }
rattler_lock = { workspace = true }
thiserror = { workspace = true }
url = { workspace = true }
uv-configuration = { workspace = true }
uv-git = { workspace = true }
uv-normalize = { workspace = true }
uv-python = { workspace = true }
Expand Down
20 changes: 19 additions & 1 deletion crates/pixi_uv_conversions/src/conversions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ use distribution_types::{FlatIndexLocation, IndexLocations, IndexUrl};
use pep508_rs::{
InvalidNameError, PackageName, UnnamedRequirementUrl, VerbatimUrl, VerbatimUrlError,
};
use pixi_manifest::pypi::{pypi_options::PypiOptions, GitRev};
use pixi_manifest::pypi::{
pypi_options::{IndexStrategy, PypiOptions},
GitRev,
};
use rattler_lock::FindLinksUrlOrPath;
use uv_git::GitReference;
use uv_python::PythonEnvironment;
Expand Down Expand Up @@ -153,3 +156,18 @@ pub fn names_to_build_isolation<'a>(
) -> uv_types::BuildIsolation<'a> {
packages_to_build_isolation(names, env)
}

/// Convert pixi `IndexStrategy` to `uv_types::IndexStrategy`
pub fn to_index_strategy(
index_strategy: Option<&IndexStrategy>,
) -> uv_configuration::IndexStrategy {
if let Some(index_strategy) = index_strategy {
match index_strategy {
IndexStrategy::FirstIndex => uv_configuration::IndexStrategy::FirstIndex,
IndexStrategy::UnsafeFirstMatch => uv_configuration::IndexStrategy::UnsafeFirstMatch,
IndexStrategy::UnsafeBestMatch => uv_configuration::IndexStrategy::UnsafeBestMatch,
}
} else {
uv_configuration::IndexStrategy::default()
}
}
61 changes: 46 additions & 15 deletions docs/reference/project_configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -293,21 +293,29 @@ The `pypi-options` table is used to define options that are specific to PyPI reg
These options can be specified either at the root level, which will add it to the default options feature, or on feature level, which will create a union of these options when the features are included in the environment.

The options that can be defined are:
- `index-url`: replaces the main index url.
- `extra-index-urls`: adds an extra index url.
- `find-links`: similar to `--find-links` option in `pip`.
- `no-build-isolation`: disables build isolation, can only be set per package.

- `index-url`: replaces the main index url.
- `extra-index-urls`: adds an extra index url.
- `find-links`: similar to `--find-links` option in `pip`.
- `no-build-isolation`: disables build isolation, can only be set per package.
- `index-strategy`: allows for specifying the index strategy to use.

These options are explained in the sections below. Most of these options are taken directly or with slight modifications from the [uv settings](https://docs.astral.sh/uv/reference/settings/). If any are missing that you need feel free to create an issue [requesting](https://github.com/prefix-dev/pixi/issues) them.


### Alternative registries

Currently the main reason to use this table is to define alternative registries.
We support:
!!! info "Strict Index Priority"
Unlike pip, because we make use of uv, we have a strict index priority. This means that the first index is used where a package can be found.
The order is determined by the order in the toml file. Where the `extra-index-urls` are preferred over the `index-url`. Read more about this on the [uv docs](https://docs.astral.sh/uv/pip/compatibility/#packages-that-exist-on-multiple-indexes)

Often you might want to use an alternative or extra index for your project. This can be done by adding the `pypi-options` table to your `pixi.toml` file, the following options are available:

- `index-url`: replaces the main index url.
Only one `index-url` can be defined per environment.
- `extra-index-urls`: adds an extra index url.
- `index-url`: replaces the main index url. If this is not set the default index used is `https://pypi.org/simple`.
**Only one** `index-url` can be defined per environment.
- `extra-index-urls`: adds an extra index url. The urls are used in the order they are defined. And are preferred over the `index-url`. These are merged across features into an environment.
- `find-links`: which can either be a path `{path = './links'}` or a url `{url = 'https://example.com/links'}`.
This is similar to the `--find-links` option in `pip`.
This is similar to the `--find-links` option in `pip`. These are merged across features into an environment.

An example:

Expand All @@ -318,12 +326,11 @@ extra-index-urls = ["https://example.com/simple"]
find-links = [{path = './links'}]
```

There are some examples in the pixi repository that make use of this feature.
To read about existing authentication methods, please check the [PyPI Authentication](../advanced/authentication.md#pypi-authentication) section.
There are some [examples](https://github.com/prefix-dev/pixi/tree/main/examples/pypi-custom-registry) in the pixi repository, that make use of this feature.

!!! tip "Authentication Methods"
To read about existing authentication methods for private registries, please check the [PyPI Authentication](../advanced/authentication.md#pypi-authentication) section.

!!! info "Strict Index Priority"
Unlike pip, because we make use of uv, we have a strict index priority. This means that the first index is used where a package can be found.
The order is determined by the order in the toml file. Where the `extra-index-urls` are preferred over the `index-url`. Read more about this on the [UV Readme](https://github.com/astral-sh/uv/blob/main/PIP_COMPATIBILITY.md#packages-that-exist-on-multiple-indexes)

### No Build Isolation
Even though build isolation is a good default.
Expand All @@ -332,13 +339,37 @@ This is convenient if you want to use `torch` or something similar for your buil


```toml
[dependencies]
pytorch = "2.4.0"

[pypi-options]
no-build-isolation = ["detectron2"]

[pypi-dependencies]
detectron2 = { git = "https://github.com/facebookresearch/detectron2.git", rev = "5b72c27ae39f99db75d43f18fd1312e1ea934e60"}
```

!!! tip "Conda dependencies define the build environment"
To use `no-build-isolation` effectively, use conda dependencies to define the build environment. These are installed before the PyPI dependencies are resolved, this way these dependencies are available during the build process. In the example above adding `torch` as a PyPI dependency would be ineffective, as it would not yet be installed during the PyPI resolution phase.

### Index Strategy

The strategy to use when resolving against multiple index URLs. Description modified from the [uv](https://docs.astral.sh/uv/reference/settings/#index-strategy) documentation:

By default, `uv` and thus `pixi`, will stop at the first index on which a given package is available, and limit resolutions to those present on that first index (first-match). This prevents *dependency confusion* attacks, whereby an attack can upload a malicious package under the same name to a secondary index.

!!! warning "One index strategy per environment"
Only one `index-strategy` can be defined per environment or solve-group, otherwise, an error will be shown.

#### Possible values:

- **"first-index"**: Only use results from the first index that returns a match for a given package name
- **"unsafe-first-match"**: Search for every package name across all indexes, exhausting the versions from the first index before moving on to the next. Meaning if the package `a` is available on index `x` and `y`, it will prefer the version from `x` unless you've requested a package version that is **only** available on `y`.
- **"unsafe-best-match"**: Search for every package name across all indexes, preferring the *best* version found. If a package version is in multiple indexes, only look at the entry for the first index. So given index, `x` and `y` that both contain package `a`, it will take the *best* version from either `x` or `y`, but should **that version** be available on both indexes it will prefer `x`.

!!! info "PyPI only"
The `index-strategy` only changes PyPI package resolution and not conda package resolution.

## The `dependencies` table(s)

This section defines what dependencies you would like to use for your project.
Expand Down
Loading

0 comments on commit 78850aa

Please sign in to comment.