Skip to content

Commit

Permalink
Merge branch 'main' into feat/pyarrow-to-datetime-infer
Browse files Browse the repository at this point in the history
  • Loading branch information
FBruzzesi authored Oct 17, 2024
2 parents 3a26b96 + e980483 commit bc5f854
Show file tree
Hide file tree
Showing 129 changed files with 885 additions and 430 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/bump-version.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
python utils/bump_version.py ${{ github.event.inputs.release_type }}
- name: Create pull request
uses: actions/github-script@v6
uses: actions/github-script@v7
if: github.actor == 'MarcoGorelli' || github.actor == 'FBruzzesi'
with:
script: |
Expand Down
6 changes: 1 addition & 5 deletions .github/workflows/extremes.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,6 @@ jobs:
run: uv pip freeze
- name: Run pytest
run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow
- name: Run doctests
run: pytest narwhals --doctest-modules

not_so_old_versions:
strategy:
Expand Down Expand Up @@ -88,13 +86,11 @@ jobs:
run: uv pip freeze
- name: Run pytest
run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow
- name: Run doctests
run: pytest narwhals --doctest-modules

nightlies:
strategy:
matrix:
python-version: ["3.11"]
python-version: ["3.12"]
os: [ubuntu-latest]
if: github.event.pull_request.head.repo.full_name == github.repository
runs-on: ${{ matrix.os }}
Expand Down
6 changes: 1 addition & 5 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,6 @@ jobs:
run: uv pip freeze
- name: Run pytest
run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=85
- name: Run doctests
if: startsWith(matrix.os, 'windows') != true
run: pytest narwhals --doctest-modules

pytest-windows:
strategy:
Expand Down Expand Up @@ -60,8 +57,6 @@ jobs:
run: uv pip freeze
- name: Run pytest
run: pytest tests --cov=narwhals --cov=tests --runslow --cov-fail-under=95
- name: Run doctests
run: pytest narwhals --doctest-modules

pytest-coverage:
strategy:
Expand Down Expand Up @@ -95,4 +90,5 @@ jobs:
- name: Run pytest
run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=100 --runslow
- name: Run doctests
if: matrix.python-version == '3.12'
run: pytest narwhals --doctest-modules
13 changes: 8 additions & 5 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,20 @@ Here's how you can set up your local development environment to contribute.

#### Option 1: Use UV (recommended)

1. Make sure you have Python3.8+ installed (for example, Python 3.11), create a virtual environment,
1. Make sure you have Python3.12 installed, create a virtual environment,
and activate it. If you're new to this, here's one way that we recommend:
1. Install uv: https://github.com/astral-sh/uv?tab=readme-ov-file#getting-started
2. Install some version of Python greater than Python3.8. For example, to install
Python3.11:
or make sure it is up-to-date with:
```
uv python install 3.11
uv self update
```
2. Install Python3.12:
```
uv python install 3.12
```
3. Create a virtual environment:
```
uv venv -p 3.11 --seed
uv venv -p 3.12 --seed
```
4. Activate it. On Linux, this is `. .venv/bin/activate`, on Windows `.\.venv\Scripts\activate`.
2. Install Narwhals: `uv pip install -e .`
Expand Down
3 changes: 2 additions & 1 deletion docs/api-reference/dtypes.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
members:
- Array
- List
- Struct
- Int64
- Int32
- Int16
Expand All @@ -15,12 +14,14 @@
- UInt32
- UInt16
- UInt8
- Field
- Float64
- Float32
- Boolean
- Categorical
- Enum
- String
- Struct
- Date
- Datetime
- Duration
Expand Down
1 change: 1 addition & 0 deletions docs/api-reference/narwhals.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Here are the top-level functions available in Narwhals.
- concat_str
- from_dict
- from_native
- from_arrow
- get_level
- get_native_namespace
- is_ordered_categorical
Expand Down
4 changes: 4 additions & 0 deletions narwhals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from narwhals.dtypes import Datetime
from narwhals.dtypes import Duration
from narwhals.dtypes import Enum
from narwhals.dtypes import Field
from narwhals.dtypes import Float32
from narwhals.dtypes import Float64
from narwhals.dtypes import Int8
Expand Down Expand Up @@ -44,6 +45,7 @@
from narwhals.expr import sum_horizontal
from narwhals.expr import when
from narwhals.functions import concat
from narwhals.functions import from_arrow
from narwhals.functions import from_dict
from narwhals.functions import get_level
from narwhals.functions import new_series
Expand All @@ -68,6 +70,7 @@
"selectors",
"concat",
"from_dict",
"from_arrow",
"get_level",
"new_series",
"to_native",
Expand Down Expand Up @@ -118,6 +121,7 @@
"String",
"Datetime",
"Duration",
"Field",
"Struct",
"Array",
"List",
Expand Down
11 changes: 10 additions & 1 deletion narwhals/_arrow/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,16 @@ def native_to_narwhals_dtype(dtype: Any, dtypes: DTypes) -> DType:
if pa.types.is_dictionary(dtype):
return dtypes.Categorical()
if pa.types.is_struct(dtype):
return dtypes.Struct()
return dtypes.Struct(
[
dtypes.Field(
dtype.field(i).name,
native_to_narwhals_dtype(dtype.field(i).type, dtypes),
)
for i in range(dtype.num_fields)
]
)

if pa.types.is_list(dtype) or pa.types.is_large_list(dtype):
return dtypes.List(native_to_narwhals_dtype(dtype.value_type, dtypes))
if pa.types.is_fixed_size_list(dtype):
Expand Down
11 changes: 10 additions & 1 deletion narwhals/_duckdb/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,16 @@ def map_duckdb_dtype_to_narwhals_dtype(duckdb_dtype: Any, dtypes: DTypes) -> DTy
if duckdb_dtype == "INTERVAL":
return dtypes.Duration()
if duckdb_dtype.startswith("STRUCT"):
return dtypes.Struct()
matchstruc_ = re.findall(r"(\w+)\s+(\w+)", duckdb_dtype)
return dtypes.Struct(
[
dtypes.Field(
matchstruc_[i][0],
map_duckdb_dtype_to_narwhals_dtype(matchstruc_[i][1], dtypes),
)
for i in range(len(matchstruc_))
]
)
if match_ := re.match(r"(.*)\[\]$", duckdb_dtype):
return dtypes.List(map_duckdb_dtype_to_narwhals_dtype(match_.group(1), dtypes))
if match_ := re.match(r"(\w+)\[(\d+)\]", duckdb_dtype):
Expand Down
10 changes: 9 additions & 1 deletion narwhals/_ibis/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,15 @@ def map_ibis_dtype_to_narwhals_dtype(ibis_dtype: Any, dtypes: DTypes) -> DType:
map_ibis_dtype_to_narwhals_dtype(ibis_dtype.value_type, dtypes)
)
if ibis_dtype.is_struct():
return dtypes.Struct()
return dtypes.Struct(
[
dtypes.Field(
ibis_dtype_name,
map_ibis_dtype_to_narwhals_dtype(ibis_dtype_field, dtypes),
)
for ibis_dtype_name, ibis_dtype_field in ibis_dtype.items()
]
)
return dtypes.Unknown() # pragma: no cover


Expand Down
4 changes: 1 addition & 3 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -619,9 +619,7 @@ def quantile(

def zip_with(self: Self, mask: Any, other: Any) -> PandasLikeSeries:
ser = self._native_series
mask = validate_column_comparand(
ser.index, mask, treat_length_one_as_scalar=False
)
mask = validate_column_comparand(ser.index, mask)
other = validate_column_comparand(ser.index, other)
res = ser.where(mask, other)
return self._from_native_series(res)
Expand Down
14 changes: 7 additions & 7 deletions narwhals/_pandas_like/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,7 @@
}


def validate_column_comparand(
index: Any, other: Any, *, treat_length_one_as_scalar: bool = True
) -> Any:
def validate_column_comparand(index: Any, other: Any) -> Any:
"""Validate RHS of binary operation.
If the comparison isn't supported, return `NotImplemented` so that the
Expand All @@ -55,9 +53,10 @@ def validate_column_comparand(
if isinstance(other, PandasLikeDataFrame):
return NotImplemented
if isinstance(other, PandasLikeSeries):
if other.len() == 1 and treat_length_one_as_scalar:
if other.len() == 1:
# broadcast
return other.item()
s = other._native_series
return s.__class__(s.iloc[0], index=index, dtype=s.dtype)
if other._native_series.index is not index:
return set_axis(
other._native_series,
Expand All @@ -83,7 +82,8 @@ def validate_dataframe_comparand(index: Any, other: Any) -> Any:
if isinstance(other, PandasLikeSeries):
if other.len() == 1:
# broadcast
return other._native_series.iloc[0]
s = other._native_series
return s.__class__(s.iloc[0], index=index, dtype=s.dtype)
if other._native_series.index is not index:
return set_axis(
other._native_series,
Expand Down Expand Up @@ -294,7 +294,7 @@ def native_to_narwhals_dtype(native_column: Any, dtypes: DTypes) -> DType:
native_column.dtype.pyarrow_dtype.list_size,
)
if dtype.startswith("struct"):
return dtypes.Struct()
return arrow_native_to_narwhals_dtype(native_column.dtype.pyarrow_dtype, dtypes)
if dtype == "object":
if ( # pragma: no cover TODO(unassigned): why does this show as uncovered?
idx := getattr(native_column, "first_valid_index", lambda: None)()
Expand Down
7 changes: 6 additions & 1 deletion narwhals/_polars/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,12 @@ def native_to_narwhals_dtype(dtype: Any, dtypes: DTypes) -> DType:
du_time_unit: Literal["us", "ns", "ms"] = getattr(dtype, "time_unit", "us")
return dtypes.Duration(time_unit=du_time_unit)
if dtype == pl.Struct:
return dtypes.Struct()
return dtypes.Struct(
[
dtypes.Field(field_name, native_to_narwhals_dtype(field_type, dtypes))
for field_name, field_type in dtype
]
)
if dtype == pl.List:
return dtypes.List(native_to_narwhals_dtype(dtype.inner, dtypes))
if dtype == pl.Array:
Expand Down
42 changes: 21 additions & 21 deletions narwhals/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,12 +546,12 @@ def write_csv(self, file: str | Path | BytesIO | None = None) -> Any:
We can pass any supported library such as pandas, Polars or PyArrow to `func`:
>>> func(df_pd) # doctest: +SKIP
'foo,bar,ham\n1,6.0,a\n2,7.0,b\n3,8.0,c\n'
>>> func(df_pl) # doctest: +SKIP
>>> func(df_pd)
'foo,bar,ham\n1,6.0,a\n2,7.0,b\n3,8.0,c\n'
>>> func(df_pa) # doctest: +SKIP
>>> func(df_pl)
'foo,bar,ham\n1,6.0,a\n2,7.0,b\n3,8.0,c\n'
>>> func(df_pa)
'"foo","bar","ham"\n1,6,"a"\n2,7,"b"\n3,8,"c"\n'
If we had passed a file name to `write_csv`, it would have been
written to that file.
Expand Down Expand Up @@ -582,9 +582,9 @@ def write_parquet(self, file: str | Path | BytesIO) -> Any:
We can then pass either pandas, Polars or PyArrow to `func`:
>>> func(df_pd) # doctest:+SKIP
>>> func(df_pl) # doctest:+SKIP
>>> func(df_pa) # doctest:+SKIP
>>> func(df_pd)
>>> func(df_pl)
>>> func(df_pa)
"""
self._compliant_frame.write_parquet(file)

Expand Down Expand Up @@ -1116,12 +1116,12 @@ def schema(self) -> Schema:
You can pass either pandas or Polars to `func`:
>>> df_pd_schema = func(df_pd)
>>> df_pd_schema # doctest:+SKIP
Schema({'foo': Int64, 'bar': Float64, 'ham', String})
>>> df_pd_schema
Schema({'foo': Int64, 'bar': Float64, 'ham': String})
>>> df_pl_schema = func(df_pl)
>>> df_pl_schema # doctest:+SKIP
Schema({'foo': Int64, 'bar': Float64, 'ham', String})
>>> df_pl_schema
Schema({'foo': Int64, 'bar': Float64, 'ham': String})
"""
return super().schema

Expand Down Expand Up @@ -1150,12 +1150,12 @@ def collect_schema(self: Self) -> Schema:
You can pass either pandas or Polars to `func`:
>>> df_pd_schema = func(df_pd)
>>> df_pd_schema # doctest:+SKIP
Schema({'foo': Int64, 'bar': Float64, 'ham', String})
>>> df_pd_schema
Schema({'foo': Int64, 'bar': Float64, 'ham': String})
>>> df_pl_schema = func(df_pl)
>>> df_pl_schema # doctest:+SKIP
Schema({'foo': Int64, 'bar': Float64, 'ham', String})
>>> df_pl_schema
Schema({'foo': Int64, 'bar': Float64, 'ham': String})
"""
return super().collect_schema()

Expand Down Expand Up @@ -2478,8 +2478,8 @@ def item(self: Self, row: int | None = None, column: int | str | None = None) ->
We can then pass either pandas or Polars to `func`:
>>> func(df_pd, 1, 1), func(df_pd, 2, "b") # doctest:+SKIP
(5, 6)
>>> func(df_pd, 1, 1), func(df_pd, 2, "b")
(np.int64(5), np.int64(6))
>>> func(df_pl, 1, 1), func(df_pl, 2, "b")
(5, 6)
Expand Down Expand Up @@ -2581,7 +2581,7 @@ def to_arrow(self: Self) -> pa.Table:
... def func(df):
... return df.to_arrow()
>>> func(df_pd) # doctest:+SKIP
>>> func(df_pd)
pyarrow.Table
foo: int64
bar: string
Expand Down Expand Up @@ -3010,7 +3010,7 @@ def schema(self) -> Schema:
... }
... )
>>> lf = nw.from_native(lf_pl)
>>> lf.schema # doctest:+SKIP
>>> lf.schema # doctest: +SKIP
Schema({'foo': Int64, 'bar': Float64, 'ham', String})
"""
return super().schema
Expand All @@ -3030,8 +3030,8 @@ def collect_schema(self: Self) -> Schema:
... }
... )
>>> lf = nw.from_native(lf_pl)
>>> lf.collect_schema() # doctest:+SKIP
Schema({'foo': Int64, 'bar': Float64, 'ham', String})
>>> lf.collect_schema()
Schema({'foo': Int64, 'bar': Float64, 'ham': String})
"""
return super().collect_schema()

Expand Down
Loading

0 comments on commit bc5f854

Please sign in to comment.