From 1817846bfd60d1ad0bf556afe9fbe86884838ac2 Mon Sep 17 00:00:00 2001 From: pierre-ubuntu Date: Tue, 12 Nov 2024 14:54:49 -0800 Subject: [PATCH 1/3] add ignore_index keyword parameter to Series and DF dropna and drop_duplicates --- pandas-stubs/core/frame.pyi | 24 +++++++++++++++++++++++- pandas-stubs/core/series.pyi | 21 ++++++++++++++++++--- tests/test_frame.py | 15 +++++++++++++-- tests/test_series.py | 10 ++++++++++ 4 files changed, 64 insertions(+), 6 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 9ba351f6..4cfff5b2 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -880,6 +880,7 @@ class DataFrame(NDFrame, OpsMixin): thresh: int | None = ..., subset: ListLikeU | Scalar | None = ..., inplace: Literal[True], + ignore_index: _bool = ..., ) -> None: ... @overload def dropna( @@ -890,6 +891,7 @@ class DataFrame(NDFrame, OpsMixin): thresh: int | None = ..., subset: ListLikeU | Scalar | None = ..., inplace: Literal[False] = ..., + ignore_index: _bool = ..., ) -> DataFrame: ... @overload def dropna( @@ -900,15 +902,35 @@ class DataFrame(NDFrame, OpsMixin): thresh: int | None = ..., subset: ListLikeU | Scalar | None = ..., inplace: _bool | None = ..., + ignore_index: _bool = ..., ) -> DataFrame | None: ... + @overload def drop_duplicates( self, subset: Hashable | Iterable[Hashable] | None = ..., *, keep: NaPosition | _bool = ..., - inplace: _bool = ..., + inplace: Literal[False] = ..., ignore_index: _bool = ..., ) -> DataFrame: ... + @overload + def drop_duplicates( + self, + subset: Hashable | Iterable[Hashable] | None = ..., + *, + keep: NaPosition | _bool = ..., + inplace: Literal[True] = ..., + ignore_index: _bool = ..., + ) -> None: ... + @overload + def drop_duplicates( + self, + subset: Hashable | Iterable[Hashable] | None = ..., + *, + keep: NaPosition | _bool = ..., + inplace: _bool = ..., + ignore_index: _bool = ..., + ) -> DataFrame | None: ... def duplicated( self, subset: Hashable | Iterable[Hashable] | None = ..., diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index 7dabbf76..d855bb7c 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -726,15 +726,27 @@ class Series(IndexOpsMixin[S1], NDFrame): def unique(self) -> np.ndarray: ... @overload def drop_duplicates( - self, *, keep: NaPosition | Literal[False] = ..., inplace: Literal[False] = ... + self, + *, + keep: NaPosition | Literal[False] = ..., + inplace: Literal[False] = ..., + ignore_index: _bool = ..., ) -> Series[S1]: ... @overload def drop_duplicates( - self, *, keep: NaPosition | Literal[False] = ..., inplace: Literal[True] + self, + *, + keep: NaPosition | Literal[False] = ..., + inplace: Literal[True], + ignore_index: _bool = ..., ) -> None: ... @overload def drop_duplicates( - self, *, keep: NaPosition | Literal[False] = ..., inplace: bool = ... + self, + *, + keep: NaPosition | Literal[False] = ..., + inplace: bool = ..., + ignore_index: _bool = ..., ) -> Series[S1] | None: ... def duplicated(self, keep: NaPosition | Literal[False] = ...) -> Series[_bool]: ... def idxmax( @@ -1148,6 +1160,7 @@ class Series(IndexOpsMixin[S1], NDFrame): axis: AxisIndex = ..., inplace: Literal[True], how: Literal["any", "all"] | None = ..., + ignore_index: _bool = ..., ) -> None: ... @overload def dropna( @@ -1156,6 +1169,7 @@ class Series(IndexOpsMixin[S1], NDFrame): axis: AxisIndex = ..., inplace: Literal[False] = ..., how: Literal["any", "all"] | None = ..., + ignore_index: _bool = ..., ) -> Series[S1]: ... @overload def dropna( @@ -1164,6 +1178,7 @@ class Series(IndexOpsMixin[S1], NDFrame): axis: AxisIndex = ..., inplace: _bool = ..., how: Literal["any", "all"] | None = ..., + ignore_index: _bool = ..., ) -> Series[S1] | None: ... def to_timestamp( self, diff --git a/tests/test_frame.py b/tests/test_frame.py index 33d09e11..76b3f12f 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -374,8 +374,12 @@ def test_arguments_drop() -> None: def test_types_dropna() -> None: df = pd.DataFrame(data={"col1": [np.nan, np.nan], "col2": [3, np.nan]}) res: pd.DataFrame = df.dropna() - res2: pd.DataFrame = df.dropna(axis=1, thresh=1) - res3: None = df.dropna(axis=0, how="all", subset=["col1"], inplace=True) + res2: pd.DataFrame = df.dropna(ignore_index=True) + res3: pd.DataFrame = df.dropna(axis=1, thresh=1) + res4: None = df.dropna(axis=0, how="all", subset=["col1"], inplace=True) + res5: None = df.dropna( + axis=0, how="all", subset=["col1"], inplace=True, ignore_index=False + ) def test_types_drop_duplicates() -> None: @@ -392,6 +396,13 @@ def test_types_drop_duplicates() -> None: check(assert_type(df.drop_duplicates(["AAA"]), pd.DataFrame), pd.DataFrame) check(assert_type(df.drop_duplicates(("AAA",)), pd.DataFrame), pd.DataFrame) check(assert_type(df.drop_duplicates("AAA"), pd.DataFrame), pd.DataFrame) + assert assert_type(df.drop_duplicates("AAA", inplace=True), None) is None + check( + assert_type( + df.drop_duplicates("AAA", inplace=False, ignore_index=True), pd.DataFrame + ), + pd.DataFrame, + ) if not PD_LTE_22: check(assert_type(df.drop_duplicates({"AAA"}), pd.DataFrame), pd.DataFrame) diff --git a/tests/test_series.py b/tests/test_series.py index 1ea1b717..8e9b60bd 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -308,10 +308,20 @@ def test_types_drop_multilevel() -> None: res: pd.Series = s.drop(labels="first", level=1) +def test_types_drop_duplicates() -> None: + s = pd.Series([1.0, 2.0, 2.0]) + check(assert_type(s.drop_duplicates(), "pd.Series[float]"), pd.Series, float) + assert assert_type(s.drop_duplicates(inplace=True), None) is None + assert ( + assert_type(s.drop_duplicates(inplace=True, ignore_index=False), None) is None + ) + + def test_types_dropna() -> None: s = pd.Series([1.0, np.nan, np.nan]) check(assert_type(s.dropna(), "pd.Series[float]"), pd.Series, float) assert assert_type(s.dropna(axis=0, inplace=True), None) is None + assert assert_type(s.dropna(axis=0, inplace=True, ignore_index=True), None) is None def test_pop() -> None: From 13cc4bb0d614c8f2af0145808c8e4187368b5e9e Mon Sep 17 00:00:00 2001 From: pierre-ubuntu Date: Tue, 12 Nov 2024 14:59:55 -0800 Subject: [PATCH 2/3] use assert_type instead --- tests/test_frame.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/tests/test_frame.py b/tests/test_frame.py index 76b3f12f..f67294d5 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -373,12 +373,21 @@ def test_arguments_drop() -> None: def test_types_dropna() -> None: df = pd.DataFrame(data={"col1": [np.nan, np.nan], "col2": [3, np.nan]}) - res: pd.DataFrame = df.dropna() - res2: pd.DataFrame = df.dropna(ignore_index=True) - res3: pd.DataFrame = df.dropna(axis=1, thresh=1) - res4: None = df.dropna(axis=0, how="all", subset=["col1"], inplace=True) - res5: None = df.dropna( - axis=0, how="all", subset=["col1"], inplace=True, ignore_index=False + check(assert_type(df.dropna(), pd.DataFrame), pd.DataFrame) + check(assert_type(df.dropna(ignore_index=True), pd.DataFrame), pd.DataFrame) + check(assert_type(df.dropna(axis=1, thresh=1), pd.DataFrame), pd.DataFrame) + assert ( + assert_type(df.dropna(axis=0, how="all", subset=["col1"], inplace=True), None) + is None + ) + assert ( + assert_type( + df.dropna( + axis=0, how="all", subset=["col1"], inplace=True, ignore_index=False + ), + None, + ) + is None ) From 9652158abe4d1bf0d49a7445bc7ee7d43175f523 Mon Sep 17 00:00:00 2001 From: pierre-ubuntu Date: Wed, 13 Nov 2024 12:54:15 -0800 Subject: [PATCH 3/3] reverse overloads order, remove ellipsis when inplace=True --- pandas-stubs/core/frame.pyi | 8 ++++---- pandas-stubs/core/series.pyi | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 4cfff5b2..295aad8b 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -910,18 +910,18 @@ class DataFrame(NDFrame, OpsMixin): subset: Hashable | Iterable[Hashable] | None = ..., *, keep: NaPosition | _bool = ..., - inplace: Literal[False] = ..., + inplace: Literal[True], ignore_index: _bool = ..., - ) -> DataFrame: ... + ) -> None: ... @overload def drop_duplicates( self, subset: Hashable | Iterable[Hashable] | None = ..., *, keep: NaPosition | _bool = ..., - inplace: Literal[True] = ..., + inplace: Literal[False] = ..., ignore_index: _bool = ..., - ) -> None: ... + ) -> DataFrame: ... @overload def drop_duplicates( self, diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index d855bb7c..457db8de 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -729,17 +729,17 @@ class Series(IndexOpsMixin[S1], NDFrame): self, *, keep: NaPosition | Literal[False] = ..., - inplace: Literal[False] = ..., + inplace: Literal[True], ignore_index: _bool = ..., - ) -> Series[S1]: ... + ) -> None: ... @overload def drop_duplicates( self, *, keep: NaPosition | Literal[False] = ..., - inplace: Literal[True], + inplace: Literal[False] = ..., ignore_index: _bool = ..., - ) -> None: ... + ) -> Series[S1]: ... @overload def drop_duplicates( self,